From ffba9604d10fa0df27eea5e1930a215086608581 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Mon, 20 Feb 2023 22:29:37 +0100 Subject: [PATCH] Cygwin: add more UTF-32 helper functions wcintowcs: convert UTF-16 to UTF-32 string wcilen: return number of characters in a UTF-32 string wcincmp: compare two fixed-size UTF-32 strings Used in followup patches introducing collating symbols Signed-off-by: Corinna Vinschen --- winsup/cygwin/local_includes/wchar.h | 46 ++++++++++++++++++++++++++++ winsup/cygwin/strfuncs.cc | 17 ++++++++++ 2 files changed, 63 insertions(+) diff --git a/winsup/cygwin/local_includes/wchar.h b/winsup/cygwin/local_includes/wchar.h index 10b44791c..6f2a4ad10 100644 --- a/winsup/cygwin/local_includes/wchar.h +++ b/winsup/cygwin/local_includes/wchar.h @@ -39,10 +39,56 @@ extern wctomb_f __utf8_wctomb; #define __WCTOMB (__get_current_locale ()->wctomb) +/* convert wint_t string to wchar_t string. Make sure dest + has room for at least twice as much characters to account + for surrogate pairs, plus a wchar_t NUL. */ +void wcintowcs (wchar_t *, wint_t *, size_t); + /* replacement function for mbrtowc, returning a wint_t representing a UTF-32 value. Defined in strfuncs.cc */ extern size_t mbrtowi (wint_t *, const char *, size_t, mbstate_t *); +/* like wcslen, just for wint_t */ +static inline size_t +wcilen (const wint_t *wcs) +{ + size_t ret = 0; + + if (wcs) + while (*wcs++) + ++ret; + return ret; +} + +/* like wcscmp, just for wint_t */ +static inline int +wcicmp (const wint_t *s1, const wint_t *s2) +{ + while (*s1 == *s2++) + if (*s1++ == 0) + return (0); + return (*s1 - *--s2); +} + +/* like wcsncmp, just for wint_t */ +static inline int +wcincmp (const wint_t *s1, const wint_t *s2, size_t n) +{ + if (n == 0) + return (0); + do + { + if (*s1 != *s2++) + { + return (*s1 - *--s2); + } + if (*s1++ == 0) + break; + } + while (--n != 0); + return (0); +} + #ifdef __cplusplus } #endif diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index d62179a1d..e343a2fcc 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -112,6 +112,23 @@ transform_chars_af_unix (PWCHAR out, const char *path, __socklen_t len) return out; } +/* convert wint_t string to wchar_t string. Make sure dest + has room for at least twice as much characters to account + for surrogate pairs, plus a wchar_t NUL. */ +extern "C" void +wcintowcs (wchar_t *dest, wint_t *src, size_t len) +{ + while (*src && len-- > 0) + if (*src > 0xffff) + { + *dest++ = ((*src - 0x10000) >> 10) + 0xd800; + *dest++ = ((*src++ - 0x10000) & 0x3ff) + 0xdc00; + } + else + *dest++ = *src++; + *dest = '\0'; +} + /* replacement function for mbrtowc, returning a wint_t representing a UTF-32 value. */ extern "C" size_t