Cygwin: add more UTF-32 helper functions

wcintowcs: convert UTF-16 to UTF-32 string
wcilen: return number of characters in a UTF-32 string
wcincmp: compare two fixed-size UTF-32 strings

Used in followup patches introducing collating symbols

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2023-02-20 22:29:37 +01:00
parent 1cbe4b3dcc
commit ffba9604d1
2 changed files with 63 additions and 0 deletions

View File

@ -39,10 +39,56 @@ extern wctomb_f __utf8_wctomb;
#define __WCTOMB (__get_current_locale ()->wctomb)
/* convert wint_t string to wchar_t string. Make sure dest
has room for at least twice as much characters to account
for surrogate pairs, plus a wchar_t NUL. */
void wcintowcs (wchar_t *, wint_t *, size_t);
/* replacement function for mbrtowc, returning a wint_t representing
a UTF-32 value. Defined in strfuncs.cc */
extern size_t mbrtowi (wint_t *, const char *, size_t, mbstate_t *);
/* like wcslen, just for wint_t */
static inline size_t
wcilen (const wint_t *wcs)
{
size_t ret = 0;
if (wcs)
while (*wcs++)
++ret;
return ret;
}
/* like wcscmp, just for wint_t */
static inline int
wcicmp (const wint_t *s1, const wint_t *s2)
{
while (*s1 == *s2++)
if (*s1++ == 0)
return (0);
return (*s1 - *--s2);
}
/* like wcsncmp, just for wint_t */
static inline int
wcincmp (const wint_t *s1, const wint_t *s2, size_t n)
{
if (n == 0)
return (0);
do
{
if (*s1 != *s2++)
{
return (*s1 - *--s2);
}
if (*s1++ == 0)
break;
}
while (--n != 0);
return (0);
}
#ifdef __cplusplus
}
#endif

View File

@ -112,6 +112,23 @@ transform_chars_af_unix (PWCHAR out, const char *path, __socklen_t len)
return out;
}
/* convert wint_t string to wchar_t string. Make sure dest
has room for at least twice as much characters to account
for surrogate pairs, plus a wchar_t NUL. */
extern "C" void
wcintowcs (wchar_t *dest, wint_t *src, size_t len)
{
while (*src && len-- > 0)
if (*src > 0xffff)
{
*dest++ = ((*src - 0x10000) >> 10) + 0xd800;
*dest++ = ((*src++ - 0x10000) & 0x3ff) + 0xdc00;
}
else
*dest++ = *src++;
*dest = '\0';
}
/* replacement function for mbrtowc, returning a wint_t representing
a UTF-32 value. */
extern "C" size_t