From f3f20038c480e43ae264b760f4d8b14b618ca973 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Wed, 22 Feb 2023 10:03:03 +0100 Subject: [PATCH] Cygwin: glob: perform correct utf-32 -> multibyte conversion g_Ctoc, converting the UTF-32 filenames to multibyte, still used UTF-16 to multibyte conversion. Introduce a wirtomb helper and fix that. Signed-off-by: Corinna Vinschen --- winsup/cygwin/glob.cc | 2 +- winsup/cygwin/local_includes/wchar.h | 4 ++++ winsup/cygwin/strfuncs.cc | 19 +++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/winsup/cygwin/glob.cc b/winsup/cygwin/glob.cc index 8bcdd61b2..2f86e4698 100644 --- a/winsup/cygwin/glob.cc +++ b/winsup/cygwin/glob.cc @@ -1024,7 +1024,7 @@ g_Ctoc(const Char *str, char *buf, size_t len) memset(&mbs, 0, sizeof(mbs)); while (len >= (size_t) MB_CUR_MAX) { - clen = wcrtomb(buf, *str, &mbs); + clen = wirtomb(buf, *str, &mbs); if (clen == (size_t)-1) return (1); if (*str == L'\0') diff --git a/winsup/cygwin/local_includes/wchar.h b/winsup/cygwin/local_includes/wchar.h index 6f2a4ad10..3907732a2 100644 --- a/winsup/cygwin/local_includes/wchar.h +++ b/winsup/cygwin/local_includes/wchar.h @@ -44,6 +44,10 @@ extern wctomb_f __utf8_wctomb; for surrogate pairs, plus a wchar_t NUL. */ void wcintowcs (wchar_t *, wint_t *, size_t); +/* replacement function for wcrtomb, converting a UTF-32 char to a + multibyte string. */ +size_t wirtomb (char *, wint_t, mbstate_t *); + /* replacement function for mbrtowc, returning a wint_t representing a UTF-32 value. Defined in strfuncs.cc */ extern size_t mbrtowi (wint_t *, const char *, size_t, mbstate_t *); diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index e343a2fcc..80e3eb0ad 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -129,6 +129,25 @@ wcintowcs (wchar_t *dest, wint_t *src, size_t len) *dest = '\0'; } +/* replacement function for wcrtomb, converting a UTF-32 char to a + multibyte string. */ +extern "C" size_t +wirtomb (char *s, wint_t wi, mbstate_t *ps) +{ + wchar_t wc[3] = { (wchar_t) wi, '\0', '\0' }; + const wchar_t *wcp = wc; + size_t nwc = 1; + + if (wi >= 0x10000) + { + wi -= 0x10000; + wc[0] = (wi >> 10) + 0xd800; + wc[1] = (wi & 0x3ff) + 0xdc00; + nwc = 2; + } + return wcsnrtombs (s, &wcp, nwc, SIZE_MAX, ps); +} + /* replacement function for mbrtowc, returning a wint_t representing a UTF-32 value. */ extern "C" size_t