From eac830e0feac1e5f4fbb9637506bd071e7530a1f Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Tue, 14 Feb 2023 12:22:36 +0100 Subject: [PATCH] Cygwin: __collate_range_cmp: handle Unicode values >= 0x10000 So far the input to __collate_range_cmp was handled as a wchar_t. Change that to handle it as wint_t holding a UTF-32 value and add creating surrogate pairs for the call to wcscoll. Signed-off-by: Corinna Vinschen --- winsup/cygwin/nlsfuncs.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc index ddd85bea1..0d204929d 100644 --- a/winsup/cygwin/nlsfuncs.cc +++ b/winsup/cygwin/nlsfuncs.cc @@ -1176,8 +1176,20 @@ strcoll (const char *__restrict s1, const char *__restrict s2) extern "C" int __collate_range_cmp (int c1, int c2) { - wchar_t s1[2] = { (wchar_t) c1, L'\0' }; - wchar_t s2[2] = { (wchar_t) c2, L'\0' }; + wchar_t s1[3] = { (wchar_t) c1, L'\0', L'\0' }; + wchar_t s2[3] = { (wchar_t) c2, L'\0', L'\0' }; + + /* Handle Unicode values >= 0x10000, convert to surrogate pair */ + if (c1 > 0xffff) + { + s1[0] = ((c1 - 0x10000) >> 10) + 0xd800; + s1[1] = ((c1 - 0x10000) & 0x3ff) + 0xdc00; + } + if (c2 > 0xffff) + { + s2[0] = ((c2 - 0x10000) >> 10) + 0xd800; + s2[1] = ((c2 - 0x10000) & 0x3ff) + 0xdc00; + } return wcscoll (s1, s2); }