* regex/regcomp.c (xwcrtomb): Don't convert Unicode chars outside the

base plane always to UTF-8.  Call wcsnrtombs instead to allow arbitrary
	multibyte charsets.
This commit is contained in:
Corinna Vinschen 2010-02-13 12:26:06 +00:00
parent 03ac74c168
commit 15a9e17656
2 changed files with 17 additions and 8 deletions

View File

@ -1,3 +1,9 @@
2010-02-13 Corinna Vinschen <corinna@vinschen.de>
* regex/regcomp.c (xwcrtomb): Don't convert Unicode chars outside the
base plane always to UTF-8. Call wcsnrtombs instead to allow arbitrary
multibyte charsets.
2010-02-12 Corinna Vinschen <corinna@vinschen.de> 2010-02-12 Corinna Vinschen <corinna@vinschen.de>
* regex/regcomp.c (wgetnext): Use size_t as type for n2 since that's * regex/regcomp.c (wgetnext): Use size_t as type for n2 since that's

View File

@ -1171,14 +1171,17 @@ xwcrtomb (char *s, wint_t wc, mbstate_t *ps)
{ {
if (sizeof (wchar_t) == 2 && wc >= 0x10000) if (sizeof (wchar_t) == 2 && wc >= 0x10000)
{ {
/* UTF-16 systems can't handle these values directly. Since the /* UTF-16 wcrtomb can't handle these values directly. The rest of the
rest of the code isn't surrogate pair aware, we handle this here, code isn't surrogate pair aware, so we handle this here. Convert
invisible for the rest of the code. */ value to UTF-16 surrogate and call wcsrtombs to convert the "string"
*s++ = 0xf0 | ((wc & 0x1c0000) >> 18); to the correct multibyte representation, if any. */
*s++ = 0x80 | ((wc & 0x3f000) >> 12); wchar_t ws[2], *wsp = ws;
*s++ = 0x80 | ((wc & 0xfc0) >> 6); size_t n;
*s = 0x80 | (wc & 0x3f);
return 4; wc -= 0x10000;
ws[0] = 0xd800 | (wc >> 10);
ws[1] = 0xdc00 | (wc & 0x3ff);
return wcsnrtombs (s, &wsp, 2, MB_CUR_MAX, ps);
} }
return wcrtomb (s, wc, ps); return wcrtomb (s, wc, ps);
} }