* regex/regcomp.c (xwcrtomb): Don't convert Unicode chars outside the
base plane always to UTF-8. Call wcsnrtombs instead to allow arbitrary multibyte charsets.
This commit is contained in:
parent
03ac74c168
commit
15a9e17656
|
@ -1,3 +1,9 @@
|
||||||
|
2010-02-13 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
|
* regex/regcomp.c (xwcrtomb): Don't convert Unicode chars outside the
|
||||||
|
base plane always to UTF-8. Call wcsnrtombs instead to allow arbitrary
|
||||||
|
multibyte charsets.
|
||||||
|
|
||||||
2010-02-12 Corinna Vinschen <corinna@vinschen.de>
|
2010-02-12 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
* regex/regcomp.c (wgetnext): Use size_t as type for n2 since that's
|
* regex/regcomp.c (wgetnext): Use size_t as type for n2 since that's
|
||||||
|
|
|
@ -1171,14 +1171,17 @@ xwcrtomb (char *s, wint_t wc, mbstate_t *ps)
|
||||||
{
|
{
|
||||||
if (sizeof (wchar_t) == 2 && wc >= 0x10000)
|
if (sizeof (wchar_t) == 2 && wc >= 0x10000)
|
||||||
{
|
{
|
||||||
/* UTF-16 systems can't handle these values directly. Since the
|
/* UTF-16 wcrtomb can't handle these values directly. The rest of the
|
||||||
rest of the code isn't surrogate pair aware, we handle this here,
|
code isn't surrogate pair aware, so we handle this here. Convert
|
||||||
invisible for the rest of the code. */
|
value to UTF-16 surrogate and call wcsrtombs to convert the "string"
|
||||||
*s++ = 0xf0 | ((wc & 0x1c0000) >> 18);
|
to the correct multibyte representation, if any. */
|
||||||
*s++ = 0x80 | ((wc & 0x3f000) >> 12);
|
wchar_t ws[2], *wsp = ws;
|
||||||
*s++ = 0x80 | ((wc & 0xfc0) >> 6);
|
size_t n;
|
||||||
*s = 0x80 | (wc & 0x3f);
|
|
||||||
return 4;
|
wc -= 0x10000;
|
||||||
|
ws[0] = 0xd800 | (wc >> 10);
|
||||||
|
ws[1] = 0xdc00 | (wc & 0x3ff);
|
||||||
|
return wcsnrtombs (s, &wsp, 2, MB_CUR_MAX, ps);
|
||||||
}
|
}
|
||||||
return wcrtomb (s, wc, ps);
|
return wcrtomb (s, wc, ps);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue