* strfuncs.cc (sys_cp_wcstombs): Implement reverse functionality
of the change to sys_cp_mbstowcs from 2009-05-30. (sys_cp_mbstowcs): Slightly reformat. Fix comment to accommodate change to sys_cp_wcstombs. Don't write to *ptr if dst is NULL.
This commit is contained in:
parent
c48a1e46bf
commit
397775c6f6
|
@ -1,3 +1,10 @@
|
||||||
|
2009-06-03 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
|
* strfuncs.cc (sys_cp_wcstombs): Implement reverse functionality
|
||||||
|
of the change to sys_cp_mbstowcs from 2009-05-30.
|
||||||
|
(sys_cp_mbstowcs): Slightly reformat. Fix comment to accommodate
|
||||||
|
change to sys_cp_wcstombs. Don't write to *ptr if dst is NULL.
|
||||||
|
|
||||||
2009-06-03 Corinna Vinschen <corinna@vinschen.de>
|
2009-06-03 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
* fhandler_console.cc (fhandler_console::read): Convert Alt-Backspace
|
* fhandler_console.cc (fhandler_console::read): Convert Alt-Backspace
|
||||||
|
|
|
@ -427,10 +427,19 @@ sys_cp_wcstombs (wctomb_p f_wctomb, char *charset, char *dst, size_t len,
|
||||||
if ((pw & 0xff00) == 0xf000)
|
if ((pw & 0xff00) == 0xf000)
|
||||||
pw &= 0xff;
|
pw &= 0xff;
|
||||||
int bytes = f_wctomb (_REENT, buf, pw, charset, &ps);
|
int bytes = f_wctomb (_REENT, buf, pw, charset, &ps);
|
||||||
|
if (bytes == -1 && (pw & 0xff00) == 0xdc00)
|
||||||
|
{
|
||||||
|
/* Reverse functionality of the single invalid second half of a
|
||||||
|
surrogate pair in the 0xDCxx range specifying an invalid byte
|
||||||
|
value when converting from MB to WC.
|
||||||
|
The comment in sys_cp_mbstowcs below explains it. */
|
||||||
|
buf[0] = (char) (pw & 0xff);
|
||||||
|
bytes = 1;
|
||||||
|
}
|
||||||
|
else if (bytes == -1 && *charset != 'U'/*TF-8*/)
|
||||||
|
{
|
||||||
/* Convert chars invalid in the current codepage to a sequence
|
/* Convert chars invalid in the current codepage to a sequence
|
||||||
ASCII SO; UTF-8 representation of invalid char. */
|
ASCII SO; UTF-8 representation of invalid char. */
|
||||||
if (bytes == -1 && *charset != 'U'/*TF-8*/)
|
|
||||||
{
|
|
||||||
buf[0] = 0x0e; /* ASCII SO */
|
buf[0] = 0x0e; /* ASCII SO */
|
||||||
bytes = __utf8_wctomb (_REENT, buf + 1, pw, charset, &ps);
|
bytes = __utf8_wctomb (_REENT, buf + 1, pw, charset, &ps);
|
||||||
if (bytes == -1)
|
if (bytes == -1)
|
||||||
|
@ -561,7 +570,8 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, wchar_t *dst, size_t dlen,
|
||||||
if (ps.__count == 4) /* First half of a surrogate. */
|
if (ps.__count == 4) /* First half of a surrogate. */
|
||||||
{
|
{
|
||||||
wchar_t *ptr2 = dst ? ptr + 1 : NULL;
|
wchar_t *ptr2 = dst ? ptr + 1 : NULL;
|
||||||
int bytes2 = __utf8_mbtowc (_REENT, ptr2, (const char *) pmbs + bytes,
|
int bytes2 = __utf8_mbtowc (_REENT, ptr2,
|
||||||
|
(const char *) pmbs + bytes,
|
||||||
nms - bytes, charset, &ps);
|
nms - bytes, charset, &ps);
|
||||||
if (bytes2 < 0)
|
if (bytes2 < 0)
|
||||||
break;
|
break;
|
||||||
|
@ -572,7 +582,9 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, wchar_t *dst, size_t dlen,
|
||||||
--len;
|
--len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms, charset, &ps)) < 0 && *pmbs > '\x80')
|
else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms,
|
||||||
|
charset, &ps)) < 0
|
||||||
|
&& *pmbs > '\x80')
|
||||||
{
|
{
|
||||||
/* This should probably be handled in f_mbtowc which can operate
|
/* This should probably be handled in f_mbtowc which can operate
|
||||||
on sequences rather than individual characters.
|
on sequences rather than individual characters.
|
||||||
|
@ -581,12 +593,10 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, wchar_t *dst, size_t dlen,
|
||||||
http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html
|
http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html
|
||||||
|
|
||||||
This is hardly perfect. Windows doesn't do anything sensical with
|
This is hardly perfect. Windows doesn't do anything sensical with
|
||||||
characters converted to this format and (currently) we don't convert
|
characters converted to this format. It does allow processing of
|
||||||
them back into their original single byte form. It does allow
|
src to continue, however, which, since there is no way to signal
|
||||||
processing of src to continue, however, which, since there is no
|
decoding errors, seems like the best we can do. */
|
||||||
way to signal decoding errors, seems like the best we can do.
|
if (dst)
|
||||||
|
|
||||||
*/
|
|
||||||
*ptr = L'\xdc80' | *pmbs;
|
*ptr = L'\xdc80' | *pmbs;
|
||||||
bytes = 1;
|
bytes = 1;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue