4
0
mirror of git://sourceware.org/git/newlib-cygwin.git synced 2025-02-02 04:20:28 +08:00

drop ambiguous-wide behaviour from Unicode CJK locales

This commit is contained in:
Thomas Wolff 2020-10-07 18:35:54 +02:00 committed by Ken Brown
parent c294f50f3d
commit a51546be1a

View File

@ -78,12 +78,9 @@ This implementation also supports the modifiers <<"cjknarrow">> and
<<"cjkwide">>, which affect how the functions <<wcwidth>> and <<wcswidth>> <<"cjkwide">>, which affect how the functions <<wcwidth>> and <<wcswidth>>
handle characters from the "CJK Ambiguous Width" category of characters handle characters from the "CJK Ambiguous Width" category of characters
described at http://www.unicode.org/reports/tr11/#Ambiguous. described at http://www.unicode.org/reports/tr11/#Ambiguous.
These characters have a width of 1 for singlebyte charsets and a width of 2 These characters have a width of 1 for singlebyte charsets and UTF-8,
for multibyte charsets other than UTF-8. and a width of 2 for multibyte charsets other than UTF-8. Specifying
For UTF-8, their width depends on the language specifier: <<"cjknarrow">> or <<"cjkwide">> forces a width of 1 or 2, respectively.
it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
and 1 for everything else. Specifying <<"cjknarrow">> or <<"cjkwide">>
forces a width of 1 or 2, respectively, independent of charset and language.
This implementation also supports the modifier <<"cjksingle">> This implementation also supports the modifier <<"cjksingle">>
to enforce single-width character properties. to enforce single-width character properties.
@ -903,17 +900,12 @@ restart:
/* Determine the width for the "CJK Ambiguous Width" category of /* Determine the width for the "CJK Ambiguous Width" category of
characters. This is used in wcwidth(). Assume single width for characters. This is used in wcwidth(). Assume single width for
single-byte charsets, and double width for multi-byte charsets single-byte charsets, and double width for multi-byte charsets
other than UTF-8. For UTF-8, use double width for the East Asian other than UTF-8. For UTF-8, use single width.
languages ("ja", "ko", "zh"), and single width for everything else.
Single width can also be forced with the "@cjknarrow" modifier. Single width can also be forced with the "@cjknarrow" modifier.
Double width can also be forced with the "@cjkwide" modifier. Double width can also be forced with the "@cjkwide" modifier.
*/ */
loc->cjk_lang = cjkwide || loc->cjk_lang = cjkwide ||
(!cjknarrow && mbc_max > 1 (!cjknarrow && mbc_max > 1 && charset[0] != 'U');
&& (charset[0] != 'U'
|| strncmp (locale, "ja", 2) == 0
|| strncmp (locale, "ko", 2) == 0
|| strncmp (locale, "zh", 2) == 0));
if (cjksingle) if (cjksingle)
loc->cjk_lang = -1; /* Disable CJK dual-width */ loc->cjk_lang = -1; /* Disable CJK dual-width */
#ifdef __HAVE_LOCALE_INFO__ #ifdef __HAVE_LOCALE_INFO__