* libc/locale/locale.c (loadlocale): Add handling of "@cjknarrow"

modifier on _MB_CAPABLE targets.  Add comment to explain.  Improve
	documentation.
This commit is contained in:
Corinna Vinschen 2009-06-18 09:13:39 +00:00
parent 339dde8fe5
commit e53c92a80e
2 changed files with 73 additions and 27 deletions

View File

@ -1,3 +1,9 @@
2009-06-18 Corinna Vinschen <corinna@vinschen.de>
* libc/locale/locale.c (loadlocale): Add handling of "@cjknarrow"
modifier on _MB_CAPABLE targets. Add comment to explain. Improve
documentation.
2009-06-17 Michael Eager <eager@eagercon.com> 2009-06-17 Michael Eager <eager@eagercon.com>
* libc/include/pthread.h: Support XMK (Xilinx) BSP, add RTEMS to * libc/include/pthread.h: Support XMK (Xilinx) BSP, add RTEMS to

View File

@ -44,29 +44,49 @@ locale.
This is a minimal implementation, supporting only the required <<"POSIX">> This is a minimal implementation, supporting only the required <<"POSIX">>
and <<"C">> values for <[locale]>; strings representing other locales are not and <<"C">> values for <[locale]>; strings representing other locales are not
honored unless _MB_CAPABLE is defined in which case POSIX locale strings honored unless _MB_CAPABLE is defined.
are allowed, plus five extensions supported for backward compatibility with
older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>, If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
<<"C-EUCJP">>/<<"C-eucJP">>, <<"C-SJIS">>, <<"C-ISO-8859-x">> with the form
1 <= x <= 15, or <<"C-CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, language[_TERRITORY][.charset][@@modifier]
1257, 1258]. Even when using POSIX locale strings, the only charsets allowed
are <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>, <<"language">> is a two character string per ISO 639. <<"TERRITORY">> is a
<<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720, country code per ISO 3166. For <<"charset">> and <<"modifier">> see below.
737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252,
1253, 1254, 1255, 1256, 1257, 1258]. Additionally to the POSIX specifier, five extensions are supported for
backward compatibility with older implementations using newlib:
<<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>/<<"C-eucJP">>, <<"C-SJIS">>,
<<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with xxx in [437,
720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251,
1252, 1253, 1254, 1255, 1256, 1257, 1258].
Even when using POSIX locale strings, the only charsets allowed are
<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>, <<"ISO-8859-x">>
with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850,
852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254,
1255, 1256, 1257, 1258].
(<<"">> is also accepted; if given, the settings are read from the (<<"">> is also accepted; if given, the settings are read from the
corresponding LC_* environment variables and $LANG according to POSIX rules. corresponding LC_* environment variables and $LANG according to POSIX rules.
Under Cygwin, this implementation additionally supports the charsets Under Cygwin, this implementation additionally supports the charsets
<<"GBK">>, <<"eucKR">>, and <<"Big5">>. <<"GBK">>, <<"eucKR">>, and <<"Big5">>.
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns This implementation also supports a single modifier, <<"cjknarrow">>.
a pointer to the string representing the current locale (always Any other modifier is ignored. <<"cjknarrow">>, in conjunction with one
<<"C">> in this implementation). The acceptable values for of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies
<[category]> are defined in `<<locale.h>>' as macros beginning with how the functions <<wcwidth>> and <<wcswidth>> handle characters from
<<"LC_">>, but this implementation does not check the values you pass the "CJK Ambiguous Width" character class described in
in the <[category]> argument. http://www.unicode.org/unicode/reports/tr11/. Usually these characters
have a width of 1, unless you specify one of the aforementioned
languages, in which case these characters have a width of 2. By
specifying the <<"cjknarrow">> modifier, these characters will have a
width of one in the languages <<"ja">>, <<"ko">>, and <<"zh">> as well.
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
pointer to the string representing the current locale. The acceptable
values for <[category]> are defined in `<<locale.h>>' as macros
beginning with <<"LC_">>.
<<localeconv>> returns a pointer to a structure (also defined in <<localeconv>> returns a pointer to a structure (also defined in
`<<locale.h>>') describing the locale-specific conventions currently `<<locale.h>>') describing the locale-specific conventions currently
@ -399,6 +419,9 @@ loadlocale(struct _reent *p, int category)
int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *); int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *);
int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t, int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
const char *, mbstate_t *); const char *, mbstate_t *);
#ifdef _MB_CAPABLE
int cjknarrow = 0;
#endif
/* "POSIX" is translated to "C", as on Linux. */ /* "POSIX" is translated to "C", as on Linux. */
if (!strcmp (locale, "POSIX")) if (!strcmp (locale, "POSIX"))
@ -429,10 +452,14 @@ loadlocale(struct _reent *p, int category)
if (c[0] == '.') if (c[0] == '.')
{ {
/* Charset */ /* Charset */
strcpy (charset, c + 1); char *chp;
if ((c = strchr (charset, '@')))
++c;
strcpy (charset, c);
if ((chp = strchr (charset, '@')))
/* Strip off modifier */ /* Strip off modifier */
*c = '\0'; *chp = '\0';
c += strlen (charset);
} }
else if (c[0] == '\0' || c[0] == '@') else if (c[0] == '\0' || c[0] == '@')
/* End of string or just a modifier */ /* End of string or just a modifier */
@ -444,6 +471,17 @@ loadlocale(struct _reent *p, int category)
else else
/* Invalid string */ /* Invalid string */
return NULL; return NULL;
#ifdef _MB_CAPABLE
if (c[0] == '@')
{
/* Modifier */
/* Only one modifier is recognized right now. "cjknarrow" is used
to modify the behaviour of wcwidth() for East Asian languages.
For details see the comment at the end of this function. */
if (!strcmp (c + 1, "cjknarrow"))
cjknarrow = 1;
}
#endif
} }
/* We only support this subset of charsets. */ /* We only support this subset of charsets. */
switch (charset[0]) switch (charset[0])
@ -606,13 +644,15 @@ loadlocale(struct _reent *p, int category)
__mbtowc = l_mbtowc; __mbtowc = l_mbtowc;
__set_ctype (charset); __set_ctype (charset);
/* Check for the language part of the locale specifier. In case /* Check for the language part of the locale specifier. In case
of "ja", "ko", or "zh", assume the use of CJK fonts. This is of "ja", "ko", or "zh", assume the use of CJK fonts, unless the
stored in lc_ctype_cjk_lang and tested in wcwidth() to figure "@cjknarrow" modifier has been specifed.
out the width to return (1 or 2) for the "CJK Ambiguous Width" The result is stored in lc_ctype_cjk_lang and tested in wcwidth()
category of characters. */ to figure out the width to return (1 or 2) for the "CJK Ambiguous
lc_ctype_cjk_lang = (strncmp (locale, "ja", 2) == 0 Width" category of characters. */
|| strncmp (locale, "ko", 2) == 0 lc_ctype_cjk_lang = !cjknarrow
|| strncmp (locale, "zh", 2) == 0); && ((strncmp (locale, "ja", 2) == 0
|| strncmp (locale, "ko", 2) == 0
|| strncmp (locale, "zh", 2) == 0));
#endif #endif
} }
else if (category == LC_MESSAGES) else if (category == LC_MESSAGES)