* libc/locale/locale.c (loadlocale): Add handling of "@cjknarrow"
modifier on _MB_CAPABLE targets. Add comment to explain. Improve documentation.
This commit is contained in:
parent
339dde8fe5
commit
e53c92a80e
|
@ -1,3 +1,9 @@
|
||||||
|
2009-06-18 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
|
* libc/locale/locale.c (loadlocale): Add handling of "@cjknarrow"
|
||||||
|
modifier on _MB_CAPABLE targets. Add comment to explain. Improve
|
||||||
|
documentation.
|
||||||
|
|
||||||
2009-06-17 Michael Eager <eager@eagercon.com>
|
2009-06-17 Michael Eager <eager@eagercon.com>
|
||||||
|
|
||||||
* libc/include/pthread.h: Support XMK (Xilinx) BSP, add RTEMS to
|
* libc/include/pthread.h: Support XMK (Xilinx) BSP, add RTEMS to
|
||||||
|
|
|
@ -44,29 +44,49 @@ locale.
|
||||||
|
|
||||||
This is a minimal implementation, supporting only the required <<"POSIX">>
|
This is a minimal implementation, supporting only the required <<"POSIX">>
|
||||||
and <<"C">> values for <[locale]>; strings representing other locales are not
|
and <<"C">> values for <[locale]>; strings representing other locales are not
|
||||||
honored unless _MB_CAPABLE is defined in which case POSIX locale strings
|
honored unless _MB_CAPABLE is defined.
|
||||||
are allowed, plus five extensions supported for backward compatibility with
|
|
||||||
older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>,
|
If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
|
||||||
<<"C-EUCJP">>/<<"C-eucJP">>, <<"C-SJIS">>, <<"C-ISO-8859-x">> with
|
the form
|
||||||
1 <= x <= 15, or <<"C-CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
|
|
||||||
855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
|
language[_TERRITORY][.charset][@@modifier]
|
||||||
1257, 1258]. Even when using POSIX locale strings, the only charsets allowed
|
|
||||||
are <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>,
|
<<"language">> is a two character string per ISO 639. <<"TERRITORY">> is a
|
||||||
<<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720,
|
country code per ISO 3166. For <<"charset">> and <<"modifier">> see below.
|
||||||
737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252,
|
|
||||||
1253, 1254, 1255, 1256, 1257, 1258].
|
Additionally to the POSIX specifier, five extensions are supported for
|
||||||
|
backward compatibility with older implementations using newlib:
|
||||||
|
<<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>/<<"C-eucJP">>, <<"C-SJIS">>,
|
||||||
|
<<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with xxx in [437,
|
||||||
|
720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251,
|
||||||
|
1252, 1253, 1254, 1255, 1256, 1257, 1258].
|
||||||
|
|
||||||
|
Even when using POSIX locale strings, the only charsets allowed are
|
||||||
|
<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>, <<"ISO-8859-x">>
|
||||||
|
with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850,
|
||||||
|
852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254,
|
||||||
|
1255, 1256, 1257, 1258].
|
||||||
(<<"">> is also accepted; if given, the settings are read from the
|
(<<"">> is also accepted; if given, the settings are read from the
|
||||||
corresponding LC_* environment variables and $LANG according to POSIX rules.
|
corresponding LC_* environment variables and $LANG according to POSIX rules.
|
||||||
|
|
||||||
Under Cygwin, this implementation additionally supports the charsets
|
Under Cygwin, this implementation additionally supports the charsets
|
||||||
<<"GBK">>, <<"eucKR">>, and <<"Big5">>.
|
<<"GBK">>, <<"eucKR">>, and <<"Big5">>.
|
||||||
|
|
||||||
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns
|
This implementation also supports a single modifier, <<"cjknarrow">>.
|
||||||
a pointer to the string representing the current locale (always
|
Any other modifier is ignored. <<"cjknarrow">>, in conjunction with one
|
||||||
<<"C">> in this implementation). The acceptable values for
|
of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies
|
||||||
<[category]> are defined in `<<locale.h>>' as macros beginning with
|
how the functions <<wcwidth>> and <<wcswidth>> handle characters from
|
||||||
<<"LC_">>, but this implementation does not check the values you pass
|
the "CJK Ambiguous Width" character class described in
|
||||||
in the <[category]> argument.
|
http://www.unicode.org/unicode/reports/tr11/. Usually these characters
|
||||||
|
have a width of 1, unless you specify one of the aforementioned
|
||||||
|
languages, in which case these characters have a width of 2. By
|
||||||
|
specifying the <<"cjknarrow">> modifier, these characters will have a
|
||||||
|
width of one in the languages <<"ja">>, <<"ko">>, and <<"zh">> as well.
|
||||||
|
|
||||||
|
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
|
||||||
|
pointer to the string representing the current locale. The acceptable
|
||||||
|
values for <[category]> are defined in `<<locale.h>>' as macros
|
||||||
|
beginning with <<"LC_">>.
|
||||||
|
|
||||||
<<localeconv>> returns a pointer to a structure (also defined in
|
<<localeconv>> returns a pointer to a structure (also defined in
|
||||||
`<<locale.h>>') describing the locale-specific conventions currently
|
`<<locale.h>>') describing the locale-specific conventions currently
|
||||||
|
@ -399,6 +419,9 @@ loadlocale(struct _reent *p, int category)
|
||||||
int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *);
|
int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *);
|
||||||
int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
|
int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
|
||||||
const char *, mbstate_t *);
|
const char *, mbstate_t *);
|
||||||
|
#ifdef _MB_CAPABLE
|
||||||
|
int cjknarrow = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* "POSIX" is translated to "C", as on Linux. */
|
/* "POSIX" is translated to "C", as on Linux. */
|
||||||
if (!strcmp (locale, "POSIX"))
|
if (!strcmp (locale, "POSIX"))
|
||||||
|
@ -429,10 +452,14 @@ loadlocale(struct _reent *p, int category)
|
||||||
if (c[0] == '.')
|
if (c[0] == '.')
|
||||||
{
|
{
|
||||||
/* Charset */
|
/* Charset */
|
||||||
strcpy (charset, c + 1);
|
char *chp;
|
||||||
if ((c = strchr (charset, '@')))
|
|
||||||
|
++c;
|
||||||
|
strcpy (charset, c);
|
||||||
|
if ((chp = strchr (charset, '@')))
|
||||||
/* Strip off modifier */
|
/* Strip off modifier */
|
||||||
*c = '\0';
|
*chp = '\0';
|
||||||
|
c += strlen (charset);
|
||||||
}
|
}
|
||||||
else if (c[0] == '\0' || c[0] == '@')
|
else if (c[0] == '\0' || c[0] == '@')
|
||||||
/* End of string or just a modifier */
|
/* End of string or just a modifier */
|
||||||
|
@ -444,6 +471,17 @@ loadlocale(struct _reent *p, int category)
|
||||||
else
|
else
|
||||||
/* Invalid string */
|
/* Invalid string */
|
||||||
return NULL;
|
return NULL;
|
||||||
|
#ifdef _MB_CAPABLE
|
||||||
|
if (c[0] == '@')
|
||||||
|
{
|
||||||
|
/* Modifier */
|
||||||
|
/* Only one modifier is recognized right now. "cjknarrow" is used
|
||||||
|
to modify the behaviour of wcwidth() for East Asian languages.
|
||||||
|
For details see the comment at the end of this function. */
|
||||||
|
if (!strcmp (c + 1, "cjknarrow"))
|
||||||
|
cjknarrow = 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
/* We only support this subset of charsets. */
|
/* We only support this subset of charsets. */
|
||||||
switch (charset[0])
|
switch (charset[0])
|
||||||
|
@ -606,13 +644,15 @@ loadlocale(struct _reent *p, int category)
|
||||||
__mbtowc = l_mbtowc;
|
__mbtowc = l_mbtowc;
|
||||||
__set_ctype (charset);
|
__set_ctype (charset);
|
||||||
/* Check for the language part of the locale specifier. In case
|
/* Check for the language part of the locale specifier. In case
|
||||||
of "ja", "ko", or "zh", assume the use of CJK fonts. This is
|
of "ja", "ko", or "zh", assume the use of CJK fonts, unless the
|
||||||
stored in lc_ctype_cjk_lang and tested in wcwidth() to figure
|
"@cjknarrow" modifier has been specifed.
|
||||||
out the width to return (1 or 2) for the "CJK Ambiguous Width"
|
The result is stored in lc_ctype_cjk_lang and tested in wcwidth()
|
||||||
category of characters. */
|
to figure out the width to return (1 or 2) for the "CJK Ambiguous
|
||||||
lc_ctype_cjk_lang = (strncmp (locale, "ja", 2) == 0
|
Width" category of characters. */
|
||||||
|| strncmp (locale, "ko", 2) == 0
|
lc_ctype_cjk_lang = !cjknarrow
|
||||||
|| strncmp (locale, "zh", 2) == 0);
|
&& ((strncmp (locale, "ja", 2) == 0
|
||||||
|
|| strncmp (locale, "ko", 2) == 0
|
||||||
|
|| strncmp (locale, "zh", 2) == 0));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else if (category == LC_MESSAGES)
|
else if (category == LC_MESSAGES)
|
||||||
|
|
Loading…
Reference in New Issue