mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-01-25 16:47:20 +08:00
c3798a2f7d
Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
1809 lines
55 KiB
C++
1809 lines
55 KiB
C++
/* nlsfuncs.cc: NLS helper functions
|
|
|
|
This file is part of Cygwin.
|
|
|
|
This software is a copyrighted work licensed under the terms of the
|
|
Cygwin license. Please consult the file "CYGWIN_LICENSE" for
|
|
details. */
|
|
|
|
#include "winsup.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <locale.h>
|
|
#include <wchar.h>
|
|
#include <wctype.h>
|
|
#include "path.h"
|
|
#include "fhandler.h"
|
|
#include "dtable.h"
|
|
#include "cygheap.h"
|
|
#include "tls_pbuf.h"
|
|
#include "collate.h"
|
|
#include "lc_msg.h"
|
|
#include "lc_era.h"
|
|
#include "lc_collelem.h"
|
|
#include "lc_def_codesets.h"
|
|
|
|
#define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
|
|
|
|
#define getlocaleinfo(category,type) \
|
|
__getlocaleinfo(win_locale,(type),_LC(category))
|
|
#define getlocaleint(type) \
|
|
__getlocaleint(win_locale,(type))
|
|
#define setlocaleinfo(category,val) \
|
|
__setlocaleinfo(_LC(category),(val))
|
|
#define eval_datetimefmt(type,flags) \
|
|
__eval_datetimefmt(win_locale,(type),(flags),&lc_time_ptr,\
|
|
lc_time_end-lc_time_ptr)
|
|
#define charfromwchar(category,in) \
|
|
__charfromwchar (_##category##_locale->in,_LC(category),f_wctomb)
|
|
|
|
/* Check for @cjk* modifier. Try to be as fast as possible */
|
|
#define __is_cjk_modifier(_in, _cmp, _L) ({ \
|
|
_in[1] == 'c' \
|
|
&& _in[2] == 'j' \
|
|
&& _in[3] == 'k'\
|
|
&& (_cmp (_in + 4, _L##"narrow") == 0 \
|
|
|| _cmp (_in + 4, _L##"wide") == 0 \
|
|
|| _cmp (_in + 4, _L##"single") == 0); \
|
|
})
|
|
#define is_cjk_modifier(_in) __is_cjk_modifier(_in, strcmp, )
|
|
#define w_is_cjk_modifier(_in) __is_cjk_modifier(_in, wcscmp, L)
|
|
|
|
/* ResolveLocaleName does not what we want. It converts anything which
|
|
vaguely resembles a locale into some other locale it supports. Bad
|
|
examples are: "en-XY" gets converted to "en-US", and worse, "ff-BF" gets
|
|
converted to "ff-Latn-SN", even though "ff-Adlm-BF" exists! Useless.
|
|
To check if a locale is supported, we have to enumerate all valid
|
|
Windows locales, and return the match, even if the locale in Windows
|
|
requires a script. */
|
|
struct res_loc_t {
|
|
const wchar_t *search_iso639;
|
|
const wchar_t *search_iso3166;
|
|
wchar_t *resolved_locale;
|
|
int res_len;
|
|
};
|
|
|
|
static BOOL
|
|
resolve_locale_proc (LPWSTR win_locale, DWORD info, LPARAM param)
|
|
{
|
|
res_loc_t *loc = (res_loc_t *) param;
|
|
wchar_t *iso639, *iso639_end;
|
|
wchar_t *iso3166;
|
|
|
|
iso639 = win_locale;
|
|
iso639_end = wcschr (iso639, L'-');
|
|
if (!iso639_end)
|
|
return TRUE;
|
|
if (wcsncmp (loc->search_iso639, iso639, iso639_end - iso639) != 0)
|
|
return TRUE;
|
|
iso3166 = ++iso639_end;
|
|
/* Territory is all upper case */
|
|
while (!iswupper (iso3166[0]) || !iswupper (iso3166[1]))
|
|
{
|
|
iso3166 = wcschr (iso3166, L'-');
|
|
if (!iso3166)
|
|
return TRUE;
|
|
++iso3166;
|
|
}
|
|
if (wcsncmp (loc->search_iso3166, iso3166, wcslen (loc->search_iso3166)))
|
|
return TRUE;
|
|
wcsncat (loc->resolved_locale, win_locale, loc->res_len - 1);
|
|
return FALSE;
|
|
}
|
|
|
|
static int
|
|
resolve_locale_name (const wchar_t *search, wchar_t *result, int rlen)
|
|
{
|
|
res_loc_t loc;
|
|
|
|
loc.search_iso639 = search;
|
|
loc.search_iso3166 = wcschr (search, L'-') + 1;
|
|
loc.resolved_locale = result;
|
|
loc.res_len = rlen;
|
|
result[0] = L'\0';
|
|
EnumSystemLocalesEx (resolve_locale_proc,
|
|
LOCALE_WINDOWS | LOCALE_SUPPLEMENTAL,
|
|
(LPARAM) &loc, NULL);
|
|
return wcslen (result);
|
|
}
|
|
|
|
/* Fetch Windows RFC 5646 locale from POSIX locale specifier.
|
|
Return values:
|
|
|
|
-1: Invalid locale
|
|
0: C or POSIX
|
|
1: valid locale
|
|
*/
|
|
static int
|
|
__get_rfc5646_from_locale (const char *name, wchar_t *win_locale)
|
|
{
|
|
wchar_t wlocale[ENCODING_LEN + 1] = { 0 };
|
|
wchar_t locale[ENCODING_LEN + 1];
|
|
wchar_t *c;
|
|
|
|
win_locale[0] = L'\0';
|
|
mbstowcs (locale, name, ENCODING_LEN + 1);
|
|
/* Remember modifier for later use. */
|
|
const char *modifier = strchr (name, '@') ? : "";
|
|
/* Drop charset and modifier */
|
|
c = wcschr (locale, L'.');
|
|
if (!c)
|
|
c = wcschr (locale, L'@');
|
|
if (c)
|
|
*c = L'\0';
|
|
/* "POSIX" already converted to "C" in loadlocale. */
|
|
if (!wcscmp (locale, L"C"))
|
|
return 0;
|
|
c = wcschr (locale, '_');
|
|
if (!c)
|
|
{
|
|
/* try if the locale can be resolved from the language tag
|
|
fix up Linux-only locale first */
|
|
if (!wcscmp (locale, L"ber"))
|
|
wcscpy (locale, L"tzm");
|
|
if (ResolveLocaleName (locale, wlocale, ENCODING_LEN + 1) <= 1)
|
|
{
|
|
set_errno (ENOENT);
|
|
return -1;
|
|
}
|
|
wcpcpy (win_locale, wlocale);
|
|
return 1;
|
|
}
|
|
|
|
/* Convert to RFC 5646 syntax. */
|
|
*c = '-';
|
|
/* Override a few locales with a different default script as used
|
|
on Linux. Linux also supports no_NO which is equivalent to nb_NO,
|
|
but Windows can resolve that nicely. Also, "tzm" and "zgh" are
|
|
subsumed under "ber" on Linux. */
|
|
struct {
|
|
const wchar_t *loc;
|
|
const wchar_t *wloc;
|
|
} override_locale[] = {
|
|
{ L"ber-DZ" , L"tzm-Latn-DZ" },
|
|
{ L"ber-MA" , L"zgh-Tfng-MA" },
|
|
{ L"mn-CN" , L"mn-Mong-CN" },
|
|
{ L"mn-MN" , L"mn-Mong-MN" },
|
|
{ L"pa-PK" , L"pa-Arab-PK" },
|
|
{ L"sd-IN" , L"sd-Deva-IN" },
|
|
{ L"sr-BA" , L"sr-Cyrl-BA" },
|
|
{ L"sr-ME" , L"sr-Cyrl-ME" },
|
|
{ L"sr-RS" , L"sr-Cyrl-RS" },
|
|
{ L"sr-XK" , L"sr-Cyrl-XK" },
|
|
{ L"tzm-MA", L"tzm-Tfng-MA" },
|
|
{ NULL , NULL }
|
|
};
|
|
|
|
for (int i = 0; override_locale[i].loc
|
|
&& override_locale[i].loc[0] <= locale[0]; ++i)
|
|
{
|
|
if (!wcscmp (locale, override_locale[i].loc))
|
|
{
|
|
wcscpy (wlocale, override_locale[i].wloc);
|
|
break;
|
|
}
|
|
}
|
|
/* If resolve_locale_name returns with error, or if it returns a
|
|
locale other than the input locale, we don't support this locale. */
|
|
if (!wlocale[0]
|
|
&& !resolve_locale_name (locale, wlocale, ENCODING_LEN + 1))
|
|
{
|
|
set_errno (ENOENT);
|
|
return -1;
|
|
}
|
|
|
|
/* Check for modifiers changing the script */
|
|
const wchar_t *iso15924_script[] = { L"Latn-", L"Cyrl-", L"Deva-", L"Adlm-" };
|
|
int idx = -1;
|
|
|
|
if (modifier[0])
|
|
{
|
|
if (!strcmp (++modifier, "latin"))
|
|
idx = 0;
|
|
else if (!strcmp (modifier, "cyrillic"))
|
|
idx = 1;
|
|
else if (!strcmp (modifier, "devanagari"))
|
|
idx = 2;
|
|
else if (!strcmp (modifier, "adlam"))
|
|
idx = 3;
|
|
}
|
|
if (idx >= 0)
|
|
{
|
|
wchar_t *iso3166 = wcschr (wlocale, L'-') + 1;
|
|
wchar_t *wlp;
|
|
|
|
/* Copy iso639 language part including dash */
|
|
wlp = wcpncpy (win_locale, wlocale, iso3166 - wlocale);
|
|
/* Concat new iso15924 script */
|
|
wlp = wcpcpy (wlp, iso15924_script[idx]);
|
|
/* Concat iso3166 territory. Skip script, if already in the locale */
|
|
wchar_t *skip_script = wcschr (iso3166, L'-');
|
|
if (skip_script)
|
|
iso3166 = skip_script + 1;
|
|
wcpcpy (wlp, iso3166);
|
|
}
|
|
else
|
|
wcpcpy (win_locale, wlocale);
|
|
return 1;
|
|
}
|
|
|
|
/* Never returns -1. Just skips invalid chars instead. Only if return_invalid
|
|
is set, s==NULL returns -1 since then it's used to recognize invalid strings
|
|
in the used charset. */
|
|
static size_t
|
|
lc_wcstombs (wctomb_p f_wctomb, char *s, const wchar_t *pwcs, size_t n,
|
|
bool return_invalid = false)
|
|
{
|
|
char *ptr = s;
|
|
size_t max = n;
|
|
char buf[8];
|
|
size_t i, bytes, num_to_copy;
|
|
mbstate_t state;
|
|
|
|
memset (&state, 0, sizeof state);
|
|
if (s == NULL)
|
|
{
|
|
size_t num_bytes = 0;
|
|
while (*pwcs != 0)
|
|
{
|
|
bytes = f_wctomb (_REENT, buf, *pwcs++, &state);
|
|
if (bytes != (size_t) -1)
|
|
num_bytes += bytes;
|
|
else if (return_invalid)
|
|
return (size_t) -1;
|
|
}
|
|
return num_bytes;
|
|
}
|
|
while (n > 0)
|
|
{
|
|
bytes = f_wctomb (_REENT, buf, *pwcs, &state);
|
|
if (bytes == (size_t) -1)
|
|
{
|
|
memset (&state, 0, sizeof state);
|
|
++pwcs;
|
|
continue;
|
|
}
|
|
num_to_copy = (n > bytes ? bytes : n);
|
|
for (i = 0; i < num_to_copy; ++i)
|
|
*ptr++ = buf[i];
|
|
|
|
if (*pwcs == 0x00)
|
|
return ptr - s - (n >= bytes);
|
|
++pwcs;
|
|
n -= num_to_copy;
|
|
}
|
|
return max;
|
|
}
|
|
|
|
/* Never returns -1. Invalid sequences are translated to replacement
|
|
wide-chars. */
|
|
static size_t
|
|
lc_mbstowcs (mbtowc_p f_mbtowc, wchar_t *pwcs, const char *s, size_t n)
|
|
{
|
|
size_t ret = 0;
|
|
char *t = (char *) s;
|
|
size_t bytes;
|
|
mbstate_t state;
|
|
|
|
memset (&state, 0, sizeof state);
|
|
if (!pwcs)
|
|
n = 1;
|
|
while (n > 0)
|
|
{
|
|
bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */, &state);
|
|
if (bytes == (size_t) -1)
|
|
{
|
|
state.__count = 0;
|
|
bytes = 1;
|
|
if (pwcs)
|
|
*pwcs = L' ';
|
|
}
|
|
else if (bytes == 0)
|
|
break;
|
|
t += bytes;
|
|
++ret;
|
|
if (pwcs)
|
|
{
|
|
++pwcs;
|
|
--n;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
locale_cmp (const void *a, const void *b)
|
|
{
|
|
char **la = (char **) a;
|
|
char **lb = (char **) b;
|
|
return strcmp (*la, *lb);
|
|
}
|
|
|
|
/* Helper function to workaround reallocs which move blocks even if they shrink.
|
|
Cygwin's realloc is not doing this, but tcsh's, for instance. All lc_foo
|
|
structures consist entirely of pointers so they are practically pointer
|
|
arrays. What we do here is just treat the lc_foo pointers as char ** and
|
|
rebase all char * pointers within, up to the given size of the structure. */
|
|
static void
|
|
rebase_locale_buf (const void *ptrv, const void *ptrvend, const char *newbase,
|
|
const char *oldbase, const char *oldend)
|
|
{
|
|
const char **ptrsend = (const char **) ptrvend;
|
|
for (const char **ptrs = (const char **) ptrv; ptrs < ptrsend; ++ptrs)
|
|
if (*ptrs >= oldbase && *ptrs < oldend)
|
|
*ptrs += newbase - oldbase;
|
|
}
|
|
|
|
static wchar_t *
|
|
__getlocaleinfo (wchar_t *loc, LCTYPE type, char **ptr, size_t size)
|
|
{
|
|
size_t num;
|
|
wchar_t *ret;
|
|
|
|
if ((uintptr_t) *ptr % 1)
|
|
++*ptr;
|
|
ret = (wchar_t *) *ptr;
|
|
num = GetLocaleInfoEx (loc, type, ret, size / sizeof (wchar_t));
|
|
*ptr = (char *) (ret + num);
|
|
return ret;
|
|
}
|
|
|
|
static wchar_t *
|
|
__setlocaleinfo (char **ptr, size_t size, wchar_t val)
|
|
{
|
|
wchar_t *ret;
|
|
|
|
if ((uintptr_t) *ptr % 1)
|
|
++*ptr;
|
|
ret = (wchar_t *) *ptr;
|
|
ret[0] = val;
|
|
ret[1] = L'\0';
|
|
*ptr = (char *) (ret + 2);
|
|
return ret;
|
|
}
|
|
|
|
static char *
|
|
__charfromwchar (const wchar_t *in, char **ptr, size_t size, wctomb_p f_wctomb)
|
|
{
|
|
size_t num;
|
|
char *ret;
|
|
|
|
num = lc_wcstombs (f_wctomb, ret = *ptr, in, size);
|
|
*ptr += num + 1;
|
|
return ret;
|
|
}
|
|
|
|
static UINT
|
|
__getlocaleint (wchar_t *loc, LCTYPE type)
|
|
{
|
|
UINT val;
|
|
return GetLocaleInfoEx (loc, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val,
|
|
sizeof val) ? val : 0;
|
|
}
|
|
|
|
enum dt_flags {
|
|
DT_DEFAULT = 0x00,
|
|
DT_AMPM = 0x01, /* Enforce 12 hour time format. */
|
|
DT_ABBREV = 0x02, /* Enforce abbreviated month and day names. */
|
|
};
|
|
|
|
static wchar_t *
|
|
__eval_datetimefmt (wchar_t *loc, LCTYPE type, dt_flags flags, char **ptr,
|
|
size_t size)
|
|
{
|
|
wchar_t buf[80];
|
|
wchar_t fc;
|
|
size_t idx;
|
|
const wchar_t *day_str = L"edaA";
|
|
const wchar_t *mon_str = L"mmbB";
|
|
const wchar_t *year_str = L"yyyY";
|
|
const wchar_t *hour12_str = L"lI";
|
|
const wchar_t *hour24_str = L"kH";
|
|
const wchar_t *t_str;
|
|
|
|
if ((uintptr_t) *ptr % 1)
|
|
++*ptr;
|
|
wchar_t *ret = (wchar_t *) *ptr;
|
|
wchar_t *p = (wchar_t *) *ptr;
|
|
GetLocaleInfoEx (loc, type, buf, 80);
|
|
for (wchar_t *fmt = buf; *fmt; ++fmt)
|
|
switch (fc = *fmt)
|
|
{
|
|
case L'\'':
|
|
if (fmt[1] == L'\'')
|
|
*p++ = L'\'';
|
|
else
|
|
while (fmt[1] && *++fmt != L'\'')
|
|
*p++ = *fmt;
|
|
break;
|
|
case L'd':
|
|
case L'M':
|
|
case L'y':
|
|
t_str = (fc == L'd' ? day_str : fc == L'M' ? mon_str : year_str);
|
|
for (idx = 0; fmt[1] == fc; ++idx, ++fmt);
|
|
if (idx > 3)
|
|
idx = 3;
|
|
if ((flags & DT_ABBREV) && fc != L'y' && idx == 3)
|
|
idx = 2;
|
|
*p++ = L'%';
|
|
*p++ = t_str[idx];
|
|
break;
|
|
case L'g':
|
|
/* TODO */
|
|
break;
|
|
case L'h':
|
|
case L'H':
|
|
t_str = (fc == L'h' || (flags & DT_AMPM) ? hour12_str : hour24_str);
|
|
idx = 0;
|
|
if (fmt[1] == fc)
|
|
{
|
|
++fmt;
|
|
idx = 1;
|
|
}
|
|
*p++ = L'%';
|
|
*p++ = t_str[idx];
|
|
break;
|
|
case L'm':
|
|
case L's':
|
|
case L't':
|
|
if (fmt[1] == fc)
|
|
++fmt;
|
|
*p++ = L'%';
|
|
*p++ = (fc == L'm' ? L'M' : fc == L's' ? L'S' : L'p');
|
|
break;
|
|
case L'\t':
|
|
case L'\n':
|
|
case L'%':
|
|
*p++ = L'%';
|
|
*p++ = fc;
|
|
break;
|
|
default:
|
|
*p++ = *fmt;
|
|
break;
|
|
}
|
|
*p++ = L'\0';
|
|
*ptr = (char *) p;
|
|
return ret;
|
|
}
|
|
|
|
/* Convert Windows grouping format into POSIX grouping format. */
|
|
static char *
|
|
conv_grouping (wchar_t *loc, LCTYPE type, char **lc_ptr)
|
|
{
|
|
wchar_t buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */
|
|
bool repeat = false;
|
|
char *ptr = *lc_ptr;
|
|
char *ret = ptr;
|
|
|
|
GetLocaleInfoEx (loc, type, buf, 10);
|
|
/* Convert Windows grouping format into POSIX grouping format. Note that
|
|
only ASCII chars are used in the grouping format. */
|
|
for (wchar_t *c = buf; *c; ++c)
|
|
{
|
|
if (*c < L'0' || *c > L'9')
|
|
continue;
|
|
char val = *c - L'0';
|
|
if (!val)
|
|
{
|
|
repeat = true;
|
|
break;
|
|
}
|
|
*ptr++ = val;
|
|
}
|
|
if (!repeat)
|
|
*ptr++ = CHAR_MAX;
|
|
*ptr++ = '\0';
|
|
*lc_ptr = ptr;
|
|
return ret;
|
|
}
|
|
|
|
/* Called from newlib's setlocale() via __time_load_locale() if category
|
|
is LC_TIME. Returns LC_TIME values fetched from Windows locale data
|
|
in the structure pointed to by _time_locale. This is subsequently
|
|
accessed by functions like nl_langinfo, strftime, strptime. */
|
|
extern "C" int
|
|
__set_lc_time_from_win (const char *name,
|
|
const struct lc_time_T *_C_time_locale,
|
|
struct lc_time_T *_time_locale,
|
|
char **lc_time_buf, wctomb_p f_wctomb,
|
|
const char *charset)
|
|
{
|
|
wchar_t win_locale[ENCODING_LEN + 1];
|
|
int ret = __get_rfc5646_from_locale (name, win_locale);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (!ret && !strcmp (charset, "ASCII"))
|
|
return 0;
|
|
|
|
# define MAX_TIME_BUFFER_SIZE 4096
|
|
|
|
char *new_lc_time_buf = (char *) malloc (MAX_TIME_BUFFER_SIZE);
|
|
const char *lc_time_end = new_lc_time_buf + MAX_TIME_BUFFER_SIZE;
|
|
|
|
if (!new_lc_time_buf)
|
|
return -1;
|
|
char *lc_time_ptr = new_lc_time_buf;
|
|
|
|
/* C.foo is just a copy of "C" with fixed charset. */
|
|
if (!ret)
|
|
memcpy (_time_locale, _C_time_locale, sizeof (struct lc_time_T));
|
|
/* codeset */
|
|
_time_locale->codeset = lc_time_ptr;
|
|
lc_time_ptr = stpcpy (lc_time_ptr, charset) + 1;
|
|
|
|
if (ret)
|
|
{
|
|
char locale[ENCODING_LEN + 1];
|
|
strcpy (locale, name);
|
|
/* Removes the charset from the locale and attach the modifier to the
|
|
language_TERRITORY part. */
|
|
char *c = strchr (locale, '.');
|
|
if (c)
|
|
{
|
|
*c = '\0';
|
|
char *c2 = strchr (c + 1, '@');
|
|
/* Ignore @cjk* modifiers, they are newlib specials. */
|
|
if (c2 && !is_cjk_modifier (c2))
|
|
memmove (c, c2, strlen (c2) + 1);
|
|
}
|
|
/* Now search in the alphabetically order lc_era array for the
|
|
locale. */
|
|
lc_era_t locale_key = { locale, NULL, NULL, NULL, NULL, NULL ,
|
|
NULL, NULL, NULL, NULL, NULL };
|
|
lc_era_t *era = (lc_era_t *) bsearch ((void *) &locale_key,
|
|
(void *) lc_era,
|
|
sizeof lc_era / sizeof *lc_era,
|
|
sizeof *lc_era, locale_cmp);
|
|
|
|
/* mon */
|
|
/* Windows has a bug in "ja-JP" and "ko-KR" (but not in "ko-KP").
|
|
In these locales, strings returned for LOCALE_SABBREVMONTHNAME*
|
|
are missing the suffix representing a month.
|
|
|
|
A Japanese article describing the problem was
|
|
https://msdn.microsoft.com/ja-jp/library/cc422084.aspx, which is
|
|
only available via
|
|
https://web.archive.org/web/20110922195821/https://msdn.microsoft.com/ja-jp/library/cc422084.aspx
|
|
these days. Testing indicates that this problem is still present
|
|
in Windows 11.
|
|
|
|
The workaround is to use LOCALE_SMONTHNAME* in these locales,
|
|
even for the abbreviated month name. */
|
|
const LCTYPE mon_base = !wcscmp (win_locale, L"ja-JP")
|
|
|| !wcscmp (win_locale, L"ko-KR")
|
|
? LOCALE_SMONTHNAME1 : LOCALE_SABBREVMONTHNAME1;
|
|
for (int i = 0; i < 12; ++i)
|
|
{
|
|
_time_locale->wmon[i] = getlocaleinfo (time, mon_base + i);
|
|
_time_locale->mon[i] = charfromwchar (time, wmon[i]);
|
|
}
|
|
/* month and alt_month */
|
|
for (int i = 0; i < 12; ++i)
|
|
{
|
|
_time_locale->wmonth[i] = getlocaleinfo (time,
|
|
LOCALE_SMONTHNAME1 + i);
|
|
_time_locale->month[i] = _time_locale->alt_month[i]
|
|
= charfromwchar (time, wmonth[i]);
|
|
}
|
|
/* wday */
|
|
_time_locale->wwday[0] = getlocaleinfo (time, LOCALE_SABBREVDAYNAME7);
|
|
_time_locale->wday[0] = charfromwchar (time, wwday[0]);
|
|
for (int i = 0; i < 6; ++i)
|
|
{
|
|
_time_locale->wwday[i + 1] = getlocaleinfo (time,
|
|
LOCALE_SABBREVDAYNAME1 + i);
|
|
_time_locale->wday[i + 1] = charfromwchar (time, wwday[i + 1]);
|
|
}
|
|
/* weekday */
|
|
_time_locale->wweekday[0] = getlocaleinfo (time, LOCALE_SDAYNAME7);
|
|
_time_locale->weekday[0] = charfromwchar (time, wweekday[0]);
|
|
for (int i = 0; i < 6; ++i)
|
|
{
|
|
_time_locale->wweekday[i + 1] = getlocaleinfo (time,
|
|
LOCALE_SDAYNAME1 + i);
|
|
_time_locale->weekday[i + 1] = charfromwchar (time, wweekday[i + 1]);
|
|
}
|
|
size_t len;
|
|
/* X_fmt */
|
|
if (era && *era->t_fmt)
|
|
{
|
|
_time_locale->wX_fmt = (const wchar_t *) lc_time_ptr;
|
|
lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wX_fmt,
|
|
era->t_fmt) + 1);
|
|
}
|
|
else
|
|
_time_locale->wX_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
|
|
_time_locale->X_fmt = charfromwchar (time, wX_fmt);
|
|
/* x_fmt */
|
|
if (era && *era->d_fmt)
|
|
{
|
|
_time_locale->wx_fmt = (const wchar_t *) lc_time_ptr;
|
|
lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wx_fmt,
|
|
era->d_fmt) + 1);
|
|
}
|
|
else
|
|
_time_locale->wx_fmt = eval_datetimefmt (LOCALE_SSHORTDATE, DT_DEFAULT);
|
|
_time_locale->x_fmt = charfromwchar (time, wx_fmt);
|
|
/* c_fmt */
|
|
if (era && *era->d_t_fmt)
|
|
{
|
|
_time_locale->wc_fmt = (const wchar_t *) lc_time_ptr;
|
|
lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wc_fmt,
|
|
era->d_t_fmt) + 1);
|
|
}
|
|
else
|
|
{
|
|
_time_locale->wc_fmt = eval_datetimefmt (LOCALE_SLONGDATE, DT_ABBREV);
|
|
((wchar_t *) lc_time_ptr)[-1] = L' ';
|
|
eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
|
|
}
|
|
_time_locale->c_fmt = charfromwchar (time, wc_fmt);
|
|
/* AM/PM */
|
|
_time_locale->wam_pm[0] = getlocaleinfo (time, LOCALE_S1159);
|
|
_time_locale->wam_pm[1] = getlocaleinfo (time, LOCALE_S2359);
|
|
_time_locale->am_pm[0] = charfromwchar (time, wam_pm[0]);
|
|
_time_locale->am_pm[1] = charfromwchar (time, wam_pm[1]);
|
|
/* date_fmt */
|
|
if (era && *era->date_fmt)
|
|
{
|
|
_time_locale->wdate_fmt = (const wchar_t *) lc_time_ptr;
|
|
lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wdate_fmt,
|
|
era->date_fmt) + 1);
|
|
}
|
|
else
|
|
_time_locale->wdate_fmt = _time_locale->wc_fmt;
|
|
_time_locale->date_fmt = charfromwchar (time, wdate_fmt);
|
|
/* md */
|
|
{
|
|
wchar_t buf[80];
|
|
GetLocaleInfoEx (win_locale, LOCALE_IDATE, buf, 80);
|
|
_time_locale->md_order = (const char *) lc_time_ptr;
|
|
lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1;
|
|
}
|
|
/* ampm_fmt */
|
|
if (era)
|
|
{
|
|
_time_locale->wampm_fmt = (const wchar_t *) lc_time_ptr;
|
|
lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wampm_fmt,
|
|
era->t_fmt_ampm) + 1);
|
|
}
|
|
else
|
|
_time_locale->wampm_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_AMPM);
|
|
_time_locale->ampm_fmt = charfromwchar (time, wampm_fmt);
|
|
|
|
if (era)
|
|
{
|
|
/* Evaluate string length in target charset. Characters invalid in the
|
|
target charset are simply ignored, as on Linux. */
|
|
len = 0;
|
|
len += lc_wcstombs (f_wctomb, NULL, era->era, 0) + 1;
|
|
len += lc_wcstombs (f_wctomb, NULL, era->era_d_fmt, 0) + 1;
|
|
len += lc_wcstombs (f_wctomb, NULL, era->era_d_t_fmt, 0) + 1;
|
|
len += lc_wcstombs (f_wctomb, NULL, era->era_t_fmt, 0) + 1;
|
|
len += lc_wcstombs (f_wctomb, NULL, era->alt_digits, 0) + 1;
|
|
len += (wcslen (era->era) + 1) * sizeof (wchar_t);
|
|
len += (wcslen (era->era_d_fmt) + 1) * sizeof (wchar_t);
|
|
len += (wcslen (era->era_d_t_fmt) + 1) * sizeof (wchar_t);
|
|
len += (wcslen (era->era_t_fmt) + 1) * sizeof (wchar_t);
|
|
len += (wcslen (era->alt_digits) + 1) * sizeof (wchar_t);
|
|
|
|
/* Make sure data fits into the buffer */
|
|
if (lc_time_ptr + len > lc_time_end)
|
|
{
|
|
len = lc_time_ptr + len - new_lc_time_buf;
|
|
char *tmp = (char *) realloc (new_lc_time_buf, len);
|
|
if (!tmp)
|
|
era = NULL;
|
|
else
|
|
{
|
|
if (tmp != new_lc_time_buf)
|
|
rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
|
|
new_lc_time_buf, lc_time_ptr);
|
|
lc_time_ptr = tmp + (lc_time_ptr - new_lc_time_buf);
|
|
new_lc_time_buf = tmp;
|
|
lc_time_end = new_lc_time_buf + len;
|
|
}
|
|
}
|
|
/* Copy over */
|
|
if (era)
|
|
{
|
|
/* era */
|
|
_time_locale->wera = (const wchar_t *) lc_time_ptr;
|
|
lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera,
|
|
era->era) + 1);
|
|
_time_locale->era = charfromwchar (time, wera);
|
|
/* era_d_fmt */
|
|
_time_locale->wera_d_fmt = (const wchar_t *) lc_time_ptr;
|
|
lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_fmt,
|
|
era->era_d_fmt) + 1);
|
|
_time_locale->era_d_fmt = charfromwchar (time, wera_d_fmt);
|
|
/* era_d_t_fmt */
|
|
_time_locale->wera_d_t_fmt = (const wchar_t *) lc_time_ptr;
|
|
lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_t_fmt,
|
|
era->era_d_t_fmt) + 1);
|
|
_time_locale->era_d_t_fmt = charfromwchar (time, wera_d_t_fmt);
|
|
/* era_t_fmt */
|
|
_time_locale->wera_t_fmt = (const wchar_t *) lc_time_ptr;
|
|
lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_t_fmt,
|
|
era->era_t_fmt) + 1);
|
|
_time_locale->era_t_fmt = charfromwchar (time, wera_t_fmt);
|
|
/* alt_digits */
|
|
_time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
|
|
lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->walt_digits,
|
|
era->alt_digits) + 1);
|
|
_time_locale->alt_digits = charfromwchar (time, walt_digits);
|
|
}
|
|
}
|
|
if (!era)
|
|
{
|
|
_time_locale->wera =
|
|
_time_locale->wera_d_fmt =
|
|
_time_locale->wera_d_t_fmt =
|
|
_time_locale->wera_t_fmt =
|
|
_time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
|
|
_time_locale->era =
|
|
_time_locale->era_d_fmt =
|
|
_time_locale->era_d_t_fmt =
|
|
_time_locale->era_t_fmt =
|
|
_time_locale->alt_digits = (const char *) lc_time_ptr;
|
|
/* Twice, to make sure wide char strings are correctly terminated. */
|
|
*lc_time_ptr++ = '\0';
|
|
*lc_time_ptr++ = '\0';
|
|
}
|
|
}
|
|
|
|
char *tmp = (char *) realloc (new_lc_time_buf, lc_time_ptr - new_lc_time_buf);
|
|
if (!tmp)
|
|
{
|
|
free (new_lc_time_buf);
|
|
return -1;
|
|
}
|
|
if (tmp != new_lc_time_buf)
|
|
rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
|
|
new_lc_time_buf, lc_time_ptr);
|
|
*lc_time_buf = tmp;
|
|
return 1;
|
|
}
|
|
|
|
/* Called from newlib's setlocale() via __ctype_load_locale() if category
|
|
is LC_CTYPE. Returns LC_CTYPE values fetched from Windows locale data
|
|
in the structure pointed to by _ctype_locale. This is subsequently
|
|
accessed by functions like nl_langinfo, localeconv, printf, etc. */
|
|
extern "C" int
|
|
__set_lc_ctype_from_win (const char *name,
|
|
const struct lc_ctype_T *_C_ctype_locale,
|
|
struct lc_ctype_T *_ctype_locale,
|
|
char **lc_ctype_buf, wctomb_p f_wctomb,
|
|
const char *charset, int mb_cur_max)
|
|
{
|
|
wchar_t win_locale[ENCODING_LEN + 1];
|
|
int ret = __get_rfc5646_from_locale (name, win_locale);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (!ret && !strcmp (charset, "ASCII"))
|
|
return 0;
|
|
|
|
# define MAX_CTYPE_BUFFER_SIZE 256
|
|
|
|
char *new_lc_ctype_buf = (char *) malloc (MAX_CTYPE_BUFFER_SIZE);
|
|
|
|
if (!new_lc_ctype_buf)
|
|
return -1;
|
|
char *lc_ctype_ptr = new_lc_ctype_buf;
|
|
/* C.foo is just a copy of "C" with fixed charset. */
|
|
if (!ret)
|
|
memcpy (_ctype_locale, _C_ctype_locale, sizeof (struct lc_ctype_T));
|
|
/* codeset */
|
|
_ctype_locale->codeset = lc_ctype_ptr;
|
|
lc_ctype_ptr = stpcpy (lc_ctype_ptr, charset) + 1;
|
|
/* mb_cur_max */
|
|
_ctype_locale->mb_cur_max = lc_ctype_ptr;
|
|
*lc_ctype_ptr++ = mb_cur_max;
|
|
*lc_ctype_ptr++ = '\0';
|
|
if (ret)
|
|
{
|
|
/* outdigits and woutdigits */
|
|
wchar_t digits[11];
|
|
GetLocaleInfoEx (win_locale, LOCALE_SNATIVEDIGITS, digits, 11);
|
|
for (int i = 0; i <= 9; ++i)
|
|
{
|
|
mbstate_t state;
|
|
|
|
/* Make sure the wchar_t's are always 2 byte aligned. */
|
|
if ((uintptr_t) lc_ctype_ptr % 2)
|
|
++lc_ctype_ptr;
|
|
wchar_t *woutdig = (wchar_t *) lc_ctype_ptr;
|
|
_ctype_locale->woutdigits[i] = (const wchar_t *) woutdig;
|
|
*woutdig++ = digits[i];
|
|
*woutdig++ = L'\0';
|
|
lc_ctype_ptr = (char *) woutdig;
|
|
_ctype_locale->outdigits[i] = lc_ctype_ptr;
|
|
memset (&state, 0, sizeof state);
|
|
lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], &state);
|
|
*lc_ctype_ptr++ = '\0';
|
|
}
|
|
}
|
|
|
|
char *tmp = (char *) realloc (new_lc_ctype_buf,
|
|
lc_ctype_ptr - new_lc_ctype_buf);
|
|
if (!tmp)
|
|
{
|
|
free (new_lc_ctype_buf);
|
|
return -1;
|
|
}
|
|
if (tmp != new_lc_ctype_buf)
|
|
rebase_locale_buf (_ctype_locale, _ctype_locale + 1, tmp,
|
|
new_lc_ctype_buf, lc_ctype_ptr);
|
|
*lc_ctype_buf = tmp;
|
|
return 1;
|
|
}
|
|
|
|
/* Called from newlib's setlocale() via __numeric_load_locale() if category
|
|
is LC_NUMERIC. Returns LC_NUMERIC values fetched from Windows locale data
|
|
in the structure pointed to by _numeric_locale. This is subsequently
|
|
accessed by functions like nl_langinfo, localeconv, printf, etc. */
|
|
extern "C" int
|
|
__set_lc_numeric_from_win (const char *name,
|
|
const struct lc_numeric_T *_C_numeric_locale,
|
|
struct lc_numeric_T *_numeric_locale,
|
|
char **lc_numeric_buf, wctomb_p f_wctomb,
|
|
const char *charset)
|
|
{
|
|
wchar_t win_locale[ENCODING_LEN + 1];
|
|
int ret = __get_rfc5646_from_locale (name, win_locale);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (!ret && !strcmp (charset, "ASCII"))
|
|
return 0;
|
|
|
|
# define MAX_NUMERIC_BUFFER_SIZE 256
|
|
|
|
char *new_lc_numeric_buf = (char *) malloc (MAX_NUMERIC_BUFFER_SIZE);
|
|
const char *lc_numeric_end = new_lc_numeric_buf + MAX_NUMERIC_BUFFER_SIZE;
|
|
|
|
if (!new_lc_numeric_buf)
|
|
return -1;
|
|
char *lc_numeric_ptr = new_lc_numeric_buf;
|
|
/* C.foo is just a copy of "C" with fixed charset. */
|
|
if (!ret)
|
|
memcpy (_numeric_locale, _C_numeric_locale, sizeof (struct lc_numeric_T));
|
|
else
|
|
{
|
|
/* decimal_point and thousands_sep */
|
|
/* fa_IR. Windows decimal_point is slash, correct is dot */
|
|
if (!wcscmp (win_locale, L"fa-IR"))
|
|
{
|
|
_numeric_locale->wdecimal_point = setlocaleinfo (numeric, L'.');
|
|
_numeric_locale->wthousands_sep = setlocaleinfo (numeric, L',');
|
|
}
|
|
/* ps_AF. Windows decimal_point is dot, thousands_sep is comma,
|
|
correct are arabic separators. */
|
|
else if (!wcscmp (win_locale, L"ps-AF"))
|
|
{
|
|
_numeric_locale->wdecimal_point = setlocaleinfo (numeric, 0x066b);
|
|
_numeric_locale->wthousands_sep = setlocaleinfo (numeric, 0x066c);
|
|
}
|
|
else
|
|
{
|
|
_numeric_locale->wdecimal_point = getlocaleinfo (numeric,
|
|
LOCALE_SDECIMAL);
|
|
_numeric_locale->wthousands_sep = getlocaleinfo (numeric,
|
|
LOCALE_STHOUSAND);
|
|
}
|
|
_numeric_locale->decimal_point = charfromwchar (numeric, wdecimal_point);
|
|
_numeric_locale->thousands_sep = charfromwchar (numeric, wthousands_sep);
|
|
/* grouping */
|
|
_numeric_locale->grouping = conv_grouping (win_locale, LOCALE_SGROUPING,
|
|
&lc_numeric_ptr);
|
|
}
|
|
/* codeset */
|
|
_numeric_locale->codeset = lc_numeric_ptr;
|
|
lc_numeric_ptr = stpcpy (lc_numeric_ptr, charset) + 1;
|
|
|
|
char *tmp = (char *) realloc (new_lc_numeric_buf,
|
|
lc_numeric_ptr - new_lc_numeric_buf);
|
|
if (!tmp)
|
|
{
|
|
free (new_lc_numeric_buf);
|
|
return -1;
|
|
}
|
|
if (tmp != new_lc_numeric_buf)
|
|
rebase_locale_buf (_numeric_locale, _numeric_locale + 1, tmp,
|
|
new_lc_numeric_buf, lc_numeric_ptr);
|
|
*lc_numeric_buf = tmp;
|
|
return 1;
|
|
}
|
|
|
|
/* Called from newlib's setlocale() via __monetary_load_locale() if category
|
|
is LC_MONETARY. Returns LC_MONETARY values fetched from Windows locale data
|
|
in the structure pointed to by _monetary_locale. This is subsequently
|
|
accessed by functions like nl_langinfo, localeconv, printf, etc. */
|
|
extern "C" int
|
|
__set_lc_monetary_from_win (const char *name,
|
|
const struct lc_monetary_T *_C_monetary_locale,
|
|
struct lc_monetary_T *_monetary_locale,
|
|
char **lc_monetary_buf, wctomb_p f_wctomb,
|
|
const char *charset)
|
|
{
|
|
wchar_t win_locale[ENCODING_LEN + 1];
|
|
int ret = __get_rfc5646_from_locale (name, win_locale);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (!ret && !strcmp (charset, "ASCII"))
|
|
return 0;
|
|
|
|
# define MAX_MONETARY_BUFFER_SIZE 512
|
|
|
|
char *new_lc_monetary_buf = (char *) malloc (MAX_MONETARY_BUFFER_SIZE);
|
|
const char *lc_monetary_end = new_lc_monetary_buf + MAX_MONETARY_BUFFER_SIZE;
|
|
|
|
if (!new_lc_monetary_buf)
|
|
return -1;
|
|
char *lc_monetary_ptr = new_lc_monetary_buf;
|
|
/* C.foo is just a copy of "C" with fixed charset. */
|
|
if (!ret)
|
|
memcpy (_monetary_locale, _C_monetary_locale, sizeof (struct lc_monetary_T));
|
|
else
|
|
{
|
|
/* int_curr_symbol */
|
|
_monetary_locale->wint_curr_symbol = getlocaleinfo (monetary,
|
|
LOCALE_SINTLSYMBOL);
|
|
/* No spacing char means space. */
|
|
if (!_monetary_locale->wint_curr_symbol[3])
|
|
{
|
|
wchar_t *wc = (wchar_t *) _monetary_locale->wint_curr_symbol + 3;
|
|
*wc++ = L' ';
|
|
*wc++ = L'\0';
|
|
lc_monetary_ptr = (char *) wc;
|
|
}
|
|
_monetary_locale->int_curr_symbol = charfromwchar (monetary,
|
|
wint_curr_symbol);
|
|
/* currency_symbol */
|
|
_monetary_locale->wcurrency_symbol = getlocaleinfo (monetary,
|
|
LOCALE_SCURRENCY);
|
|
/* As on Linux: If the currency_symbol can't be represented in the
|
|
given charset, use int_curr_symbol. */
|
|
if (lc_wcstombs (f_wctomb, NULL, _monetary_locale->wcurrency_symbol,
|
|
0, true) == (size_t) -1)
|
|
_monetary_locale->currency_symbol = _monetary_locale->int_curr_symbol;
|
|
else
|
|
_monetary_locale->currency_symbol = charfromwchar (monetary,
|
|
wcurrency_symbol);
|
|
/* mon_decimal_point and mon_thousands_sep */
|
|
/* fa_IR or ps_AF. Windows mon_decimal_point is slash and comma,
|
|
mon_thousands_sep is comma and dot, correct
|
|
are arabic separators. */
|
|
if (!wcscmp (win_locale, L"fa-IR")
|
|
|| !wcscmp (win_locale, L"ps-AF"))
|
|
{
|
|
_monetary_locale->wmon_decimal_point = setlocaleinfo (monetary,
|
|
0x066b);
|
|
_monetary_locale->wmon_thousands_sep = setlocaleinfo (monetary,
|
|
0x066c);
|
|
}
|
|
else
|
|
{
|
|
_monetary_locale->wmon_decimal_point = getlocaleinfo (monetary,
|
|
LOCALE_SMONDECIMALSEP);
|
|
_monetary_locale->wmon_thousands_sep = getlocaleinfo (monetary,
|
|
LOCALE_SMONTHOUSANDSEP);
|
|
}
|
|
_monetary_locale->mon_decimal_point = charfromwchar (monetary,
|
|
wmon_decimal_point);
|
|
_monetary_locale->mon_thousands_sep = charfromwchar (monetary,
|
|
wmon_thousands_sep);
|
|
/* mon_grouping */
|
|
_monetary_locale->mon_grouping = conv_grouping (win_locale,
|
|
LOCALE_SMONGROUPING,
|
|
&lc_monetary_ptr);
|
|
/* positive_sign */
|
|
_monetary_locale->wpositive_sign = getlocaleinfo (monetary,
|
|
LOCALE_SPOSITIVESIGN);
|
|
_monetary_locale->positive_sign = charfromwchar (monetary, wpositive_sign);
|
|
/* negative_sign */
|
|
_monetary_locale->wnegative_sign = getlocaleinfo (monetary,
|
|
LOCALE_SNEGATIVESIGN);
|
|
_monetary_locale->negative_sign = charfromwchar (monetary, wnegative_sign);
|
|
/* int_frac_digits */
|
|
*lc_monetary_ptr = (char) getlocaleint (LOCALE_IINTLCURRDIGITS);
|
|
_monetary_locale->int_frac_digits = lc_monetary_ptr++;
|
|
/* frac_digits */
|
|
*lc_monetary_ptr = (char) getlocaleint (LOCALE_ICURRDIGITS);
|
|
_monetary_locale->frac_digits = lc_monetary_ptr++;
|
|
/* p_cs_precedes and int_p_cs_precedes */
|
|
*lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSYMPRECEDES);
|
|
_monetary_locale->p_cs_precedes
|
|
= _monetary_locale->int_p_cs_precedes = lc_monetary_ptr++;
|
|
/* p_sep_by_space and int_p_sep_by_space */
|
|
*lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSEPBYSPACE);
|
|
_monetary_locale->p_sep_by_space
|
|
= _monetary_locale->int_p_sep_by_space = lc_monetary_ptr++;
|
|
/* n_cs_precedes and int_n_cs_precedes */
|
|
*lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSYMPRECEDES);
|
|
_monetary_locale->n_cs_precedes
|
|
= _monetary_locale->int_n_cs_precedes = lc_monetary_ptr++;
|
|
/* n_sep_by_space and int_n_sep_by_space */
|
|
*lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSEPBYSPACE);
|
|
_monetary_locale->n_sep_by_space
|
|
= _monetary_locale->int_n_sep_by_space = lc_monetary_ptr++;
|
|
/* p_sign_posn and int_p_sign_posn */
|
|
*lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSIGNPOSN);
|
|
_monetary_locale->p_sign_posn
|
|
= _monetary_locale->int_p_sign_posn = lc_monetary_ptr++;
|
|
/* n_sign_posn and int_n_sign_posn */
|
|
*lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSIGNPOSN);
|
|
_monetary_locale->n_sign_posn
|
|
= _monetary_locale->int_n_sign_posn = lc_monetary_ptr++;
|
|
}
|
|
/* codeset */
|
|
_monetary_locale->codeset = lc_monetary_ptr;
|
|
lc_monetary_ptr = stpcpy (lc_monetary_ptr, charset) + 1;
|
|
|
|
char *tmp = (char *) realloc (new_lc_monetary_buf,
|
|
lc_monetary_ptr - new_lc_monetary_buf);
|
|
if (!tmp)
|
|
{
|
|
free (new_lc_monetary_buf);
|
|
return -1;
|
|
}
|
|
if (tmp != new_lc_monetary_buf)
|
|
rebase_locale_buf (_monetary_locale, _monetary_locale + 1, tmp,
|
|
new_lc_monetary_buf, lc_monetary_ptr);
|
|
*lc_monetary_buf = tmp;
|
|
return 1;
|
|
}
|
|
|
|
extern "C" int
|
|
__set_lc_messages_from_win (const char *name,
|
|
const struct lc_messages_T *_C_messages_locale,
|
|
struct lc_messages_T *_messages_locale,
|
|
char **lc_messages_buf,
|
|
wctomb_p f_wctomb, const char *charset)
|
|
{
|
|
wchar_t win_locale[ENCODING_LEN + 1];
|
|
int ret = __get_rfc5646_from_locale (name, win_locale);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (!ret && !strcmp (charset, "ASCII"))
|
|
return 0;
|
|
|
|
char locale[ENCODING_LEN + 1];
|
|
char *c, *c2;
|
|
lc_msg_t *msg = NULL;
|
|
|
|
/* C.foo is just a copy of "C" with fixed charset. */
|
|
if (!ret)
|
|
memcpy (_messages_locale, _C_messages_locale, sizeof (struct lc_messages_T));
|
|
else
|
|
{
|
|
strcpy (locale, name);
|
|
/* Removes the charset from the locale and attach the modifer to the
|
|
language_TERRITORY part. */
|
|
c = strchr (locale, '.');
|
|
if (c)
|
|
{
|
|
*c = '\0';
|
|
c2 = strchr (c + 1, '@');
|
|
/* Ignore @cjk* modifiers, they are newlib specials. */
|
|
if (c2 && !is_cjk_modifier (c2))
|
|
memmove (c, c2, strlen (c2) + 1);
|
|
}
|
|
/* Now search in the alphabetically order lc_msg array for the
|
|
locale. */
|
|
lc_msg_t locale_key = { locale, NULL, NULL, NULL, NULL };
|
|
msg = (lc_msg_t *) bsearch ((void *) &locale_key, (void *) lc_msg,
|
|
sizeof lc_msg / sizeof *lc_msg,
|
|
sizeof *lc_msg, locale_cmp);
|
|
if (!msg)
|
|
return 0;
|
|
}
|
|
|
|
/* Evaluate string length in target charset. Characters invalid in the
|
|
target charset are simply ignored, as on Linux. */
|
|
size_t len = 0;
|
|
len += (strlen (charset) + 1);
|
|
if (ret)
|
|
{
|
|
len += lc_wcstombs (f_wctomb, NULL, msg->yesexpr, 0) + 1;
|
|
len += lc_wcstombs (f_wctomb, NULL, msg->noexpr, 0) + 1;
|
|
len += lc_wcstombs (f_wctomb, NULL, msg->yesstr, 0) + 1;
|
|
len += lc_wcstombs (f_wctomb, NULL, msg->nostr, 0) + 1;
|
|
len += (wcslen (msg->yesexpr) + 1) * sizeof (wchar_t);
|
|
len += (wcslen (msg->noexpr) + 1) * sizeof (wchar_t);
|
|
len += (wcslen (msg->yesstr) + 1) * sizeof (wchar_t);
|
|
len += (wcslen (msg->nostr) + 1) * sizeof (wchar_t);
|
|
if (len % 1)
|
|
++len;
|
|
}
|
|
/* Allocate. */
|
|
char *new_lc_messages_buf = (char *) malloc (len);
|
|
const char *lc_messages_end = new_lc_messages_buf + len;
|
|
|
|
if (!new_lc_messages_buf)
|
|
return -1;
|
|
/* Copy over. */
|
|
c = new_lc_messages_buf;
|
|
/* codeset */
|
|
_messages_locale->codeset = c;
|
|
c = stpcpy (c, charset) + 1;
|
|
if (ret)
|
|
{
|
|
_messages_locale->yesexpr = (const char *) c;
|
|
len = lc_wcstombs (f_wctomb, c, msg->yesexpr, lc_messages_end - c);
|
|
_messages_locale->noexpr = (const char *) (c += len + 1);
|
|
len = lc_wcstombs (f_wctomb, c, msg->noexpr, lc_messages_end - c);
|
|
_messages_locale->yesstr = (const char *) (c += len + 1);
|
|
len = lc_wcstombs (f_wctomb, c, msg->yesstr, lc_messages_end - c);
|
|
_messages_locale->nostr = (const char *) (c += len + 1);
|
|
len = lc_wcstombs (f_wctomb, c, msg->nostr, lc_messages_end - c);
|
|
c += len + 1;
|
|
if ((uintptr_t) c % 1)
|
|
++c;
|
|
wchar_t *wc = (wchar_t *) c;
|
|
_messages_locale->wyesexpr = (const wchar_t *) wc;
|
|
wc = wcpcpy (wc, msg->yesexpr) + 1;
|
|
_messages_locale->wnoexpr = (const wchar_t *) wc;
|
|
wc = wcpcpy (wc, msg->noexpr) + 1;
|
|
_messages_locale->wyesstr = (const wchar_t *) wc;
|
|
wc = wcpcpy (wc, msg->yesstr) + 1;
|
|
_messages_locale->wnostr = (const wchar_t *) wc;
|
|
wcpcpy (wc, msg->nostr);
|
|
}
|
|
*lc_messages_buf = new_lc_messages_buf;
|
|
return 1;
|
|
}
|
|
|
|
const struct lc_collate_T _C_collate_locale =
|
|
{
|
|
L"",
|
|
__ascii_mbtowc,
|
|
"ASCII"
|
|
};
|
|
|
|
/* Called from newlib's setlocale() if category is LC_COLLATE. Stores
|
|
LC_COLLATE locale information. This is subsequently accessed by the
|
|
below functions strcoll, strxfrm, wcscoll, wcsxfrm. */
|
|
extern "C" int
|
|
__collate_load_locale (struct __locale_t *locale, const char *name,
|
|
void *f_mbtowc, const char *charset)
|
|
{
|
|
char *bufp = NULL;
|
|
struct lc_collate_T *cop = NULL;
|
|
|
|
wchar_t win_locale[ENCODING_LEN + 1];
|
|
int ret = __get_rfc5646_from_locale (name, win_locale);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (ret)
|
|
{
|
|
bufp = (char *) malloc (1); /* dummy */
|
|
if (!bufp)
|
|
return -1;
|
|
cop = (struct lc_collate_T *) calloc (1, sizeof (struct lc_collate_T));
|
|
if (!cop)
|
|
{
|
|
free (bufp);
|
|
return -1;
|
|
}
|
|
wcscpy (cop->win_locale, win_locale);
|
|
cop->mbtowc = (mbtowc_p) f_mbtowc;
|
|
stpcpy (cop->codeset, charset);
|
|
}
|
|
struct __lc_cats tmp = locale->lc_cat[LC_COLLATE];
|
|
locale->lc_cat[LC_COLLATE].ptr = !win_locale[0] ? &_C_collate_locale : cop;
|
|
locale->lc_cat[LC_COLLATE].buf = bufp;
|
|
/* If buf is not NULL, both pointers have been alloc'ed */
|
|
if (tmp.buf)
|
|
{
|
|
free ((void *) tmp.ptr);
|
|
free (tmp.buf);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* We use the Windows functions for locale-specific string comparison and
|
|
transformation. The advantage is that we don't need any files with
|
|
collation information. */
|
|
|
|
extern "C" int
|
|
wcscoll_l (const wchar_t *__restrict ws1, const wchar_t *__restrict ws2,
|
|
struct __locale_t *locale)
|
|
{
|
|
int ret;
|
|
const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
|
|
|
|
if (!collate_locale[0])
|
|
return wcscmp (ws1, ws2);
|
|
ret = CompareStringEx (collate_locale, 0, ws1, -1, ws2, -1, NULL, NULL, 0);
|
|
if (!ret)
|
|
set_errno (EINVAL);
|
|
return ret - CSTR_EQUAL;
|
|
}
|
|
|
|
extern "C" int
|
|
wcscoll (const wchar_t *__restrict ws1, const wchar_t *__restrict ws2)
|
|
{
|
|
return wcscoll_l (ws1, ws2, __get_current_locale ());
|
|
}
|
|
|
|
extern "C" int
|
|
strcoll_l (const char *__restrict s1, const char *__restrict s2,
|
|
struct __locale_t *locale)
|
|
{
|
|
size_t n1, n2;
|
|
wchar_t *ws1, *ws2;
|
|
tmp_pathbuf tp;
|
|
int ret;
|
|
const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
|
|
|
|
if (!collate_locale[0])
|
|
return strcmp (s1, s2);
|
|
mbtowc_p collate_mbtowc = __get_collate_locale (locale)->mbtowc;
|
|
n1 = lc_mbstowcs (collate_mbtowc, NULL, s1, 0) + 1;
|
|
ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t))
|
|
: tp.w_get ());
|
|
lc_mbstowcs (collate_mbtowc, ws1, s1, n1);
|
|
n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1;
|
|
ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
|
|
: tp.w_get ());
|
|
lc_mbstowcs (collate_mbtowc, ws2, s2, n2);
|
|
ret = CompareStringEx (collate_locale, 0, ws1, -1, ws2, -1, NULL, NULL, 0);
|
|
if (n1 > NT_MAX_PATH)
|
|
free (ws1);
|
|
if (n2 > NT_MAX_PATH)
|
|
free (ws2);
|
|
if (!ret)
|
|
set_errno (EINVAL);
|
|
return ret - CSTR_EQUAL;
|
|
}
|
|
|
|
extern "C" int
|
|
strcoll (const char *__restrict s1, const char *__restrict s2)
|
|
{
|
|
return strcoll_l (s1, s2, __get_current_locale ());
|
|
}
|
|
|
|
/* BSD. Used from glob.cc, fnmatch.c and regcomp.c. */
|
|
extern "C" int
|
|
__wcollate_range_cmp (wint_t c1, wint_t c2)
|
|
{
|
|
wchar_t s1[3] = { (wchar_t) c1, L'\0', L'\0' };
|
|
wchar_t s2[3] = { (wchar_t) c2, L'\0', L'\0' };
|
|
|
|
/* Handle Unicode values >= 0x10000, convert to surrogate pair */
|
|
if (c1 > 0xffff)
|
|
{
|
|
s1[0] = ((c1 - 0x10000) >> 10) + 0xd800;
|
|
s1[1] = ((c1 - 0x10000) & 0x3ff) + 0xdc00;
|
|
}
|
|
if (c2 > 0xffff)
|
|
{
|
|
s2[0] = ((c2 - 0x10000) >> 10) + 0xd800;
|
|
s2[1] = ((c2 - 0x10000) & 0x3ff) + 0xdc00;
|
|
}
|
|
return wcscoll (s1, s2);
|
|
}
|
|
|
|
/* Not so much BSD. Used from glob.cc, fnmatch.c and regcomp.c.
|
|
|
|
The args are pointers to wint_t strings. This allows to compare
|
|
against collating symbols. */
|
|
extern "C" int
|
|
__wscollate_range_cmp (wint_t *c1, wint_t *c2,
|
|
size_t c1len, size_t c2len)
|
|
{
|
|
wchar_t s1[c1len * 2 + 1] = { 0 }; /* # of chars if all are surrogates */
|
|
wchar_t s2[c2len * 2 + 1] = { 0 };
|
|
|
|
/* wcscoll() ignores case in many locales. but we don't want that
|
|
for filenames... */
|
|
if ((iswupper (*c1) && !iswupper (*c2))
|
|
|| (iswlower (*c1) && !iswlower (*c2)))
|
|
return *c1 - *c2;
|
|
|
|
wcintowcs (s1, c1, c1len);
|
|
wcintowcs (s2, c2, c2len);
|
|
return wcscoll_l (s1, s2, __get_current_locale ());
|
|
}
|
|
|
|
const size_t ce_size = sizeof collating_element / sizeof *collating_element;
|
|
const size_t ce_e_size = sizeof *collating_element;
|
|
|
|
/* Check if UTF-32 input character `test' is in the same equivalence class
|
|
as UTF-32 character 'eqv'.
|
|
Note that we only recognize input in Unicode normalization form C, that
|
|
is, we expect all letters to be composed. A single character is all we
|
|
look at.
|
|
To check equivalence, decompose pattern letter and input letter into
|
|
normalization form KD and check the base character for equality. Also,
|
|
convert all digits to the ASCII digits 0 - 9 and compare. */
|
|
extern "C" int
|
|
is_unicode_equiv (wint_t test, wint_t eqv)
|
|
{
|
|
wchar_t decomp_testc[24] = { 0 };
|
|
wchar_t decomp_eqvc[24] = { 0 };
|
|
wchar_t testc[3] = { 0 };
|
|
wchar_t eqvc[3] = { 0 };
|
|
|
|
/* For equivalence classes, case doesn't matter. However, be careful.
|
|
Only convert chars which have a "upper" to "lower". */
|
|
if (iswupper (eqv))
|
|
eqv = towlower (eqv);
|
|
if (iswupper (test))
|
|
test = towlower (test);
|
|
/* Convert to UTF-16 string */
|
|
if (eqv > 0x10000) {
|
|
eqvc[0] = ((eqv - 0x10000) >> 10) + 0xd800;
|
|
eqvc[1] = ((eqv - 0x10000) & 0x3ff) + 0xdc00;
|
|
} else
|
|
eqvc[0] = eqv;
|
|
if (test > 0x10000) {
|
|
testc[0] = ((test - 0x10000) >> 10) + 0xd800;
|
|
testc[1] = ((test - 0x10000) & 0x3ff) + 0xdc00;
|
|
} else
|
|
testc[0] = test;
|
|
/* Convert to decomposed form */
|
|
FoldStringW (MAP_COMPOSITE | MAP_FOLDCZONE | MAP_FOLDDIGITS,
|
|
eqvc, -1, decomp_eqvc, 24);
|
|
FoldStringW (MAP_COMPOSITE | MAP_FOLDCZONE | MAP_FOLDDIGITS,
|
|
testc, -1, decomp_testc, 24);
|
|
/* If they are equivalent, the base char must be the same. */
|
|
if (decomp_eqvc[0] != decomp_testc[0])
|
|
return 0;
|
|
/* If it's a surrogate pair, check the second char, too */
|
|
if (decomp_eqvc[0] >= 0xd800 && decomp_eqvc[0] <= 0xdbff &&
|
|
decomp_eqvc[1] != decomp_testc[1])
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
comp_coll_elem (const void *key, const void *array_member)
|
|
{
|
|
collating_element_t *ckey = (collating_element_t *) key;
|
|
collating_element_t *carray_member = (collating_element_t *) array_member;
|
|
|
|
int ret = wcicmp ((const wint_t *) ckey->element,
|
|
(const wint_t *) carray_member->element);
|
|
/* The locale in the collating_element array never has a codeset
|
|
attached. So the length of the collating_element locale is
|
|
always <= length of the key locale, and that's all we need to
|
|
check. Also, if the collating_element locale is empty, we're
|
|
all set. */
|
|
if (ret == 0 && carray_member->locale[0])
|
|
ret = strncmp (ckey->locale, carray_member->locale,
|
|
strlen (carray_member->locale));
|
|
return ret;
|
|
}
|
|
|
|
extern "C" int
|
|
is_unicode_coll_elem (const wint_t *test)
|
|
{
|
|
collating_element_t ct = {
|
|
(const char32_t *) test,
|
|
__get_current_locale ()->categories[LC_COLLATE]
|
|
};
|
|
collating_element_t *cmatch;
|
|
|
|
if (wcilen (test) == 1)
|
|
return 1;
|
|
cmatch = (collating_element_t *)
|
|
bsearch (&ct, collating_element, ce_size, ce_e_size, comp_coll_elem);
|
|
return !!cmatch;
|
|
}
|
|
|
|
static int
|
|
comp_coll_elem_n (const void *key, const void *array_member)
|
|
{
|
|
collating_element_t *ckey = (collating_element_t *) key;
|
|
collating_element_t *carray_member = (collating_element_t *) array_member;
|
|
|
|
int ret = wcincmp ((const wint_t *) ckey->element,
|
|
(const wint_t *) carray_member->element,
|
|
wcilen ((const wint_t *) carray_member->element));
|
|
/* The locale in the collating_element array never has a codeset
|
|
attached. So the length of the collating_element locale is
|
|
always <= length of the key locale, and that's all we need to
|
|
check. Also, if the collating_element locale is empty, we're
|
|
all set. */
|
|
if (ret == 0 && carray_member->locale[0])
|
|
ret = strncmp (ckey->locale, carray_member->locale,
|
|
strlen (carray_member->locale));
|
|
return ret;
|
|
}
|
|
|
|
/* Return the number of UTF-32 chars making up the next full character in
|
|
inp, taking valid collation elements in the current locale into account. */
|
|
extern "C" size_t
|
|
next_unicode_char (wint_t *inp)
|
|
{
|
|
collating_element_t ct = {
|
|
(const char32_t *) inp,
|
|
__get_current_locale ()->categories[LC_COLLATE]
|
|
};
|
|
collating_element_t *cmatch;
|
|
|
|
if (wcilen (inp) > 1)
|
|
{
|
|
cmatch = (collating_element_t *)
|
|
bsearch (&ct, collating_element, ce_size, ce_e_size,
|
|
comp_coll_elem_n);
|
|
if (cmatch)
|
|
return wcilen ((const wint_t *) cmatch->element);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
extern "C" size_t
|
|
wcsxfrm_l (wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t wsn,
|
|
struct __locale_t *locale)
|
|
{
|
|
size_t ret;
|
|
const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
|
|
|
|
if (!collate_locale[0])
|
|
return wcslcpy (ws1, ws2, wsn);
|
|
/* Don't use LCMAP_SORTKEY in conjunction with LCMAP_BYTEREV. The cchDest
|
|
parameter is used as byte count with LCMAP_SORTKEY but as char count with
|
|
LCMAP_BYTEREV. */
|
|
ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1, ws1,
|
|
wsn * sizeof (wchar_t), NULL, NULL, 0);
|
|
if (ret)
|
|
{
|
|
ret /= sizeof (wchar_t);
|
|
if (wsn)
|
|
{
|
|
/* Byte swap the array ourselves here. */
|
|
for (size_t idx = 0; idx < ret; ++idx)
|
|
ws1[idx] = __builtin_bswap16 (ws1[idx]);
|
|
/* LCMapStringW returns byte count including the terminating NUL char.
|
|
wcsxfrm is supposed to return length in wchar_t excluding the NUL.
|
|
Since the array is only single byte NUL-terminated yet, make sure
|
|
the result is wchar_t-NUL terminated. */
|
|
if (ret < wsn)
|
|
ws1[ret] = L'\0';
|
|
}
|
|
return ret;
|
|
}
|
|
if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
|
|
set_errno (EINVAL);
|
|
else
|
|
{
|
|
ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
|
|
NULL, 0, NULL, NULL, 0);
|
|
if (ret)
|
|
wsn = ret / sizeof (wchar_t);
|
|
}
|
|
return wsn;
|
|
}
|
|
|
|
extern "C" size_t
|
|
wcsxfrm (wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t wsn)
|
|
{
|
|
return wcsxfrm_l (ws1, ws2, wsn, __get_current_locale ());
|
|
}
|
|
|
|
extern "C" size_t
|
|
strxfrm_l (char *__restrict s1, const char *__restrict s2, size_t sn,
|
|
struct __locale_t *locale)
|
|
{
|
|
size_t ret = 0;
|
|
size_t n2;
|
|
wchar_t *ws2;
|
|
tmp_pathbuf tp;
|
|
const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
|
|
|
|
if (!collate_locale[0])
|
|
return strlcpy (s1, s2, sn);
|
|
mbtowc_p collate_mbtowc = __get_collate_locale (locale)->mbtowc;
|
|
n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1;
|
|
ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
|
|
: tp.w_get ());
|
|
if (ws2)
|
|
{
|
|
lc_mbstowcs (collate_mbtowc, ws2, s2, n2);
|
|
/* The sort key is a NUL-terminated byte string. */
|
|
ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
|
|
(PWCHAR) s1, sn, NULL, NULL, 0);
|
|
}
|
|
if (ret == 0)
|
|
{
|
|
ret = sn + 1;
|
|
if (!ws2 || GetLastError () != ERROR_INSUFFICIENT_BUFFER)
|
|
set_errno (EINVAL);
|
|
else
|
|
ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
|
|
NULL, 0, NULL, NULL, 0);
|
|
}
|
|
if (ws2 && n2 > NT_MAX_PATH)
|
|
free (ws2);
|
|
/* LCMapStringW returns byte count including the terminating NUL character.
|
|
strxfrm is supposed to return length excluding the NUL. */
|
|
return ret - 1;
|
|
}
|
|
|
|
extern "C" size_t
|
|
strxfrm (char *__restrict s1, const char *__restrict s2, size_t sn)
|
|
{
|
|
return strxfrm_l (s1, s2, sn, __get_current_locale ());
|
|
}
|
|
|
|
/* Fetch default ANSI codepage from locale info and generate a setlocale
|
|
compatible character set code. Called from newlib's setlocale(), if the
|
|
charset isn't given explicitely in the POSIX compatible locale specifier. */
|
|
extern "C" void
|
|
__set_charset_from_locale (const char *loc, char *charset)
|
|
{
|
|
wchar_t win_locale[ENCODING_LEN + 1];
|
|
char locale[ENCODING_LEN + 1];
|
|
char *modifier;
|
|
char *c;
|
|
UINT cp;
|
|
|
|
/* Cut out explicit codeset */
|
|
stpcpy (locale, loc);
|
|
modifier = strchr (loc, '@');
|
|
if ((c = strchr (locale, '.')))
|
|
stpcpy (c, modifier ?: "");
|
|
/* Ignore @cjk* modifiers, they are newlib specials. */
|
|
modifier = strchr (locale, '@');
|
|
if (modifier && is_cjk_modifier (modifier))
|
|
*modifier = '\0';
|
|
|
|
default_codeset_t srch_dc = { locale, NULL };
|
|
default_codeset_t *dc = (default_codeset_t *)
|
|
bsearch ((void *) &srch_dc, (void *) default_codeset,
|
|
sizeof default_codeset / sizeof *default_codeset,
|
|
sizeof *default_codeset, locale_cmp);
|
|
if (dc)
|
|
{
|
|
stpcpy (charset, dc->codeset);
|
|
return;
|
|
}
|
|
|
|
/* "C" locale, or invalid locale? */
|
|
if (__get_rfc5646_from_locale (locale, win_locale) <= 0)
|
|
cp = 20127;
|
|
else if (GetLocaleInfoEx (win_locale,
|
|
LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
|
|
(PWCHAR) &cp, sizeof cp))
|
|
cp = 0;
|
|
/* Translate codepage and lcid to a charset closely aligned with the default
|
|
charsets defined in Glibc. */
|
|
const char *cs;
|
|
switch (cp)
|
|
{
|
|
case 20127:
|
|
cs = "ASCII";
|
|
break;
|
|
case 874:
|
|
cs = "CP874";
|
|
break;
|
|
case 932:
|
|
cs = "EUCJP";
|
|
break;
|
|
case 936:
|
|
cs = "GB2312";
|
|
break;
|
|
case 949:
|
|
cs = "EUCKR";
|
|
break;
|
|
case 950:
|
|
cs = "BIG5";
|
|
break;
|
|
case 1250:
|
|
cs = "ISO-8859-2";
|
|
break;
|
|
case 1251:
|
|
cs = "ISO-8859-5";
|
|
break;
|
|
case 1252:
|
|
cs = "ISO-8859-1";
|
|
break;
|
|
case 1253:
|
|
cs = "ISO-8859-7";
|
|
break;
|
|
case 1254:
|
|
cs = "ISO-8859-9";
|
|
break;
|
|
case 1255:
|
|
cs = "ISO-8859-8";
|
|
break;
|
|
case 1256:
|
|
cs = "ISO-8859-6";
|
|
break;
|
|
case 1257:
|
|
cs = "ISO-8859-13";
|
|
break;
|
|
case 1258:
|
|
cs = "UTF-8";
|
|
break;
|
|
default:
|
|
/* Some (pretty new) EU locales don't exist in GLibc and haven't been
|
|
catched above. Check for @euro modifier again and make these locales
|
|
always use ISO-8859-15. */
|
|
if (modifier && !strcmp (modifier + 1, "euro"))
|
|
cs = "ISO-8859-15";
|
|
else
|
|
cs = "UTF-8";
|
|
break;
|
|
}
|
|
stpcpy (charset, cs);
|
|
}
|
|
|
|
/* Called from fhandler_tty::setup_locale. Set a codepage which reflects the
|
|
internal charset setting. This is *not* necessarily the Windows
|
|
codepage connected to a locale by default, so we have to set this
|
|
up explicitely. */
|
|
UINT
|
|
__eval_codepage_from_internal_charset ()
|
|
{
|
|
const char *charset = __locale_charset (__get_global_locale ());
|
|
UINT codepage = CP_UTF8; /* Default UTF8 */
|
|
|
|
/* The internal charset names are well defined, so we can use shortcuts. */
|
|
switch (charset[0])
|
|
{
|
|
case 'B': /* BIG5 */
|
|
codepage = 950;
|
|
break;
|
|
case 'C': /* CPxxx */
|
|
codepage = strtoul (charset + 2, NULL, 10);
|
|
break;
|
|
case 'E': /* EUCxx */
|
|
switch (charset[3])
|
|
{
|
|
case 'J': /* EUCJP */
|
|
codepage = 20932;
|
|
break;
|
|
case 'K': /* EUCKR */
|
|
codepage = 949;
|
|
break;
|
|
case 'C': /* EUCCN */
|
|
codepage = 936;
|
|
break;
|
|
}
|
|
break;
|
|
case 'G': /* GBK/GB2312/GB18030 */
|
|
codepage = (charset[2] == '1') ? 54936 : 936;
|
|
break;
|
|
case 'I': /* ISO-8859-x */
|
|
codepage = strtoul (charset + 9, NULL, 10) + 28590;
|
|
break;
|
|
case 'S': /* SJIS */
|
|
codepage = 932;
|
|
break;
|
|
default: /* All set to UTF8 already */
|
|
break;
|
|
}
|
|
return codepage;
|
|
}
|
|
|
|
/* This function is called from newlib's loadlocale if the locale identifier
|
|
was invalid, one way or the other. It looks for the file
|
|
|
|
/usr/share/locale/locale.alias
|
|
|
|
which is part of the gettext package, and if it finds the locale alias
|
|
in that file, it replaces the locale with the correct locale string from
|
|
that file.
|
|
|
|
If successful, it returns a pointer to new_locale, NULL otherwise.*/
|
|
extern "C" char *
|
|
__set_locale_from_locale_alias (const char *locale, char *new_locale)
|
|
{
|
|
wchar_t wlocale[ENCODING_LEN + 1];
|
|
wchar_t walias[ENCODING_LEN + 1];
|
|
#define LOCALE_ALIAS_LINE_LEN 255
|
|
char alias_buf[LOCALE_ALIAS_LINE_LEN + 1], *c;
|
|
wchar_t *wc;
|
|
const char *alias, *replace;
|
|
char *ret = NULL;
|
|
|
|
FILE *fp = fopen ("/usr/share/locale/locale.alias", "rt");
|
|
if (!fp)
|
|
return NULL;
|
|
/* The incoming locale is given in the application charset, or in
|
|
the Cygwin internal charset. We try both. */
|
|
if (mbstowcs (wlocale, locale, ENCODING_LEN + 1) == (size_t) -1)
|
|
sys_mbstowcs (wlocale, ENCODING_LEN + 1, locale);
|
|
wlocale[ENCODING_LEN] = L'\0';
|
|
/* Ignore @cjk* modifiers, they are newlib specials. */
|
|
wc = wcschr (wlocale, L'@');
|
|
if (wc && w_is_cjk_modifier (wc))
|
|
*wc = L'\0';
|
|
while (fgets (alias_buf, LOCALE_ALIAS_LINE_LEN + 1, fp))
|
|
{
|
|
alias_buf[LOCALE_ALIAS_LINE_LEN] = '\0';
|
|
c = strrchr (alias_buf, '\n');
|
|
if (c)
|
|
*c = '\0';
|
|
c = alias_buf;
|
|
c += strspn (c, " \t");
|
|
if (!*c || *c == '#')
|
|
continue;
|
|
alias = c;
|
|
c += strcspn (c, " \t");
|
|
*c++ = '\0';
|
|
c += strspn (c, " \t");
|
|
if (*c == '#')
|
|
continue;
|
|
replace = c;
|
|
c += strcspn (c, " \t");
|
|
*c++ = '\0';
|
|
if (strlen (replace) > ENCODING_LEN)
|
|
continue;
|
|
/* The file is latin1 encoded */
|
|
lc_mbstowcs (__iso_mbtowc (1), walias, alias, ENCODING_LEN + 1);
|
|
walias[ENCODING_LEN] = L'\0';
|
|
if (!wcscmp (wlocale, walias))
|
|
{
|
|
ret = strcpy (new_locale, replace);
|
|
break;
|
|
}
|
|
}
|
|
fclose (fp);
|
|
return ret;
|
|
}
|
|
|
|
/* Can be called via cygwin_internal (CW_INTERNAL_SETLOCALE) for application
|
|
which really (think they) know what they are doing. */
|
|
extern "C" void
|
|
internal_setlocale ()
|
|
{
|
|
/* Each setlocale from the environment potentially changes the
|
|
multibyte representation of the CWD. Therefore we have to
|
|
reevaluate the CWD's posix path and store in the new charset.
|
|
Same for the PATH environment variable. */
|
|
/* FIXME: Other buffered paths might be affected as well. */
|
|
/* FIXME: It could be necessary to convert the entire environment,
|
|
not just PATH. */
|
|
tmp_pathbuf tp;
|
|
char *path;
|
|
wchar_t *w_path = NULL, *w_cwd;
|
|
|
|
/* Don't do anything if the charset hasn't actually changed. */
|
|
if (cygheap->locale.mbtowc == __get_global_locale ()->mbtowc)
|
|
return;
|
|
|
|
debug_printf ("Global charset set to %s",
|
|
__locale_charset (__get_global_locale ()));
|
|
/* Fetch PATH and CWD and convert to wchar_t in previous charset. */
|
|
path = getenv ("PATH");
|
|
if (path && *path) /* $PATH can be potentially unset. */
|
|
{
|
|
w_path = tp.w_get ();
|
|
_sys_mbstowcs (cygheap->locale.mbtowc, w_path, 32768, path);
|
|
}
|
|
w_cwd = tp.w_get ();
|
|
cwdstuff::acquire_write ();
|
|
_sys_mbstowcs (cygheap->locale.mbtowc, w_cwd, 32768,
|
|
cygheap->cwd.get_posix ());
|
|
/* Set charset for internal conversion functions. */
|
|
cygheap->locale.mbtowc = __get_global_locale ()->mbtowc;
|
|
if (cygheap->locale.mbtowc == __ascii_mbtowc)
|
|
cygheap->locale.mbtowc = __utf8_mbtowc;
|
|
/* Restore CWD and PATH in new charset. */
|
|
cygheap->cwd.reset_posix (w_cwd);
|
|
cwdstuff::release_write ();
|
|
if (w_path)
|
|
{
|
|
char *c_path = tp.c_get ();
|
|
sys_wcstombs (c_path, 32768, w_path);
|
|
setenv ("PATH", c_path, 1);
|
|
}
|
|
}
|
|
|
|
/* Called from dll_crt0_1, before fetching the command line from Windows.
|
|
Set the internal charset according to the environment locale settings.
|
|
Check if a required codepage is available, and only switch internal
|
|
charset if so.
|
|
Make sure to reset the application locale to "C" per POSIX. */
|
|
void
|
|
initial_setlocale ()
|
|
{
|
|
char *ret = _setlocale_r (_REENT, LC_CTYPE, "");
|
|
if (ret)
|
|
internal_setlocale ();
|
|
}
|