mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-02-01 12:00:35 +08:00
Cygwin: pty: move codepage evaluation to nlsfuncs.cc
The new function __eval_codepage_from_internal_charset is a simplified version of the former code in fhandler_tty.cc. It probably needs some extension, but the gist is to use knowledge of internals to be as quick as possible. Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
parent
9fddfa3d16
commit
66d8857677
@ -1614,8 +1614,8 @@ fhandler_pty_master::write (const void *ptr, size_t len)
|
||||
if (to_be_read_from_pcon () && get_ttyp ()->h_pseudo_console)
|
||||
{
|
||||
size_t nlen;
|
||||
char *buf = convert_mb_str
|
||||
(CP_UTF8, &nlen, get_ttyp ()->term_code_page, (const char *) ptr, len);
|
||||
char *buf = convert_mb_str (CP_UTF8, &nlen, get_ttyp ()->term_code_page,
|
||||
(const char *) ptr, len);
|
||||
|
||||
WaitForSingleObject (input_mutex, INFINITE);
|
||||
|
||||
@ -1782,183 +1782,13 @@ fhandler_pty_common::set_close_on_exec (bool val)
|
||||
close_on_exec (val);
|
||||
}
|
||||
|
||||
/* This table is borrowed from mintty: charset.c */
|
||||
static const struct {
|
||||
UINT cp;
|
||||
const char *name;
|
||||
}
|
||||
cs_names[] = {
|
||||
{ CP_UTF8, "UTF-8"},
|
||||
{ CP_UTF8, "UTF8"},
|
||||
{ 20127, "ASCII"},
|
||||
{ 20127, "US-ASCII"},
|
||||
{ 20127, "ANSI_X3.4-1968"},
|
||||
{ 20866, "KOI8-R"},
|
||||
{ 20866, "KOI8R"},
|
||||
{ 20866, "KOI8"},
|
||||
{ 21866, "KOI8-U"},
|
||||
{ 21866, "KOI8U"},
|
||||
{ 20932, "EUCJP"},
|
||||
{ 20932, "EUC-JP"},
|
||||
{ 874, "TIS620"},
|
||||
{ 874, "TIS-620"},
|
||||
{ 932, "SJIS"},
|
||||
{ 936, "GBK"},
|
||||
{ 936, "GB2312"},
|
||||
{ 936, "EUCCN"},
|
||||
{ 936, "EUC-CN"},
|
||||
{ 949, "EUCKR"},
|
||||
{ 949, "EUC-KR"},
|
||||
{ 950, "BIG5"},
|
||||
{ 0, "NULL"}
|
||||
};
|
||||
|
||||
static void
|
||||
get_locale_from_env (char *locale)
|
||||
{
|
||||
const char *env = NULL;
|
||||
char lang[ENCODING_LEN + 1] = {0, }, country[ENCODING_LEN + 1] = {0, };
|
||||
env = getenv ("LC_ALL");
|
||||
if (env == NULL || !*env)
|
||||
env = getenv ("LC_CTYPE");
|
||||
if (env == NULL || !*env)
|
||||
env = getenv ("LANG");
|
||||
if (env == NULL || !*env)
|
||||
{
|
||||
if (GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT,
|
||||
LOCALE_SISO639LANGNAME,
|
||||
lang, sizeof (lang)))
|
||||
GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT,
|
||||
LOCALE_SISO3166CTRYNAME,
|
||||
country, sizeof (country));
|
||||
else if (GetLocaleInfo (LOCALE_CUSTOM_DEFAULT,
|
||||
LOCALE_SISO639LANGNAME,
|
||||
lang, sizeof (lang)))
|
||||
GetLocaleInfo (LOCALE_CUSTOM_DEFAULT,
|
||||
LOCALE_SISO3166CTRYNAME,
|
||||
country, sizeof (country));
|
||||
else if (GetLocaleInfo (LOCALE_USER_DEFAULT,
|
||||
LOCALE_SISO639LANGNAME,
|
||||
lang, sizeof (lang)))
|
||||
GetLocaleInfo (LOCALE_USER_DEFAULT,
|
||||
LOCALE_SISO3166CTRYNAME,
|
||||
country, sizeof (country));
|
||||
else if (GetLocaleInfo (LOCALE_SYSTEM_DEFAULT,
|
||||
LOCALE_SISO639LANGNAME,
|
||||
lang, sizeof (lang)))
|
||||
GetLocaleInfo (LOCALE_SYSTEM_DEFAULT,
|
||||
LOCALE_SISO3166CTRYNAME,
|
||||
country, sizeof (country));
|
||||
if (strlen (lang) && strlen (country))
|
||||
__small_sprintf (lang + strlen (lang), "_%s.UTF-8", country);
|
||||
else
|
||||
strcpy (lang , "C.UTF-8");
|
||||
env = lang;
|
||||
}
|
||||
strcpy (locale, env);
|
||||
}
|
||||
|
||||
static void
|
||||
get_langinfo (char *locale_out, char *charset_out)
|
||||
{
|
||||
/* Get locale from environment */
|
||||
char new_locale[ENCODING_LEN + 1];
|
||||
get_locale_from_env (new_locale);
|
||||
|
||||
__locale_t loc;
|
||||
memset (&loc, 0, sizeof (loc));
|
||||
const char *locale = __loadlocale (&loc, LC_CTYPE, new_locale);
|
||||
if (!locale)
|
||||
locale = "C";
|
||||
|
||||
const char *charset;
|
||||
struct lc_ctype_T *lc_ctype = (struct lc_ctype_T *) loc.lc_cat[LC_CTYPE].ptr;
|
||||
if (!lc_ctype)
|
||||
charset = "ASCII";
|
||||
else
|
||||
charset = lc_ctype->codeset;
|
||||
|
||||
/* The following code is borrowed from nl_langinfo()
|
||||
in newlib/libc/locale/nl_langinfo.c */
|
||||
/* Convert charset to Linux compatible codeset string. */
|
||||
if (charset[0] == 'A'/*SCII*/)
|
||||
charset = "ANSI_X3.4-1968";
|
||||
else if (charset[0] == 'E')
|
||||
{
|
||||
if (strcmp (charset, "EUCJP") == 0)
|
||||
charset = "EUC-JP";
|
||||
else if (strcmp (charset, "EUCKR") == 0)
|
||||
charset = "EUC-KR";
|
||||
else if (strcmp (charset, "EUCCN") == 0)
|
||||
charset = "GB2312";
|
||||
}
|
||||
else if (charset[0] == 'C'/*Pxxxx*/)
|
||||
{
|
||||
if (strcmp (charset + 2, "874") == 0)
|
||||
charset = "TIS-620";
|
||||
else if (strcmp (charset + 2, "20866") == 0)
|
||||
charset = "KOI8-R";
|
||||
else if (strcmp (charset + 2, "21866") == 0)
|
||||
charset = "KOI8-U";
|
||||
else if (strcmp (charset + 2, "101") == 0)
|
||||
charset = "GEORGIAN-PS";
|
||||
else if (strcmp (charset + 2, "102") == 0)
|
||||
charset = "PT154";
|
||||
}
|
||||
else if (charset[0] == 'S'/*JIS*/)
|
||||
{
|
||||
/* Cygwin uses MSFT's implementation of SJIS, which differs
|
||||
in some codepoints from the real thing, especially
|
||||
0x5c: yen sign instead of backslash,
|
||||
0x7e: overline instead of tilde.
|
||||
We can't use the real SJIS since otherwise Win32
|
||||
pathnames would become invalid. OTOH, if we return
|
||||
"SJIS" here, then libiconv will do mb<->wc conversion
|
||||
differently to our internal functions. Therefore we
|
||||
return what we really implement, CP932. This is handled
|
||||
fine by libiconv. */
|
||||
charset = "CP932";
|
||||
}
|
||||
|
||||
/* Set results */
|
||||
strcpy (locale_out, new_locale);
|
||||
strcpy (charset_out, charset);
|
||||
}
|
||||
|
||||
void
|
||||
fhandler_pty_slave::setup_locale (void)
|
||||
{
|
||||
if (get_ttyp ()->term_code_page != 0)
|
||||
return;
|
||||
extern UINT __eval_codepage_from_internal_charset ();
|
||||
|
||||
char locale[ENCODING_LEN + 1] = "C";
|
||||
char charset[ENCODING_LEN + 1] = "ASCII";
|
||||
get_langinfo (locale, charset);
|
||||
|
||||
/* Set terminal code page from locale */
|
||||
/* This code is borrowed from mintty: charset.c */
|
||||
get_ttyp ()->term_code_page = 20127; /* Default ASCII */
|
||||
char charset_u[ENCODING_LEN + 1] = {0, };
|
||||
for (int i=0; charset[i] && i<ENCODING_LEN; i++)
|
||||
charset_u[i] = toupper (charset[i]);
|
||||
unsigned int iso;
|
||||
UINT cp = 20127; /* Default for fallback */
|
||||
if (sscanf (charset_u, "ISO-8859-%u", &iso) == 1
|
||||
|| sscanf (charset_u, "ISO8859-%u", &iso) == 1
|
||||
|| sscanf (charset_u, "ISO8859%u", &iso) == 1)
|
||||
{
|
||||
if (iso && iso <= 16 && iso !=12)
|
||||
get_ttyp ()->term_code_page = 28590 + iso;
|
||||
}
|
||||
else if (sscanf (charset_u, "CP%u", &cp) == 1)
|
||||
get_ttyp ()->term_code_page = cp;
|
||||
else
|
||||
for (int i=0; cs_names[i].cp; i++)
|
||||
if (strcasecmp (charset_u, cs_names[i].name) == 0)
|
||||
{
|
||||
get_ttyp ()->term_code_page = cs_names[i].cp;
|
||||
break;
|
||||
}
|
||||
if (!get_ttyp ()->term_code_page)
|
||||
get_ttyp ()->term_code_page = __eval_codepage_from_internal_charset ();
|
||||
}
|
||||
|
||||
void
|
||||
@ -1977,9 +1807,6 @@ fhandler_pty_slave::fixup_after_exec ()
|
||||
if (!close_on_exec ())
|
||||
fixup_after_fork (NULL); /* No parent handle required. */
|
||||
|
||||
/* Set locale */
|
||||
setup_locale ();
|
||||
|
||||
/* Hook Console API */
|
||||
#define DO_HOOK(module, name) \
|
||||
if (!name##_Orig) \
|
||||
@ -2205,8 +2032,8 @@ fhandler_pty_master::pty_master_fwd_thread ()
|
||||
state = 0;
|
||||
|
||||
size_t nlen;
|
||||
char *buf = convert_mb_str
|
||||
(get_ttyp ()->term_code_page, &nlen, CP_UTF8, ptr, wlen);
|
||||
char *buf = convert_mb_str (get_ttyp ()->term_code_page,
|
||||
&nlen, CP_UTF8, ptr, wlen);
|
||||
|
||||
ptr = buf;
|
||||
wlen = rlen = nlen;
|
||||
@ -2228,8 +2055,8 @@ fhandler_pty_master::pty_master_fwd_thread ()
|
||||
continue;
|
||||
}
|
||||
size_t nlen;
|
||||
char *buf = convert_mb_str
|
||||
(get_ttyp ()->term_code_page, &nlen, GetConsoleOutputCP (), ptr, wlen);
|
||||
char *buf = convert_mb_str (get_ttyp ()->term_code_page, &nlen,
|
||||
GetConsoleOutputCP (), ptr, wlen);
|
||||
|
||||
ptr = buf;
|
||||
wlen = rlen = nlen;
|
||||
|
@ -1448,6 +1448,54 @@ __set_charset_from_locale (const char *locale, char *charset)
|
||||
stpcpy (charset, cs);
|
||||
}
|
||||
|
||||
/* Called from fhandler_tty::setup_locale. Set a codepage which reflects the
|
||||
internal charset setting. This is *not* necessarily the Windows
|
||||
codepage connected to a locale by default, so we have to set this
|
||||
up explicitely. */
|
||||
UINT
|
||||
__eval_codepage_from_internal_charset ()
|
||||
{
|
||||
const char *charset = __locale_charset (__get_global_locale ());
|
||||
UINT codepage = CP_UTF8; /* Default UTF8 */
|
||||
|
||||
/* The internal charset names are well defined, so we can use shortcuts. */
|
||||
switch (charset[0])
|
||||
{
|
||||
case 'B': /* BIG5 */
|
||||
codepage = 950;
|
||||
break;
|
||||
case 'C': /* CPxxx */
|
||||
codepage = strtoul (charset + 2, NULL, 10);
|
||||
break;
|
||||
case 'E': /* EUCxx */
|
||||
switch (charset[3])
|
||||
{
|
||||
case 'J': /* EUCJP */
|
||||
codepage = 20932;
|
||||
break;
|
||||
case 'K': /* EUCKR */
|
||||
codepage = 949;
|
||||
break;
|
||||
case 'C': /* EUCCN */
|
||||
codepage = 936;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'G': /* GBK/GB2312 */
|
||||
codepage = 936;
|
||||
break;
|
||||
case 'I': /* ISO-8859-x */
|
||||
codepage = strtoul (charset + 9, NULL, 10) + 28590;
|
||||
break;
|
||||
case 'S': /* SJIS */
|
||||
codepage = 932;
|
||||
break;
|
||||
default: /* All set to UTF8 already */
|
||||
break;
|
||||
}
|
||||
return codepage;
|
||||
}
|
||||
|
||||
/* This function is called from newlib's loadlocale if the locale identifier
|
||||
was invalid, one way or the other. It looks for the file
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user