mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-02-12 20:19:14 +08:00
Cygwin: pty: move codepage evaluation to nlsfuncs.cc
The new function __eval_codepage_from_internal_charset is a simplified version of the former code in fhandler_tty.cc. It probably needs some extension, but the gist is to use knowledge of internals to be as quick as possible. Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
parent
9fddfa3d16
commit
66d8857677
@ -1614,8 +1614,8 @@ fhandler_pty_master::write (const void *ptr, size_t len)
|
|||||||
if (to_be_read_from_pcon () && get_ttyp ()->h_pseudo_console)
|
if (to_be_read_from_pcon () && get_ttyp ()->h_pseudo_console)
|
||||||
{
|
{
|
||||||
size_t nlen;
|
size_t nlen;
|
||||||
char *buf = convert_mb_str
|
char *buf = convert_mb_str (CP_UTF8, &nlen, get_ttyp ()->term_code_page,
|
||||||
(CP_UTF8, &nlen, get_ttyp ()->term_code_page, (const char *) ptr, len);
|
(const char *) ptr, len);
|
||||||
|
|
||||||
WaitForSingleObject (input_mutex, INFINITE);
|
WaitForSingleObject (input_mutex, INFINITE);
|
||||||
|
|
||||||
@ -1782,183 +1782,13 @@ fhandler_pty_common::set_close_on_exec (bool val)
|
|||||||
close_on_exec (val);
|
close_on_exec (val);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This table is borrowed from mintty: charset.c */
|
|
||||||
static const struct {
|
|
||||||
UINT cp;
|
|
||||||
const char *name;
|
|
||||||
}
|
|
||||||
cs_names[] = {
|
|
||||||
{ CP_UTF8, "UTF-8"},
|
|
||||||
{ CP_UTF8, "UTF8"},
|
|
||||||
{ 20127, "ASCII"},
|
|
||||||
{ 20127, "US-ASCII"},
|
|
||||||
{ 20127, "ANSI_X3.4-1968"},
|
|
||||||
{ 20866, "KOI8-R"},
|
|
||||||
{ 20866, "KOI8R"},
|
|
||||||
{ 20866, "KOI8"},
|
|
||||||
{ 21866, "KOI8-U"},
|
|
||||||
{ 21866, "KOI8U"},
|
|
||||||
{ 20932, "EUCJP"},
|
|
||||||
{ 20932, "EUC-JP"},
|
|
||||||
{ 874, "TIS620"},
|
|
||||||
{ 874, "TIS-620"},
|
|
||||||
{ 932, "SJIS"},
|
|
||||||
{ 936, "GBK"},
|
|
||||||
{ 936, "GB2312"},
|
|
||||||
{ 936, "EUCCN"},
|
|
||||||
{ 936, "EUC-CN"},
|
|
||||||
{ 949, "EUCKR"},
|
|
||||||
{ 949, "EUC-KR"},
|
|
||||||
{ 950, "BIG5"},
|
|
||||||
{ 0, "NULL"}
|
|
||||||
};
|
|
||||||
|
|
||||||
static void
|
|
||||||
get_locale_from_env (char *locale)
|
|
||||||
{
|
|
||||||
const char *env = NULL;
|
|
||||||
char lang[ENCODING_LEN + 1] = {0, }, country[ENCODING_LEN + 1] = {0, };
|
|
||||||
env = getenv ("LC_ALL");
|
|
||||||
if (env == NULL || !*env)
|
|
||||||
env = getenv ("LC_CTYPE");
|
|
||||||
if (env == NULL || !*env)
|
|
||||||
env = getenv ("LANG");
|
|
||||||
if (env == NULL || !*env)
|
|
||||||
{
|
|
||||||
if (GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT,
|
|
||||||
LOCALE_SISO639LANGNAME,
|
|
||||||
lang, sizeof (lang)))
|
|
||||||
GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT,
|
|
||||||
LOCALE_SISO3166CTRYNAME,
|
|
||||||
country, sizeof (country));
|
|
||||||
else if (GetLocaleInfo (LOCALE_CUSTOM_DEFAULT,
|
|
||||||
LOCALE_SISO639LANGNAME,
|
|
||||||
lang, sizeof (lang)))
|
|
||||||
GetLocaleInfo (LOCALE_CUSTOM_DEFAULT,
|
|
||||||
LOCALE_SISO3166CTRYNAME,
|
|
||||||
country, sizeof (country));
|
|
||||||
else if (GetLocaleInfo (LOCALE_USER_DEFAULT,
|
|
||||||
LOCALE_SISO639LANGNAME,
|
|
||||||
lang, sizeof (lang)))
|
|
||||||
GetLocaleInfo (LOCALE_USER_DEFAULT,
|
|
||||||
LOCALE_SISO3166CTRYNAME,
|
|
||||||
country, sizeof (country));
|
|
||||||
else if (GetLocaleInfo (LOCALE_SYSTEM_DEFAULT,
|
|
||||||
LOCALE_SISO639LANGNAME,
|
|
||||||
lang, sizeof (lang)))
|
|
||||||
GetLocaleInfo (LOCALE_SYSTEM_DEFAULT,
|
|
||||||
LOCALE_SISO3166CTRYNAME,
|
|
||||||
country, sizeof (country));
|
|
||||||
if (strlen (lang) && strlen (country))
|
|
||||||
__small_sprintf (lang + strlen (lang), "_%s.UTF-8", country);
|
|
||||||
else
|
|
||||||
strcpy (lang , "C.UTF-8");
|
|
||||||
env = lang;
|
|
||||||
}
|
|
||||||
strcpy (locale, env);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
get_langinfo (char *locale_out, char *charset_out)
|
|
||||||
{
|
|
||||||
/* Get locale from environment */
|
|
||||||
char new_locale[ENCODING_LEN + 1];
|
|
||||||
get_locale_from_env (new_locale);
|
|
||||||
|
|
||||||
__locale_t loc;
|
|
||||||
memset (&loc, 0, sizeof (loc));
|
|
||||||
const char *locale = __loadlocale (&loc, LC_CTYPE, new_locale);
|
|
||||||
if (!locale)
|
|
||||||
locale = "C";
|
|
||||||
|
|
||||||
const char *charset;
|
|
||||||
struct lc_ctype_T *lc_ctype = (struct lc_ctype_T *) loc.lc_cat[LC_CTYPE].ptr;
|
|
||||||
if (!lc_ctype)
|
|
||||||
charset = "ASCII";
|
|
||||||
else
|
|
||||||
charset = lc_ctype->codeset;
|
|
||||||
|
|
||||||
/* The following code is borrowed from nl_langinfo()
|
|
||||||
in newlib/libc/locale/nl_langinfo.c */
|
|
||||||
/* Convert charset to Linux compatible codeset string. */
|
|
||||||
if (charset[0] == 'A'/*SCII*/)
|
|
||||||
charset = "ANSI_X3.4-1968";
|
|
||||||
else if (charset[0] == 'E')
|
|
||||||
{
|
|
||||||
if (strcmp (charset, "EUCJP") == 0)
|
|
||||||
charset = "EUC-JP";
|
|
||||||
else if (strcmp (charset, "EUCKR") == 0)
|
|
||||||
charset = "EUC-KR";
|
|
||||||
else if (strcmp (charset, "EUCCN") == 0)
|
|
||||||
charset = "GB2312";
|
|
||||||
}
|
|
||||||
else if (charset[0] == 'C'/*Pxxxx*/)
|
|
||||||
{
|
|
||||||
if (strcmp (charset + 2, "874") == 0)
|
|
||||||
charset = "TIS-620";
|
|
||||||
else if (strcmp (charset + 2, "20866") == 0)
|
|
||||||
charset = "KOI8-R";
|
|
||||||
else if (strcmp (charset + 2, "21866") == 0)
|
|
||||||
charset = "KOI8-U";
|
|
||||||
else if (strcmp (charset + 2, "101") == 0)
|
|
||||||
charset = "GEORGIAN-PS";
|
|
||||||
else if (strcmp (charset + 2, "102") == 0)
|
|
||||||
charset = "PT154";
|
|
||||||
}
|
|
||||||
else if (charset[0] == 'S'/*JIS*/)
|
|
||||||
{
|
|
||||||
/* Cygwin uses MSFT's implementation of SJIS, which differs
|
|
||||||
in some codepoints from the real thing, especially
|
|
||||||
0x5c: yen sign instead of backslash,
|
|
||||||
0x7e: overline instead of tilde.
|
|
||||||
We can't use the real SJIS since otherwise Win32
|
|
||||||
pathnames would become invalid. OTOH, if we return
|
|
||||||
"SJIS" here, then libiconv will do mb<->wc conversion
|
|
||||||
differently to our internal functions. Therefore we
|
|
||||||
return what we really implement, CP932. This is handled
|
|
||||||
fine by libiconv. */
|
|
||||||
charset = "CP932";
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Set results */
|
|
||||||
strcpy (locale_out, new_locale);
|
|
||||||
strcpy (charset_out, charset);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
fhandler_pty_slave::setup_locale (void)
|
fhandler_pty_slave::setup_locale (void)
|
||||||
{
|
{
|
||||||
if (get_ttyp ()->term_code_page != 0)
|
extern UINT __eval_codepage_from_internal_charset ();
|
||||||
return;
|
|
||||||
|
|
||||||
char locale[ENCODING_LEN + 1] = "C";
|
if (!get_ttyp ()->term_code_page)
|
||||||
char charset[ENCODING_LEN + 1] = "ASCII";
|
get_ttyp ()->term_code_page = __eval_codepage_from_internal_charset ();
|
||||||
get_langinfo (locale, charset);
|
|
||||||
|
|
||||||
/* Set terminal code page from locale */
|
|
||||||
/* This code is borrowed from mintty: charset.c */
|
|
||||||
get_ttyp ()->term_code_page = 20127; /* Default ASCII */
|
|
||||||
char charset_u[ENCODING_LEN + 1] = {0, };
|
|
||||||
for (int i=0; charset[i] && i<ENCODING_LEN; i++)
|
|
||||||
charset_u[i] = toupper (charset[i]);
|
|
||||||
unsigned int iso;
|
|
||||||
UINT cp = 20127; /* Default for fallback */
|
|
||||||
if (sscanf (charset_u, "ISO-8859-%u", &iso) == 1
|
|
||||||
|| sscanf (charset_u, "ISO8859-%u", &iso) == 1
|
|
||||||
|| sscanf (charset_u, "ISO8859%u", &iso) == 1)
|
|
||||||
{
|
|
||||||
if (iso && iso <= 16 && iso !=12)
|
|
||||||
get_ttyp ()->term_code_page = 28590 + iso;
|
|
||||||
}
|
|
||||||
else if (sscanf (charset_u, "CP%u", &cp) == 1)
|
|
||||||
get_ttyp ()->term_code_page = cp;
|
|
||||||
else
|
|
||||||
for (int i=0; cs_names[i].cp; i++)
|
|
||||||
if (strcasecmp (charset_u, cs_names[i].name) == 0)
|
|
||||||
{
|
|
||||||
get_ttyp ()->term_code_page = cs_names[i].cp;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -1977,9 +1807,6 @@ fhandler_pty_slave::fixup_after_exec ()
|
|||||||
if (!close_on_exec ())
|
if (!close_on_exec ())
|
||||||
fixup_after_fork (NULL); /* No parent handle required. */
|
fixup_after_fork (NULL); /* No parent handle required. */
|
||||||
|
|
||||||
/* Set locale */
|
|
||||||
setup_locale ();
|
|
||||||
|
|
||||||
/* Hook Console API */
|
/* Hook Console API */
|
||||||
#define DO_HOOK(module, name) \
|
#define DO_HOOK(module, name) \
|
||||||
if (!name##_Orig) \
|
if (!name##_Orig) \
|
||||||
@ -2205,8 +2032,8 @@ fhandler_pty_master::pty_master_fwd_thread ()
|
|||||||
state = 0;
|
state = 0;
|
||||||
|
|
||||||
size_t nlen;
|
size_t nlen;
|
||||||
char *buf = convert_mb_str
|
char *buf = convert_mb_str (get_ttyp ()->term_code_page,
|
||||||
(get_ttyp ()->term_code_page, &nlen, CP_UTF8, ptr, wlen);
|
&nlen, CP_UTF8, ptr, wlen);
|
||||||
|
|
||||||
ptr = buf;
|
ptr = buf;
|
||||||
wlen = rlen = nlen;
|
wlen = rlen = nlen;
|
||||||
@ -2228,8 +2055,8 @@ fhandler_pty_master::pty_master_fwd_thread ()
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
size_t nlen;
|
size_t nlen;
|
||||||
char *buf = convert_mb_str
|
char *buf = convert_mb_str (get_ttyp ()->term_code_page, &nlen,
|
||||||
(get_ttyp ()->term_code_page, &nlen, GetConsoleOutputCP (), ptr, wlen);
|
GetConsoleOutputCP (), ptr, wlen);
|
||||||
|
|
||||||
ptr = buf;
|
ptr = buf;
|
||||||
wlen = rlen = nlen;
|
wlen = rlen = nlen;
|
||||||
|
@ -1448,6 +1448,54 @@ __set_charset_from_locale (const char *locale, char *charset)
|
|||||||
stpcpy (charset, cs);
|
stpcpy (charset, cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Called from fhandler_tty::setup_locale. Set a codepage which reflects the
|
||||||
|
internal charset setting. This is *not* necessarily the Windows
|
||||||
|
codepage connected to a locale by default, so we have to set this
|
||||||
|
up explicitely. */
|
||||||
|
UINT
|
||||||
|
__eval_codepage_from_internal_charset ()
|
||||||
|
{
|
||||||
|
const char *charset = __locale_charset (__get_global_locale ());
|
||||||
|
UINT codepage = CP_UTF8; /* Default UTF8 */
|
||||||
|
|
||||||
|
/* The internal charset names are well defined, so we can use shortcuts. */
|
||||||
|
switch (charset[0])
|
||||||
|
{
|
||||||
|
case 'B': /* BIG5 */
|
||||||
|
codepage = 950;
|
||||||
|
break;
|
||||||
|
case 'C': /* CPxxx */
|
||||||
|
codepage = strtoul (charset + 2, NULL, 10);
|
||||||
|
break;
|
||||||
|
case 'E': /* EUCxx */
|
||||||
|
switch (charset[3])
|
||||||
|
{
|
||||||
|
case 'J': /* EUCJP */
|
||||||
|
codepage = 20932;
|
||||||
|
break;
|
||||||
|
case 'K': /* EUCKR */
|
||||||
|
codepage = 949;
|
||||||
|
break;
|
||||||
|
case 'C': /* EUCCN */
|
||||||
|
codepage = 936;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'G': /* GBK/GB2312 */
|
||||||
|
codepage = 936;
|
||||||
|
break;
|
||||||
|
case 'I': /* ISO-8859-x */
|
||||||
|
codepage = strtoul (charset + 9, NULL, 10) + 28590;
|
||||||
|
break;
|
||||||
|
case 'S': /* SJIS */
|
||||||
|
codepage = 932;
|
||||||
|
break;
|
||||||
|
default: /* All set to UTF8 already */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return codepage;
|
||||||
|
}
|
||||||
|
|
||||||
/* This function is called from newlib's loadlocale if the locale identifier
|
/* This function is called from newlib's loadlocale if the locale identifier
|
||||||
was invalid, one way or the other. It looks for the file
|
was invalid, one way or the other. It looks for the file
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user