4
0
mirror of git://sourceware.org/git/newlib-cygwin.git synced 2025-02-12 20:19:14 +08:00

Cygwin: pty: move codepage evaluation to nlsfuncs.cc

The new function __eval_codepage_from_internal_charset
is a simplified version of the former code in
fhandler_tty.cc.  It probably needs some extension,
but the gist is to use knowledge of internals to
be as quick as possible.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2020-09-08 10:36:04 +02:00 committed by Ken Brown
parent 9fddfa3d16
commit 66d8857677
2 changed files with 57 additions and 182 deletions

View File

@ -1614,8 +1614,8 @@ fhandler_pty_master::write (const void *ptr, size_t len)
if (to_be_read_from_pcon () && get_ttyp ()->h_pseudo_console) if (to_be_read_from_pcon () && get_ttyp ()->h_pseudo_console)
{ {
size_t nlen; size_t nlen;
char *buf = convert_mb_str char *buf = convert_mb_str (CP_UTF8, &nlen, get_ttyp ()->term_code_page,
(CP_UTF8, &nlen, get_ttyp ()->term_code_page, (const char *) ptr, len); (const char *) ptr, len);
WaitForSingleObject (input_mutex, INFINITE); WaitForSingleObject (input_mutex, INFINITE);
@ -1782,183 +1782,13 @@ fhandler_pty_common::set_close_on_exec (bool val)
close_on_exec (val); close_on_exec (val);
} }
/* This table is borrowed from mintty: charset.c */
static const struct {
UINT cp;
const char *name;
}
cs_names[] = {
{ CP_UTF8, "UTF-8"},
{ CP_UTF8, "UTF8"},
{ 20127, "ASCII"},
{ 20127, "US-ASCII"},
{ 20127, "ANSI_X3.4-1968"},
{ 20866, "KOI8-R"},
{ 20866, "KOI8R"},
{ 20866, "KOI8"},
{ 21866, "KOI8-U"},
{ 21866, "KOI8U"},
{ 20932, "EUCJP"},
{ 20932, "EUC-JP"},
{ 874, "TIS620"},
{ 874, "TIS-620"},
{ 932, "SJIS"},
{ 936, "GBK"},
{ 936, "GB2312"},
{ 936, "EUCCN"},
{ 936, "EUC-CN"},
{ 949, "EUCKR"},
{ 949, "EUC-KR"},
{ 950, "BIG5"},
{ 0, "NULL"}
};
static void
get_locale_from_env (char *locale)
{
const char *env = NULL;
char lang[ENCODING_LEN + 1] = {0, }, country[ENCODING_LEN + 1] = {0, };
env = getenv ("LC_ALL");
if (env == NULL || !*env)
env = getenv ("LC_CTYPE");
if (env == NULL || !*env)
env = getenv ("LANG");
if (env == NULL || !*env)
{
if (GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT,
LOCALE_SISO639LANGNAME,
lang, sizeof (lang)))
GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT,
LOCALE_SISO3166CTRYNAME,
country, sizeof (country));
else if (GetLocaleInfo (LOCALE_CUSTOM_DEFAULT,
LOCALE_SISO639LANGNAME,
lang, sizeof (lang)))
GetLocaleInfo (LOCALE_CUSTOM_DEFAULT,
LOCALE_SISO3166CTRYNAME,
country, sizeof (country));
else if (GetLocaleInfo (LOCALE_USER_DEFAULT,
LOCALE_SISO639LANGNAME,
lang, sizeof (lang)))
GetLocaleInfo (LOCALE_USER_DEFAULT,
LOCALE_SISO3166CTRYNAME,
country, sizeof (country));
else if (GetLocaleInfo (LOCALE_SYSTEM_DEFAULT,
LOCALE_SISO639LANGNAME,
lang, sizeof (lang)))
GetLocaleInfo (LOCALE_SYSTEM_DEFAULT,
LOCALE_SISO3166CTRYNAME,
country, sizeof (country));
if (strlen (lang) && strlen (country))
__small_sprintf (lang + strlen (lang), "_%s.UTF-8", country);
else
strcpy (lang , "C.UTF-8");
env = lang;
}
strcpy (locale, env);
}
static void
get_langinfo (char *locale_out, char *charset_out)
{
/* Get locale from environment */
char new_locale[ENCODING_LEN + 1];
get_locale_from_env (new_locale);
__locale_t loc;
memset (&loc, 0, sizeof (loc));
const char *locale = __loadlocale (&loc, LC_CTYPE, new_locale);
if (!locale)
locale = "C";
const char *charset;
struct lc_ctype_T *lc_ctype = (struct lc_ctype_T *) loc.lc_cat[LC_CTYPE].ptr;
if (!lc_ctype)
charset = "ASCII";
else
charset = lc_ctype->codeset;
/* The following code is borrowed from nl_langinfo()
in newlib/libc/locale/nl_langinfo.c */
/* Convert charset to Linux compatible codeset string. */
if (charset[0] == 'A'/*SCII*/)
charset = "ANSI_X3.4-1968";
else if (charset[0] == 'E')
{
if (strcmp (charset, "EUCJP") == 0)
charset = "EUC-JP";
else if (strcmp (charset, "EUCKR") == 0)
charset = "EUC-KR";
else if (strcmp (charset, "EUCCN") == 0)
charset = "GB2312";
}
else if (charset[0] == 'C'/*Pxxxx*/)
{
if (strcmp (charset + 2, "874") == 0)
charset = "TIS-620";
else if (strcmp (charset + 2, "20866") == 0)
charset = "KOI8-R";
else if (strcmp (charset + 2, "21866") == 0)
charset = "KOI8-U";
else if (strcmp (charset + 2, "101") == 0)
charset = "GEORGIAN-PS";
else if (strcmp (charset + 2, "102") == 0)
charset = "PT154";
}
else if (charset[0] == 'S'/*JIS*/)
{
/* Cygwin uses MSFT's implementation of SJIS, which differs
in some codepoints from the real thing, especially
0x5c: yen sign instead of backslash,
0x7e: overline instead of tilde.
We can't use the real SJIS since otherwise Win32
pathnames would become invalid. OTOH, if we return
"SJIS" here, then libiconv will do mb<->wc conversion
differently to our internal functions. Therefore we
return what we really implement, CP932. This is handled
fine by libiconv. */
charset = "CP932";
}
/* Set results */
strcpy (locale_out, new_locale);
strcpy (charset_out, charset);
}
void void
fhandler_pty_slave::setup_locale (void) fhandler_pty_slave::setup_locale (void)
{ {
if (get_ttyp ()->term_code_page != 0) extern UINT __eval_codepage_from_internal_charset ();
return;
char locale[ENCODING_LEN + 1] = "C"; if (!get_ttyp ()->term_code_page)
char charset[ENCODING_LEN + 1] = "ASCII"; get_ttyp ()->term_code_page = __eval_codepage_from_internal_charset ();
get_langinfo (locale, charset);
/* Set terminal code page from locale */
/* This code is borrowed from mintty: charset.c */
get_ttyp ()->term_code_page = 20127; /* Default ASCII */
char charset_u[ENCODING_LEN + 1] = {0, };
for (int i=0; charset[i] && i<ENCODING_LEN; i++)
charset_u[i] = toupper (charset[i]);
unsigned int iso;
UINT cp = 20127; /* Default for fallback */
if (sscanf (charset_u, "ISO-8859-%u", &iso) == 1
|| sscanf (charset_u, "ISO8859-%u", &iso) == 1
|| sscanf (charset_u, "ISO8859%u", &iso) == 1)
{
if (iso && iso <= 16 && iso !=12)
get_ttyp ()->term_code_page = 28590 + iso;
}
else if (sscanf (charset_u, "CP%u", &cp) == 1)
get_ttyp ()->term_code_page = cp;
else
for (int i=0; cs_names[i].cp; i++)
if (strcasecmp (charset_u, cs_names[i].name) == 0)
{
get_ttyp ()->term_code_page = cs_names[i].cp;
break;
}
} }
void void
@ -1977,9 +1807,6 @@ fhandler_pty_slave::fixup_after_exec ()
if (!close_on_exec ()) if (!close_on_exec ())
fixup_after_fork (NULL); /* No parent handle required. */ fixup_after_fork (NULL); /* No parent handle required. */
/* Set locale */
setup_locale ();
/* Hook Console API */ /* Hook Console API */
#define DO_HOOK(module, name) \ #define DO_HOOK(module, name) \
if (!name##_Orig) \ if (!name##_Orig) \
@ -2205,8 +2032,8 @@ fhandler_pty_master::pty_master_fwd_thread ()
state = 0; state = 0;
size_t nlen; size_t nlen;
char *buf = convert_mb_str char *buf = convert_mb_str (get_ttyp ()->term_code_page,
(get_ttyp ()->term_code_page, &nlen, CP_UTF8, ptr, wlen); &nlen, CP_UTF8, ptr, wlen);
ptr = buf; ptr = buf;
wlen = rlen = nlen; wlen = rlen = nlen;
@ -2228,8 +2055,8 @@ fhandler_pty_master::pty_master_fwd_thread ()
continue; continue;
} }
size_t nlen; size_t nlen;
char *buf = convert_mb_str char *buf = convert_mb_str (get_ttyp ()->term_code_page, &nlen,
(get_ttyp ()->term_code_page, &nlen, GetConsoleOutputCP (), ptr, wlen); GetConsoleOutputCP (), ptr, wlen);
ptr = buf; ptr = buf;
wlen = rlen = nlen; wlen = rlen = nlen;

View File

@ -1448,6 +1448,54 @@ __set_charset_from_locale (const char *locale, char *charset)
stpcpy (charset, cs); stpcpy (charset, cs);
} }
/* Called from fhandler_tty::setup_locale. Set a codepage which reflects the
internal charset setting. This is *not* necessarily the Windows
codepage connected to a locale by default, so we have to set this
up explicitely. */
UINT
__eval_codepage_from_internal_charset ()
{
const char *charset = __locale_charset (__get_global_locale ());
UINT codepage = CP_UTF8; /* Default UTF8 */
/* The internal charset names are well defined, so we can use shortcuts. */
switch (charset[0])
{
case 'B': /* BIG5 */
codepage = 950;
break;
case 'C': /* CPxxx */
codepage = strtoul (charset + 2, NULL, 10);
break;
case 'E': /* EUCxx */
switch (charset[3])
{
case 'J': /* EUCJP */
codepage = 20932;
break;
case 'K': /* EUCKR */
codepage = 949;
break;
case 'C': /* EUCCN */
codepage = 936;
break;
}
break;
case 'G': /* GBK/GB2312 */
codepage = 936;
break;
case 'I': /* ISO-8859-x */
codepage = strtoul (charset + 9, NULL, 10) + 28590;
break;
case 'S': /* SJIS */
codepage = 932;
break;
default: /* All set to UTF8 already */
break;
}
return codepage;
}
/* This function is called from newlib's loadlocale if the locale identifier /* This function is called from newlib's loadlocale if the locale identifier
was invalid, one way or the other. It looks for the file was invalid, one way or the other. It looks for the file