Cygwin: support KOI8-T codeset
Used on Linux as default codeset for Tajik. There's no matching Windows codepage, so fake it as CP103. Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
parent
ac405ab9bc
commit
89eb4bce15
|
@ -50,10 +50,10 @@ but uses the UTF-8 charset.
|
|||
|
||||
The following charsets are recognized:
|
||||
<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
|
||||
<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
|
||||
1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
|
||||
857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
|
||||
1257, 1258].
|
||||
<<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">>
|
||||
with 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
|
||||
855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255,
|
||||
1256, 1257, 1258].
|
||||
|
||||
Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
|
||||
are equivalent. Charset names with dashes can also be written without
|
||||
|
@ -769,7 +769,7 @@ restart:
|
|||
break;
|
||||
case 'K':
|
||||
case 'k':
|
||||
/* KOI8-R, KOI8-U and the aliases without dash */
|
||||
/* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */
|
||||
if (strncasecmp (charset, "KOI8", 4))
|
||||
FAIL;
|
||||
c = charset + 4;
|
||||
|
@ -785,6 +785,11 @@ restart:
|
|||
val = 21866;
|
||||
strcpy (charset, "CP21866");
|
||||
}
|
||||
else if (*c == 'T' || *c == 't')
|
||||
{
|
||||
val = 103;
|
||||
strcpy (charset, "CP103");
|
||||
}
|
||||
else
|
||||
FAIL;
|
||||
mbc_max = 1;
|
||||
|
|
|
@ -233,6 +233,8 @@ do_codeset:
|
|||
ret = "GEORGIAN-PS";
|
||||
else if (strcmp (ret + 2, "102") == 0)
|
||||
ret = "PT154";
|
||||
else if (strcmp (ret + 2, "103") == 0)
|
||||
ret = "KOI8-T";
|
||||
}
|
||||
else if (ret[0] == 'S'/*JIS*/)
|
||||
{
|
||||
|
|
|
@ -201,7 +201,7 @@ wchar_t __iso_8859_conv[14][0x60] = {
|
|||
value (function __cp_index), the second index is the value of the
|
||||
incoming character - 0x80.
|
||||
Values < 0x80 don't have to be converted anyway. */
|
||||
wchar_t __cp_conv[26][0x80] = {
|
||||
wchar_t __cp_conv[27][0x80] = {
|
||||
/* CP437 */
|
||||
{ 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7,
|
||||
0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
|
||||
|
@ -649,7 +649,24 @@ wchar_t __cp_conv[26][0x80] = {
|
|||
0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437,
|
||||
0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f,
|
||||
0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447,
|
||||
0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f }
|
||||
0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f },
|
||||
/* CP103 (KOI8-T) */
|
||||
{ 0x049b, 0x0493, 0x201a, 0x0492, 0x201e, 0x2026, 0x2020, 0x2021,
|
||||
0x88, 0x2030, 0x04b3, 0x2039, 0x04b2, 0x04b7, 0x04b6, 0x8f,
|
||||
0x049a, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
|
||||
0x98, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
|
||||
0xa0, 0x04ef, 0x04ee, 0x0451, 0xa4, 0x04e3, 0xa6, 0xa7,
|
||||
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
|
||||
0xb0, 0xb1, 0xb2, 0x401, 0xb4, 0x04e2, 0xb6, 0xb7,
|
||||
0xb8, 0x2116, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xa9,
|
||||
0x44e, 0x430, 0x431, 0x446, 0x434, 0x435, 0x444, 0x433,
|
||||
0x445, 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e,
|
||||
0x43f, 0x44f, 0x440, 0x441, 0x442, 0x443, 0x436, 0x432,
|
||||
0x44c, 0x44b, 0x437, 0x448, 0x44d, 0x449, 0x447, 0x44a,
|
||||
0x42e, 0x410, 0x411, 0x426, 0x414, 0x415, 0x424, 0x413,
|
||||
0x425, 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e,
|
||||
0x41f, 0x42f, 0x420, 0x421, 0x422, 0x423, 0x416, 0x412,
|
||||
0x42c, 0x42b, 0x417, 0x428, 0x42d, 0x429, 0x427, 0x42a },
|
||||
};
|
||||
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
|
||||
|
||||
|
@ -776,6 +793,9 @@ __cp_val_index (int val)
|
|||
case 102:
|
||||
cp_idx = 25;
|
||||
break;
|
||||
case 103:
|
||||
cp_idx = 26;
|
||||
break;
|
||||
default:
|
||||
cp_idx = -1;
|
||||
break;
|
||||
|
|
|
@ -1510,6 +1510,8 @@ __set_charset_from_locale (const char *locale, char *charset)
|
|||
cs = "CP1251";
|
||||
else if (lcid == 0x0422) /* uk_UA (Ukrainian/Ukraine) */
|
||||
cs = "KOI8-U";
|
||||
else if (lcid == 0x0428) /* tg_TJ (Tajik/Tajikistan) */
|
||||
cs = "KOI8-T";
|
||||
else
|
||||
cs = "ISO-8859-5";
|
||||
break;
|
||||
|
|
|
@ -486,6 +486,7 @@ print_charmaps ()
|
|||
"ISO-8859-8",
|
||||
"ISO-8859-9",
|
||||
"KOI8-R",
|
||||
"KOI8-T",
|
||||
"KOI8-U",
|
||||
"PT154",
|
||||
"SJIS",
|
||||
|
|
Loading…
Reference in New Issue