Cygwin: support KOI8-T codeset

Used on Linux as default codeset for Tajik. There's no matching
Windows codepage, so fake it as CP103.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2023-02-24 16:07:26 +01:00
parent ac405ab9bc
commit 89eb4bce15
5 changed files with 55 additions and 25 deletions

View File

@ -50,10 +50,10 @@ but uses the UTF-8 charset.
The following charsets are recognized: The following charsets are recognized:
<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>, <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with <<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">>
1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855, with 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255,
1257, 1258]. 1256, 1257, 1258].
Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">> Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
are equivalent. Charset names with dashes can also be written without are equivalent. Charset names with dashes can also be written without
@ -769,7 +769,7 @@ restart:
break; break;
case 'K': case 'K':
case 'k': case 'k':
/* KOI8-R, KOI8-U and the aliases without dash */ /* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */
if (strncasecmp (charset, "KOI8", 4)) if (strncasecmp (charset, "KOI8", 4))
FAIL; FAIL;
c = charset + 4; c = charset + 4;
@ -785,6 +785,11 @@ restart:
val = 21866; val = 21866;
strcpy (charset, "CP21866"); strcpy (charset, "CP21866");
} }
else if (*c == 'T' || *c == 't')
{
val = 103;
strcpy (charset, "CP103");
}
else else
FAIL; FAIL;
mbc_max = 1; mbc_max = 1;

View File

@ -233,6 +233,8 @@ do_codeset:
ret = "GEORGIAN-PS"; ret = "GEORGIAN-PS";
else if (strcmp (ret + 2, "102") == 0) else if (strcmp (ret + 2, "102") == 0)
ret = "PT154"; ret = "PT154";
else if (strcmp (ret + 2, "103") == 0)
ret = "KOI8-T";
} }
else if (ret[0] == 'S'/*JIS*/) else if (ret[0] == 'S'/*JIS*/)
{ {

View File

@ -201,7 +201,7 @@ wchar_t __iso_8859_conv[14][0x60] = {
value (function __cp_index), the second index is the value of the value (function __cp_index), the second index is the value of the
incoming character - 0x80. incoming character - 0x80.
Values < 0x80 don't have to be converted anyway. */ Values < 0x80 don't have to be converted anyway. */
wchar_t __cp_conv[26][0x80] = { wchar_t __cp_conv[27][0x80] = {
/* CP437 */ /* CP437 */
{ 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, { 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7,
0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
@ -649,7 +649,24 @@ wchar_t __cp_conv[26][0x80] = {
0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437,
0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f,
0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447,
0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f } 0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f },
/* CP103 (KOI8-T) */
{ 0x049b, 0x0493, 0x201a, 0x0492, 0x201e, 0x2026, 0x2020, 0x2021,
0x88, 0x2030, 0x04b3, 0x2039, 0x04b2, 0x04b7, 0x04b6, 0x8f,
0x049a, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
0x98, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0x04ef, 0x04ee, 0x0451, 0xa4, 0x04e3, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0x401, 0xb4, 0x04e2, 0xb6, 0xb7,
0xb8, 0x2116, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xa9,
0x44e, 0x430, 0x431, 0x446, 0x434, 0x435, 0x444, 0x433,
0x445, 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e,
0x43f, 0x44f, 0x440, 0x441, 0x442, 0x443, 0x436, 0x432,
0x44c, 0x44b, 0x437, 0x448, 0x44d, 0x449, 0x447, 0x44a,
0x42e, 0x410, 0x411, 0x426, 0x414, 0x415, 0x424, 0x413,
0x425, 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e,
0x41f, 0x42f, 0x420, 0x421, 0x422, 0x423, 0x416, 0x412,
0x42c, 0x42b, 0x417, 0x428, 0x42d, 0x429, 0x427, 0x42a },
}; };
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
@ -776,6 +793,9 @@ __cp_val_index (int val)
case 102: case 102:
cp_idx = 25; cp_idx = 25;
break; break;
case 103:
cp_idx = 26;
break;
default: default:
cp_idx = -1; cp_idx = -1;
break; break;

View File

@ -1510,6 +1510,8 @@ __set_charset_from_locale (const char *locale, char *charset)
cs = "CP1251"; cs = "CP1251";
else if (lcid == 0x0422) /* uk_UA (Ukrainian/Ukraine) */ else if (lcid == 0x0422) /* uk_UA (Ukrainian/Ukraine) */
cs = "KOI8-U"; cs = "KOI8-U";
else if (lcid == 0x0428) /* tg_TJ (Tajik/Tajikistan) */
cs = "KOI8-T";
else else
cs = "ISO-8859-5"; cs = "ISO-8859-5";
break; break;

View File

@ -486,6 +486,7 @@ print_charmaps ()
"ISO-8859-8", "ISO-8859-8",
"ISO-8859-9", "ISO-8859-9",
"KOI8-R", "KOI8-R",
"KOI8-T",
"KOI8-U", "KOI8-U",
"PT154", "PT154",
"SJIS", "SJIS",