442 lines
8.7 KiB
Plaintext
442 lines
8.7 KiB
Plaintext
#
|
|
# This file describes dependencies between encodings, CES, CCS, etc.
|
|
# File relates only to UCS-based conversions and is needed for automatic
|
|
# generation of C source files and C header files.
|
|
#
|
|
# This configuration file consists of sections, each section consists of
|
|
# entries.
|
|
#
|
|
# Use only normalized names.
|
|
#
|
|
|
|
#
|
|
# The first section named "ENCODINGS" describes:
|
|
# 1. CES converter corresponding for each enoding;
|
|
# 2. Each encoding's aliases;
|
|
# 3. CCS tables corresponding for each enoding.
|
|
#
|
|
SECTION ENCODINGS
|
|
|
|
# ISO-10646-UCS-2. Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
|
|
ENTRY
|
|
ENCODING: ucs_2
|
|
CES: ucs_2
|
|
ALIASES: ucs2 iso_10646_ucs_2 iso10646_ucs_2 iso_10646_ucs2 iso10646_ucs2 iso10646ucs2 csUnicode
|
|
ENTRY END
|
|
|
|
# Big Endian version of ISO-10646-UCS-2 (in fact, equivalent to ucs_2).
|
|
# Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
|
|
ENTRY
|
|
ENCODING: ucs_2be
|
|
CES: ucs_2
|
|
ALIASES: ucs2be
|
|
ENTRY END
|
|
|
|
# Little Endian version of ISO-10646-UCS-2.
|
|
# Little Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
|
|
ENTRY
|
|
ENCODING: ucs_2le
|
|
CES: ucs_2
|
|
ALIASES: ucs2le
|
|
ENTRY END
|
|
|
|
# ISO-10646-UCS-2 in system byte order.
|
|
# NBSP is always interpreted as NBSP (BOM isn't supported).
|
|
# NOTE: Dont delete and rename this since it is used as widechar's
|
|
# encoding when sizeof(wchar_t) == 2
|
|
ENTRY
|
|
ENCODING: ucs_2_internal
|
|
CES: ucs_2_internal
|
|
ALIASES: ucs2_internal ucs_2internal ucs2internal
|
|
ENTRY END
|
|
|
|
# ISO-10646-UCS-4. Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
|
|
ENTRY
|
|
ENCODING: ucs_4
|
|
CES: ucs_4
|
|
ALIASES: ucs4 iso_10646_ucs_4 iso10646_ucs_4 iso_10646_ucs4 iso10646_ucs4 iso10646ucs4
|
|
ENTRY END
|
|
|
|
# Big Endian version of ISO-10646-UCS-4 (in fact, equivalent to ucs_4).
|
|
# Big Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
|
|
ENTRY
|
|
ENCODING: ucs_4be
|
|
CES: ucs_4
|
|
ALIASES: ucs4be
|
|
ENTRY END
|
|
|
|
# Little Endian version of ISO-10646-UCS-4.
|
|
# Little Endian, NBSP is always interpreted as NBSP (BOM isn't supported).
|
|
ENTRY
|
|
ENCODING: ucs_4le
|
|
CES: ucs_4
|
|
ALIASES: ucs4le
|
|
ENTRY END
|
|
|
|
# ISO-10646-UCS-4 in system byte order.
|
|
# NBSP is always interpreted as NBSP (BOM isn't supported).
|
|
# NOTE: Dont delete and rename this since it is used as widechar's
|
|
# encoding when sizeof(wchar_t) == 4
|
|
ENTRY
|
|
ENCODING: ucs_4_internal
|
|
CES: ucs_4_internal
|
|
ALIASES: ucs4_internal ucs_4internal ucs4internal
|
|
ENTRY END
|
|
|
|
# RFC 3629 UTF-8
|
|
ENTRY
|
|
ENCODING: utf_8
|
|
CES: utf_8
|
|
ALIASES: utf8
|
|
ENTRY END
|
|
|
|
# RFC 2781 UTF-16. The very first NBSP code in stream is interpreted as BOM.
|
|
ENTRY
|
|
ENCODING: utf_16
|
|
CES: utf_16
|
|
ALIASES: utf16
|
|
ENTRY END
|
|
|
|
# Big Endian version of RFC 2781 UTF-16.
|
|
# NBSP is always interpreted as NBSP (BOM isn't supported).
|
|
ENTRY
|
|
ENCODING: utf_16be
|
|
CES: utf_16
|
|
ALIASES: utf16be
|
|
ENTRY END
|
|
|
|
# Little Endian version of RFC 2781 UTF-16.
|
|
# NBSP is always interpreted as NBSP (BOM isn't supported).
|
|
ENTRY
|
|
ENCODING: utf_16le
|
|
CES: utf_16
|
|
ALIASES: utf16le
|
|
ENTRY END
|
|
|
|
# 7-bit ASCII.
|
|
ENTRY
|
|
ENCODING: us_ascii
|
|
CES: us_ascii
|
|
ALIASES: ansi_x3.4_1968 ansi_x3.4_1986 iso_646.irv:1991 ascii iso646_us us ibm367 cp367 csascii
|
|
ENTRY END
|
|
|
|
# RFC 1489 Cyrillic
|
|
ENTRY
|
|
ENCODING: koi8_r
|
|
CES: table
|
|
CCS: koi8_r
|
|
ALIASES: cskoi8r koi8r koi8
|
|
ENTRY END
|
|
|
|
# Obsoleted Ukrainian
|
|
ENTRY
|
|
ENCODING: koi8_ru
|
|
CES: table
|
|
CCS: koi8_ru
|
|
ALIASES: koi8ru
|
|
ENTRY END
|
|
|
|
# RFC 2319 Ukrainian
|
|
ENTRY
|
|
ENCODING: koi8_u
|
|
CES: table
|
|
CCS: koi8_u
|
|
ALIASES: koi8u
|
|
ENTRY END
|
|
|
|
# KOI8 Unified
|
|
ENTRY
|
|
ENCODING: koi8_uni
|
|
CES: table
|
|
CCS: koi8_uni
|
|
ALIASES: koi8uni
|
|
ENTRY END
|
|
|
|
# ISO IR 111/ECMA Cyrillic.
|
|
ENTRY
|
|
ENCODING: iso_ir_111
|
|
CES: table
|
|
CCS: iso_ir_111
|
|
ALIASES: ecma_cyrillic koi8_e koi8e csiso111ecmacyrillic
|
|
ENTRY END
|
|
|
|
# ISO 8859-1:1987 - Latin 1, West European
|
|
ENTRY
|
|
ENCODING: iso_8859_1
|
|
CES: table
|
|
CCS: iso_8859_1
|
|
ALIASES: iso8859_1 iso88591 iso_8859_1:1987 iso_ir_100 latin1 l1 ibm819 cp819 csisolatin1
|
|
ENTRY END
|
|
|
|
# ISO 8859-2:1987 - Latin 2, East European
|
|
ENTRY
|
|
ENCODING: iso_8859_2
|
|
CES: table
|
|
CCS: iso_8859_2
|
|
ALIASES: iso8859_2 iso88592 iso_8859_2:1987 iso_ir_101 latin2 l2 csisolatin2
|
|
ENTRY END
|
|
|
|
# ISO 8859-3:1988 - Latin 3, South European
|
|
ENTRY
|
|
ENCODING: iso_8859_3
|
|
CES: table
|
|
CCS: iso_8859_3
|
|
ALIASES: iso_8859_3:1988 iso_ir_109 iso8859_3 latin3 l3 csisolatin3 iso88593
|
|
ENTRY END
|
|
|
|
# ISO 8859-4:1988 - Latin 4, North European
|
|
ENTRY
|
|
ENCODING: iso_8859_4
|
|
CES: table
|
|
CCS: iso_8859_4
|
|
ALIASES: iso8859_4 iso88594 iso_8859_4:1988 iso_ir_110 latin4 l4 csisolatin4
|
|
ENTRY END
|
|
|
|
# ISO 8859-5:1988 - Cyrillic
|
|
ENTRY
|
|
ENCODING: iso_8859_5
|
|
CES: table
|
|
CCS: iso_8859_5
|
|
ALIASES: iso8859_5 iso88595 iso_8859_5:1988 iso_ir_144 cyrillic csisolatincyrillic
|
|
ENTRY END
|
|
|
|
# ISO i8859-6:1987 - Arabic
|
|
ENTRY
|
|
ENCODING: iso_8859_6
|
|
CES: table
|
|
CCS: iso_8859_6
|
|
ALIASES: iso_8859_6:1987 iso_ir_127 iso8859_6 ecma_114 asmo_708 arabic csisolatinarabic iso88596
|
|
ENTRY END
|
|
|
|
# ISO 8859-7:1987 - Greek
|
|
ENTRY
|
|
ENCODING: iso_8859_7
|
|
CES: table
|
|
CCS: iso_8859_7
|
|
ALIASES: iso_8859_7:1987 iso_ir_126 iso8859_7 elot_928 ecma_118 greek greek8 csisolatingreek iso88597
|
|
ENTRY END
|
|
|
|
# ISO 8859-8:1988 - Hebrew
|
|
ENTRY
|
|
ENCODING: iso_8859_8
|
|
CES: table
|
|
CCS: iso_8859_8
|
|
ALIASES: iso_8859_8:1988 iso_ir_138 iso8859_8 hebrew csisolatinhebrew iso88598
|
|
ENTRY END
|
|
|
|
# ISO 8859-9:1989 - Latin 5, Turkish
|
|
ENTRY
|
|
ENCODING: iso_8859_9
|
|
CES: table
|
|
CCS: iso_8859_9
|
|
ALIASES: iso_8859_9:1989 iso_ir_148 iso8859_9 latin5 l5 csisolatin5 iso88599
|
|
ENTRY END
|
|
|
|
# ISO 8859-10:1992 - Latin 6, Nordic
|
|
ENTRY
|
|
ENCODING: iso_8859_10
|
|
CES: table
|
|
CCS: iso_8859_10
|
|
ALIASES: iso_8859_10:1992 iso_ir_157 iso885910 latin6 l6 csisolatin6 iso8859_10
|
|
ENTRY END
|
|
|
|
# ISO 8859-11 - Thai
|
|
ENTRY
|
|
ENCODING: iso_8859_11
|
|
CES: table
|
|
CCS: iso_8859_11
|
|
ALIASES: iso8859_11 iso885911
|
|
ENTRY END
|
|
|
|
# ISO 8859-13:1998 - Latin 7, Baltic Rim
|
|
ENTRY
|
|
ENCODING: iso_8859_13
|
|
CES: table
|
|
CCS: iso_8859_13
|
|
ALIASES: iso_8859_13:1998 iso8859_13 iso885913
|
|
ENTRY END
|
|
|
|
# ISO 8859-14:1998 - Latin 8, Celtic
|
|
ENTRY
|
|
ENCODING: iso_8859_14
|
|
CES: table
|
|
CCS: iso_8859_14
|
|
ALIASES: iso_8859_14:1998 iso885914 iso8859_14
|
|
ENTRY END
|
|
|
|
# ISO 8859-15:1998 - Latin 9, West Europe, successor of Latin 1
|
|
ENTRY
|
|
ENCODING: iso_8859_15
|
|
CES: table
|
|
CCS: iso_8859_15
|
|
ALIASES: iso885915 iso_8859_15:1998 iso8859_15
|
|
ENTRY END
|
|
|
|
# Win-1250
|
|
ENTRY
|
|
ENCODING: win_1250
|
|
CES: table
|
|
CCS: win_1250
|
|
ALIASES: cp1250
|
|
ENTRY END
|
|
|
|
# Win-1251 - Cyrillic
|
|
ENTRY
|
|
ENCODING: win_1251
|
|
CES: table
|
|
CCS: win_1251
|
|
ALIASES: cp1251
|
|
ENTRY END
|
|
|
|
# Win-1252 - Latin 1
|
|
ENTRY
|
|
ENCODING: win_1252
|
|
CES: table
|
|
CCS: win_1252
|
|
ALIASES: cp1252
|
|
ENTRY END
|
|
|
|
# Win-1253 - Greek
|
|
ENTRY
|
|
ENCODING: win_1253
|
|
CES: table
|
|
CCS: win_1253
|
|
ALIASES: cp1253
|
|
ENTRY END
|
|
|
|
# Win-1254 - Turkish
|
|
ENTRY
|
|
ENCODING: win_1254
|
|
CES: table
|
|
CCS: win_1254
|
|
ALIASES: cp1254
|
|
ENTRY END
|
|
|
|
# Win-1255 - Hebrew
|
|
ENTRY
|
|
ENCODING: win_1255
|
|
CES: table
|
|
CCS: win_1255
|
|
ALIASES: cp1255
|
|
ENTRY END
|
|
|
|
# Win-1256 - Arabic
|
|
ENTRY
|
|
ENCODING: win_1256
|
|
CES: table
|
|
CCS: win_1256
|
|
ALIASES: cp1256
|
|
ENTRY END
|
|
|
|
# Win-1257 - Baltic
|
|
ENTRY
|
|
ENCODING: win_1257
|
|
CES: table
|
|
CCS: win_1257
|
|
ALIASES: cp1257
|
|
ENTRY END
|
|
|
|
# Win-1258 - Vietnamese7 that supports Cyrillic
|
|
ENTRY
|
|
ENCODING: win_1258
|
|
CES: table
|
|
CCS: win_1258
|
|
ALIASES: cp1258
|
|
ENTRY END
|
|
|
|
# big5 - an encoding for Traditional Chinese
|
|
ENTRY
|
|
ENCODING: big5
|
|
CES: table_pcs
|
|
CCS: big5
|
|
ALIASES: csbig5 big_five bigfive cn_big5 cp950
|
|
ENTRY END
|
|
|
|
# IBM 775 - an updated version of CP 437 that supports balitic languages.
|
|
ENTRY
|
|
ENCODING: cp775
|
|
CES: table
|
|
CCS: cp775
|
|
ALIASES: ibm775 cspc775baltic
|
|
ENTRY END
|
|
|
|
# IBM 850 - an updated version of CP 437 where several Latin 1 characters have been
|
|
# added instead of some less-often used characters like line-drawing and greek ones.
|
|
ENTRY
|
|
ENCODING: cp850
|
|
CES: table
|
|
CCS: cp850
|
|
ALIASES: ibm850 850 cspc850multilingual
|
|
ENTRY END
|
|
|
|
# IBM 852 - an updated version of CP 437 where several Latin 2 characters have been added
|
|
# instead of some less-often used characters like line-drawing and greek ones.
|
|
ENTRY
|
|
ENCODING: cp852
|
|
CES: table
|
|
CCS: cp852
|
|
ALIASES: ibm852 852 cspcp852
|
|
ENTRY END
|
|
|
|
# IBM 855 - an updated version of CP 437 that supports Cyrillic.
|
|
ENTRY
|
|
ENCODING: cp855
|
|
CES: table
|
|
CCS: cp855
|
|
ALIASES: ibm855 855 csibm855
|
|
ENTRY END
|
|
|
|
# IBM 866 - an updated version of CP 855 which followes the more logical Russian alphabet
|
|
# ordering of the alternativny variant that is preferred by many Russian users.
|
|
ENTRY
|
|
ENCODING: cp866
|
|
CES: table
|
|
CCS: cp866
|
|
ALIASES: 866 IBM866 CSIBM866
|
|
ENTRY END
|
|
|
|
# EUC-JP - The EUC for Japanese
|
|
ENTRY
|
|
ENCODING: euc_jp
|
|
CES: euc
|
|
CCS: jis_x0208_1990 jis_x0201_1976 jis_x0212_1990
|
|
ALIASES: eucjp
|
|
ENTRY END
|
|
|
|
# EUC-KR - The EUC for Korean
|
|
ENTRY
|
|
ENCODING: euc_kr
|
|
CES: euc
|
|
CCS: ksx1001
|
|
ALIASES: euckr
|
|
ENTRY END
|
|
|
|
# EUC-TW - The EUC for Traditional Chinese
|
|
ENTRY
|
|
ENCODING: euc_tw
|
|
CES: euc
|
|
CCS: cns11643_plane1 cns11643_plane2 cns11643_plane14
|
|
ALIASES: euctw
|
|
ENTRY END
|
|
|
|
SECTION END
|
|
|
|
#
|
|
# This section is named "CES_DEPENDENCIES" and describes dependencies
|
|
# between CES converters (some CES converters may use another CES converters).
|
|
#
|
|
SECTION CES_DEPENDENCIES
|
|
|
|
ENTRY
|
|
CES: table_pcs
|
|
USED_CES: table
|
|
ENTRY END
|
|
|
|
ENTRY
|
|
CES: euc
|
|
USED_CES: table us_ascii
|
|
ENTRY END
|
|
|
|
SECTION END
|
|
|