newlib-cygwin/newlib/libc/iconv/ccs/iconv_mktbl

270 lines
7.5 KiB
Perl

#! /usr/bin/perl
#
# Copyright (c) 1999, 2000
# Konstantin Chuguev. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# iconv (Charset Conversion Library) v2.0
#
require 'getopts.pl';
use integer;
sub pack_hex {
"_$_[0](" . join(", ", map sprintf("0x%02X", $_), unpack('C4', $_[1]))
. ")";
}
sub pack_array {
my($size, $format, $array_ref) = @_;
return pack("$format$size", @$array_ref) unless $opt_C;
my($res, $i);
if ($format eq 'N') {
for ($i = 0; $i < $size; $i += 2) {
$res .= "\t"
. &pack_hex('1l', pack("N", $$array_ref[$i]))
. ", "
. &pack_hex('1l', pack("N", $$array_ref[$i+1]))
. ",\n";
}
$array_size += $size * 4;
} else {
for ($i = 0; $i < $size; $i += 4) {
$res .= "\t"
. &pack_hex('2s', pack("n2", $$array_ref[$i], $$array_ref[$i+1]))
. ", "
. &pack_hex('2s', pack("n2", $$array_ref[$i+2], $$array_ref[$i+3]))
. ",\n";
}
$array_size += $size * 2;
}
return $res;
}
# create an array of short/long values in network byte order
sub build_array {
my($size, $format, $default, $array_ref) = @_;
my($i);
for $i (0 .. $size-1) {
$$array_ref[$i] = $default unless defined($$array_ref[$i]);
}
return &pack_array($size, $format, $array_ref);
}
sub build_table1 {
my($size, $array_ref) = @_;
return &build_array($size, "n", 0xFFFE, $array_ref);
}
sub build_table2 {
my($size, $array_ref) = @_;
my($offset, $n, $i, @offs) = ($size * 4, 0);
for $i (0 .. $size-1) {
next unless defined($$array_ref[$i]);
$offs[$i] = $offset;
$offset += $size * 2;
}
my($data) = (&build_array($size, "N", 0, \@offs));
for $i (0 .. $size-1) {
next unless defined($$array_ref[$i]);
$n ++;
$data .= &build_table1($size, $$array_ref[$i]);
}
printf STDERR "%d subtables.\n", $n;
return $data;
}
$control0 = 0;
$control1 = 0;
$delete = 0;
@to_ucs;
@from_ucs;
# set a value in two charset conversion tables; update charset properties
# ($cs, $ucs) = (local charset code, Unicode)
#
sub set_val {
my($cs, $ucs) = @_;
return if $opt_a && $cs > 0x7F;
$to_ucs[$cs >> 8][$cs & 0xFF] = $ucs;
$from_ucs[$ucs >> 8][$ucs & 0xFF] = $cs;
if (($cs & 0x60) == 0) {
if($cs & 0x80) {
$control1 = 1;
} else {
$control0 = 1;
}
}
$delete = 1 if $cs == 0x7F;
if ($cs < 0x80) {
$_7bit = 1;
} elsif ($cs < 0x100) {
$_8bit = 1;
} elsif ($cs & 0x8080) {
$_16bit = 1;
} else {
$_14bit = 1;
}
}
# set a range of equal codes to charset conversion tables
#
sub set_range {
for (@_) {
&set_val($_, $_);
}
}
&Getopts('aCc:Mm:o:p:u:');
# ||| || | | +- u N: field number for Unicode character codes
# ||| || | +--- p str: prefix
# ||| || +----- o file: output file name
# ||| |+------- m file: character mnemonic table from RFC1345
# ||| +-------- M: Macintosh newline (<LF> only)
# ||+---------- c N: field number for charset character codes
# |+----------- C: make C source file
# +------------ a: ignore 8 bit (for ASCII)
$opt_c = 0 unless defined($opt_c);
$opt_p = '0x' unless defined($opt_p);
$opt_u = 1 unless defined($opt_u);
if ($opt_o) {
$opt_o =~ tr/-/_/;
open(STDOUT, ">$opt_o");
$opt_o =~ s/.c$//;
}
%map;
if ($opt_M) {
$/ = "\cM";
}
if ($opt_m) {
open(MAP, $opt_m);
while(<MAP>) {
chop;
next unless /^ [^ ]/;
next if 2 > split;
$map{$_[0]} = $_[1];
}
close(MAP);
local($code) = 0;
while (<>) {
chop;
s/^ *//;
if (/^&[a-z]/) {
split(' ', substr($_, 1));
if ($_[0] eq 'code') {
$code = $_[1];
}
} else {
foreach (split) {
&set_val($code, hex "0x$map{$_}") if $_ ne '??';
$code ++;
}
}
}
} else {
while (<>) {
s/[#\n].*//;
next if 2 > split; # too few fields
next if ($_[$opt_c] =~ s/^$opt_p/0x/o) != 1;
# local charset code prefix is invalid
&set_val(hex $_[$opt_c], hex $_[$opt_u]);
}
}
if (!$_16bit && !$_14bit) {
if ($_8bit) {
print STDERR "8bit charset";
if (!$control0) {
&set_range(0 .. 0x1F);
print STDERR "; control0 chars added";
}
if (!$control1) {
&set_range(0x80 .. 0x9F);
print STDERR "; control1 chars added";
}
if (!$delete) {
&set_range(0x7F);
print STDERR "; delete char added";
}
$nbits = 8;
$type = 1;
} else {
print STDERR "7bit charset";
$nbits = 7;
$type = 0;
}
print STDERR ".\n";
$to = &build_table1($_8bit ? 256 : 128, $to_ucs[0]);
} elsif ($_16bit) {
print STDERR "16bit charset";
if (!$_7bit && !$_8bit) {
&set_range(0 .. 0x7F);
print STDERR "; ASCII subset added";
} elsif (!$control0) {
&set_range(0 .. 0x1F);
print STDERR "; control0 chars added";
}
print STDERR ".\n";
$to = &build_table2(256, \@to_ucs);
$nbits = 16;
$type = 3;
} else {
print STDERR "14bit charset.\n";
$to = &build_table2(128, \@to_ucs);
$nbits = 14;
$type = 2;
}
$to_size = $opt_C ? $array_size : length($to);
$from = &build_table2(256, \@from_ucs);
if ($opt_C) {
die "-o option is mandatory with -C" unless $opt_o;
$opt_o =~ s/\.c$//;
$opt_o =~ tr/-/_/;
$name = $opt_o;
$name =~ tr/[a-z]/[A-Z]/;
print "#include \"..\/lib\/deps.h\"\n\n";
print "#ifdef _ICONV_CONVERTER_$name\n";
print "#include \"..\/lib\/endian.h\"\n\n";
print "_CONST unsigned char _iconv_ccs_table_$opt_o" . "[] = {\n";
print "\t3, 'C', 'S', 'C', 'T', ICONV_ORDER, $nbits, $type,\n";
print &pack_array(2, 'N', [8, 8 + $to_size]);
print $to;
print $from;
print "};\n\n";
print "#endif /* #ifdef _ICONV_CONVERTER_$name */\n\n";
} else {
print pack("A5CCCNN", "\003CSCT", 0, $nbits, $type, 8, 8 + $to_size);
print $to;
print $from;
}