270 lines
7.5 KiB
Perl
270 lines
7.5 KiB
Perl
#! /usr/bin/perl
|
|
#
|
|
# Copyright (c) 1999, 2000
|
|
# Konstantin Chuguev. All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
# SUCH DAMAGE.
|
|
#
|
|
# iconv (Charset Conversion Library) v2.0
|
|
#
|
|
|
|
require 'getopts.pl';
|
|
use integer;
|
|
|
|
sub pack_hex {
|
|
"_$_[0](" . join(", ", map sprintf("0x%02X", $_), unpack('C4', $_[1]))
|
|
. ")";
|
|
}
|
|
|
|
sub pack_array {
|
|
my($size, $format, $array_ref) = @_;
|
|
return pack("$format$size", @$array_ref) unless $opt_C;
|
|
my($res, $i);
|
|
if ($format eq 'N') {
|
|
for ($i = 0; $i < $size; $i += 2) {
|
|
$res .= "\t"
|
|
. &pack_hex('1l', pack("N", $$array_ref[$i]))
|
|
. ", "
|
|
. &pack_hex('1l', pack("N", $$array_ref[$i+1]))
|
|
. ",\n";
|
|
}
|
|
$array_size += $size * 4;
|
|
} else {
|
|
for ($i = 0; $i < $size; $i += 4) {
|
|
$res .= "\t"
|
|
. &pack_hex('2s', pack("n2", $$array_ref[$i], $$array_ref[$i+1]))
|
|
. ", "
|
|
. &pack_hex('2s', pack("n2", $$array_ref[$i+2], $$array_ref[$i+3]))
|
|
. ",\n";
|
|
}
|
|
$array_size += $size * 2;
|
|
}
|
|
return $res;
|
|
}
|
|
|
|
# create an array of short/long values in network byte order
|
|
sub build_array {
|
|
my($size, $format, $default, $array_ref) = @_;
|
|
my($i);
|
|
for $i (0 .. $size-1) {
|
|
$$array_ref[$i] = $default unless defined($$array_ref[$i]);
|
|
}
|
|
return &pack_array($size, $format, $array_ref);
|
|
}
|
|
|
|
sub build_table1 {
|
|
my($size, $array_ref) = @_;
|
|
return &build_array($size, "n", 0xFFFE, $array_ref);
|
|
}
|
|
|
|
sub build_table2 {
|
|
my($size, $array_ref) = @_;
|
|
my($offset, $n, $i, @offs) = ($size * 4, 0);
|
|
for $i (0 .. $size-1) {
|
|
next unless defined($$array_ref[$i]);
|
|
$offs[$i] = $offset;
|
|
$offset += $size * 2;
|
|
}
|
|
my($data) = (&build_array($size, "N", 0, \@offs));
|
|
for $i (0 .. $size-1) {
|
|
next unless defined($$array_ref[$i]);
|
|
$n ++;
|
|
$data .= &build_table1($size, $$array_ref[$i]);
|
|
}
|
|
printf STDERR "%d subtables.\n", $n;
|
|
return $data;
|
|
}
|
|
|
|
$control0 = 0;
|
|
$control1 = 0;
|
|
$delete = 0;
|
|
|
|
@to_ucs;
|
|
@from_ucs;
|
|
|
|
# set a value in two charset conversion tables; update charset properties
|
|
# ($cs, $ucs) = (local charset code, Unicode)
|
|
#
|
|
sub set_val {
|
|
my($cs, $ucs) = @_;
|
|
return if $opt_a && $cs > 0x7F;
|
|
$to_ucs[$cs >> 8][$cs & 0xFF] = $ucs;
|
|
$from_ucs[$ucs >> 8][$ucs & 0xFF] = $cs;
|
|
if (($cs & 0x60) == 0) {
|
|
if($cs & 0x80) {
|
|
$control1 = 1;
|
|
} else {
|
|
$control0 = 1;
|
|
}
|
|
}
|
|
$delete = 1 if $cs == 0x7F;
|
|
if ($cs < 0x80) {
|
|
$_7bit = 1;
|
|
} elsif ($cs < 0x100) {
|
|
$_8bit = 1;
|
|
} elsif ($cs & 0x8080) {
|
|
$_16bit = 1;
|
|
} else {
|
|
$_14bit = 1;
|
|
}
|
|
}
|
|
|
|
# set a range of equal codes to charset conversion tables
|
|
#
|
|
sub set_range {
|
|
for (@_) {
|
|
&set_val($_, $_);
|
|
}
|
|
}
|
|
|
|
&Getopts('aCc:Mm:o:p:u:');
|
|
# ||| || | | +- u N: field number for Unicode character codes
|
|
# ||| || | +--- p str: prefix
|
|
# ||| || +----- o file: output file name
|
|
# ||| |+------- m file: character mnemonic table from RFC1345
|
|
# ||| +-------- M: Macintosh newline (<LF> only)
|
|
# ||+---------- c N: field number for charset character codes
|
|
# |+----------- C: make C source file
|
|
# +------------ a: ignore 8 bit (for ASCII)
|
|
|
|
$opt_c = 0 unless defined($opt_c);
|
|
$opt_p = '0x' unless defined($opt_p);
|
|
$opt_u = 1 unless defined($opt_u);
|
|
|
|
if ($opt_o) {
|
|
$opt_o =~ tr/-/_/;
|
|
open(STDOUT, ">$opt_o");
|
|
$opt_o =~ s/.c$//;
|
|
}
|
|
|
|
%map;
|
|
|
|
if ($opt_M) {
|
|
$/ = "\cM";
|
|
}
|
|
|
|
if ($opt_m) {
|
|
open(MAP, $opt_m);
|
|
while(<MAP>) {
|
|
chop;
|
|
next unless /^ [^ ]/;
|
|
next if 2 > split;
|
|
$map{$_[0]} = $_[1];
|
|
}
|
|
close(MAP);
|
|
local($code) = 0;
|
|
while (<>) {
|
|
chop;
|
|
s/^ *//;
|
|
if (/^&[a-z]/) {
|
|
split(' ', substr($_, 1));
|
|
if ($_[0] eq 'code') {
|
|
$code = $_[1];
|
|
}
|
|
} else {
|
|
foreach (split) {
|
|
&set_val($code, hex "0x$map{$_}") if $_ ne '??';
|
|
$code ++;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
while (<>) {
|
|
s/[#\n].*//;
|
|
next if 2 > split; # too few fields
|
|
next if ($_[$opt_c] =~ s/^$opt_p/0x/o) != 1;
|
|
# local charset code prefix is invalid
|
|
&set_val(hex $_[$opt_c], hex $_[$opt_u]);
|
|
}
|
|
}
|
|
|
|
if (!$_16bit && !$_14bit) {
|
|
if ($_8bit) {
|
|
print STDERR "8bit charset";
|
|
if (!$control0) {
|
|
&set_range(0 .. 0x1F);
|
|
print STDERR "; control0 chars added";
|
|
}
|
|
if (!$control1) {
|
|
&set_range(0x80 .. 0x9F);
|
|
print STDERR "; control1 chars added";
|
|
}
|
|
if (!$delete) {
|
|
&set_range(0x7F);
|
|
print STDERR "; delete char added";
|
|
}
|
|
$nbits = 8;
|
|
$type = 1;
|
|
} else {
|
|
print STDERR "7bit charset";
|
|
$nbits = 7;
|
|
$type = 0;
|
|
}
|
|
print STDERR ".\n";
|
|
$to = &build_table1($_8bit ? 256 : 128, $to_ucs[0]);
|
|
} elsif ($_16bit) {
|
|
print STDERR "16bit charset";
|
|
if (!$_7bit && !$_8bit) {
|
|
&set_range(0 .. 0x7F);
|
|
print STDERR "; ASCII subset added";
|
|
} elsif (!$control0) {
|
|
&set_range(0 .. 0x1F);
|
|
print STDERR "; control0 chars added";
|
|
}
|
|
print STDERR ".\n";
|
|
$to = &build_table2(256, \@to_ucs);
|
|
$nbits = 16;
|
|
$type = 3;
|
|
} else {
|
|
print STDERR "14bit charset.\n";
|
|
$to = &build_table2(128, \@to_ucs);
|
|
$nbits = 14;
|
|
$type = 2;
|
|
}
|
|
|
|
$to_size = $opt_C ? $array_size : length($to);
|
|
|
|
$from = &build_table2(256, \@from_ucs);
|
|
|
|
if ($opt_C) {
|
|
die "-o option is mandatory with -C" unless $opt_o;
|
|
$opt_o =~ s/\.c$//;
|
|
$opt_o =~ tr/-/_/;
|
|
$name = $opt_o;
|
|
$name =~ tr/[a-z]/[A-Z]/;
|
|
print "#include \"..\/lib\/deps.h\"\n\n";
|
|
print "#ifdef _ICONV_CONVERTER_$name\n";
|
|
print "#include \"..\/lib\/endian.h\"\n\n";
|
|
print "_CONST unsigned char _iconv_ccs_table_$opt_o" . "[] = {\n";
|
|
print "\t3, 'C', 'S', 'C', 'T', ICONV_ORDER, $nbits, $type,\n";
|
|
print &pack_array(2, 'N', [8, 8 + $to_size]);
|
|
print $to;
|
|
print $from;
|
|
print "};\n\n";
|
|
print "#endif /* #ifdef _ICONV_CONVERTER_$name */\n\n";
|
|
} else {
|
|
print pack("A5CCCNN", "\003CSCT", 0, $nbits, $type, 8, 8 + $to_size);
|
|
print $to;
|
|
print $from;
|
|
}
|
|
|