#! /usr/bin/perl # # Copyright (c) 1999, 2000 # Konstantin Chuguev. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # iconv (Charset Conversion Library) v2.0 # require 'getopts.pl'; use integer; sub pack_hex { "_$_[0](" . join(", ", map sprintf("0x%02X", $_), unpack('C4', $_[1])) . ")"; } sub pack_array { my($size, $format, $array_ref) = @_; return pack("$format$size", @$array_ref) unless $opt_C; my($res, $i); if ($format eq 'N') { for ($i = 0; $i < $size; $i += 2) { $res .= "\t" . &pack_hex('1l', pack("N", $$array_ref[$i])) . ", " . &pack_hex('1l', pack("N", $$array_ref[$i+1])) . ",\n"; } $array_size += $size * 4; } else { for ($i = 0; $i < $size; $i += 4) { $res .= "\t" . &pack_hex('2s', pack("n2", $$array_ref[$i], $$array_ref[$i+1])) . ", " . &pack_hex('2s', pack("n2", $$array_ref[$i+2], $$array_ref[$i+3])) . ",\n"; } $array_size += $size * 2; } return $res; } # create an array of short/long values in network byte order sub build_array { my($size, $format, $default, $array_ref) = @_; my($i); for $i (0 .. $size-1) { $$array_ref[$i] = $default unless defined($$array_ref[$i]); } return &pack_array($size, $format, $array_ref); } sub build_table1 { my($size, $array_ref) = @_; return &build_array($size, "n", 0xFFFE, $array_ref); } sub build_table2 { my($size, $array_ref) = @_; my($offset, $n, $i, @offs) = ($size * 4, 0); for $i (0 .. $size-1) { next unless defined($$array_ref[$i]); $offs[$i] = $offset; $offset += $size * 2; } my($data) = (&build_array($size, "N", 0, \@offs)); for $i (0 .. $size-1) { next unless defined($$array_ref[$i]); $n ++; $data .= &build_table1($size, $$array_ref[$i]); } printf STDERR "%d subtables.\n", $n; return $data; } $control0 = 0; $control1 = 0; $delete = 0; @to_ucs; @from_ucs; # set a value in two charset conversion tables; update charset properties # ($cs, $ucs) = (local charset code, Unicode) # sub set_val { my($cs, $ucs) = @_; return if $opt_a && $cs > 0x7F; $to_ucs[$cs >> 8][$cs & 0xFF] = $ucs; $from_ucs[$ucs >> 8][$ucs & 0xFF] = $cs; if (($cs & 0x60) == 0) { if($cs & 0x80) { $control1 = 1; } else { $control0 = 1; } } $delete = 1 if $cs == 0x7F; if ($cs < 0x80) { $_7bit = 1; } elsif ($cs < 0x100) { $_8bit = 1; } elsif ($cs & 0x8080) { $_16bit = 1; } else { $_14bit = 1; } } # set a range of equal codes to charset conversion tables # sub set_range { for (@_) { &set_val($_, $_); } } &Getopts('aCc:Mm:o:p:u:'); # ||| || | | +- u N: field number for Unicode character codes # ||| || | +--- p str: prefix # ||| || +----- o file: output file name # ||| |+------- m file: character mnemonic table from RFC1345 # ||| +-------- M: Macintosh newline ( only) # ||+---------- c N: field number for charset character codes # |+----------- C: make C source file # +------------ a: ignore 8 bit (for ASCII) $opt_c = 0 unless defined($opt_c); $opt_p = '0x' unless defined($opt_p); $opt_u = 1 unless defined($opt_u); if ($opt_o) { $opt_o =~ tr/-/_/; open(STDOUT, ">$opt_o"); $opt_o =~ s/.c$//; } %map; if ($opt_M) { $/ = "\cM"; } if ($opt_m) { open(MAP, $opt_m); while() { chop; next unless /^ [^ ]/; next if 2 > split; $map{$_[0]} = $_[1]; } close(MAP); local($code) = 0; while (<>) { chop; s/^ *//; if (/^&[a-z]/) { split(' ', substr($_, 1)); if ($_[0] eq 'code') { $code = $_[1]; } } else { foreach (split) { &set_val($code, hex "0x$map{$_}") if $_ ne '??'; $code ++; } } } } else { while (<>) { s/[#\n].*//; next if 2 > split; # too few fields next if ($_[$opt_c] =~ s/^$opt_p/0x/o) != 1; # local charset code prefix is invalid &set_val(hex $_[$opt_c], hex $_[$opt_u]); } } if (!$_16bit && !$_14bit) { if ($_8bit) { print STDERR "8bit charset"; if (!$control0) { &set_range(0 .. 0x1F); print STDERR "; control0 chars added"; } if (!$control1) { &set_range(0x80 .. 0x9F); print STDERR "; control1 chars added"; } if (!$delete) { &set_range(0x7F); print STDERR "; delete char added"; } $nbits = 8; $type = 1; } else { print STDERR "7bit charset"; $nbits = 7; $type = 0; } print STDERR ".\n"; $to = &build_table1($_8bit ? 256 : 128, $to_ucs[0]); } elsif ($_16bit) { print STDERR "16bit charset"; if (!$_7bit && !$_8bit) { &set_range(0 .. 0x7F); print STDERR "; ASCII subset added"; } elsif (!$control0) { &set_range(0 .. 0x1F); print STDERR "; control0 chars added"; } print STDERR ".\n"; $to = &build_table2(256, \@to_ucs); $nbits = 16; $type = 3; } else { print STDERR "14bit charset.\n"; $to = &build_table2(128, \@to_ucs); $nbits = 14; $type = 2; } $to_size = $opt_C ? $array_size : length($to); $from = &build_table2(256, \@from_ucs); if ($opt_C) { die "-o option is mandatory with -C" unless $opt_o; $opt_o =~ s/\.c$//; $opt_o =~ tr/-/_/; $name = $opt_o; $name =~ tr/[a-z]/[A-Z]/; print "#include \"..\/lib\/deps.h\"\n\n"; print "#ifdef _ICONV_CONVERTER_$name\n"; print "#include \"..\/lib\/endian.h\"\n\n"; print "_CONST unsigned char _iconv_ccs_table_$opt_o" . "[] = {\n"; print "\t3, 'C', 'S', 'C', 'T', ICONV_ORDER, $nbits, $type,\n"; print &pack_array(2, 'N', [8, 8 + $to_size]); print $to; print $from; print "};\n\n"; print "#endif /* #ifdef _ICONV_CONVERTER_$name */\n\n"; } else { print pack("A5CCCNN", "\003CSCT", 0, $nbits, $type, 8, 8 + $to_size); print $to; print $from; }