fix and amend scripts and makefile rules to generate Unicode data

This commit is contained in:
Thomas Wolff 2021-04-17 00:00:00 +02:00 committed by Corinna Vinschen
parent 11fdae24b7
commit 204ee3cf6a
6 changed files with 33 additions and 25 deletions

View File

@ -420,10 +420,6 @@ endif
# Generate Unicode data tables for libc/string/wcwidth and libc/ctype/??w*
unidata:
cd $(srcdir)/libc/string; ./mkunidata
cd $(srcdir)/libc/ctype; ./mkunidata
unidate-download:
cd $(srcdir)/libc/string; ./mkunidata -u
cd $(srcdir)/libc/ctype; ./mkunidata -u

View File

@ -1168,10 +1168,6 @@ install-data-local: install-toollibLIBRARIES
# Generate Unicode data tables for libc/string/wcwidth and libc/ctype/??w*
unidata:
cd $(srcdir)/libc/string; ./mkunidata
cd $(srcdir)/libc/ctype; ./mkunidata
unidate-download:
cd $(srcdir)/libc/string; ./mkunidata -u
cd $(srcdir)/libc/ctype; ./mkunidata -u

View File

@ -0,0 +1,11 @@
# Generate Unicode data tables for string/wcwidth and ctype/??w*
# Download Unicode data files
unidata:
cd string; ./mkunidata -u
cd ctype; ./mkunidata -u
# Generate Unicode data tables for string/wcwidth and ctype/??w*
# Use installed Unicode data files from package unicode-ucd
unidata-local:
cd string; ./mkunidata -i
cd ctype; ./mkunidata -i

View File

@ -20,7 +20,9 @@ case "$1" in
;;
-u)
wget () {
curl -R -O --connect-timeout 55 -z "`basename $1`" "$1"
ref=`basename $1`
ref=`ls "$ref" 2> /dev/null || echo 01-Jan-1970`
curl -R -O --connect-timeout 55 -z "$ref" "$1"
}
echo downloading data from unicode.org

View File

@ -21,11 +21,13 @@ case "$1" in
;;
-u)
wget () {
curl -R -O --connect-timeout 55 -z "`basename $1`" "$1"
ref=`basename $1`
ref=`ls "$ref" 2> /dev/null || echo 01-Jan-1970`
curl -R -O --connect-timeout 55 -z "$ref" "$1"
}
echo downloading uniset tool
wget http://www.cl.cam.ac.uk/~mgk25/download/uniset.tar.gz
wget https://www.cl.cam.ac.uk/~mgk25/download/uniset.tar.gz
gzip -dc uniset.tar.gz | tar xvf - uniset
echo downloading data from unicode.org

View File

@ -2,7 +2,7 @@
# Uniset -- Unicode subset manager -- Markus Kuhn
# http://www.cl.cam.ac.uk/~mgk25/download/uniset.tar.gz
require 5.008;
require 5.014;
use open ':utf8';
use FindBin qw($RealBin); # to find directory where this file is located
@ -147,11 +147,12 @@ sub is_unicode {
return exists $name{$ucs};
}
my @search_path;
push @search_path, "$ENV{HOME}/local/share/uniset"
if -d "$ENV{HOME}/local/share/uniset";
push @search_path, "/usr/share/uniset" if -d "/usr/share/uniset";
push @search_path, $RealBin unless $RealBin =~ m|^/usr/bin|;
my @search_path = ();
if ($RealBin =~ m|^(.*)/bin\z| && -d "$1/share/uniset") {
push @search_path, "$1/share/uniset";
} else {
push @search_path, $RealBin;
}
sub search_open {
my ($mode, $fn) = @_;
@ -186,7 +187,7 @@ while (<$data>) {
$category{$ucs} = $3;
$comment{$ucs} = $12;
} else {
die("Syntax error in line '$_' in file '$unicodedata'");
die("Syntax error in line '$_' in file '$unicodedata'\n");
}
}
close($data);
@ -209,7 +210,7 @@ while (<$data>) {
} elsif (/^\s*\#/ || /^\s*$/) {
# ignore comments and empty lines
} else {
die("Syntax error in line '$_' in file '$blockdata'");
die("Syntax error in line '$_' in file '$blockdata'\n");
}
}
close($data);
@ -231,16 +232,16 @@ while ($_ = shift(@ARGV)) {
$image = 1;
} elsif (/^template$/) {
$template = shift(@ARGV);
open(TEMPLATE, $template) || die("Can't open template file '$template': '$!'");
open(TEMPLATE, $template) || die("Can't open template file '$template': $!\n");
while (<TEMPLATE>) {
if (/^\#\s*include\s+\"([^\"]*)\"\s*$/) {
open(INCLUDE, $1) || die("Can't open template include file '$1': '$!'");
open(INCLUDE, $1) || die("Can't open template include file '$1': $!\n");
while (<INCLUDE>) {
print $_;
}
close(INCLUDE);
} elsif (/^\#\s*quote\s+\"([^\"]*)\"\s*$/) {
open(INCLUDE, $1) || die("Can't open template include file '$1': '$!'");
open(INCLUDE, $1) || die("Can't open template include file '$1': $!\n");
while (<INCLUDE>) {
s/&/&amp;/g;
s/</&lt;/g;
@ -275,7 +276,7 @@ while ($_ = shift(@ARGV)) {
$setfile = $2;
$setfile = shift(@ARGV) if $setfile eq "";
push(@SETS, $setfile);
open(SET, $setfile) || die("Can't open set file '$setfile': '$!'");
open(SET, $setfile) || die("Can't open set file '$setfile': $!\n");
$setname = $setfile;
while (<SET>) {
while ($_) {
@ -303,7 +304,7 @@ while ($_ = shift(@ARGV)) {
$setfile = shift(@ARGV) if $setfile eq "";
push(@SETS, $setfile);
my $setf = search_open('<', $setfile);
die("Can't open set file '$setfile': '$!'") unless $setf;
die("Can't open set file '$setfile': $!\n") unless $setf;
$cedf = ($setfile =~ /cedf/); # detect Kosta Kosti's trans CEDF format by path name
$setname = $setfile;
$setname =~ s/([^.\[\]]*)\..*/$1/;
@ -691,6 +692,6 @@ while ($_ = shift(@ARGV)) {
delete $used{$i} if is_unicode($i);
}
} else {
die("Unknown command line command '$_'");
die("Unknown command line command '$_'\n");
};
}