Cygwin: linux-locale-helpers: helper tools to generate locale data from Linux

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2023-02-20 23:00:04 +01:00
parent ce5aa09807
commit 4ab778df24
4 changed files with 477 additions and 0 deletions

View File

@ -0,0 +1,10 @@
These scripts and helper applications are used to create locale data
required for complete locale support, but either missing in Windows
or implemented in a non-POSIXy way.
The script has to run from inside a glibc git clone.
The C tools can be built without any special options.
All three tools generate the new locale headers (lc_collelem.h,
lc_era.h, lc_msg.h) in the current working directory. They can just
be copied to local_includes and commited without further changes.

View File

@ -0,0 +1,61 @@
#!/bin/bash
#
# SPDX-License-Identifier: BSD-2-Clause
#
# Assuming
#
# git clone https://sourceware.org/git/glibc.git
# cd glibc
#
topdir="$(git rev-parse --show-toplevel)"
if [ -z "${topdir}" ]
then
echo "Not a git dir? Exit."
exit 1
fi
cd "${topdir}"
glibc_conf="$(grep 'GNU C Library' configure.ac)"
if [ -z "${glibc_conf}" ]
then
echo "No GLibc configure.ac? Wrong git repo? Exit."
exit 1
fi
if [ ! -f version.h ]
then
echo "No version.h file? Exit."
exit 1
fi
version=$(sed -n -e 's/#define VERSION "\(.*\)"/\1/p' version.h)
if [ -z "${version}" ]
then
echo "Malformed version.h file. Exit."
exit 1
fi
if [ ! -d localedata/locales ]
then
echo "No localedata/locales subdir. Broken repo? Exit."
exit 1
fi
(
cd localedata/locales
cat <<-EOF
/* This struct of collating elements data has been generated by fetching
locale data from a GLibc ${version} source dir on $(date +%F). */
struct collating_element_t
{
const char32_t *element;
const char *locale;
};
collating_element_t collating_element[] =
{
EOF
grep -r collating-element * \
| sed -e 's#^\([^:]*\):collating-element[ \t]*\([^ \t]*\)[ \t]*from[ \t]*"\(.*\)".*$# { U"\3", "\1" }, /* \2 */#
s/<U\([[:xdigit:]]\{4\}\)>/\\U0000\1/g
s/<U\([[:xdigit:]]\{5\}\)>/\\U000\1/g
s/<U\([[:xdigit:]]\{6\}\)>/\\U00\1/g
s/iso14651_t1_common//g' \
| sort
echo "};"
) > lc_collelem.h

View File

@ -0,0 +1,169 @@
/* SPDX-License-Identifier: BSD-2-Clause */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <time.h>
#include <locale.h>
#include <langinfo.h>
#include <wchar.h>
struct lc_msg_t {
char locale[64];
char yesstr[256];
char nostr[256];
char yesexpr[256];
char noexpr[256];
} msg[512];
int mcnt = 0;
char *
xfrm_utf (const wchar_t *ws)
{
static char xfrm[256];
char *p = xfrm;
int wconst = 0;
while (*ws)
{
if (*ws < 0x80 && (!wconst || !wcschr (L"aAbBcCdDeEfF", *ws)))
{
*p++ = *ws;
wconst = 0;
}
else
{
p += sprintf (p, "\\x%04lx", *ws);
wconst = 1;
}
++ws;
}
*p = '\0';
return xfrm;
}
void
read_locale_messages (char *name)
{
char *nl;
char locale[64];
wchar_t nlbuf[256];
strcpy (locale, name);
nl = strchr (locale, '@');
if (nl)
stpcpy (stpcpy (nl, ".utf8"), strchr (name, '@'));
else
strcat (locale, ".utf8");
printf ("%s\n", locale);
setlocale (LC_ALL, locale);
strcpy (msg[mcnt].locale, name);
nl = nl_langinfo (YESSTR);
mbstowcs (nlbuf, nl, 256);
strcpy (msg[mcnt].yesstr, xfrm_utf (nlbuf));
nl = nl_langinfo (NOSTR);
mbstowcs (nlbuf, nl, 256);
strcpy (msg[mcnt].nostr, xfrm_utf (nlbuf));
nl = nl_langinfo (YESEXPR);
mbstowcs (nlbuf, nl, 256);
strcpy (msg[mcnt].yesexpr, xfrm_utf (nlbuf));
nl = nl_langinfo (NOEXPR);
mbstowcs (nlbuf, nl, 256);
strcpy (msg[mcnt].noexpr, xfrm_utf (nlbuf));
/* Serbian locale rename weirdness */
if (!strncmp (msg[mcnt].locale, "sr_RS", 5))
{
/* Create additional equivalent entry for the old locale sr_SP. */
++mcnt;
memcpy (&msg[mcnt], &msg[mcnt - 1], sizeof msg[mcnt]);
msg[mcnt].locale[3] = 'S';
msg[mcnt].locale[4] = 'P';
/* Create additional equivalent entry for sr_ME@latin missing in Linux. */
if (!strcmp (msg[mcnt].locale, "sr_SP@latin"))
{
++mcnt;
memcpy (&msg[mcnt], &msg[mcnt - 1], sizeof msg[mcnt]);
msg[mcnt].locale[3] = 'M';
msg[mcnt].locale[4] = 'E';
}
}
++mcnt;
}
int
locale_cmp (const void *a, const void *b)
{
struct lc_msg_t *la = (struct lc_msg_t *) a;
struct lc_msg_t *lb = (struct lc_msg_t *) b;
return strcmp (la->locale, lb->locale);
}
void
create_list ()
{
FILE *fp = fopen ("lc_msg.h", "w");
FILE *pp = popen ("rpm -q glibc", "r");
char vers[64];
int i;
struct tm *tm;
time_t tim;
char tstr[64];
fgets (vers, 64, pp);
pclose (pp);
if (strchr (vers, '\n'))
*strchr (vers, '\n') = '\0';
tim = time (NULL);
tm = gmtime (&tim);
strftime (tstr, 64, "%F", tm);
fprintf (fp,
"/* This struct of LC_MESSAGES data has been generated by fetching locale\n"
" data from a Linux system using %s on %s. */\n"
"\n"
"struct lc_msg_t\n"
"{\n"
" const char *locale;\n"
" const wchar_t *yesexpr;\n"
" const wchar_t *noexpr;\n"
" const wchar_t *yesstr;\n"
" const wchar_t *nostr;\n"
"};\n"
"\n"
"static struct lc_msg_t lc_msg[] =\n"
"{\n", vers, tstr);
qsort (msg, mcnt, sizeof (struct lc_msg_t), locale_cmp);
for (i = 0; i < mcnt; ++i)
fprintf (fp, " { \"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\" },\n",
msg[i].locale,
msg[i].yesexpr, msg[i].noexpr,
msg[i].yesstr, msg[i].nostr);
fputs ("};\n", fp);
fclose (fp);
}
int
main ()
{
char name[32], *c;
FILE *pp;
pp = popen ("locale -a | grep -a '_' | fgrep -v .", "r");
if (!pp)
{
perror ("popen failed");
return 1;
}
while (fgets (name, 32, pp))
{
c = strchr (name, '\n');
if (c)
*c = '\0';
read_locale_messages (name);
}
pclose (pp);
create_list ();
return 0;
}

View File

@ -0,0 +1,237 @@
/* SPDX-License-Identifier: BSD-2-Clause */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <time.h>
#include <locale.h>
#include <langinfo.h>
#include <wchar.h>
struct lc_era_t {
char locale[64];
char *date_fmt;
char *d_fmt;
char *d_t_fmt;
char *t_fmt;
char *t_fmt_ampm;
char *era;
char *era_d_fmt;
char *era_d_t_fmt;
char *era_t_fmt;
char *alt_digits;
} era[512];
int ecnt = 0;
char *
xfrm_utf (const wchar_t *ws, int slist)
{
static char xfrm[4096];
char *p = xfrm;
int wconst = 0;
while (*ws)
{
if (*ws < 0x80 && (!wconst || !wcschr (L"aAbBcCdDeEfF", *ws)))
{
*p++ = *ws;
wconst = 0;
}
else
{
p += sprintf (p, "\\x%04lx", *ws);
wconst = 1;
}
++ws;
if (!*ws && slist)
{
++ws;
if (*ws)
p += sprintf (p, ";");
}
}
*p = '\0';
return xfrm;
}
char *
xfrm_slist (const char *slist)
{
static wchar_t wxfrm[4096], *wp;
char *xfrm, *p, *ret;
wp = wxfrm;
while (*slist)
{
size_t len = mbstowcs (wp, slist, wxfrm + 4096 - wp) + 1;
slist += strlen (slist) + 1;
wp += len;
}
*wp++ = L'\0';
xfrm = xfrm_utf (wxfrm, 1);
p = xfrm;
while (*p)
p += strlen (p) + 1;
++p;
ret = (char *) malloc (p - xfrm);
memcpy (ret, xfrm, p - xfrm);
return ret;
}
void
read_locale_era (char *name)
{
char *nl, *nlera, *altd;
char locale[64];
wchar_t nlbuf[256];
strcpy (locale, name);
nl = strchr (locale, '@');
if (nl)
stpcpy (stpcpy (nl, ".utf8"), strchr (name, '@'));
else
strcat (locale, ".utf8");
printf ("%s\n", locale);
setlocale (LC_ALL, locale);
nlera = nl_langinfo (ERA);
altd = nl_langinfo (ALT_DIGITS);
if (!*nlera && !*altd)
return;
strcpy (era[ecnt].locale, name);
nl = nl_langinfo (_DATE_FMT);
mbstowcs (nlbuf, nl, 256);
era[ecnt].date_fmt = strdup (xfrm_utf (nlbuf, 0));
nl = nl_langinfo (D_FMT);
mbstowcs (nlbuf, nl, 256);
era[ecnt].d_fmt = strdup (xfrm_utf (nlbuf, 0));
nl = nl_langinfo (D_T_FMT);
mbstowcs (nlbuf, nl, 256);
era[ecnt].d_t_fmt = strdup (xfrm_utf (nlbuf, 0));
nl = nl_langinfo (T_FMT);
mbstowcs (nlbuf, nl, 256);
era[ecnt].t_fmt = strdup (xfrm_utf (nlbuf, 0));
nl = nl_langinfo (T_FMT_AMPM);
mbstowcs (nlbuf, nl, 256);
era[ecnt].t_fmt_ampm = strdup (xfrm_utf (nlbuf, 0));
era[ecnt].era = *nlera ? xfrm_slist (nlera) : "\0";
era[ecnt].alt_digits = *altd ? xfrm_slist (altd) : "\0";
nl = nl_langinfo (ERA_D_FMT);
mbstowcs (nlbuf, nl, 256);
era[ecnt].era_d_fmt = strdup (xfrm_utf (nlbuf, 0));
nl = nl_langinfo (ERA_D_T_FMT);
mbstowcs (nlbuf, nl, 256);
era[ecnt].era_d_t_fmt = strdup (xfrm_utf (nlbuf, 0));
nl = nl_langinfo (ERA_T_FMT);
mbstowcs (nlbuf, nl, 256);
era[ecnt].era_t_fmt = strdup (xfrm_utf (nlbuf, 0));
/* Serbian locale rename weirdness */
if (!strncmp (era[ecnt].locale, "sr_RS", 5))
{
/* Create additional equivalent entries for the old locale sr_SP. */
++ecnt;
memcpy (&era[ecnt], &era[ecnt - 1], sizeof era[ecnt]);
era[ecnt].locale[3] = 'S';
era[ecnt].locale[4] = 'P';
/* Create additional equivalent entry for sr_ME@latin missing in Linux. */
if (!strcmp (era[ecnt].locale, "sr_SP@latin"))
{
++ecnt;
memcpy (&era[ecnt], &era[ecnt - 1], sizeof era[ecnt]);
era[ecnt].locale[3] = 'M';
era[ecnt].locale[4] = 'E';
}
}
++ecnt;
}
int
locale_cmp (const void *a, const void *b)
{
struct lc_era_t *la = (struct lc_era_t *) a;
struct lc_era_t *lb = (struct lc_era_t *) b;
return strcmp (la->locale, lb->locale);
}
void
create_list ()
{
FILE *fp = fopen ("lc_era.h", "w");
FILE *pp = popen ("rpm -q glibc", "r");
char vers[64];
int i;
struct tm *tm;
time_t tim;
char tstr[64];
fgets (vers, 64, pp);
pclose (pp);
if (strchr (vers, '\n'))
*strchr (vers, '\n') = '\0';
tim = time (NULL);
tm = gmtime (&tim);
strftime (tstr, 64, "%F", tm);
fprintf (fp,
"/* This struct of LC_TIME ERA data has been generated by fetching locale\n"
" data from a Linux system using %s on %s. */\n"
"\n"
"struct lc_era_t\n"
"{\n"
" const char *locale;\n"
" const wchar_t *date_fmt;\n"
" const wchar_t *d_fmt;\n"
" const wchar_t *d_t_fmt;\n"
" const wchar_t *t_fmt;\n"
" const wchar_t *t_fmt_ampm;\n"
" const wchar_t *era;\n"
" const wchar_t *era_d_fmt;\n"
" const wchar_t *era_d_t_fmt;\n"
" const wchar_t *era_t_fmt;\n"
" const wchar_t *alt_digits;\n"
"};\n"
"\n"
"static struct lc_era_t lc_era[] =\n"
"{\n", vers, tstr);
qsort (era, ecnt, sizeof (struct lc_era_t), locale_cmp);
for (i = 0; i < ecnt; ++i)
fprintf (fp, " { \"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\", "
"L\"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\" },\n",
era[i].locale, era[i].date_fmt,
era[i].d_fmt, era[i].d_t_fmt,
era[i].t_fmt, era[i].t_fmt_ampm,
era[i].era, era[i].era_d_fmt,
era[i].era_d_t_fmt, era[i].era_t_fmt,
era[i].alt_digits);
fputs ("};\n", fp);
fclose (fp);
}
int
main ()
{
char name[32], *c;
FILE *pp;
pp = popen ("locale -a | grep -a '_' | fgrep -v .", "r");
if (!pp)
{
perror ("popen failed");
return 1;
}
while (fgets (name, 32, pp))
{
c = strchr (name, '\n');
if (c)
*c = '\0';
read_locale_era (name);
}
pclose (pp);
create_list ();
return 0;
}