From 4ab778df242efdc364b9a42b225c071e0ecc3cb7 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Mon, 20 Feb 2023 23:00:04 +0100 Subject: [PATCH] Cygwin: linux-locale-helpers: helper tools to generate locale data from Linux Signed-off-by: Corinna Vinschen --- winsup/cygwin/linux-locale-helpers/README | 10 + .../fetch-lc_collate-elements-from-glibc | 61 +++++ .../fetch-lc_messages-from-linux.c | 169 +++++++++++++ .../fetch-lc_time_era-from-linux.c | 237 ++++++++++++++++++ 4 files changed, 477 insertions(+) create mode 100644 winsup/cygwin/linux-locale-helpers/README create mode 100755 winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-glibc create mode 100644 winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c create mode 100644 winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c diff --git a/winsup/cygwin/linux-locale-helpers/README b/winsup/cygwin/linux-locale-helpers/README new file mode 100644 index 000000000..2489416bc --- /dev/null +++ b/winsup/cygwin/linux-locale-helpers/README @@ -0,0 +1,10 @@ +These scripts and helper applications are used to create locale data +required for complete locale support, but either missing in Windows +or implemented in a non-POSIXy way. + +The script has to run from inside a glibc git clone. +The C tools can be built without any special options. + +All three tools generate the new locale headers (lc_collelem.h, +lc_era.h, lc_msg.h) in the current working directory. They can just +be copied to local_includes and commited without further changes. diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-glibc b/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-glibc new file mode 100755 index 000000000..a0ff0e62f --- /dev/null +++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-glibc @@ -0,0 +1,61 @@ +#!/bin/bash +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Assuming +# +# git clone https://sourceware.org/git/glibc.git +# cd glibc +# +topdir="$(git rev-parse --show-toplevel)" +if [ -z "${topdir}" ] +then + echo "Not a git dir? Exit." + exit 1 +fi +cd "${topdir}" +glibc_conf="$(grep 'GNU C Library' configure.ac)" +if [ -z "${glibc_conf}" ] +then + echo "No GLibc configure.ac? Wrong git repo? Exit." + exit 1 +fi +if [ ! -f version.h ] +then + echo "No version.h file? Exit." + exit 1 +fi +version=$(sed -n -e 's/#define VERSION "\(.*\)"/\1/p' version.h) +if [ -z "${version}" ] +then + echo "Malformed version.h file. Exit." + exit 1 +fi +if [ ! -d localedata/locales ] +then + echo "No localedata/locales subdir. Broken repo? Exit." + exit 1 +fi +( + cd localedata/locales + cat <<-EOF + /* This struct of collating elements data has been generated by fetching + locale data from a GLibc ${version} source dir on $(date +%F). */ + struct collating_element_t + { + const char32_t *element; + const char *locale; + }; + + collating_element_t collating_element[] = + { + EOF + grep -r collating-element * \ + | sed -e 's#^\([^:]*\):collating-element[ \t]*\([^ \t]*\)[ \t]*from[ \t]*"\(.*\)".*$# { U"\3", "\1" }, /* \2 */# + s//\\U0000\1/g + s//\\U000\1/g + s//\\U00\1/g + s/iso14651_t1_common//g' \ + | sort + echo "};" +) > lc_collelem.h diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c b/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c new file mode 100644 index 000000000..03755c6aa --- /dev/null +++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +struct lc_msg_t { + char locale[64]; + char yesstr[256]; + char nostr[256]; + char yesexpr[256]; + char noexpr[256]; +} msg[512]; +int mcnt = 0; + +char * +xfrm_utf (const wchar_t *ws) +{ + static char xfrm[256]; + char *p = xfrm; + int wconst = 0; + + while (*ws) + { + if (*ws < 0x80 && (!wconst || !wcschr (L"aAbBcCdDeEfF", *ws))) + { + *p++ = *ws; + wconst = 0; + } + else + { + p += sprintf (p, "\\x%04lx", *ws); + wconst = 1; + } + ++ws; + } + *p = '\0'; + return xfrm; +} + +void +read_locale_messages (char *name) +{ + char *nl; + char locale[64]; + wchar_t nlbuf[256]; + + strcpy (locale, name); + nl = strchr (locale, '@'); + if (nl) + stpcpy (stpcpy (nl, ".utf8"), strchr (name, '@')); + else + strcat (locale, ".utf8"); + printf ("%s\n", locale); + setlocale (LC_ALL, locale); + + strcpy (msg[mcnt].locale, name); + nl = nl_langinfo (YESSTR); + mbstowcs (nlbuf, nl, 256); + strcpy (msg[mcnt].yesstr, xfrm_utf (nlbuf)); + nl = nl_langinfo (NOSTR); + mbstowcs (nlbuf, nl, 256); + strcpy (msg[mcnt].nostr, xfrm_utf (nlbuf)); + nl = nl_langinfo (YESEXPR); + mbstowcs (nlbuf, nl, 256); + strcpy (msg[mcnt].yesexpr, xfrm_utf (nlbuf)); + nl = nl_langinfo (NOEXPR); + mbstowcs (nlbuf, nl, 256); + strcpy (msg[mcnt].noexpr, xfrm_utf (nlbuf)); + /* Serbian locale rename weirdness */ + if (!strncmp (msg[mcnt].locale, "sr_RS", 5)) + { + /* Create additional equivalent entry for the old locale sr_SP. */ + ++mcnt; + memcpy (&msg[mcnt], &msg[mcnt - 1], sizeof msg[mcnt]); + msg[mcnt].locale[3] = 'S'; + msg[mcnt].locale[4] = 'P'; + /* Create additional equivalent entry for sr_ME@latin missing in Linux. */ + if (!strcmp (msg[mcnt].locale, "sr_SP@latin")) + { + ++mcnt; + memcpy (&msg[mcnt], &msg[mcnt - 1], sizeof msg[mcnt]); + msg[mcnt].locale[3] = 'M'; + msg[mcnt].locale[4] = 'E'; + } + } + ++mcnt; +} + +int +locale_cmp (const void *a, const void *b) +{ + struct lc_msg_t *la = (struct lc_msg_t *) a; + struct lc_msg_t *lb = (struct lc_msg_t *) b; + return strcmp (la->locale, lb->locale); +} + +void +create_list () +{ + FILE *fp = fopen ("lc_msg.h", "w"); + FILE *pp = popen ("rpm -q glibc", "r"); + char vers[64]; + int i; + struct tm *tm; + time_t tim; + char tstr[64]; + + fgets (vers, 64, pp); + pclose (pp); + if (strchr (vers, '\n')) + *strchr (vers, '\n') = '\0'; + tim = time (NULL); + tm = gmtime (&tim); + strftime (tstr, 64, "%F", tm); + fprintf (fp, +"/* This struct of LC_MESSAGES data has been generated by fetching locale\n" +" data from a Linux system using %s on %s. */\n" +"\n" +"struct lc_msg_t\n" +"{\n" +" const char *locale;\n" +" const wchar_t *yesexpr;\n" +" const wchar_t *noexpr;\n" +" const wchar_t *yesstr;\n" +" const wchar_t *nostr;\n" +"};\n" +"\n" +"static struct lc_msg_t lc_msg[] =\n" +"{\n", vers, tstr); + + qsort (msg, mcnt, sizeof (struct lc_msg_t), locale_cmp); + for (i = 0; i < mcnt; ++i) + fprintf (fp, " { \"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\" },\n", + msg[i].locale, + msg[i].yesexpr, msg[i].noexpr, + msg[i].yesstr, msg[i].nostr); + fputs ("};\n", fp); + fclose (fp); +} + +int +main () +{ + char name[32], *c; + FILE *pp; + + pp = popen ("locale -a | grep -a '_' | fgrep -v .", "r"); + if (!pp) + { + perror ("popen failed"); + return 1; + } + while (fgets (name, 32, pp)) + { + c = strchr (name, '\n'); + if (c) + *c = '\0'; + read_locale_messages (name); + } + pclose (pp); + create_list (); + return 0; +} diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c b/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c new file mode 100644 index 000000000..1ee75ca6b --- /dev/null +++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +struct lc_era_t { + char locale[64]; + char *date_fmt; + char *d_fmt; + char *d_t_fmt; + char *t_fmt; + char *t_fmt_ampm; + char *era; + char *era_d_fmt; + char *era_d_t_fmt; + char *era_t_fmt; + char *alt_digits; +} era[512]; +int ecnt = 0; + +char * +xfrm_utf (const wchar_t *ws, int slist) +{ + static char xfrm[4096]; + char *p = xfrm; + int wconst = 0; + + while (*ws) + { + if (*ws < 0x80 && (!wconst || !wcschr (L"aAbBcCdDeEfF", *ws))) + { + *p++ = *ws; + wconst = 0; + } + else + { + p += sprintf (p, "\\x%04lx", *ws); + wconst = 1; + } + ++ws; + if (!*ws && slist) + { + ++ws; + if (*ws) + p += sprintf (p, ";"); + } + } + *p = '\0'; + return xfrm; +} + +char * +xfrm_slist (const char *slist) +{ + static wchar_t wxfrm[4096], *wp; + char *xfrm, *p, *ret; + + wp = wxfrm; + while (*slist) + { + size_t len = mbstowcs (wp, slist, wxfrm + 4096 - wp) + 1; + slist += strlen (slist) + 1; + wp += len; + } + *wp++ = L'\0'; + xfrm = xfrm_utf (wxfrm, 1); + p = xfrm; + while (*p) + p += strlen (p) + 1; + ++p; + ret = (char *) malloc (p - xfrm); + memcpy (ret, xfrm, p - xfrm); + return ret; +} + +void +read_locale_era (char *name) +{ + char *nl, *nlera, *altd; + char locale[64]; + wchar_t nlbuf[256]; + + strcpy (locale, name); + nl = strchr (locale, '@'); + if (nl) + stpcpy (stpcpy (nl, ".utf8"), strchr (name, '@')); + else + strcat (locale, ".utf8"); + printf ("%s\n", locale); + setlocale (LC_ALL, locale); + + nlera = nl_langinfo (ERA); + altd = nl_langinfo (ALT_DIGITS); + + if (!*nlera && !*altd) + return; + + strcpy (era[ecnt].locale, name); + nl = nl_langinfo (_DATE_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].date_fmt = strdup (xfrm_utf (nlbuf, 0)); + nl = nl_langinfo (D_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].d_fmt = strdup (xfrm_utf (nlbuf, 0)); + nl = nl_langinfo (D_T_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].d_t_fmt = strdup (xfrm_utf (nlbuf, 0)); + nl = nl_langinfo (T_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].t_fmt = strdup (xfrm_utf (nlbuf, 0)); + nl = nl_langinfo (T_FMT_AMPM); + mbstowcs (nlbuf, nl, 256); + era[ecnt].t_fmt_ampm = strdup (xfrm_utf (nlbuf, 0)); + + era[ecnt].era = *nlera ? xfrm_slist (nlera) : "\0"; + era[ecnt].alt_digits = *altd ? xfrm_slist (altd) : "\0"; + + nl = nl_langinfo (ERA_D_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].era_d_fmt = strdup (xfrm_utf (nlbuf, 0)); + nl = nl_langinfo (ERA_D_T_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].era_d_t_fmt = strdup (xfrm_utf (nlbuf, 0)); + nl = nl_langinfo (ERA_T_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].era_t_fmt = strdup (xfrm_utf (nlbuf, 0)); + /* Serbian locale rename weirdness */ + if (!strncmp (era[ecnt].locale, "sr_RS", 5)) + { + /* Create additional equivalent entries for the old locale sr_SP. */ + ++ecnt; + memcpy (&era[ecnt], &era[ecnt - 1], sizeof era[ecnt]); + era[ecnt].locale[3] = 'S'; + era[ecnt].locale[4] = 'P'; + /* Create additional equivalent entry for sr_ME@latin missing in Linux. */ + if (!strcmp (era[ecnt].locale, "sr_SP@latin")) + { + ++ecnt; + memcpy (&era[ecnt], &era[ecnt - 1], sizeof era[ecnt]); + era[ecnt].locale[3] = 'M'; + era[ecnt].locale[4] = 'E'; + } + } + ++ecnt; +} + +int +locale_cmp (const void *a, const void *b) +{ + struct lc_era_t *la = (struct lc_era_t *) a; + struct lc_era_t *lb = (struct lc_era_t *) b; + return strcmp (la->locale, lb->locale); +} + +void +create_list () +{ + FILE *fp = fopen ("lc_era.h", "w"); + FILE *pp = popen ("rpm -q glibc", "r"); + char vers[64]; + int i; + struct tm *tm; + time_t tim; + char tstr[64]; + + fgets (vers, 64, pp); + pclose (pp); + if (strchr (vers, '\n')) + *strchr (vers, '\n') = '\0'; + tim = time (NULL); + tm = gmtime (&tim); + strftime (tstr, 64, "%F", tm); + fprintf (fp, +"/* This struct of LC_TIME ERA data has been generated by fetching locale\n" +" data from a Linux system using %s on %s. */\n" +"\n" +"struct lc_era_t\n" +"{\n" +" const char *locale;\n" +" const wchar_t *date_fmt;\n" +" const wchar_t *d_fmt;\n" +" const wchar_t *d_t_fmt;\n" +" const wchar_t *t_fmt;\n" +" const wchar_t *t_fmt_ampm;\n" +" const wchar_t *era;\n" +" const wchar_t *era_d_fmt;\n" +" const wchar_t *era_d_t_fmt;\n" +" const wchar_t *era_t_fmt;\n" +" const wchar_t *alt_digits;\n" +"};\n" +"\n" +"static struct lc_era_t lc_era[] =\n" +"{\n", vers, tstr); + + qsort (era, ecnt, sizeof (struct lc_era_t), locale_cmp); + for (i = 0; i < ecnt; ++i) + fprintf (fp, " { \"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\", " + "L\"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\" },\n", + era[i].locale, era[i].date_fmt, + era[i].d_fmt, era[i].d_t_fmt, + era[i].t_fmt, era[i].t_fmt_ampm, + era[i].era, era[i].era_d_fmt, + era[i].era_d_t_fmt, era[i].era_t_fmt, + era[i].alt_digits); + fputs ("};\n", fp); + fclose (fp); +} + +int +main () +{ + char name[32], *c; + FILE *pp; + + pp = popen ("locale -a | grep -a '_' | fgrep -v .", "r"); + if (!pp) + { + perror ("popen failed"); + return 1; + } + while (fgets (name, 32, pp)) + { + c = strchr (name, '\n'); + if (c) + *c = '\0'; + read_locale_era (name); + } + pclose (pp); + create_list (); + return 0; +}