Cygwin: add support for GB18030 codeset

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
2023-03-16 18:25:09 +01:00 · 2023-03-16 18:25:09 +01:00 · 5da71b6059
parent 2285cf0d1c
commit 5da71b6059
7 changed files with 37 additions and 7 deletions
--- a/newlib/libc/locale/locale.c
+++ b/newlib/libc/locale/locale.c
@ -65,8 +65,8 @@ build with multibyte support and support for all ISO and Windows Codepage.
 Otherwise all singlebyte charsets are simply mapped to ASCII.  Right now,
 only newlib for Cygwin is built with full charset support by default.
 Under Cygwin, this implementation additionally supports the charsets
-<<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>.  Cygwin
+<<"GB18030">>, <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and
-does not support <<"JIS">>.
+<<"Big5">>.  Cygwin does not support <<"JIS">>.
 Cygwin additionally supports locales from the file
 /usr/share/locale/locale.alias.
@ -657,7 +657,7 @@ restart:
 	}
 #ifdef __CYGWIN__
      /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
-      	 implementation requires Windows support. */
+	 implementation requires Windows support. */
      else if (!strcasecmp (c, "KR"))
 	{
 	  strcpy (charset, "EUCKR");
@ -817,12 +817,19 @@ restart:
 	 requires Windows support. */
      if (!strcasecmp (charset, "GBK")
 	  || !strcasecmp (charset, "GB2312"))
-      	{
+	{
 	  strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
 	  mbc_max = 2;
 	  l_wctomb = __gbk_wctomb;
 	  l_mbtowc = __gbk_mbtowc;
 	}
      else if (!strcasecmp (charset, "GB18030"))
 	{
 	  strcpy (charset, "GB18030");
 	  mbc_max = 4;
 	  l_wctomb = __gb18030_wctomb;
 	  l_mbtowc = __gb18030_mbtowc;
 	}
      else
 #endif /* __CYGWIN__ */
      /* GEORGIAN-PS and the alias without dash */
--- a/newlib/libc/stdlib/local.h
+++ b/newlib/libc/stdlib/local.h
@ -24,6 +24,7 @@ wctomb_p __iso_wctomb (int val);
 wctomb_p __cp_wctomb (int val);
 #ifdef __CYGWIN__
 wctomb_f __gbk_wctomb;
 wctomb_f __gb18030_wctomb;
 wctomb_f __kr_wctomb;
 wctomb_f __big5_wctomb;
 #endif
@ -45,6 +46,7 @@ mbtowc_p __iso_mbtowc (int val);
 mbtowc_p __cp_mbtowc (int val);
 #ifdef __CYGWIN__
 mbtowc_f __gbk_mbtowc;
 mbtowc_f __gb18030_mbtowc;
 mbtowc_f __kr_mbtowc;
 mbtowc_f __big5_mbtowc;
 #endif
--- a/winsup/cygwin/fhandler/proc.cc
+++ b/winsup/cygwin/fhandler/proc.cc
@ -2323,6 +2323,7 @@ format_proc_codesets (void *, char *&destbuf)
 			 "EUC-CN\n"
 			 "EUC-JP\n"
 			 "EUC-KR\n"
 			 "GB18030\n"
 			 "GB2312\n"
 			 "GBK\n"
 			 "GEORGIAN-PS\n"
--- a/winsup/cygwin/nlsfuncs.cc
+++ b/winsup/cygwin/nlsfuncs.cc
@ -1578,8 +1578,8 @@ __eval_codepage_from_internal_charset ()
 	  break;
 	}
      break;
-    case 'G': /* GBK/GB2312 */
+    case 'G': /* GBK/GB2312/GB18030 */
-      codepage = 936;
+      codepage = (charset[2] == '1') ? 54936 : 936;
      break;
    case 'I': /* ISO-8859-x */
      codepage = strtoul (charset + 9, NULL, 10) + 28590;
--- a/winsup/cygwin/release/3.5.0
+++ b/winsup/cygwin/release/3.5.0
@ -21,3 +21,5 @@ What's new:
  supported codesets and locales for all interested parties.  Locale(1)
  opens these files and uses the info for printing locale info like any
  other process could do.
 - Add support for GB18030 codeset.
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@ -245,7 +245,8 @@ mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
   eucJP, the both most used Japanese charset encodings, this shouldn't
   be such a big problem. */
-/* GBK, eucKR, and Big5 conversions are not available so far in newlib. */
+/* GBK, GB18030, eucKR, and Big5 conversions are not available so far
   in newlib. */
 static int
 __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp)
@ -325,6 +326,12 @@ __gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
  return __db_wctomb (r,s, wchar, 936);
 }
 extern "C" int
 __gb18030_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 {
  return __db_wctomb (r,s, wchar, 54936);
 }
 extern "C" int
 __kr_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 {
@ -482,6 +489,13 @@ __gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
  return __db_mbtowc (r, pwc, s, n, 936, state);
 }
 extern "C" int
 __gb18030_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
 		  mbstate_t *state)
 {
  return __db_mbtowc (r, pwc, s, n, 54936, state);
 }
 extern "C" int
 __kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
 	     mbstate_t *state)
--- a/winsup/doc/new-features.xml
+++ b/winsup/doc/new-features.xml
@ -41,6 +41,10 @@ files and uses the info for printing locale info like any other process
 could do.
 </para></listitem>
 <listitem><para>
 Add support for GB18030 codeset.
 </para></listitem>
 </itemizedlist>
 </sect2>