From d856640e1c8dc008113a6865826d9459710f9857 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Mon, 28 Sep 2009 10:43:49 +0000 Subject: [PATCH] * cygheap.cc (cygheap_init): Default locale.charset to "UTF-8". * dcrt0.cc (dll_crt0_1): Call setlocale (LC_CTYPE, "") here, just before resetting application locale to "C". * environ.cc (environ_init): Drop code setting locale here. * syscalls.cc (check_codepage): Break out check for required codepage from internal_setlocale. (internal_setlocale): Just convert CWD and $PATH from old charset to new charset and set internal charset. (setlocale): Change accordingly. Don't require LC_ALL or LC_CTYPE to store old locale value. Always call internal_setlocale if charset changed due to locale environment changes. --- winsup/cygwin/ChangeLog | 14 +++++ winsup/cygwin/cygheap.cc | 2 +- winsup/cygwin/dcrt0.cc | 4 +- winsup/cygwin/environ.cc | 17 ------ winsup/cygwin/syscalls.cc | 106 +++++++++++++++++++++----------------- 5 files changed, 76 insertions(+), 67 deletions(-) diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog index 76c9e151e..b47845acb 100644 --- a/winsup/cygwin/ChangeLog +++ b/winsup/cygwin/ChangeLog @@ -1,3 +1,17 @@ +2009-09-28 Corinna Vinschen + + * cygheap.cc (cygheap_init): Default locale.charset to "UTF-8". + * dcrt0.cc (dll_crt0_1): Call setlocale (LC_CTYPE, "") here, just + before resetting application locale to "C". + * environ.cc (environ_init): Drop code setting locale here. + * syscalls.cc (check_codepage): Break out check for required codepage + from internal_setlocale. + (internal_setlocale): Just convert CWD and $PATH from old charset to + new charset and set internal charset. + (setlocale): Change accordingly. Don't require LC_ALL or LC_CTYPE to + store old locale value. Always call internal_setlocale if charset + changed due to locale environment changes. + 2009-09-26 Eric Blake * exec.cc (fexecve): New function. diff --git a/winsup/cygwin/cygheap.cc b/winsup/cygwin/cygheap.cc index af5140a56..7c66d736a 100644 --- a/winsup/cygwin/cygheap.cc +++ b/winsup/cygwin/cygheap.cc @@ -164,7 +164,7 @@ cygheap_init () /* Default locale settings. */ cygheap->locale.mbtowc = __utf8_mbtowc; cygheap->locale.wctomb = __utf8_wctomb; - strcpy (cygheap->locale.charset, "ASCII"); + strcpy (cygheap->locale.charset, "UTF-8"); /* Set umask to a sane default. */ cygheap->umask = 022; } diff --git a/winsup/cygwin/dcrt0.cc b/winsup/cygwin/dcrt0.cc index 2a023456f..e142b74c5 100644 --- a/winsup/cygwin/dcrt0.cc +++ b/winsup/cygwin/dcrt0.cc @@ -939,7 +939,9 @@ dll_crt0_1 (void *) do this for noncygwin case since the signal thread is blocked due to LoadLibrary serialization. */ ld_preload (); - /* Reset current application locale to "C" per POSIX */ + /* Set internal locale to the environment settings. */ + setlocale (LC_CTYPE, ""); + /* Reset application locale to "C" per POSIX */ _setlocale_r (_REENT, LC_CTYPE, "C"); if (user_data->main) cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr)); diff --git a/winsup/cygwin/environ.cc b/winsup/cygwin/environ.cc index d4e003f04..bc1130372 100644 --- a/winsup/cygwin/environ.cc +++ b/winsup/cygwin/environ.cc @@ -732,7 +732,6 @@ environ_init (char **envp, int envc) static char NO_COPY cygterm[] = "TERM=cygwin"; myfault efault; tmp_pathbuf tp; - static const char *lc_arr[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL }; if (efault.faulted ()) api_fatal ("internal error reading the windows environment - too many environment variables?"); @@ -777,22 +776,6 @@ environ_init (char **envp, int envc) /* Allocate space for environment + trailing NULL + CYGWIN env. */ lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *)); - /* We need the locale variables' content before we can loop through - the whole environment, so that the wide-char to multibyte conversion - can be done according to the $LC_ALL/$LC_CTYPE/$LANG/current_codepage - setting, as well as the uppercasing according to the "upcaseenv" - setting. Note that we have to reset the LC_CTYPE setting to "C" - before calling main() for POSIX compatibility. */ - for (int lc = 0; lc_arr[lc]; ++lc) - { - if ((i = GetEnvironmentVariableA (lc_arr[lc], NULL, 0))) - { - char *buf = (char *) alloca (i); - GetEnvironmentVariableA (lc_arr[lc], buf, i); - if (setlocale (LC_CTYPE, buf)) - break; - } - } /* We also need the CYGWIN variable early to know the value of the CYGWIN=upcaseenv setting for the below loop. */ if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0))) diff --git a/winsup/cygwin/syscalls.cc b/winsup/cygwin/syscalls.cc index 94410fed3..67dddf3d9 100644 --- a/winsup/cygwin/syscalls.cc +++ b/winsup/cygwin/syscalls.cc @@ -4151,69 +4151,79 @@ unlinkat (int dirfd, const char *pathname, int flags) } static char * -internal_setlocale (char *ret) +check_codepage (char *ret) { - tmp_pathbuf tp; + if (!wincap.has_always_all_codepages ()) + { + /* Prior to Windows Vista, many codepages are not installed by + default, or can be deinstalled. The following codepages require + that the respective conversion tables are installed into the OS. + So we check if they are installed and if not, setlocale should + fail. */ + CPINFO cpi; + UINT cp = 0; + if (__mbtowc == __sjis_mbtowc) + cp = 932; + else if (__mbtowc == __eucjp_mbtowc) + cp = 20932; + else if (__mbtowc == __gbk_mbtowc) + cp = 936; + else if (__mbtowc == __kr_mbtowc) + cp = 949; + else if (__mbtowc == __big5_mbtowc) + cp = 950; + if (cp && !GetCPInfo (cp, &cpi) + && GetLastError () == ERROR_INVALID_PARAMETER) + return NULL; + } + return ret; +} - /* Each setlocale potentially changes the multibyte representation - of the CWD. Therefore we have to reevaluate the CWD's posix path and - store in the new charset. */ +static void +internal_setlocale () +{ + /* Each setlocale from the environment potentially changes the + multibyte representation of the CWD. Therefore we have to + reevaluate the CWD's posix path and store in the new charset. + Same for the PATH environment variable. */ /* FIXME: Other buffered paths might be affected as well. */ - wchar_t *w_cwd = tp.w_get (); + tmp_pathbuf tp; + wchar_t *w_path, *w_cwd; + + debug_printf ("Cygwin charset changed from %s to %s", + cygheap->locale.charset, __locale_charset ()); + /* Fetch CWD and PATH and convert to wchar_t in previous charset. */ + w_path = tp.w_get (); + sys_mbstowcs (w_path, 32768, getenv ("PATH")); + w_cwd = tp.w_get (); cwdstuff::cwd_lock.acquire (); sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ()); - - if (*__locale_charset () == 'A') - { - cygheap->locale.mbtowc = __utf8_mbtowc; - cygheap->locale.wctomb = __utf8_wctomb; - } - else - { - if (!wincap.has_always_all_codepages ()) - { - /* Prior to Windows Vista, many codepages are not installed by - default, or can be deinstalled. The following codepages require - that the respective conversion tables are installed into the OS. - So we check if they are installed and if not, setlocale should - fail. */ - CPINFO cpi; - UINT cp = 0; - if (__mbtowc == __sjis_mbtowc) - cp = 932; - else if (__mbtowc == __eucjp_mbtowc) - cp = 20932; - else if (__mbtowc == __gbk_mbtowc) - cp = 936; - else if (__mbtowc == __kr_mbtowc) - cp = 949; - else if (__mbtowc == __big5_mbtowc) - cp = 950; - if (cp && !GetCPInfo (cp, &cpi) - && GetLastError () == ERROR_INVALID_PARAMETER) - return NULL; - } - cygheap->locale.mbtowc = __mbtowc; - cygheap->locale.wctomb = __wctomb; - } + /* Set charset for internal conversion functions. */ + cygheap->locale.mbtowc = __mbtowc; + cygheap->locale.wctomb = __wctomb; strcpy (cygheap->locale.charset, __locale_charset ()); - - /* See above. */ + /* Restore CWD and PATH in new charset. */ cygheap->cwd.reset_posix (w_cwd); cwdstuff::cwd_lock.release (); - return ret; + char *c_path = tp.c_get (); + sys_wcstombs (c_path, 32768, w_path); + setenv ("PATH", c_path, 1); } extern "C" char * setlocale (int category, const char *locale) { char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)]; - if (locale && (category == LC_ALL || category == LC_CTYPE) - && !wincap.has_always_all_codepages ()) + if (locale && !wincap.has_always_all_codepages ()) stpcpy (old, _setlocale_r (_REENT, category, NULL)); char *ret = _setlocale_r (_REENT, category, locale); - if (ret && locale && (category == LC_ALL || category == LC_CTYPE) - && !(ret = internal_setlocale (ret))) - _setlocale_r (_REENT, category, old); + if (ret && locale) + { + if (!(ret = check_codepage (ret))) + _setlocale_r (_REENT, category, old); + else if (!*locale && strcmp (cygheap->locale.charset, + __locale_charset ()) != 0) + internal_setlocale (); + } return ret; }