* cygheap.cc (cygheap_init): Default locale.charset to "UTF-8".

* dcrt0.cc (dll_crt0_1): Call setlocale (LC_CTYPE, "") here, just
	before resetting application locale to "C".
	* environ.cc (environ_init): Drop code setting locale here.
	* syscalls.cc (check_codepage): Break out check for required codepage
	from internal_setlocale.
	(internal_setlocale): Just convert CWD and $PATH from old charset to
	new charset and set internal charset.
	(setlocale): Change accordingly.  Don't require LC_ALL or LC_CTYPE to
	store old locale value.  Always call internal_setlocale if charset
	changed due to locale environment changes.
This commit is contained in:
Corinna Vinschen 2009-09-28 10:43:49 +00:00
parent a58d484bcf
commit d856640e1c
5 changed files with 76 additions and 67 deletions

View File

@ -1,3 +1,17 @@
2009-09-28 Corinna Vinschen <corinna@vinschen.de>
* cygheap.cc (cygheap_init): Default locale.charset to "UTF-8".
* dcrt0.cc (dll_crt0_1): Call setlocale (LC_CTYPE, "") here, just
before resetting application locale to "C".
* environ.cc (environ_init): Drop code setting locale here.
* syscalls.cc (check_codepage): Break out check for required codepage
from internal_setlocale.
(internal_setlocale): Just convert CWD and $PATH from old charset to
new charset and set internal charset.
(setlocale): Change accordingly. Don't require LC_ALL or LC_CTYPE to
store old locale value. Always call internal_setlocale if charset
changed due to locale environment changes.
2009-09-26 Eric Blake <ebb9@byu.net> 2009-09-26 Eric Blake <ebb9@byu.net>
* exec.cc (fexecve): New function. * exec.cc (fexecve): New function.

View File

@ -164,7 +164,7 @@ cygheap_init ()
/* Default locale settings. */ /* Default locale settings. */
cygheap->locale.mbtowc = __utf8_mbtowc; cygheap->locale.mbtowc = __utf8_mbtowc;
cygheap->locale.wctomb = __utf8_wctomb; cygheap->locale.wctomb = __utf8_wctomb;
strcpy (cygheap->locale.charset, "ASCII"); strcpy (cygheap->locale.charset, "UTF-8");
/* Set umask to a sane default. */ /* Set umask to a sane default. */
cygheap->umask = 022; cygheap->umask = 022;
} }

View File

@ -939,7 +939,9 @@ dll_crt0_1 (void *)
do this for noncygwin case since the signal thread is blocked due to do this for noncygwin case since the signal thread is blocked due to
LoadLibrary serialization. */ LoadLibrary serialization. */
ld_preload (); ld_preload ();
/* Reset current application locale to "C" per POSIX */ /* Set internal locale to the environment settings. */
setlocale (LC_CTYPE, "");
/* Reset application locale to "C" per POSIX */
_setlocale_r (_REENT, LC_CTYPE, "C"); _setlocale_r (_REENT, LC_CTYPE, "C");
if (user_data->main) if (user_data->main)
cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr)); cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr));

View File

@ -732,7 +732,6 @@ environ_init (char **envp, int envc)
static char NO_COPY cygterm[] = "TERM=cygwin"; static char NO_COPY cygterm[] = "TERM=cygwin";
myfault efault; myfault efault;
tmp_pathbuf tp; tmp_pathbuf tp;
static const char *lc_arr[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
if (efault.faulted ()) if (efault.faulted ())
api_fatal ("internal error reading the windows environment - too many environment variables?"); api_fatal ("internal error reading the windows environment - too many environment variables?");
@ -777,22 +776,6 @@ environ_init (char **envp, int envc)
/* Allocate space for environment + trailing NULL + CYGWIN env. */ /* Allocate space for environment + trailing NULL + CYGWIN env. */
lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *)); lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *));
/* We need the locale variables' content before we can loop through
the whole environment, so that the wide-char to multibyte conversion
can be done according to the $LC_ALL/$LC_CTYPE/$LANG/current_codepage
setting, as well as the uppercasing according to the "upcaseenv"
setting. Note that we have to reset the LC_CTYPE setting to "C"
before calling main() for POSIX compatibility. */
for (int lc = 0; lc_arr[lc]; ++lc)
{
if ((i = GetEnvironmentVariableA (lc_arr[lc], NULL, 0)))
{
char *buf = (char *) alloca (i);
GetEnvironmentVariableA (lc_arr[lc], buf, i);
if (setlocale (LC_CTYPE, buf))
break;
}
}
/* We also need the CYGWIN variable early to know the value of the /* We also need the CYGWIN variable early to know the value of the
CYGWIN=upcaseenv setting for the below loop. */ CYGWIN=upcaseenv setting for the below loop. */
if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0))) if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0)))

View File

@ -4151,24 +4151,7 @@ unlinkat (int dirfd, const char *pathname, int flags)
} }
static char * static char *
internal_setlocale (char *ret) check_codepage (char *ret)
{
tmp_pathbuf tp;
/* Each setlocale potentially changes the multibyte representation
of the CWD. Therefore we have to reevaluate the CWD's posix path and
store in the new charset. */
/* FIXME: Other buffered paths might be affected as well. */
wchar_t *w_cwd = tp.w_get ();
cwdstuff::cwd_lock.acquire ();
sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
if (*__locale_charset () == 'A')
{
cygheap->locale.mbtowc = __utf8_mbtowc;
cygheap->locale.wctomb = __utf8_wctomb;
}
else
{ {
if (!wincap.has_always_all_codepages ()) if (!wincap.has_always_all_codepages ())
{ {
@ -4193,27 +4176,54 @@ internal_setlocale (char *ret)
&& GetLastError () == ERROR_INVALID_PARAMETER) && GetLastError () == ERROR_INVALID_PARAMETER)
return NULL; return NULL;
} }
return ret;
}
static void
internal_setlocale ()
{
/* Each setlocale from the environment potentially changes the
multibyte representation of the CWD. Therefore we have to
reevaluate the CWD's posix path and store in the new charset.
Same for the PATH environment variable. */
/* FIXME: Other buffered paths might be affected as well. */
tmp_pathbuf tp;
wchar_t *w_path, *w_cwd;
debug_printf ("Cygwin charset changed from %s to %s",
cygheap->locale.charset, __locale_charset ());
/* Fetch CWD and PATH and convert to wchar_t in previous charset. */
w_path = tp.w_get ();
sys_mbstowcs (w_path, 32768, getenv ("PATH"));
w_cwd = tp.w_get ();
cwdstuff::cwd_lock.acquire ();
sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
/* Set charset for internal conversion functions. */
cygheap->locale.mbtowc = __mbtowc; cygheap->locale.mbtowc = __mbtowc;
cygheap->locale.wctomb = __wctomb; cygheap->locale.wctomb = __wctomb;
}
strcpy (cygheap->locale.charset, __locale_charset ()); strcpy (cygheap->locale.charset, __locale_charset ());
/* Restore CWD and PATH in new charset. */
/* See above. */
cygheap->cwd.reset_posix (w_cwd); cygheap->cwd.reset_posix (w_cwd);
cwdstuff::cwd_lock.release (); cwdstuff::cwd_lock.release ();
return ret; char *c_path = tp.c_get ();
sys_wcstombs (c_path, 32768, w_path);
setenv ("PATH", c_path, 1);
} }
extern "C" char * extern "C" char *
setlocale (int category, const char *locale) setlocale (int category, const char *locale)
{ {
char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)]; char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)];
if (locale && (category == LC_ALL || category == LC_CTYPE) if (locale && !wincap.has_always_all_codepages ())
&& !wincap.has_always_all_codepages ())
stpcpy (old, _setlocale_r (_REENT, category, NULL)); stpcpy (old, _setlocale_r (_REENT, category, NULL));
char *ret = _setlocale_r (_REENT, category, locale); char *ret = _setlocale_r (_REENT, category, locale);
if (ret && locale && (category == LC_ALL || category == LC_CTYPE) if (ret && locale)
&& !(ret = internal_setlocale (ret))) {
if (!(ret = check_codepage (ret)))
_setlocale_r (_REENT, category, old); _setlocale_r (_REENT, category, old);
else if (!*locale && strcmp (cygheap->locale.charset,
__locale_charset ()) != 0)
internal_setlocale ();
}
return ret; return ret;
} }