4
0
mirror of git://sourceware.org/git/newlib-cygwin.git synced 2025-03-02 13:05:42 +08:00

* cygheap.cc (cygheap_init): Default locale.charset to "UTF-8".

* dcrt0.cc (dll_crt0_1): Call setlocale (LC_CTYPE, "") here, just
	before resetting application locale to "C".
	* environ.cc (environ_init): Drop code setting locale here.
	* syscalls.cc (check_codepage): Break out check for required codepage
	from internal_setlocale.
	(internal_setlocale): Just convert CWD and $PATH from old charset to
	new charset and set internal charset.
	(setlocale): Change accordingly.  Don't require LC_ALL or LC_CTYPE to
	store old locale value.  Always call internal_setlocale if charset
	changed due to locale environment changes.
This commit is contained in:
Corinna Vinschen 2009-09-28 10:43:49 +00:00
parent a58d484bcf
commit d856640e1c
5 changed files with 76 additions and 67 deletions

View File

@ -1,3 +1,17 @@
2009-09-28 Corinna Vinschen <corinna@vinschen.de>
* cygheap.cc (cygheap_init): Default locale.charset to "UTF-8".
* dcrt0.cc (dll_crt0_1): Call setlocale (LC_CTYPE, "") here, just
before resetting application locale to "C".
* environ.cc (environ_init): Drop code setting locale here.
* syscalls.cc (check_codepage): Break out check for required codepage
from internal_setlocale.
(internal_setlocale): Just convert CWD and $PATH from old charset to
new charset and set internal charset.
(setlocale): Change accordingly. Don't require LC_ALL or LC_CTYPE to
store old locale value. Always call internal_setlocale if charset
changed due to locale environment changes.
2009-09-26 Eric Blake <ebb9@byu.net> 2009-09-26 Eric Blake <ebb9@byu.net>
* exec.cc (fexecve): New function. * exec.cc (fexecve): New function.

View File

@ -164,7 +164,7 @@ cygheap_init ()
/* Default locale settings. */ /* Default locale settings. */
cygheap->locale.mbtowc = __utf8_mbtowc; cygheap->locale.mbtowc = __utf8_mbtowc;
cygheap->locale.wctomb = __utf8_wctomb; cygheap->locale.wctomb = __utf8_wctomb;
strcpy (cygheap->locale.charset, "ASCII"); strcpy (cygheap->locale.charset, "UTF-8");
/* Set umask to a sane default. */ /* Set umask to a sane default. */
cygheap->umask = 022; cygheap->umask = 022;
} }

View File

@ -939,7 +939,9 @@ dll_crt0_1 (void *)
do this for noncygwin case since the signal thread is blocked due to do this for noncygwin case since the signal thread is blocked due to
LoadLibrary serialization. */ LoadLibrary serialization. */
ld_preload (); ld_preload ();
/* Reset current application locale to "C" per POSIX */ /* Set internal locale to the environment settings. */
setlocale (LC_CTYPE, "");
/* Reset application locale to "C" per POSIX */
_setlocale_r (_REENT, LC_CTYPE, "C"); _setlocale_r (_REENT, LC_CTYPE, "C");
if (user_data->main) if (user_data->main)
cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr)); cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr));

View File

@ -732,7 +732,6 @@ environ_init (char **envp, int envc)
static char NO_COPY cygterm[] = "TERM=cygwin"; static char NO_COPY cygterm[] = "TERM=cygwin";
myfault efault; myfault efault;
tmp_pathbuf tp; tmp_pathbuf tp;
static const char *lc_arr[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
if (efault.faulted ()) if (efault.faulted ())
api_fatal ("internal error reading the windows environment - too many environment variables?"); api_fatal ("internal error reading the windows environment - too many environment variables?");
@ -777,22 +776,6 @@ environ_init (char **envp, int envc)
/* Allocate space for environment + trailing NULL + CYGWIN env. */ /* Allocate space for environment + trailing NULL + CYGWIN env. */
lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *)); lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *));
/* We need the locale variables' content before we can loop through
the whole environment, so that the wide-char to multibyte conversion
can be done according to the $LC_ALL/$LC_CTYPE/$LANG/current_codepage
setting, as well as the uppercasing according to the "upcaseenv"
setting. Note that we have to reset the LC_CTYPE setting to "C"
before calling main() for POSIX compatibility. */
for (int lc = 0; lc_arr[lc]; ++lc)
{
if ((i = GetEnvironmentVariableA (lc_arr[lc], NULL, 0)))
{
char *buf = (char *) alloca (i);
GetEnvironmentVariableA (lc_arr[lc], buf, i);
if (setlocale (LC_CTYPE, buf))
break;
}
}
/* We also need the CYGWIN variable early to know the value of the /* We also need the CYGWIN variable early to know the value of the
CYGWIN=upcaseenv setting for the below loop. */ CYGWIN=upcaseenv setting for the below loop. */
if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0))) if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0)))

View File

@ -4151,69 +4151,79 @@ unlinkat (int dirfd, const char *pathname, int flags)
} }
static char * static char *
internal_setlocale (char *ret) check_codepage (char *ret)
{ {
tmp_pathbuf tp; if (!wincap.has_always_all_codepages ())
{
/* Prior to Windows Vista, many codepages are not installed by
default, or can be deinstalled. The following codepages require
that the respective conversion tables are installed into the OS.
So we check if they are installed and if not, setlocale should
fail. */
CPINFO cpi;
UINT cp = 0;
if (__mbtowc == __sjis_mbtowc)
cp = 932;
else if (__mbtowc == __eucjp_mbtowc)
cp = 20932;
else if (__mbtowc == __gbk_mbtowc)
cp = 936;
else if (__mbtowc == __kr_mbtowc)
cp = 949;
else if (__mbtowc == __big5_mbtowc)
cp = 950;
if (cp && !GetCPInfo (cp, &cpi)
&& GetLastError () == ERROR_INVALID_PARAMETER)
return NULL;
}
return ret;
}
/* Each setlocale potentially changes the multibyte representation static void
of the CWD. Therefore we have to reevaluate the CWD's posix path and internal_setlocale ()
store in the new charset. */ {
/* Each setlocale from the environment potentially changes the
multibyte representation of the CWD. Therefore we have to
reevaluate the CWD's posix path and store in the new charset.
Same for the PATH environment variable. */
/* FIXME: Other buffered paths might be affected as well. */ /* FIXME: Other buffered paths might be affected as well. */
wchar_t *w_cwd = tp.w_get (); tmp_pathbuf tp;
wchar_t *w_path, *w_cwd;
debug_printf ("Cygwin charset changed from %s to %s",
cygheap->locale.charset, __locale_charset ());
/* Fetch CWD and PATH and convert to wchar_t in previous charset. */
w_path = tp.w_get ();
sys_mbstowcs (w_path, 32768, getenv ("PATH"));
w_cwd = tp.w_get ();
cwdstuff::cwd_lock.acquire (); cwdstuff::cwd_lock.acquire ();
sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ()); sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
/* Set charset for internal conversion functions. */
if (*__locale_charset () == 'A') cygheap->locale.mbtowc = __mbtowc;
{ cygheap->locale.wctomb = __wctomb;
cygheap->locale.mbtowc = __utf8_mbtowc;
cygheap->locale.wctomb = __utf8_wctomb;
}
else
{
if (!wincap.has_always_all_codepages ())
{
/* Prior to Windows Vista, many codepages are not installed by
default, or can be deinstalled. The following codepages require
that the respective conversion tables are installed into the OS.
So we check if they are installed and if not, setlocale should
fail. */
CPINFO cpi;
UINT cp = 0;
if (__mbtowc == __sjis_mbtowc)
cp = 932;
else if (__mbtowc == __eucjp_mbtowc)
cp = 20932;
else if (__mbtowc == __gbk_mbtowc)
cp = 936;
else if (__mbtowc == __kr_mbtowc)
cp = 949;
else if (__mbtowc == __big5_mbtowc)
cp = 950;
if (cp && !GetCPInfo (cp, &cpi)
&& GetLastError () == ERROR_INVALID_PARAMETER)
return NULL;
}
cygheap->locale.mbtowc = __mbtowc;
cygheap->locale.wctomb = __wctomb;
}
strcpy (cygheap->locale.charset, __locale_charset ()); strcpy (cygheap->locale.charset, __locale_charset ());
/* Restore CWD and PATH in new charset. */
/* See above. */
cygheap->cwd.reset_posix (w_cwd); cygheap->cwd.reset_posix (w_cwd);
cwdstuff::cwd_lock.release (); cwdstuff::cwd_lock.release ();
return ret; char *c_path = tp.c_get ();
sys_wcstombs (c_path, 32768, w_path);
setenv ("PATH", c_path, 1);
} }
extern "C" char * extern "C" char *
setlocale (int category, const char *locale) setlocale (int category, const char *locale)
{ {
char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)]; char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)];
if (locale && (category == LC_ALL || category == LC_CTYPE) if (locale && !wincap.has_always_all_codepages ())
&& !wincap.has_always_all_codepages ())
stpcpy (old, _setlocale_r (_REENT, category, NULL)); stpcpy (old, _setlocale_r (_REENT, category, NULL));
char *ret = _setlocale_r (_REENT, category, locale); char *ret = _setlocale_r (_REENT, category, locale);
if (ret && locale && (category == LC_ALL || category == LC_CTYPE) if (ret && locale)
&& !(ret = internal_setlocale (ret))) {
_setlocale_r (_REENT, category, old); if (!(ret = check_codepage (ret)))
_setlocale_r (_REENT, category, old);
else if (!*locale && strcmp (cygheap->locale.charset,
__locale_charset ()) != 0)
internal_setlocale ();
}
return ret; return ret;
} }