* cygheap.cc (cygheap_init): Default locale.charset to "UTF-8".

* dcrt0.cc (dll_crt0_1): Call setlocale (LC_CTYPE, "") here, just
	before resetting application locale to "C".
	* environ.cc (environ_init): Drop code setting locale here.
	* syscalls.cc (check_codepage): Break out check for required codepage
	from internal_setlocale.
	(internal_setlocale): Just convert CWD and $PATH from old charset to
	new charset and set internal charset.
	(setlocale): Change accordingly.  Don't require LC_ALL or LC_CTYPE to
	store old locale value.  Always call internal_setlocale if charset
	changed due to locale environment changes.
This commit is contained in:
Corinna Vinschen 2009-09-28 10:43:49 +00:00
parent a58d484bcf
commit d856640e1c
5 changed files with 76 additions and 67 deletions

View File

@ -1,3 +1,17 @@
2009-09-28 Corinna Vinschen <corinna@vinschen.de>
* cygheap.cc (cygheap_init): Default locale.charset to "UTF-8".
* dcrt0.cc (dll_crt0_1): Call setlocale (LC_CTYPE, "") here, just
before resetting application locale to "C".
* environ.cc (environ_init): Drop code setting locale here.
* syscalls.cc (check_codepage): Break out check for required codepage
from internal_setlocale.
(internal_setlocale): Just convert CWD and $PATH from old charset to
new charset and set internal charset.
(setlocale): Change accordingly. Don't require LC_ALL or LC_CTYPE to
store old locale value. Always call internal_setlocale if charset
changed due to locale environment changes.
2009-09-26 Eric Blake <ebb9@byu.net>
* exec.cc (fexecve): New function.

View File

@ -164,7 +164,7 @@ cygheap_init ()
/* Default locale settings. */
cygheap->locale.mbtowc = __utf8_mbtowc;
cygheap->locale.wctomb = __utf8_wctomb;
strcpy (cygheap->locale.charset, "ASCII");
strcpy (cygheap->locale.charset, "UTF-8");
/* Set umask to a sane default. */
cygheap->umask = 022;
}

View File

@ -939,7 +939,9 @@ dll_crt0_1 (void *)
do this for noncygwin case since the signal thread is blocked due to
LoadLibrary serialization. */
ld_preload ();
/* Reset current application locale to "C" per POSIX */
/* Set internal locale to the environment settings. */
setlocale (LC_CTYPE, "");
/* Reset application locale to "C" per POSIX */
_setlocale_r (_REENT, LC_CTYPE, "C");
if (user_data->main)
cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr));

View File

@ -732,7 +732,6 @@ environ_init (char **envp, int envc)
static char NO_COPY cygterm[] = "TERM=cygwin";
myfault efault;
tmp_pathbuf tp;
static const char *lc_arr[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
if (efault.faulted ())
api_fatal ("internal error reading the windows environment - too many environment variables?");
@ -777,22 +776,6 @@ environ_init (char **envp, int envc)
/* Allocate space for environment + trailing NULL + CYGWIN env. */
lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *));
/* We need the locale variables' content before we can loop through
the whole environment, so that the wide-char to multibyte conversion
can be done according to the $LC_ALL/$LC_CTYPE/$LANG/current_codepage
setting, as well as the uppercasing according to the "upcaseenv"
setting. Note that we have to reset the LC_CTYPE setting to "C"
before calling main() for POSIX compatibility. */
for (int lc = 0; lc_arr[lc]; ++lc)
{
if ((i = GetEnvironmentVariableA (lc_arr[lc], NULL, 0)))
{
char *buf = (char *) alloca (i);
GetEnvironmentVariableA (lc_arr[lc], buf, i);
if (setlocale (LC_CTYPE, buf))
break;
}
}
/* We also need the CYGWIN variable early to know the value of the
CYGWIN=upcaseenv setting for the below loop. */
if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0)))

View File

@ -4151,69 +4151,79 @@ unlinkat (int dirfd, const char *pathname, int flags)
}
static char *
internal_setlocale (char *ret)
check_codepage (char *ret)
{
tmp_pathbuf tp;
if (!wincap.has_always_all_codepages ())
{
/* Prior to Windows Vista, many codepages are not installed by
default, or can be deinstalled. The following codepages require
that the respective conversion tables are installed into the OS.
So we check if they are installed and if not, setlocale should
fail. */
CPINFO cpi;
UINT cp = 0;
if (__mbtowc == __sjis_mbtowc)
cp = 932;
else if (__mbtowc == __eucjp_mbtowc)
cp = 20932;
else if (__mbtowc == __gbk_mbtowc)
cp = 936;
else if (__mbtowc == __kr_mbtowc)
cp = 949;
else if (__mbtowc == __big5_mbtowc)
cp = 950;
if (cp && !GetCPInfo (cp, &cpi)
&& GetLastError () == ERROR_INVALID_PARAMETER)
return NULL;
}
return ret;
}
/* Each setlocale potentially changes the multibyte representation
of the CWD. Therefore we have to reevaluate the CWD's posix path and
store in the new charset. */
static void
internal_setlocale ()
{
/* Each setlocale from the environment potentially changes the
multibyte representation of the CWD. Therefore we have to
reevaluate the CWD's posix path and store in the new charset.
Same for the PATH environment variable. */
/* FIXME: Other buffered paths might be affected as well. */
wchar_t *w_cwd = tp.w_get ();
tmp_pathbuf tp;
wchar_t *w_path, *w_cwd;
debug_printf ("Cygwin charset changed from %s to %s",
cygheap->locale.charset, __locale_charset ());
/* Fetch CWD and PATH and convert to wchar_t in previous charset. */
w_path = tp.w_get ();
sys_mbstowcs (w_path, 32768, getenv ("PATH"));
w_cwd = tp.w_get ();
cwdstuff::cwd_lock.acquire ();
sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
if (*__locale_charset () == 'A')
{
cygheap->locale.mbtowc = __utf8_mbtowc;
cygheap->locale.wctomb = __utf8_wctomb;
}
else
{
if (!wincap.has_always_all_codepages ())
{
/* Prior to Windows Vista, many codepages are not installed by
default, or can be deinstalled. The following codepages require
that the respective conversion tables are installed into the OS.
So we check if they are installed and if not, setlocale should
fail. */
CPINFO cpi;
UINT cp = 0;
if (__mbtowc == __sjis_mbtowc)
cp = 932;
else if (__mbtowc == __eucjp_mbtowc)
cp = 20932;
else if (__mbtowc == __gbk_mbtowc)
cp = 936;
else if (__mbtowc == __kr_mbtowc)
cp = 949;
else if (__mbtowc == __big5_mbtowc)
cp = 950;
if (cp && !GetCPInfo (cp, &cpi)
&& GetLastError () == ERROR_INVALID_PARAMETER)
return NULL;
}
cygheap->locale.mbtowc = __mbtowc;
cygheap->locale.wctomb = __wctomb;
}
/* Set charset for internal conversion functions. */
cygheap->locale.mbtowc = __mbtowc;
cygheap->locale.wctomb = __wctomb;
strcpy (cygheap->locale.charset, __locale_charset ());
/* See above. */
/* Restore CWD and PATH in new charset. */
cygheap->cwd.reset_posix (w_cwd);
cwdstuff::cwd_lock.release ();
return ret;
char *c_path = tp.c_get ();
sys_wcstombs (c_path, 32768, w_path);
setenv ("PATH", c_path, 1);
}
extern "C" char *
setlocale (int category, const char *locale)
{
char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)];
if (locale && (category == LC_ALL || category == LC_CTYPE)
&& !wincap.has_always_all_codepages ())
if (locale && !wincap.has_always_all_codepages ())
stpcpy (old, _setlocale_r (_REENT, category, NULL));
char *ret = _setlocale_r (_REENT, category, locale);
if (ret && locale && (category == LC_ALL || category == LC_CTYPE)
&& !(ret = internal_setlocale (ret)))
_setlocale_r (_REENT, category, old);
if (ret && locale)
{
if (!(ret = check_codepage (ret)))
_setlocale_r (_REENT, category, old);
else if (!*locale && strcmp (cygheap->locale.charset,
__locale_charset ()) != 0)
internal_setlocale ();
}
return ret;
}