* cygheap.cc (cygheap_init): Default locale.charset to "UTF-8".
* dcrt0.cc (dll_crt0_1): Call setlocale (LC_CTYPE, "") here, just before resetting application locale to "C". * environ.cc (environ_init): Drop code setting locale here. * syscalls.cc (check_codepage): Break out check for required codepage from internal_setlocale. (internal_setlocale): Just convert CWD and $PATH from old charset to new charset and set internal charset. (setlocale): Change accordingly. Don't require LC_ALL or LC_CTYPE to store old locale value. Always call internal_setlocale if charset changed due to locale environment changes.
This commit is contained in:
parent
a58d484bcf
commit
d856640e1c
|
@ -1,3 +1,17 @@
|
||||||
|
2009-09-28 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
|
* cygheap.cc (cygheap_init): Default locale.charset to "UTF-8".
|
||||||
|
* dcrt0.cc (dll_crt0_1): Call setlocale (LC_CTYPE, "") here, just
|
||||||
|
before resetting application locale to "C".
|
||||||
|
* environ.cc (environ_init): Drop code setting locale here.
|
||||||
|
* syscalls.cc (check_codepage): Break out check for required codepage
|
||||||
|
from internal_setlocale.
|
||||||
|
(internal_setlocale): Just convert CWD and $PATH from old charset to
|
||||||
|
new charset and set internal charset.
|
||||||
|
(setlocale): Change accordingly. Don't require LC_ALL or LC_CTYPE to
|
||||||
|
store old locale value. Always call internal_setlocale if charset
|
||||||
|
changed due to locale environment changes.
|
||||||
|
|
||||||
2009-09-26 Eric Blake <ebb9@byu.net>
|
2009-09-26 Eric Blake <ebb9@byu.net>
|
||||||
|
|
||||||
* exec.cc (fexecve): New function.
|
* exec.cc (fexecve): New function.
|
||||||
|
|
|
@ -164,7 +164,7 @@ cygheap_init ()
|
||||||
/* Default locale settings. */
|
/* Default locale settings. */
|
||||||
cygheap->locale.mbtowc = __utf8_mbtowc;
|
cygheap->locale.mbtowc = __utf8_mbtowc;
|
||||||
cygheap->locale.wctomb = __utf8_wctomb;
|
cygheap->locale.wctomb = __utf8_wctomb;
|
||||||
strcpy (cygheap->locale.charset, "ASCII");
|
strcpy (cygheap->locale.charset, "UTF-8");
|
||||||
/* Set umask to a sane default. */
|
/* Set umask to a sane default. */
|
||||||
cygheap->umask = 022;
|
cygheap->umask = 022;
|
||||||
}
|
}
|
||||||
|
|
|
@ -939,7 +939,9 @@ dll_crt0_1 (void *)
|
||||||
do this for noncygwin case since the signal thread is blocked due to
|
do this for noncygwin case since the signal thread is blocked due to
|
||||||
LoadLibrary serialization. */
|
LoadLibrary serialization. */
|
||||||
ld_preload ();
|
ld_preload ();
|
||||||
/* Reset current application locale to "C" per POSIX */
|
/* Set internal locale to the environment settings. */
|
||||||
|
setlocale (LC_CTYPE, "");
|
||||||
|
/* Reset application locale to "C" per POSIX */
|
||||||
_setlocale_r (_REENT, LC_CTYPE, "C");
|
_setlocale_r (_REENT, LC_CTYPE, "C");
|
||||||
if (user_data->main)
|
if (user_data->main)
|
||||||
cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr));
|
cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr));
|
||||||
|
|
|
@ -732,7 +732,6 @@ environ_init (char **envp, int envc)
|
||||||
static char NO_COPY cygterm[] = "TERM=cygwin";
|
static char NO_COPY cygterm[] = "TERM=cygwin";
|
||||||
myfault efault;
|
myfault efault;
|
||||||
tmp_pathbuf tp;
|
tmp_pathbuf tp;
|
||||||
static const char *lc_arr[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
|
|
||||||
|
|
||||||
if (efault.faulted ())
|
if (efault.faulted ())
|
||||||
api_fatal ("internal error reading the windows environment - too many environment variables?");
|
api_fatal ("internal error reading the windows environment - too many environment variables?");
|
||||||
|
@ -777,22 +776,6 @@ environ_init (char **envp, int envc)
|
||||||
/* Allocate space for environment + trailing NULL + CYGWIN env. */
|
/* Allocate space for environment + trailing NULL + CYGWIN env. */
|
||||||
lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *));
|
lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *));
|
||||||
|
|
||||||
/* We need the locale variables' content before we can loop through
|
|
||||||
the whole environment, so that the wide-char to multibyte conversion
|
|
||||||
can be done according to the $LC_ALL/$LC_CTYPE/$LANG/current_codepage
|
|
||||||
setting, as well as the uppercasing according to the "upcaseenv"
|
|
||||||
setting. Note that we have to reset the LC_CTYPE setting to "C"
|
|
||||||
before calling main() for POSIX compatibility. */
|
|
||||||
for (int lc = 0; lc_arr[lc]; ++lc)
|
|
||||||
{
|
|
||||||
if ((i = GetEnvironmentVariableA (lc_arr[lc], NULL, 0)))
|
|
||||||
{
|
|
||||||
char *buf = (char *) alloca (i);
|
|
||||||
GetEnvironmentVariableA (lc_arr[lc], buf, i);
|
|
||||||
if (setlocale (LC_CTYPE, buf))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* We also need the CYGWIN variable early to know the value of the
|
/* We also need the CYGWIN variable early to know the value of the
|
||||||
CYGWIN=upcaseenv setting for the below loop. */
|
CYGWIN=upcaseenv setting for the below loop. */
|
||||||
if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0)))
|
if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0)))
|
||||||
|
|
|
@ -4151,24 +4151,7 @@ unlinkat (int dirfd, const char *pathname, int flags)
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *
|
static char *
|
||||||
internal_setlocale (char *ret)
|
check_codepage (char *ret)
|
||||||
{
|
|
||||||
tmp_pathbuf tp;
|
|
||||||
|
|
||||||
/* Each setlocale potentially changes the multibyte representation
|
|
||||||
of the CWD. Therefore we have to reevaluate the CWD's posix path and
|
|
||||||
store in the new charset. */
|
|
||||||
/* FIXME: Other buffered paths might be affected as well. */
|
|
||||||
wchar_t *w_cwd = tp.w_get ();
|
|
||||||
cwdstuff::cwd_lock.acquire ();
|
|
||||||
sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
|
|
||||||
|
|
||||||
if (*__locale_charset () == 'A')
|
|
||||||
{
|
|
||||||
cygheap->locale.mbtowc = __utf8_mbtowc;
|
|
||||||
cygheap->locale.wctomb = __utf8_wctomb;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
if (!wincap.has_always_all_codepages ())
|
if (!wincap.has_always_all_codepages ())
|
||||||
{
|
{
|
||||||
|
@ -4193,27 +4176,54 @@ internal_setlocale (char *ret)
|
||||||
&& GetLastError () == ERROR_INVALID_PARAMETER)
|
&& GetLastError () == ERROR_INVALID_PARAMETER)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
internal_setlocale ()
|
||||||
|
{
|
||||||
|
/* Each setlocale from the environment potentially changes the
|
||||||
|
multibyte representation of the CWD. Therefore we have to
|
||||||
|
reevaluate the CWD's posix path and store in the new charset.
|
||||||
|
Same for the PATH environment variable. */
|
||||||
|
/* FIXME: Other buffered paths might be affected as well. */
|
||||||
|
tmp_pathbuf tp;
|
||||||
|
wchar_t *w_path, *w_cwd;
|
||||||
|
|
||||||
|
debug_printf ("Cygwin charset changed from %s to %s",
|
||||||
|
cygheap->locale.charset, __locale_charset ());
|
||||||
|
/* Fetch CWD and PATH and convert to wchar_t in previous charset. */
|
||||||
|
w_path = tp.w_get ();
|
||||||
|
sys_mbstowcs (w_path, 32768, getenv ("PATH"));
|
||||||
|
w_cwd = tp.w_get ();
|
||||||
|
cwdstuff::cwd_lock.acquire ();
|
||||||
|
sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
|
||||||
|
/* Set charset for internal conversion functions. */
|
||||||
cygheap->locale.mbtowc = __mbtowc;
|
cygheap->locale.mbtowc = __mbtowc;
|
||||||
cygheap->locale.wctomb = __wctomb;
|
cygheap->locale.wctomb = __wctomb;
|
||||||
}
|
|
||||||
strcpy (cygheap->locale.charset, __locale_charset ());
|
strcpy (cygheap->locale.charset, __locale_charset ());
|
||||||
|
/* Restore CWD and PATH in new charset. */
|
||||||
/* See above. */
|
|
||||||
cygheap->cwd.reset_posix (w_cwd);
|
cygheap->cwd.reset_posix (w_cwd);
|
||||||
cwdstuff::cwd_lock.release ();
|
cwdstuff::cwd_lock.release ();
|
||||||
return ret;
|
char *c_path = tp.c_get ();
|
||||||
|
sys_wcstombs (c_path, 32768, w_path);
|
||||||
|
setenv ("PATH", c_path, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" char *
|
extern "C" char *
|
||||||
setlocale (int category, const char *locale)
|
setlocale (int category, const char *locale)
|
||||||
{
|
{
|
||||||
char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)];
|
char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)];
|
||||||
if (locale && (category == LC_ALL || category == LC_CTYPE)
|
if (locale && !wincap.has_always_all_codepages ())
|
||||||
&& !wincap.has_always_all_codepages ())
|
|
||||||
stpcpy (old, _setlocale_r (_REENT, category, NULL));
|
stpcpy (old, _setlocale_r (_REENT, category, NULL));
|
||||||
char *ret = _setlocale_r (_REENT, category, locale);
|
char *ret = _setlocale_r (_REENT, category, locale);
|
||||||
if (ret && locale && (category == LC_ALL || category == LC_CTYPE)
|
if (ret && locale)
|
||||||
&& !(ret = internal_setlocale (ret)))
|
{
|
||||||
|
if (!(ret = check_codepage (ret)))
|
||||||
_setlocale_r (_REENT, category, old);
|
_setlocale_r (_REENT, category, old);
|
||||||
|
else if (!*locale && strcmp (cygheap->locale.charset,
|
||||||
|
__locale_charset ()) != 0)
|
||||||
|
internal_setlocale ();
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue