* cygheap.cc (cygheap_init): Set Cygwin default locale values.

* cygheap.h (struct cygheap_locale): New structure.
	(struct user_heap_info): Add cygheap_locale member locale.
	* dcrt0.cc (dll_crt0_1): Revert to calling _setlocale_r so that only
	the applications locale is reverted to "C".
	* environ.cc (environ_init): Remove unused got_lc variable.
	* fhandler.h (class dev_console): Remove now unsed locale variables.
	* fhandler_console.cc (fhandler_console::get_tty_stuff): Remove
	setting dev_console's locale members.
	(dev_console::con_to_str): Use internal locale settings.  Default to
	__ascii_wctomb if charset is "ASCII".
	(fhandler_console::write_normal): Ditto.
	* strfuncs.cc (__ascii_wctomb): Drop declaration.
	(__db_wctomb): Use fixed value 2 instead of not
	necessarily matching MB_CUR_MAX.
	(__eucjp_wctomb): Use 3 instead of MB_CUR_MAX.
	(sys_cp_wcstombs): Remove special case for "C" locale.
	(sys_wcstombs): Implement here.  Use internal locale data stored on
	cygheap.
	(sys_cp_mbstowcs): Remove special case for "C" locale.
	(sys_mbstowcs): Implement here.  Use internal locale data stored on
	cygheap.
	* syscalls.cc (internal_setlocale): New function to set cygheap locale
	data and to reset CWD posix path.
	(setlocale): Just call internal_setlocale from here if necessary.
	* wchar.h (__ascii_wctomb): Declare.
	(sys_wcstombs): Don't define inline, just declare.
	(sys_mbstowcs): Ditto.
This commit is contained in:
Corinna Vinschen 2009-05-14 19:49:37 +00:00
parent ef5da523a9
commit 6f401eccfb
10 changed files with 95 additions and 57 deletions

View File

@ -1,3 +1,34 @@
2009-05-14 Corinna Vinschen <corinna@vinschen.de>
* cygheap.cc (cygheap_init): Set Cygwin default locale values.
* cygheap.h (struct cygheap_locale): New structure.
(struct user_heap_info): Add cygheap_locale member locale.
* dcrt0.cc (dll_crt0_1): Revert to calling _setlocale_r so that only
the applications locale is reverted to "C".
* environ.cc (environ_init): Remove unused got_lc variable.
* fhandler.h (class dev_console): Remove now unsed locale variables.
* fhandler_console.cc (fhandler_console::get_tty_stuff): Remove
setting dev_console's locale members.
(dev_console::con_to_str): Use internal locale settings. Default to
__ascii_wctomb if charset is "ASCII".
(fhandler_console::write_normal): Ditto.
* strfuncs.cc (__ascii_wctomb): Drop declaration.
(__db_wctomb): Use fixed value 2 instead of not
necessarily matching MB_CUR_MAX.
(__eucjp_wctomb): Use 3 instead of MB_CUR_MAX.
(sys_cp_wcstombs): Remove special case for "C" locale.
(sys_wcstombs): Implement here. Use internal locale data stored on
cygheap.
(sys_cp_mbstowcs): Remove special case for "C" locale.
(sys_mbstowcs): Implement here. Use internal locale data stored on
cygheap.
* syscalls.cc (internal_setlocale): New function to set cygheap locale
data and to reset CWD posix path.
(setlocale): Just call internal_setlocale from here if necessary.
* wchar.h (__ascii_wctomb): Declare.
(sys_wcstombs): Don't define inline, just declare.
(sys_mbstowcs): Ditto.
2009-05-14 Christopher Faylor <me+cygwin@cgf.cx>
* mount.cc (mount_info::init): Remove MOUNT_CYGWIN_EXEC setting when

View File

@ -156,6 +156,10 @@ cygheap_init ()
_cygheap_mid - _cygheap_start);
cygheap_max = cygheap;
_csbrk (sizeof (*cygheap));
/* Default locale settings. */
cygheap->locale.mbtowc = __utf8_mbtowc;
cygheap->locale.wctomb = __utf8_wctomb;
strcpy (cygheap->locale.charset, "ASCII");
/* Set umask to a sane default. */
cygheap->umask = 022;
}

View File

@ -258,6 +258,13 @@ struct cygheap_debug
};
#endif
struct cygheap_locale
{
mbtowc_p mbtowc;
wctomb_p wctomb;
char charset[ENCODING_LEN + 1];
};
struct user_heap_info
{
void *base;
@ -281,6 +288,7 @@ struct init_cygheap
char *buckets[32];
cygheap_root root;
cygheap_user user;
cygheap_locale locale;
user_heap_info user_heap;
mode_t umask;
HANDLE console_h;

View File

@ -930,8 +930,8 @@ dll_crt0_1 (void *)
do this for noncygwin case since the signal thread is blocked due to
LoadLibrary serialization. */
ld_preload ();
/* Reset current locale to "C" per POSIX */
setlocale (LC_CTYPE, "C");
/* Reset current application locale to "C" per POSIX */
_setlocale_r (_REENT, LC_CTYPE, "C");
if (user_data->main)
cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr));
__asm__ (" \n\

View File

@ -732,7 +732,6 @@ environ_init (char **envp, int envc)
static char NO_COPY cygterm[] = "TERM=cygwin";
myfault efault;
tmp_pathbuf tp;
bool got_lc = false;
static const char *lc_arr[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
if (efault.faulted ())
@ -791,10 +790,7 @@ environ_init (char **envp, int envc)
char *buf = (char *) alloca (i);
GetEnvironmentVariableA (lc_arr[lc], buf, i);
if (setlocale (LC_CTYPE, buf))
{
got_lc = true;
break;
}
break;
}
}
/* We also need the CYGWIN variable early to know the value of the

View File

@ -894,10 +894,6 @@ class dev_console
bool use_mouse;
bool raw_win32_keyboard_mode;
mbtowc_p con_mbtowc;
wctomb_p con_wctomb;
char con_charset[ENCODING_LEN + 1];
inline UINT get_console_cp ();
DWORD con_to_str (char *d, int dlen, WCHAR w);
DWORD str_to_con (mbtowc_p, char *, PWCHAR d, const char *s, DWORD sz);

View File

@ -99,25 +99,6 @@ fhandler_console::get_tty_stuff (int flags = 0)
dev_state->meta_mask |= RIGHT_ALT_PRESSED;
dev_state->set_default_attr ();
shared_console_info->tty_min_state.sethwnd ((HWND) INVALID_HANDLE_VALUE);
/* Set the console charset and the mb<->wc conversion functions from
the current locale the first time the shared console info is created.
When this initialization is called, the current locale is the one
used when reading the environment. This way we get a console setting
which matches the setting of LC_ALL/LC_CTYPE/LANG at the time the
first Cygwin process in this console starts.
This has an interesting effect. If none of the above environment
variables is set, the setting is equivalent to before when
CYGWIN=codepage was not set: The console charset will be the
default ANSI codepage. So it's sort of backward compatible.
TODO: Find out if that's a feasible approach. It might be better
in the long run to have a distinct console charset environment
variable. */
dev_state->con_mbtowc = __mbtowc;
dev_state->con_wctomb = __wctomb;
strcpy (dev_state->con_charset, __locale_charset ());
}
return &shared_console_info->tty_min_state;
@ -146,7 +127,9 @@ tty_list::get_tty (int n)
inline DWORD
dev_console::con_to_str (char *d, int dlen, WCHAR w)
{
return sys_cp_wcstombs (con_wctomb, con_charset, d, dlen, &w, 1);
return sys_cp_wcstombs (*cygheap->locale.charset == 'A'
? __ascii_wctomb : cygheap->locale.wctomb,
cygheap->locale.charset, d, dlen, &w, 1);
}
inline UINT
@ -1467,8 +1450,8 @@ fhandler_console::write_normal (const unsigned char *src,
f_mbtowc = __set_charset_from_codepage (cp, charset = charsetbuf);
else
{
f_mbtowc = dev_state->con_mbtowc;
charset = dev_state->con_charset;
charset = cygheap->locale.charset;
f_mbtowc = (*charset == 'A') ? __ascii_mbtowc : cygheap->locale.mbtowc;
}
/* First check if we have cached lead bytes of a former try to write

View File

@ -50,7 +50,7 @@ __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp)
BOOL def_used = false;
int ret = WideCharToMultiByte (cp, WC_NO_BEST_FIT_CHARS, &wchar, 1, s,
MB_CUR_MAX, NULL, &def_used);
2, NULL, &def_used);
if (ret > 0 && !def_used)
return ret;
@ -65,8 +65,6 @@ __sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
return __db_wctomb (r,s, wchar, 932);
}
extern "C" int __ascii_wctomb (struct _reent *, char *, wchar_t, const char *,
mbstate_t *);
extern "C" int
__jis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
mbstate_t *state)
@ -101,7 +99,7 @@ __eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
BOOL def_used = false;
int ret = WideCharToMultiByte (20932, WC_NO_BEST_FIT_CHARS, &wchar, 1, s,
MB_CUR_MAX, NULL, &def_used);
3, NULL, &def_used);
if (ret > 0 && !def_used)
{
/* CP20932 representation of JIS-X-0212 character? */
@ -418,8 +416,6 @@ sys_cp_wcstombs (wctomb_p f_wctomb, char *charset, char *dst, size_t len,
mbstate_t ps;
save_errno save;
if (f_wctomb == __ascii_wctomb)
f_wctomb = __utf8_wctomb;
memset (&ps, 0, sizeof ps);
if (dst == NULL)
len = (size_t) -1;
@ -479,6 +475,13 @@ sys_cp_wcstombs (wctomb_p f_wctomb, char *charset, char *dst, size_t len,
return n;
}
size_t __stdcall
sys_wcstombs (char *dst, size_t len, const wchar_t * src, size_t nwc)
{
return sys_cp_wcstombs (cygheap->locale.wctomb, cygheap->locale.charset,
dst, len, src, nwc);
}
/* Allocate a buffer big enough for the string, always including the
terminating '\0'. The buffer pointer is returned in *dst_p, the return
value is the number of bytes written to the buffer, as usual.
@ -527,8 +530,6 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, wchar_t *dst, size_t dlen,
mbstate_t ps;
save_errno save;
if (f_mbtowc == __ascii_mbtowc)
f_mbtowc = __utf8_mbtowc;
memset (&ps, 0, sizeof ps);
if (dst == NULL)
len = (size_t)-1;
@ -597,6 +598,13 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, wchar_t *dst, size_t dlen,
return count;
}
size_t __stdcall
sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, size_t nms)
{
return sys_cp_mbstowcs (cygheap->locale.mbtowc, cygheap->locale.charset,
dst, dlen, src, nms);
}
/* Same as sys_wcstombs_alloc, just backwards. */
size_t __stdcall
sys_mbstowcs_alloc (wchar_t **dst_p, int type, const char *src, size_t nms)

View File

@ -4006,15 +4006,32 @@ unlinkat (int dirfd, const char *pathname, int flags)
return (flags & AT_REMOVEDIR) ? rmdir (path) : unlink (path);
}
extern "C" char *
setlocale (int category, const char *locale)
static void
internal_setlocale ()
{
if (*cygheap->locale.charset == 'A')
{
cygheap->locale.mbtowc = __utf8_mbtowc;
cygheap->locale.wctomb = __utf8_wctomb;
}
else
{
cygheap->locale.mbtowc = __mbtowc;
cygheap->locale.wctomb = __wctomb;
}
strcpy (cygheap->locale.charset, __locale_charset ());
/* Each setlocale potentially changes the multibyte representation
of the CWD. Therefore we have to rest the CWD's posix path and
reevaluate the next time it's used. */
/* FIXME: Other buffered paths might be affected as well. */
cygheap->cwd.reset_posix ();
}
extern "C" char *
setlocale (int category, const char *locale)
{
char *ret = _setlocale_r (_REENT, category, locale);
if (ret)
cygheap->cwd.reset_posix ();
if (ret && locale && (category == LC_ALL || category == LC_CTYPE))
internal_setlocale ();
return ret;
}

View File

@ -34,6 +34,7 @@ typedef int wctomb_f (struct _reent *, char *, wchar_t, const char *,
typedef wctomb_f *wctomb_p;
extern wctomb_p __wctomb;
extern wctomb_f __ascii_wctomb;
extern wctomb_f __utf8_wctomb;
extern char *__locale_charset ();
@ -48,12 +49,9 @@ extern mbtowc_p __set_charset_from_codepage (unsigned int cp, char *charset);
size_t __stdcall sys_cp_wcstombs (wctomb_p, char *, char *, size_t,
const wchar_t *, size_t = (size_t) -1)
__attribute__ ((regparm(3)));
inline size_t
__stdcall sys_wcstombs (char *dst, size_t len, const wchar_t * src,
size_t nwc = (size_t) -1)
{
return sys_cp_wcstombs (__wctomb, __locale_charset (), dst, len, src, nwc);
}
size_t __stdcall sys_wcstombs (char *dst, size_t len, const wchar_t * src,
size_t nwc = (size_t) -1)
__attribute__ ((regparm(3)));
size_t __stdcall sys_wcstombs_alloc (char **, int, const wchar_t *,
size_t = (size_t) -1)
__attribute__ ((regparm(3)));
@ -61,12 +59,9 @@ size_t __stdcall sys_wcstombs_alloc (char **, int, const wchar_t *,
size_t __stdcall sys_cp_mbstowcs (mbtowc_p, char *, wchar_t *, size_t,
const char *, size_t = (size_t) -1)
__attribute__ ((regparm(3)));
inline size_t
sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src,
size_t nms = (size_t) -1)
{
return sys_cp_mbstowcs (__mbtowc, __locale_charset (), dst, dlen, src, nms);
}
size_t sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src,
size_t nms = (size_t) -1)
__attribute__ ((regparm(3)));
size_t __stdcall sys_mbstowcs_alloc (wchar_t **, int, const char *,
size_t = (size_t) -1)
__attribute__ ((regparm(3)));