From 6f401eccfb260b64a35a55e54e0e403324921690 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Thu, 14 May 2009 19:49:37 +0000 Subject: [PATCH] * cygheap.cc (cygheap_init): Set Cygwin default locale values. * cygheap.h (struct cygheap_locale): New structure. (struct user_heap_info): Add cygheap_locale member locale. * dcrt0.cc (dll_crt0_1): Revert to calling _setlocale_r so that only the applications locale is reverted to "C". * environ.cc (environ_init): Remove unused got_lc variable. * fhandler.h (class dev_console): Remove now unsed locale variables. * fhandler_console.cc (fhandler_console::get_tty_stuff): Remove setting dev_console's locale members. (dev_console::con_to_str): Use internal locale settings. Default to __ascii_wctomb if charset is "ASCII". (fhandler_console::write_normal): Ditto. * strfuncs.cc (__ascii_wctomb): Drop declaration. (__db_wctomb): Use fixed value 2 instead of not necessarily matching MB_CUR_MAX. (__eucjp_wctomb): Use 3 instead of MB_CUR_MAX. (sys_cp_wcstombs): Remove special case for "C" locale. (sys_wcstombs): Implement here. Use internal locale data stored on cygheap. (sys_cp_mbstowcs): Remove special case for "C" locale. (sys_mbstowcs): Implement here. Use internal locale data stored on cygheap. * syscalls.cc (internal_setlocale): New function to set cygheap locale data and to reset CWD posix path. (setlocale): Just call internal_setlocale from here if necessary. * wchar.h (__ascii_wctomb): Declare. (sys_wcstombs): Don't define inline, just declare. (sys_mbstowcs): Ditto. --- winsup/cygwin/ChangeLog | 31 +++++++++++++++++++++++++++++++ winsup/cygwin/cygheap.cc | 4 ++++ winsup/cygwin/cygheap.h | 8 ++++++++ winsup/cygwin/dcrt0.cc | 4 ++-- winsup/cygwin/environ.cc | 6 +----- winsup/cygwin/fhandler.h | 4 ---- winsup/cygwin/fhandler_console.cc | 27 +++++---------------------- winsup/cygwin/strfuncs.cc | 24 ++++++++++++++++-------- winsup/cygwin/syscalls.cc | 25 +++++++++++++++++++++---- winsup/cygwin/wchar.h | 19 +++++++------------ 10 files changed, 95 insertions(+), 57 deletions(-) diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog index bf8e9edb1..ea9a6ef22 100644 --- a/winsup/cygwin/ChangeLog +++ b/winsup/cygwin/ChangeLog @@ -1,3 +1,34 @@ +2009-05-14 Corinna Vinschen + + * cygheap.cc (cygheap_init): Set Cygwin default locale values. + * cygheap.h (struct cygheap_locale): New structure. + (struct user_heap_info): Add cygheap_locale member locale. + * dcrt0.cc (dll_crt0_1): Revert to calling _setlocale_r so that only + the applications locale is reverted to "C". + * environ.cc (environ_init): Remove unused got_lc variable. + * fhandler.h (class dev_console): Remove now unsed locale variables. + * fhandler_console.cc (fhandler_console::get_tty_stuff): Remove + setting dev_console's locale members. + (dev_console::con_to_str): Use internal locale settings. Default to + __ascii_wctomb if charset is "ASCII". + (fhandler_console::write_normal): Ditto. + * strfuncs.cc (__ascii_wctomb): Drop declaration. + (__db_wctomb): Use fixed value 2 instead of not + necessarily matching MB_CUR_MAX. + (__eucjp_wctomb): Use 3 instead of MB_CUR_MAX. + (sys_cp_wcstombs): Remove special case for "C" locale. + (sys_wcstombs): Implement here. Use internal locale data stored on + cygheap. + (sys_cp_mbstowcs): Remove special case for "C" locale. + (sys_mbstowcs): Implement here. Use internal locale data stored on + cygheap. + * syscalls.cc (internal_setlocale): New function to set cygheap locale + data and to reset CWD posix path. + (setlocale): Just call internal_setlocale from here if necessary. + * wchar.h (__ascii_wctomb): Declare. + (sys_wcstombs): Don't define inline, just declare. + (sys_mbstowcs): Ditto. + 2009-05-14 Christopher Faylor * mount.cc (mount_info::init): Remove MOUNT_CYGWIN_EXEC setting when diff --git a/winsup/cygwin/cygheap.cc b/winsup/cygwin/cygheap.cc index 1e579f341..1a25b6f8e 100644 --- a/winsup/cygwin/cygheap.cc +++ b/winsup/cygwin/cygheap.cc @@ -156,6 +156,10 @@ cygheap_init () _cygheap_mid - _cygheap_start); cygheap_max = cygheap; _csbrk (sizeof (*cygheap)); + /* Default locale settings. */ + cygheap->locale.mbtowc = __utf8_mbtowc; + cygheap->locale.wctomb = __utf8_wctomb; + strcpy (cygheap->locale.charset, "ASCII"); /* Set umask to a sane default. */ cygheap->umask = 022; } diff --git a/winsup/cygwin/cygheap.h b/winsup/cygwin/cygheap.h index 0932f865d..2c878f0c9 100644 --- a/winsup/cygwin/cygheap.h +++ b/winsup/cygwin/cygheap.h @@ -258,6 +258,13 @@ struct cygheap_debug }; #endif +struct cygheap_locale +{ + mbtowc_p mbtowc; + wctomb_p wctomb; + char charset[ENCODING_LEN + 1]; +}; + struct user_heap_info { void *base; @@ -281,6 +288,7 @@ struct init_cygheap char *buckets[32]; cygheap_root root; cygheap_user user; + cygheap_locale locale; user_heap_info user_heap; mode_t umask; HANDLE console_h; diff --git a/winsup/cygwin/dcrt0.cc b/winsup/cygwin/dcrt0.cc index 12365b7b7..3088b5020 100644 --- a/winsup/cygwin/dcrt0.cc +++ b/winsup/cygwin/dcrt0.cc @@ -930,8 +930,8 @@ dll_crt0_1 (void *) do this for noncygwin case since the signal thread is blocked due to LoadLibrary serialization. */ ld_preload (); - /* Reset current locale to "C" per POSIX */ - setlocale (LC_CTYPE, "C"); + /* Reset current application locale to "C" per POSIX */ + _setlocale_r (_REENT, LC_CTYPE, "C"); if (user_data->main) cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr)); __asm__ (" \n\ diff --git a/winsup/cygwin/environ.cc b/winsup/cygwin/environ.cc index 161b9d98f..d4e003f04 100644 --- a/winsup/cygwin/environ.cc +++ b/winsup/cygwin/environ.cc @@ -732,7 +732,6 @@ environ_init (char **envp, int envc) static char NO_COPY cygterm[] = "TERM=cygwin"; myfault efault; tmp_pathbuf tp; - bool got_lc = false; static const char *lc_arr[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL }; if (efault.faulted ()) @@ -791,10 +790,7 @@ environ_init (char **envp, int envc) char *buf = (char *) alloca (i); GetEnvironmentVariableA (lc_arr[lc], buf, i); if (setlocale (LC_CTYPE, buf)) - { - got_lc = true; - break; - } + break; } } /* We also need the CYGWIN variable early to know the value of the diff --git a/winsup/cygwin/fhandler.h b/winsup/cygwin/fhandler.h index 4102e47e3..4731c3089 100644 --- a/winsup/cygwin/fhandler.h +++ b/winsup/cygwin/fhandler.h @@ -894,10 +894,6 @@ class dev_console bool use_mouse; bool raw_win32_keyboard_mode; - mbtowc_p con_mbtowc; - wctomb_p con_wctomb; - char con_charset[ENCODING_LEN + 1]; - inline UINT get_console_cp (); DWORD con_to_str (char *d, int dlen, WCHAR w); DWORD str_to_con (mbtowc_p, char *, PWCHAR d, const char *s, DWORD sz); diff --git a/winsup/cygwin/fhandler_console.cc b/winsup/cygwin/fhandler_console.cc index a2b2cd38c..70f8b12cc 100644 --- a/winsup/cygwin/fhandler_console.cc +++ b/winsup/cygwin/fhandler_console.cc @@ -99,25 +99,6 @@ fhandler_console::get_tty_stuff (int flags = 0) dev_state->meta_mask |= RIGHT_ALT_PRESSED; dev_state->set_default_attr (); shared_console_info->tty_min_state.sethwnd ((HWND) INVALID_HANDLE_VALUE); - - /* Set the console charset and the mb<->wc conversion functions from - the current locale the first time the shared console info is created. - When this initialization is called, the current locale is the one - used when reading the environment. This way we get a console setting - which matches the setting of LC_ALL/LC_CTYPE/LANG at the time the - first Cygwin process in this console starts. - - This has an interesting effect. If none of the above environment - variables is set, the setting is equivalent to before when - CYGWIN=codepage was not set: The console charset will be the - default ANSI codepage. So it's sort of backward compatible. - - TODO: Find out if that's a feasible approach. It might be better - in the long run to have a distinct console charset environment - variable. */ - dev_state->con_mbtowc = __mbtowc; - dev_state->con_wctomb = __wctomb; - strcpy (dev_state->con_charset, __locale_charset ()); } return &shared_console_info->tty_min_state; @@ -146,7 +127,9 @@ tty_list::get_tty (int n) inline DWORD dev_console::con_to_str (char *d, int dlen, WCHAR w) { - return sys_cp_wcstombs (con_wctomb, con_charset, d, dlen, &w, 1); + return sys_cp_wcstombs (*cygheap->locale.charset == 'A' + ? __ascii_wctomb : cygheap->locale.wctomb, + cygheap->locale.charset, d, dlen, &w, 1); } inline UINT @@ -1467,8 +1450,8 @@ fhandler_console::write_normal (const unsigned char *src, f_mbtowc = __set_charset_from_codepage (cp, charset = charsetbuf); else { - f_mbtowc = dev_state->con_mbtowc; - charset = dev_state->con_charset; + charset = cygheap->locale.charset; + f_mbtowc = (*charset == 'A') ? __ascii_mbtowc : cygheap->locale.mbtowc; } /* First check if we have cached lead bytes of a former try to write diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 150f51ff4..965bdeefd 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -50,7 +50,7 @@ __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp) BOOL def_used = false; int ret = WideCharToMultiByte (cp, WC_NO_BEST_FIT_CHARS, &wchar, 1, s, - MB_CUR_MAX, NULL, &def_used); + 2, NULL, &def_used); if (ret > 0 && !def_used) return ret; @@ -65,8 +65,6 @@ __sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, return __db_wctomb (r,s, wchar, 932); } -extern "C" int __ascii_wctomb (struct _reent *, char *, wchar_t, const char *, - mbstate_t *); extern "C" int __jis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, mbstate_t *state) @@ -101,7 +99,7 @@ __eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, BOOL def_used = false; int ret = WideCharToMultiByte (20932, WC_NO_BEST_FIT_CHARS, &wchar, 1, s, - MB_CUR_MAX, NULL, &def_used); + 3, NULL, &def_used); if (ret > 0 && !def_used) { /* CP20932 representation of JIS-X-0212 character? */ @@ -418,8 +416,6 @@ sys_cp_wcstombs (wctomb_p f_wctomb, char *charset, char *dst, size_t len, mbstate_t ps; save_errno save; - if (f_wctomb == __ascii_wctomb) - f_wctomb = __utf8_wctomb; memset (&ps, 0, sizeof ps); if (dst == NULL) len = (size_t) -1; @@ -479,6 +475,13 @@ sys_cp_wcstombs (wctomb_p f_wctomb, char *charset, char *dst, size_t len, return n; } +size_t __stdcall +sys_wcstombs (char *dst, size_t len, const wchar_t * src, size_t nwc) +{ + return sys_cp_wcstombs (cygheap->locale.wctomb, cygheap->locale.charset, + dst, len, src, nwc); +} + /* Allocate a buffer big enough for the string, always including the terminating '\0'. The buffer pointer is returned in *dst_p, the return value is the number of bytes written to the buffer, as usual. @@ -527,8 +530,6 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, wchar_t *dst, size_t dlen, mbstate_t ps; save_errno save; - if (f_mbtowc == __ascii_mbtowc) - f_mbtowc = __utf8_mbtowc; memset (&ps, 0, sizeof ps); if (dst == NULL) len = (size_t)-1; @@ -597,6 +598,13 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, wchar_t *dst, size_t dlen, return count; } +size_t __stdcall +sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, size_t nms) +{ + return sys_cp_mbstowcs (cygheap->locale.mbtowc, cygheap->locale.charset, + dst, dlen, src, nms); +} + /* Same as sys_wcstombs_alloc, just backwards. */ size_t __stdcall sys_mbstowcs_alloc (wchar_t **dst_p, int type, const char *src, size_t nms) diff --git a/winsup/cygwin/syscalls.cc b/winsup/cygwin/syscalls.cc index ab319225c..fa0c607a6 100644 --- a/winsup/cygwin/syscalls.cc +++ b/winsup/cygwin/syscalls.cc @@ -4006,15 +4006,32 @@ unlinkat (int dirfd, const char *pathname, int flags) return (flags & AT_REMOVEDIR) ? rmdir (path) : unlink (path); } -extern "C" char * -setlocale (int category, const char *locale) +static void +internal_setlocale () { + if (*cygheap->locale.charset == 'A') + { + cygheap->locale.mbtowc = __utf8_mbtowc; + cygheap->locale.wctomb = __utf8_wctomb; + } + else + { + cygheap->locale.mbtowc = __mbtowc; + cygheap->locale.wctomb = __wctomb; + } + strcpy (cygheap->locale.charset, __locale_charset ()); /* Each setlocale potentially changes the multibyte representation of the CWD. Therefore we have to rest the CWD's posix path and reevaluate the next time it's used. */ /* FIXME: Other buffered paths might be affected as well. */ + cygheap->cwd.reset_posix (); +} + +extern "C" char * +setlocale (int category, const char *locale) +{ char *ret = _setlocale_r (_REENT, category, locale); - if (ret) - cygheap->cwd.reset_posix (); + if (ret && locale && (category == LC_ALL || category == LC_CTYPE)) + internal_setlocale (); return ret; } diff --git a/winsup/cygwin/wchar.h b/winsup/cygwin/wchar.h index 064b8df62..4c720cbc8 100644 --- a/winsup/cygwin/wchar.h +++ b/winsup/cygwin/wchar.h @@ -34,6 +34,7 @@ typedef int wctomb_f (struct _reent *, char *, wchar_t, const char *, typedef wctomb_f *wctomb_p; extern wctomb_p __wctomb; +extern wctomb_f __ascii_wctomb; extern wctomb_f __utf8_wctomb; extern char *__locale_charset (); @@ -48,12 +49,9 @@ extern mbtowc_p __set_charset_from_codepage (unsigned int cp, char *charset); size_t __stdcall sys_cp_wcstombs (wctomb_p, char *, char *, size_t, const wchar_t *, size_t = (size_t) -1) __attribute__ ((regparm(3))); -inline size_t -__stdcall sys_wcstombs (char *dst, size_t len, const wchar_t * src, - size_t nwc = (size_t) -1) -{ - return sys_cp_wcstombs (__wctomb, __locale_charset (), dst, len, src, nwc); -} +size_t __stdcall sys_wcstombs (char *dst, size_t len, const wchar_t * src, + size_t nwc = (size_t) -1) + __attribute__ ((regparm(3))); size_t __stdcall sys_wcstombs_alloc (char **, int, const wchar_t *, size_t = (size_t) -1) __attribute__ ((regparm(3))); @@ -61,12 +59,9 @@ size_t __stdcall sys_wcstombs_alloc (char **, int, const wchar_t *, size_t __stdcall sys_cp_mbstowcs (mbtowc_p, char *, wchar_t *, size_t, const char *, size_t = (size_t) -1) __attribute__ ((regparm(3))); -inline size_t -sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, - size_t nms = (size_t) -1) -{ - return sys_cp_mbstowcs (__mbtowc, __locale_charset (), dst, dlen, src, nms); -} +size_t sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, + size_t nms = (size_t) -1) + __attribute__ ((regparm(3))); size_t __stdcall sys_mbstowcs_alloc (wchar_t **, int, const char *, size_t = (size_t) -1) __attribute__ ((regparm(3)));