* ctype.cc (_CTYPE_DATA_0_127): Add _B class to TAB character.
(__ctype_default): New character class array for default ASCII character set. (__ctype_iso): New array of character class array for ISO charsets. (__ctype_cp): Ditto for singlebyte Windows codepages. (tolower): Implement as distinct function to support any singlebyte charset. (toupper): Ditto. (__set_ctype): New function to copy singlebyte character classes corresponding to current charset to ctype_b array. Align copyright text to upstream. * dcrt0.cc (dll_crt0_1): Reset current locale to "C" per POSIX. * environ.cc (set_file_api_mode): Remove. (codepage_init): Remove. (parse_thing): Remove "codepage" setting. (environ_init): Set locale according to environment settings, or to current codepage, before converting environment to multibyte. * fhandler.h (fhandler_console::write_replacement_char): Drop argument. * fhandler_console.cc (dev_console::str_to_con): Call sys_cp_mbstowcs rather than MultiByteToWideChar. (fhandler_console::write_replacement_char): Always print a funny half filled square if a character isn't in the current charset. (fhandler_console::write_normal): Convert to using __mbtowc rather than next_char. * fork.cc (frok::child): Drop call to set_file_api_mode. * globals.cc (enum codepage_type) Remove. (current_codepage): Remove. * miscfuncs.cc (cygwin_wcslwr): Unused, dangerous. Remove. (cygwin_wcsupr): Ditto. (is_cp_multibyte): Remove. (next_char): Remove. * miscfuncs.h (is_cp_multibyte): Drop declaration. (next_char): Ditto. * strfuncs.cc (get_cp): Remove. (__db_wctomb): New function to implement _wctomb_r functionality for doublebyte charsets using WideCharToMultiByte. (__sjis_wctomb): New function to replace unusable newlib function. (__jis_wctomb): Ditto. (__eucjp_wctomb): Ditto. (__gbk_wctomb): New function. (__kr_wctomb): Ditto. (__big5_wctomb): Ditto. (__db_mbtowc): New function to implement _mbtowc_r functionality for doublebyte charsets using MultiByteToWideChar. (__sjis_mbtowc): New function to replace unusable newlib function. (__jis_mbtowc): Ditto. (__eucjp_mbtowc): Ditto. (__gbk_mbtowc): New function. (__kr_mbtowc): New function (__big5_mbtowc): New function (__set_charset_from_codepage): New function. (sys_wcstombs): Reimplement, basically using same wide char to multibyte conversion as newlib's application level functions. Plus extras. Add lengthy comment to explain. Change return type to size_t. (sys_wcstombs_alloc): Just use sys_wcstombs. Change return type to size_t. (sys_cp_mbstowcs): Replace sys_mbstowcs, take additional codepage argument. Explain why. Change return type to size_t. (sys_mbstowcs_alloc): Just use sys_mbstowcs. Change return type to size_t. * wchar.h: Declare internal functions implemented in strfuncs.cc. (wcscasecmp): Remove. (wcsncasecmp): Remove. (wcslwr): Remove. (wcsupr): Remove. * winsup.h (codepage_init): Remove declaration. (get_cp): Ditto. (sys_wcstombs): Align declaration to new implementation. (sys_wcstombs_alloc): Ditto. (sys_cp_mbstowcs): Add declaration. (sys_mbstowcs): Define as inline function. (sys_mbstowcs_alloc): Align declaration to new implementation. (set_file_api_mode): Remove declaration. * include/ctype.h (isblank): Redefine to use _B character class. (toupper): Remove ASCII-only definition. (tolower): Ditto.
This commit is contained in:
parent
6a32d500a9
commit
161211d186
|
@ -1,4 +1,83 @@
|
|||
2009-03-23 Corinna Vinschen <corinna@vinschen.de>
|
||||
2009-03-24 Corinna Vinschen <corinna@vinschen.de>
|
||||
|
||||
* ctype.cc (_CTYPE_DATA_0_127): Add _B class to TAB character.
|
||||
(__ctype_default): New character class array for default ASCII
|
||||
character set.
|
||||
(__ctype_iso): New array of character class array for ISO charsets.
|
||||
(__ctype_cp): Ditto for singlebyte Windows codepages.
|
||||
(tolower): Implement as distinct function to support any singlebyte
|
||||
charset.
|
||||
(toupper): Ditto.
|
||||
(__set_ctype): New function to copy singlebyte character classes
|
||||
corresponding to current charset to ctype_b array.
|
||||
Align copyright text to upstream.
|
||||
* dcrt0.cc (dll_crt0_1): Reset current locale to "C" per POSIX.
|
||||
* environ.cc (set_file_api_mode): Remove.
|
||||
(codepage_init): Remove.
|
||||
(parse_thing): Remove "codepage" setting.
|
||||
(environ_init): Set locale according to environment settings, or
|
||||
to current codepage, before converting environment to multibyte.
|
||||
* fhandler.h (fhandler_console::write_replacement_char): Drop argument.
|
||||
* fhandler_console.cc (dev_console::str_to_con): Call sys_cp_mbstowcs
|
||||
rather than MultiByteToWideChar.
|
||||
(fhandler_console::write_replacement_char): Always print a funny
|
||||
half filled square if a character isn't in the current charset.
|
||||
(fhandler_console::write_normal): Convert to using __mbtowc
|
||||
rather than next_char.
|
||||
* fork.cc (frok::child): Drop call to set_file_api_mode.
|
||||
* globals.cc (enum codepage_type) Remove.
|
||||
(current_codepage): Remove.
|
||||
* miscfuncs.cc (cygwin_wcslwr): Unused, dangerous. Remove.
|
||||
(cygwin_wcsupr): Ditto.
|
||||
(is_cp_multibyte): Remove.
|
||||
(next_char): Remove.
|
||||
* miscfuncs.h (is_cp_multibyte): Drop declaration.
|
||||
(next_char): Ditto.
|
||||
* strfuncs.cc (get_cp): Remove.
|
||||
(__db_wctomb): New function to implement _wctomb_r functionality for
|
||||
doublebyte charsets using WideCharToMultiByte.
|
||||
(__sjis_wctomb): New function to replace unusable newlib function.
|
||||
(__jis_wctomb): Ditto.
|
||||
(__eucjp_wctomb): Ditto.
|
||||
(__gbk_wctomb): New function.
|
||||
(__kr_wctomb): Ditto.
|
||||
(__big5_wctomb): Ditto.
|
||||
(__db_mbtowc): New function to implement _mbtowc_r functionality for
|
||||
doublebyte charsets using MultiByteToWideChar.
|
||||
(__sjis_mbtowc): New function to replace unusable newlib function.
|
||||
(__jis_mbtowc): Ditto.
|
||||
(__eucjp_mbtowc): Ditto.
|
||||
(__gbk_mbtowc): New function.
|
||||
(__kr_mbtowc): New function
|
||||
(__big5_mbtowc): New function
|
||||
(__set_charset_from_codepage): New function.
|
||||
(sys_wcstombs): Reimplement, basically using same wide char to multibyte
|
||||
conversion as newlib's application level functions. Plus extras.
|
||||
Add lengthy comment to explain. Change return type to size_t.
|
||||
(sys_wcstombs_alloc): Just use sys_wcstombs. Change return type to
|
||||
size_t.
|
||||
(sys_cp_mbstowcs): Replace sys_mbstowcs, take additional codepage
|
||||
argument. Explain why. Change return type to size_t.
|
||||
(sys_mbstowcs_alloc): Just use sys_mbstowcs. Change return type to
|
||||
size_t.
|
||||
* wchar.h: Declare internal functions implemented in strfuncs.cc.
|
||||
(wcscasecmp): Remove.
|
||||
(wcsncasecmp): Remove.
|
||||
(wcslwr): Remove.
|
||||
(wcsupr): Remove.
|
||||
* winsup.h (codepage_init): Remove declaration.
|
||||
(get_cp): Ditto.
|
||||
(sys_wcstombs): Align declaration to new implementation.
|
||||
(sys_wcstombs_alloc): Ditto.
|
||||
(sys_cp_mbstowcs): Add declaration.
|
||||
(sys_mbstowcs): Define as inline function.
|
||||
(sys_mbstowcs_alloc): Align declaration to new implementation.
|
||||
(set_file_api_mode): Remove declaration.
|
||||
* include/ctype.h (isblank): Redefine to use _B character class.
|
||||
(toupper): Remove ASCII-only definition.
|
||||
(tolower): Ditto.
|
||||
|
||||
2009-03-24 Corinna Vinschen <corinna@vinschen.de>
|
||||
|
||||
* sec_auth.cc (str2buf2uni): Remove.
|
||||
* security.h (str2buf2uni): Remove declaration.
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
#include "winsup.h"
|
||||
extern "C" {
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#define _CTYPE_DATA_0_127 \
|
||||
_C, _C, _C, _C, _C, _C, _C, _C, \
|
||||
_C, _C|_S, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C, \
|
||||
_C, _B|_C|_S, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C, \
|
||||
_C, _C, _C, _C, _C, _C, _C, _C, \
|
||||
_C, _C, _C, _C, _C, _C, _C, _C, \
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P, \
|
||||
|
@ -36,7 +38,645 @@ extern "C" {
|
|||
0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0
|
||||
0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
/* FIXME: These tables should rather be defined in newlib and we should
|
||||
switch to the newer __ctype_ptr method from newlib for new applications. */
|
||||
|
||||
static char __ctype_default[128] = { _CTYPE_DATA_128_256 };
|
||||
static char __ctype_iso[15][128] = {
|
||||
/* ISO-8859-1 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* ISO-8859-2 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _U, _P, _U, _P, _U, _U, _P,
|
||||
_P, _U, _U, _U, _U, _P, _U, _U,
|
||||
_P, _L, _P, _L, _P, _L, _L, _P,
|
||||
_P, _L, _L, _L, _L, _P, _L, _L,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* ISO-8859-3 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _U, _P, _P, _P, 0, _U, _P,
|
||||
_P, _U, _U, _U, _U, _P, 0, _U,
|
||||
_P, _L, _P, _P, _P, _L, _L, _P,
|
||||
_P, _L, _L, _L, _L, _P, 0, _L,
|
||||
_U, _U, _U, 0, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
0, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, 0, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
0, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P },
|
||||
/* ISO-8859-4 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _U, _L, _U, _P, _U, _U, _P,
|
||||
_P, _U, _U, _U, _U, _P, _U, _P,
|
||||
_P, _L, _P, _L, _P, _L, _L, _P,
|
||||
_P, _L, _L, _L, _L, _P, _L, _L,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* ISO-8859-5 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _P, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_P, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _P, _L, _L },
|
||||
/* ISO-8859-6 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, 0, 0, 0, _P, 0, 0, 0,
|
||||
0, 0, 0, 0, _P, _P, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, _P, 0, 0, 0, _P,
|
||||
0, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
0, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, 0, 0, 0, 0, 0,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* ISO-8859-7 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _U, _P,
|
||||
_U, _U, _U, _P, _U, _P, _U, _U,
|
||||
_L, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P },
|
||||
/* ISO-8859-8 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, 0, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, 0, 0, _P, _P, 0 },
|
||||
/* ISO-8859-9 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* ISO-8859-10 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _P, _U, _U,
|
||||
_P, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _P, _L, _L,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* ISO-8859-11 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _P, _L, _L, _P, _P, _P, _P,
|
||||
_P, _P, _P, 0, 0, 0, 0, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, 0, 0, 0, 0 },
|
||||
/* ISO-8859-13 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _U, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _L, _P, _P, _P, _P, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P },
|
||||
/* ISO-8859-14 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _U, _L, _P, _U, _L, _U, _P,
|
||||
_U, _P, _U, _L, _U, _P, _P, _U,
|
||||
_U, _L, _U, _L, _U, _L, _P, _U,
|
||||
_L, _L, _L, _U, _L, _U, _L, _L,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* ISO-8859-15 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* ISO-8859-16 */
|
||||
{ _C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_C, _C, _C, _C, _C, _C, _C, _C,
|
||||
_S|_B, _U, _L, _U, _P, _P, _U, _P,
|
||||
_L, _P, _U, _P, _U, _P, _L, _U,
|
||||
_P, _P, _U, _U, _U, _P, _P, _P,
|
||||
_L, _L, _L, _P, _U, _L, _U, _L,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L }
|
||||
};
|
||||
static char __ctype_cp[22][128] = {
|
||||
/* CP437 */
|
||||
{ _U, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _U, _U,
|
||||
_U, _L, _U, _L, _L, _L, _L, _L,
|
||||
_L, _U, _U, _P, _P, _P, _P, _P,
|
||||
_L, _L, _L, _L, _L, _L, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _L, _U, _L, _U, _L, _P, _L,
|
||||
_U, _U, _U, _L, _P, _L, _L, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _S|_B },
|
||||
/* CP720 */
|
||||
{ 0, 0, _L, _L, 0, _L, 0, _L,
|
||||
_L, _L, _L, _L, _L, 0, 0, 0,
|
||||
0, _P, _P, _L, _P, _P, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
0, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _S|_B },
|
||||
/* CP737 */
|
||||
{ _U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _U, _U, _U, _U, _U, _U,
|
||||
_U, _P, _P, _P, _P, _U, _U, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _S|_B },
|
||||
/* CP775 */
|
||||
{ _U, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _U, _L, _L, _U, _U, _U,
|
||||
_U, _L, _U, _L, _L, _U, _P, _U,
|
||||
_L, _U, _U, _P, _P, _P, _P, _P,
|
||||
_U, _U, _L, _U, _L, _L, _P, _P,
|
||||
_P, _P, _P, _P, _P, _U, _P, _P,
|
||||
_P, _P, _P, _P, _P, _U, _U, _U,
|
||||
_U, _P, _P, _P, _P, _U, _U, _P,
|
||||
_P, _P, _P, _P, _P, _P, _U, _U,
|
||||
_P, _P, _P, _P, _P, _P, _P, _U,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _P, _P, _P, _P, _P, _P, _P,
|
||||
_U, _L, _U, _U, _L, _U, _L, _L,
|
||||
_U, _L, _U, _L, _L, _U, _U, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _S|_B },
|
||||
/* CP850 */
|
||||
{ _U, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _U, _U,
|
||||
_U, _L, _U, _L, _L, _L, _L, _L,
|
||||
_L, _U, _U, _L, _P, _U, _P, _P,
|
||||
_L, _L, _L, _L, _L, _U, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _U, _U, _U,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _L, _U,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _U, _U, _U, _U, _L, _U, _U,
|
||||
_U, _P, _P, _P, _P, _P, _U, _P,
|
||||
_U, _L, _U, _U, _L, _U, _L, _U,
|
||||
_L, _U, _U, _U, _L, _U, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _S|_B },
|
||||
/* CP852 */
|
||||
{ _U, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _U, _L, _L, _U, _U, _U,
|
||||
_U, _U, _L, _L, _L, _U, _L, _U,
|
||||
_L, _U, _U, _U, _L, _U, _P, _L,
|
||||
_L, _L, _L, _L, _U, _L, _U, _L,
|
||||
_U, _L, _P, _L, _U, _L, _P, _P,
|
||||
_P, _P, _P, _P, _P, _U, _U, _U,
|
||||
_U, _P, _P, _P, _P, _U, _L, _P,
|
||||
_P, _P, _P, _P, _P, _P, _U, _L,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _U, _U, _U, _L, _U, _U, _U,
|
||||
_L, _P, _P, _P, _P, _U, _U, _P,
|
||||
_U, _L, _U, _U, _L, _L, _U, _L,
|
||||
_U, _U, _L, _U, _L, _U, _L, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _L, _U, _L, _P, _S|_B },
|
||||
/* CP855 */
|
||||
{ _L, _U, _L, _U, _L, _U, _L, _U,
|
||||
_L, _U, _L, _U, _L, _U, _L, _U,
|
||||
_L, _U, _L, _U, _L, _U, _L, _U,
|
||||
_L, _U, _L, _U, _L, _U, _L, _U,
|
||||
_L, _U, _L, _U, _L, _U, _L, _U,
|
||||
_L, _U, _L, _U, _L, _U, _P, _P,
|
||||
_P, _P, _P, _P, _P, _L, _U, _L,
|
||||
_U, _P, _P, _P, _P, _L, _U, _P,
|
||||
_P, _P, _P, _P, _P, _P, _L, _U,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _U, _L, _U, _L, _U, _L, _U,
|
||||
_L, _P, _P, _P, _P, _U, _L, _P,
|
||||
_U, _L, _U, _L, _U, _L, _U, _L,
|
||||
_U, _L, _U, _L, _U, _L, _U, _P,
|
||||
_P, _L, _U, _L, _U, _L, _U, _L,
|
||||
_U, _L, _U, _L, _U, _P, _P, _S|_B },
|
||||
/* CP857 */
|
||||
{ _U, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _U, _U,
|
||||
_U, _L, _U, _L, _L, _L, _L, _L,
|
||||
_U, _U, _U, _L, _P, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _U, _U, _L,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _U, _U, _U, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _L, _U,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _U, _U, _U, _L, _U, _U,
|
||||
_U, _P, _P, _P, _P, _P, _U, _P,
|
||||
_U, _L, _U, _U, _L, _U, _L, _L,
|
||||
_P, _U, _U, _U, _L, _L, _P, _P,
|
||||
_P, _P, _L, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _S|_B },
|
||||
/* CP858 */
|
||||
{ _U, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _U, _U,
|
||||
_U, _L, _U, _L, _L, _L, _L, _L,
|
||||
_L, _U, _U, _L, _P, _U, _P, _P,
|
||||
_L, _L, _L, _L, _L, _U, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _U, _U, _U,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _L, _U,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _U, _U, _U, _U, _P, _U, _U,
|
||||
_U, _P, _P, _P, _P, _P, _U, _P,
|
||||
_U, _L, _U, _U, _L, _U, _L, _U,
|
||||
_L, _U, _U, _U, _L, _U, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _S|_B },
|
||||
/* CP862 */
|
||||
{ _L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _P, _P, _P, _P, _P,
|
||||
_L, _L, _L, _L, _L, _U, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _L, _U, _L, _U, _L, _P, _L,
|
||||
_U, _U, _U, _L, _P, _L, _L, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _S|_B },
|
||||
/* CP866 */
|
||||
{ _U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_U, _L, _U, _L, _U, _L, _U, _L,
|
||||
_P, _P, _P, _P, _P, _P, _P, _S|_B },
|
||||
/* CP874 */
|
||||
{ _P, 0, 0, 0, 0, _P, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, _P, _P, _P, _P, _P, _P, _P,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
_S|_B, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, 0, 0, 0, 0, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _L, _L, 0, 0, 0, 0 },
|
||||
/* CP1125 */
|
||||
{ _U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_U, _L, _U, _L, _U, _L, _U, _L,
|
||||
_U, _L, _P, _P, _P, _P, _P, _S|_B },
|
||||
/* CP1250 */
|
||||
{ _P, 0, _P, 0, _P, _P, _P, _P,
|
||||
0, _P, _U, _P, _U, _U, _U, _U,
|
||||
0, _P, _P, _P, _P, _P, _P, _P,
|
||||
0, _P, _L, _P, _L, _L, _L, _L,
|
||||
_S|_B, _P, _P, _U, _P, _U, _P, _P,
|
||||
_P, _P, _U, _P, _P, _P, _P, _U,
|
||||
_P, _P, _P, _L, _P, _P, _P, _P,
|
||||
_P, _L, _L, _P, _U, _P, _L, _L,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P },
|
||||
/* CP1251 */
|
||||
{ _U, _U, _P, _L, _P, _P, _P, _P,
|
||||
_P, _P, _U, _P, _U, _U, _U, _U,
|
||||
_L, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _P, _L, _L, _L, _L, _P, _U,
|
||||
_S|_B, _U, _L, _U, _P, _U, _P, _P,
|
||||
_U, _P, _U, _P, _P, _P, _P, _U,
|
||||
_P, _P, _U, _L, _L, _P, _P, _P,
|
||||
_L, _P, _L, _P, _L, _U, _L, _L,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* CP1252 */
|
||||
{ _P, 0, _P, _L, _P, _P, _P, _P,
|
||||
_P, _P, _U, _P, _U, _U, 0, 0,
|
||||
0, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _L, _P, _L, 0, _L, _U,
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* CP1253 */
|
||||
{ _P, 0, _P, _L, _P, _P, _P, _P,
|
||||
0, _P, 0, _P, 0, 0, 0, 0,
|
||||
0, _P, _P, _P, _P, _P, _P, _P,
|
||||
0, _P, _P, 0, 0, 0, 0, 0,
|
||||
_S|_B, _P, _U, _P, _P, _P, _P, _P,
|
||||
_P, _P, 0, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_U, _U, _U, _P, _U, _P, _U, _U,
|
||||
_L, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* CP1254 */
|
||||
{ _P, 0, _P, _L, _P, _P, _P, _P,
|
||||
_P, _P, _U, _P, _U, 0, 0, 0,
|
||||
0, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _L, _P, _L, 0, 0, _U,
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L },
|
||||
/* CP1255 */
|
||||
{ _P, 0, _P, _L, _P, _P, _P, _P,
|
||||
_P, _P, 0, _P, 0, 0, 0, 0,
|
||||
0, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, 0, _P, 0, 0, 0, 0,
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, 0, 0, 0, 0, 0, 0, 0,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, 0, 0, _P, _P, 0 },
|
||||
/* CP1256 */
|
||||
{ _P, _L, _P, _L, _P, _P, _P, _P,
|
||||
_P, _P, _L, _P, _U, _L, _L, _L,
|
||||
_L, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _P, _L, _P, _L, _P, _P, _L,
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _L, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _P, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_P, _P, _P, _P, _L, _P, _P, _P,
|
||||
_P, _L, _P, _L, _L, _P, _P, _L },
|
||||
/* CP1257 */
|
||||
{ _P, 0, _P, 0, _P, _P, _P, _P,
|
||||
0, _P, 0, _P, 0, _P, _P, _P,
|
||||
0, _P, _P, _P, _P, _P, _P, _P,
|
||||
0, _P, 0, _P, 0, _P, _P, 0,
|
||||
_S|_B, 0, _P, _P, _P, 0, _P, _P,
|
||||
_U, _P, _U, _P, _P, _P, _P, _U,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_L, _P, _L, _P, _P, _P, _P, _L,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _L, _P },
|
||||
/* CP1258 */
|
||||
{ _P, 0, _P, _L, _P, _P, _P, _P,
|
||||
_P, _P, 0, _P, _U, 0, 0, 0,
|
||||
0, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, 0, _P, _L, 0, 0, _U,
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_P, _P, _P, _P, _P, _P, _P, _P,
|
||||
_U, _U, _U, _U, _U, _U, _U, _U,
|
||||
_U, _U, _U, _U, _P, _U, _U, _U,
|
||||
_U, _U, _P, _U, _U, _U, _U, _P,
|
||||
_U, _U, _U, _U, _U, _U, _P, _L,
|
||||
_L, _L, _L, _L, _L, _L, _L, _L,
|
||||
_L, _L, _L, _L, _P, _L, _L, _L,
|
||||
_L, _L, _P, _L, _L, _L, _L, _P,
|
||||
_L, _L, _L, _L, _L, _L, _P, _L }
|
||||
};
|
||||
|
||||
char ctype_b[128 + 256] = {
|
||||
_CTYPE_DATA_128_256,
|
||||
|
@ -70,12 +710,81 @@ makefunc(ispunct)
|
|||
makefunc(isspace)
|
||||
makefunc(isupper)
|
||||
makefunc(isxdigit)
|
||||
makefunc(tolower)
|
||||
makefunc(toupper)
|
||||
makefunc(isblank)
|
||||
makefunc(isascii)
|
||||
makefunc(toascii)
|
||||
|
||||
static int __cdecl
|
||||
c_tolower (int c)
|
||||
{
|
||||
if ((unsigned char) c <= 0x7f)
|
||||
return isupper (c) ? c + 0x20 : c;
|
||||
|
||||
char s[8] = { c, '\0' };
|
||||
wchar_t wc;
|
||||
if (mbtowc (&wc, s, 1) >= 0
|
||||
&& wctomb (s, (wchar_t) towlower ((wint_t) wc)) == 1)
|
||||
c = s[0];
|
||||
return c;
|
||||
}
|
||||
EXPORT_ALIAS(c_tolower, tolower)
|
||||
|
||||
static int __cdecl
|
||||
c_toupper (int c)
|
||||
{
|
||||
if ((unsigned char) c <= 0x7f)
|
||||
return islower (c) ? c - 0x20 : c;
|
||||
|
||||
char s[8] = { c, '\0' };
|
||||
wchar_t wc;
|
||||
if (mbtowc (&wc, s, 1) >= 0
|
||||
&& wctomb (s, (wchar_t) towupper ((wint_t) wc)) == 1)
|
||||
c = s[0];
|
||||
return c;
|
||||
}
|
||||
EXPORT_ALIAS(c_toupper, toupper)
|
||||
|
||||
/* Called from newlib's setlocale(). What we do here is to copy the
|
||||
128 bytes of charset specific ctype data into the array at _ctype_b.
|
||||
Given that the functionality is usually implemented locally in the
|
||||
application, that's the only backward compatible way to do it.
|
||||
Setlocale is usually only called once in an application, so this isn't
|
||||
time-critical anyway. */
|
||||
int __iso_8859_index (const char *charset_ext); /* Newlib */
|
||||
int __cp_index (const char *charset_ext); /* Newlib */
|
||||
|
||||
void
|
||||
__set_ctype (const char *charset)
|
||||
{
|
||||
int idx;
|
||||
|
||||
switch (*charset)
|
||||
{
|
||||
case 'I':
|
||||
idx = __iso_8859_index (charset + 9);
|
||||
/* Our ctype table has a leading ISO-8859-1 element. */
|
||||
if (idx < 0)
|
||||
idx = 0;
|
||||
else
|
||||
++idx;
|
||||
memcpy (ctype_b, __ctype_iso[idx], 128);
|
||||
memcpy (ctype_b + 256, __ctype_iso[idx], 128);
|
||||
return;
|
||||
case 'C':
|
||||
idx = __cp_index (charset + 2);
|
||||
if (idx < 0)
|
||||
break;
|
||||
memcpy (ctype_b, __ctype_cp[idx], 128);
|
||||
memcpy (ctype_b + 256, __ctype_cp[idx], 128);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
memcpy (ctype_b, __ctype_default, 128);
|
||||
memcpy (ctype_b + 256, __ctype_default, 128);
|
||||
}
|
||||
|
||||
} /* extern "C" */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1989 The Regents of the University of California.
|
||||
|
@ -89,13 +798,6 @@ makefunc(toascii)
|
|||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
|
|
|
@ -16,6 +16,7 @@ details. */
|
|||
#include <stdlib.h>
|
||||
#include "glob.h"
|
||||
#include <ctype.h>
|
||||
#include <locale.h>
|
||||
#include "environ.h"
|
||||
#include "sigproc.h"
|
||||
#include "pinfo.h"
|
||||
|
@ -921,6 +922,8 @@ dll_crt0_1 (void *)
|
|||
do this for noncygwin case since the signal thread is blocked due to
|
||||
LoadLibrary serialization. */
|
||||
ld_preload ();
|
||||
/* Reset current locale to "C" per POSIX */
|
||||
_setlocale_r (_GLOBAL_REENT, LC_CTYPE, "C");
|
||||
if (user_data->main)
|
||||
cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr));
|
||||
__asm__ (" \n\
|
||||
|
|
|
@ -13,6 +13,7 @@ details. */
|
|||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
#include <ctype.h>
|
||||
#include <locale.h>
|
||||
#include <assert.h>
|
||||
#include <cygwin/version.h>
|
||||
#include <winnls.h>
|
||||
|
@ -552,48 +553,6 @@ glob_init (const char *buf)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
set_file_api_mode (codepage_type cp)
|
||||
{
|
||||
if (cp == oem_cp)
|
||||
{
|
||||
SetFileApisToOEM ();
|
||||
debug_printf ("File APIs set to OEM");
|
||||
}
|
||||
else
|
||||
{
|
||||
SetFileApisToANSI ();
|
||||
debug_printf ("File APIs set to ANSI");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
codepage_init (const char *buf)
|
||||
{
|
||||
if (!buf)
|
||||
buf = "ansi";
|
||||
|
||||
if (ascii_strcasematch (buf, "oem"))
|
||||
{
|
||||
current_codepage = oem_cp;
|
||||
active_codepage = GetOEMCP ();
|
||||
}
|
||||
else if (ascii_strcasematch (buf, "utf8"))
|
||||
{
|
||||
current_codepage = utf8_cp;
|
||||
active_codepage = CP_UTF8;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!ascii_strcasematch (buf, "ansi"))
|
||||
debug_printf ("Wrong codepage name: %s", buf);
|
||||
/* Fallback to ANSI */
|
||||
current_codepage = ansi_cp;
|
||||
active_codepage = GetACP ();
|
||||
}
|
||||
set_file_api_mode (current_codepage);
|
||||
}
|
||||
|
||||
static void
|
||||
set_chunksize (const char *buf)
|
||||
{
|
||||
|
@ -629,7 +588,6 @@ static struct parse_thing
|
|||
} values[2];
|
||||
} known[] NO_COPY =
|
||||
{
|
||||
{"codepage", {func: &codepage_init}, isfunc, NULL, {{0}, {0}}},
|
||||
{"dosfilewarning", {&dos_file_warning}, justset, NULL, {{false}, {true}}},
|
||||
{"envcache", {&envcache}, justset, NULL, {{true}, {false}}},
|
||||
{"error_start", {func: &error_start_init}, isfunc, NULL, {{0}, {0}}},
|
||||
|
@ -774,6 +732,8 @@ environ_init (char **envp, int envc)
|
|||
static char NO_COPY cygterm[] = "TERM=cygwin";
|
||||
myfault efault;
|
||||
tmp_pathbuf tp;
|
||||
bool got_lc = false;
|
||||
static const char *lc_arr[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
|
||||
|
||||
if (efault.faulted ())
|
||||
api_fatal ("internal error reading the windows environment - too many environment variables?");
|
||||
|
@ -818,10 +778,27 @@ environ_init (char **envp, int envc)
|
|||
/* Allocate space for environment + trailing NULL + CYGWIN env. */
|
||||
lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *));
|
||||
|
||||
/* We need the CYGWIN variable content before we can loop through
|
||||
/* We need the locale variables' content before we can loop through
|
||||
the whole environment, so that the wide-char to multibyte conversion
|
||||
can be done according to the "codepage" setting, as well as the
|
||||
uppercasing according to the "upcaseenv" setting. */
|
||||
can be done according to the $LC_ALL/$LC_CTYPE/$LANG/current_codepage
|
||||
setting, as well as the uppercasing according to the "upcaseenv"
|
||||
setting. Note that we have to reset the LC_CTYPE setting to "C"
|
||||
before calling main() for POSIX compatibility. */
|
||||
for (int lc = 0; lc_arr[lc]; ++lc)
|
||||
{
|
||||
if ((i = GetEnvironmentVariableA (lc_arr[lc], NULL, 0)))
|
||||
{
|
||||
char *buf = (char *) alloca (i);
|
||||
GetEnvironmentVariableA (lc_arr[lc], buf, i);
|
||||
if (_setlocale_r (_GLOBAL_REENT, LC_CTYPE, buf))
|
||||
got_lc = true;
|
||||
}
|
||||
}
|
||||
/* No matching POSIX environment variable, use current codepage. */
|
||||
if (!got_lc)
|
||||
_setlocale_r (_GLOBAL_REENT, LC_CTYPE, "en_US");
|
||||
/* We also need the CYGWIN variable early to know the value of the
|
||||
CYGWIN=upcaseenv setting for the below loop. */
|
||||
if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0)))
|
||||
{
|
||||
char *buf = (char *) alloca (i);
|
||||
|
|
|
@ -927,7 +927,7 @@ class fhandler_console: public fhandler_termios
|
|||
void cursor_set (bool, int, int);
|
||||
void cursor_get (int *, int *);
|
||||
void cursor_rel (int, int);
|
||||
void write_replacement_char (const unsigned char *);
|
||||
void write_replacement_char ();
|
||||
const unsigned char *write_normal (unsigned const char*, unsigned const char *);
|
||||
void char_command (char);
|
||||
bool set_raw_win32_keyboard_mode (bool);
|
||||
|
|
|
@ -13,6 +13,7 @@ details. */
|
|||
#include "miscfuncs.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <wchar.h>
|
||||
#include <wingdi.h>
|
||||
#include <winuser.h>
|
||||
#include <winnls.h>
|
||||
|
@ -133,13 +134,13 @@ dev_console::con_to_str (char *d, int dlen, WCHAR w)
|
|||
inline UINT
|
||||
dev_console::get_console_cp ()
|
||||
{
|
||||
return alternate_charset_active ? GetConsoleOutputCP () : get_cp ();
|
||||
return alternate_charset_active ? GetConsoleOutputCP () : 0;
|
||||
}
|
||||
|
||||
inline DWORD
|
||||
dev_console::str_to_con (PWCHAR d, const char *s, DWORD sz)
|
||||
{
|
||||
return MultiByteToWideChar (get_console_cp (), 0, s, sz, d, CONVERT_LIMIT);
|
||||
return sys_cp_mbstowcs (get_console_cp (), d, CONVERT_LIMIT, s, sz);
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -1400,22 +1401,15 @@ beep ()
|
|||
MessageBeep (MB_OK);
|
||||
}
|
||||
|
||||
/* This gets called when we found an invalid UTF-8 character. We try with
|
||||
the default ANSI codepage. If that fails we just print a question mark.
|
||||
Looks ugly but is a neat and alomst sane fallback for many languages. */
|
||||
/* This gets called when we found an invalid input character. We just
|
||||
print a half filled square (UTF 0x2592). We have no chance to figure
|
||||
out the "meaning" of the input char anyway. */
|
||||
void
|
||||
fhandler_console::write_replacement_char (const unsigned char *char_p)
|
||||
fhandler_console::write_replacement_char ()
|
||||
{
|
||||
int n;
|
||||
WCHAR def_cp_chars[2];
|
||||
static const wchar_t replacement_char = 0x2592; /* Half filled square */
|
||||
DWORD done;
|
||||
|
||||
n = MultiByteToWideChar (GetACP (), 0, (const CHAR *) char_p, 1,
|
||||
def_cp_chars, 2);
|
||||
if (n)
|
||||
WriteConsoleW (get_output_handle (), def_cp_chars, n, &done, 0);
|
||||
else
|
||||
WriteConsoleW (get_output_handle (), L"?", 1, &done, 0);
|
||||
WriteConsoleW (get_output_handle (), &replacement_char, 1, &done, 0);
|
||||
}
|
||||
|
||||
const unsigned char *
|
||||
|
@ -1426,22 +1420,46 @@ fhandler_console::write_normal (const unsigned char *src,
|
|||
DWORD done;
|
||||
DWORD buf_len;
|
||||
const unsigned char *found = src;
|
||||
const unsigned char *nfound;
|
||||
size_t ret;
|
||||
mbstate_t ps;
|
||||
UINT cp = dev_state->get_console_cp ();
|
||||
char charsetbuf[32];
|
||||
char *charset = __locale_charset ();
|
||||
mbtowc_p f_mbtowc = __mbtowc;
|
||||
|
||||
if (cp)
|
||||
f_mbtowc = __set_charset_from_codepage (cp, charset = charsetbuf);
|
||||
|
||||
/* First check if we have cached lead bytes of a former try to write
|
||||
a truncated multibyte sequence. If so, process it. */
|
||||
if (trunc_buf.len)
|
||||
{
|
||||
const unsigned char *nfound;
|
||||
int cp_len = min (end - src, 4 - trunc_buf.len);
|
||||
memcpy (trunc_buf.buf + trunc_buf.len, src, cp_len);
|
||||
nfound = next_char (cp, trunc_buf.buf,
|
||||
trunc_buf.buf + trunc_buf.len + cp_len);
|
||||
/* Still truncated multibyte sequence? Keep in trunc_buf. */
|
||||
if (nfound == trunc_buf.buf)
|
||||
memset (&ps, 0, sizeof ps);
|
||||
switch (ret = f_mbtowc (_REENT, NULL, (const char *) trunc_buf.buf,
|
||||
trunc_buf.len + cp_len, charset, &ps))
|
||||
{
|
||||
case -2:
|
||||
/* Still truncated multibyte sequence? Keep in trunc_buf. */
|
||||
trunc_buf.len += cp_len;
|
||||
return end;
|
||||
case -1:
|
||||
/* Give up, print replacement chars for trunc_buf... */
|
||||
for (int i = 0; i < trunc_buf.len; ++i)
|
||||
write_replacement_char ();
|
||||
/* ... mark trunc_buf as unused... */
|
||||
trunc_buf.len = 0;
|
||||
/* ... and proceed. */
|
||||
nfound = NULL;
|
||||
break;
|
||||
case 0:
|
||||
nfound = trunc_buf.buf + 1;
|
||||
break;
|
||||
default:
|
||||
nfound = trunc_buf.buf + ret;
|
||||
break;
|
||||
}
|
||||
/* Valid multibyte sequence? Process. */
|
||||
if (nfound)
|
||||
|
@ -1454,28 +1472,32 @@ fhandler_console::write_normal (const unsigned char *src,
|
|||
trunc_buf.len = 0;
|
||||
return found;
|
||||
}
|
||||
/* Give up, print replacement chars for trunc_buf... */
|
||||
for (int i = 0; i < trunc_buf.len; ++i)
|
||||
write_replacement_char (trunc_buf.buf + i);
|
||||
/* ... mark trunc_buf as unused... */
|
||||
trunc_buf.len = 0;
|
||||
/* ... and proceed. */
|
||||
}
|
||||
|
||||
memset (&ps, 0, sizeof ps);
|
||||
while (found < end
|
||||
&& found - src < CONVERT_LIMIT
|
||||
&& base_chars[*found] == NOR)
|
||||
{
|
||||
nfound = next_char (cp, found, end);
|
||||
if (!nfound) /* Invalid multibyte sequence. */
|
||||
break;
|
||||
if (nfound == found) /* Truncated multibyte sequence. */
|
||||
{ /* Stick to it until the next write. */
|
||||
switch (ret = f_mbtowc (_REENT, NULL, (const char *) found,
|
||||
end - found, charset, &ps))
|
||||
{
|
||||
case -2:
|
||||
/* Truncated multibyte sequence. Stick to it until the next write. */
|
||||
trunc_buf.len = end - found;
|
||||
memcpy (trunc_buf.buf, found, trunc_buf.len);
|
||||
return end;
|
||||
case -1:
|
||||
break;
|
||||
case 0:
|
||||
found++;
|
||||
break;
|
||||
default:
|
||||
found += ret;
|
||||
break;
|
||||
}
|
||||
found = nfound;
|
||||
if (ret == (size_t) -1) /* Invalid multibyte sequence. */
|
||||
break;
|
||||
}
|
||||
|
||||
/* Print all the base ones out */
|
||||
|
@ -1558,7 +1580,7 @@ fhandler_console::write_normal (const unsigned char *src,
|
|||
cursor_set (false, 8 * (x / 8 + 1), y);
|
||||
break;
|
||||
case NOR:
|
||||
write_replacement_char (found);
|
||||
write_replacement_char ();
|
||||
break;
|
||||
}
|
||||
found++;
|
||||
|
|
|
@ -205,8 +205,6 @@ frok::child (volatile char * volatile here)
|
|||
}
|
||||
#endif
|
||||
|
||||
set_file_api_mode (current_codepage);
|
||||
|
||||
MALLOC_CHECK;
|
||||
|
||||
/* Incredible but true: If we use sockets and SYSV IPC shared memory,
|
||||
|
|
|
@ -25,8 +25,6 @@ HMODULE NO_COPY cygwin_hmodule;
|
|||
HANDLE hExeced;
|
||||
|
||||
/* Codepage and multibyte string specific stuff. */
|
||||
enum codepage_type {ansi_cp, oem_cp, utf8_cp};
|
||||
codepage_type current_codepage = ansi_cp;
|
||||
UINT active_codepage;
|
||||
|
||||
/* program exit the program */
|
||||
|
|
|
@ -49,7 +49,7 @@ extern const __declspec(dllimport) char _ctype_[];
|
|||
|
||||
#if !defined(__cplusplus) || defined(__INSIDE_CYGWIN__)
|
||||
#define isalpha(c) ((_ctype_+1)[(unsigned)(c)]&(_U|_L))
|
||||
#define isblank(c) ((c) == ' ' || (c) == '\t')
|
||||
#define isblank(c) ((_ctype_+1)[(unsigned)(c)]&_B)
|
||||
#define isupper(c) ((_ctype_+1)[(unsigned)(c)]&_U)
|
||||
#define islower(c) ((_ctype_+1)[(unsigned)(c)]&_L)
|
||||
#define isdigit(c) ((_ctype_+1)[(unsigned)(c)]&_N)
|
||||
|
@ -60,12 +60,6 @@ extern const __declspec(dllimport) char _ctype_[];
|
|||
#define isprint(c) ((_ctype_+1)[(unsigned)(c)]&(_P|_U|_L|_N|_B))
|
||||
#define isgraph(c) ((_ctype_+1)[(unsigned)(c)]&(_P|_U|_L|_N))
|
||||
#define iscntrl(c) ((_ctype_+1)[(unsigned)(c)]&_C)
|
||||
/* Non-gcc versions will get the library versions, and will be
|
||||
slightly slower */
|
||||
# define toupper(c) \
|
||||
__extension__ ({ int __x = (c); islower(__x) ? (__x - 'a' + 'A') : __x;})
|
||||
# define tolower(c) \
|
||||
__extension__ ({ int __x = (c); isupper(__x) ? (__x - 'A' + 'a') : __x;})
|
||||
#endif /* !__cplusplus */
|
||||
|
||||
#if !defined(__STRICT_ANSI__) || defined(__INSIDE_CYGWIN__)
|
||||
|
|
|
@ -141,26 +141,6 @@ cygwin_strncasecmp (const char *cs, const char *ct, size_t n)
|
|||
return RtlCompareUnicodeString (&us, &ut, TRUE);
|
||||
}
|
||||
|
||||
extern "C" wchar_t * __stdcall
|
||||
cygwin_wcslwr (wchar_t *string)
|
||||
{
|
||||
UNICODE_STRING us;
|
||||
|
||||
RtlInitUnicodeString (&us, string);
|
||||
RtlDowncaseUnicodeString (&us, &us, FALSE);
|
||||
return string;
|
||||
}
|
||||
|
||||
extern "C" wchar_t * __stdcall
|
||||
cygwin_wcsupr (wchar_t *string)
|
||||
{
|
||||
UNICODE_STRING us;
|
||||
|
||||
RtlInitUnicodeString (&us, string);
|
||||
RtlUpcaseUnicodeString (&us, &us, FALSE);
|
||||
return string;
|
||||
}
|
||||
|
||||
extern "C" char * __stdcall
|
||||
cygwin_strlwr (char *string)
|
||||
{
|
||||
|
@ -189,118 +169,6 @@ cygwin_strupr (char *string)
|
|||
return string;
|
||||
}
|
||||
|
||||
/* FIXME? We only support standard ANSI/OEM codepages according to
|
||||
http://www.microsoft.com/globaldev/reference/cphome.mspx as well
|
||||
as UTF-8 and codepage 1361, which is also mentioned as valid
|
||||
doublebyte codepage in MSDN man pages (e.g. IsDBCSLeadByteEx).
|
||||
Everything else will be hosed. */
|
||||
|
||||
bool
|
||||
is_cp_multibyte (UINT cp)
|
||||
{
|
||||
switch (cp)
|
||||
{
|
||||
case 932:
|
||||
case 936:
|
||||
case 949:
|
||||
case 950:
|
||||
case 1361:
|
||||
case 65001:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* OMYGOD! CharNextExA is not UTF-8 aware! It only works fine with
|
||||
double byte charsets. So we have to do it ourselves for UTF-8.
|
||||
|
||||
While being at it, we do more. If a double-byte or multibyte
|
||||
sequence is truncated due to an early end, we need a way to recognize
|
||||
it. The reason is that multiple buffered write statements might
|
||||
accidentally stop and start in the middle of a single character byte
|
||||
sequence. If we have to interpret the byte sequences (as in
|
||||
fhandler_console), we would print wrong output in these cases.
|
||||
|
||||
So we have four possible return values here:
|
||||
|
||||
ret = end if str >= end
|
||||
ret = NULL if we encounter an invalid byte sequence
|
||||
ret = str if we encounter the start byte of a truncated byte sequence
|
||||
ret = str + n if we encounter a vaild byte sequence
|
||||
*/
|
||||
|
||||
const unsigned char *
|
||||
next_char (UINT cp, const unsigned char *str, const unsigned char *end)
|
||||
{
|
||||
const unsigned char *ret = NULL;
|
||||
|
||||
if (str >= end)
|
||||
return end;
|
||||
|
||||
switch (cp)
|
||||
{
|
||||
case 932:
|
||||
case 936:
|
||||
case 949:
|
||||
case 950:
|
||||
case 1361:
|
||||
if (*str <= 0x7f)
|
||||
ret = str + 1;
|
||||
else if (str == end - 1 && IsDBCSLeadByteEx (cp, *str))
|
||||
ret = str;
|
||||
else
|
||||
ret = (const unsigned char *) CharNextExA (cp, (const CHAR *) str, 0);
|
||||
break;
|
||||
case CP_UTF8:
|
||||
switch (str[0] >> 4)
|
||||
{
|
||||
case 0x0 ... 0x7: /* One byte character. */
|
||||
ret = str + 1;
|
||||
break;
|
||||
case 0x8 ... 0xb: /* Followup byte. Invalid as first byte. */
|
||||
ret = NULL;
|
||||
break;
|
||||
case 0xc ... 0xd: /* Two byte character. */
|
||||
/* Check followup bytes for validity. */
|
||||
if (str >= end - 1)
|
||||
ret = str;
|
||||
else if (str[1] <= 0xbf)
|
||||
ret = str + 2;
|
||||
else
|
||||
ret = NULL;
|
||||
break;
|
||||
case 0xe: /* Three byte character. */
|
||||
if (str >= end - 2)
|
||||
ret = str;
|
||||
else if ((str[1] & 0xc0) == 0x80 && (str[2] & 0xc0) == 0x80
|
||||
&& (str[0] != 0xe0 || str[1] >= 0xa0)
|
||||
&& (str[0] != 0xed || str[1] <= 0x9f))
|
||||
ret = str + 3;
|
||||
else
|
||||
ret = NULL;
|
||||
break;
|
||||
case 0xf: /* Four byte character. */
|
||||
if (str[0] >= 0xf8)
|
||||
ret = NULL;
|
||||
else if (str >= end - 3)
|
||||
ret = str;
|
||||
else if ((str[1] & 0xc0) == 0x80 && (str[2] & 0xc0) == 0x80
|
||||
&& (str[3] & 0xc0) == 0x80
|
||||
&& (str[0] == 0xf0 || str[1] >= 0x90)
|
||||
&& (str[0] == 0xf4 || str[1] <= 0x8f))
|
||||
ret = str + 4;
|
||||
else
|
||||
ret = NULL;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ret = str + 1;
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __stdcall
|
||||
check_invalid_virtual_addr (const void *s, unsigned sz)
|
||||
{
|
||||
|
|
|
@ -25,11 +25,6 @@ void backslashify (const char *, char *, bool);
|
|||
void slashify (const char *, char *, bool);
|
||||
#define isslash(c) ((c) == '/')
|
||||
|
||||
/* multibyte stuff */
|
||||
bool is_cp_multibyte (UINT cp);
|
||||
const unsigned char *next_char (UINT cp, const unsigned char *str,
|
||||
const unsigned char *end);
|
||||
|
||||
/* Memory checking */
|
||||
int __stdcall check_invalid_virtual_addr (const void *s, unsigned sz) __attribute__ ((regparm(2)));
|
||||
|
||||
|
|
|
@ -20,45 +20,356 @@ details. */
|
|||
#include "fhandler.h"
|
||||
#include "dtable.h"
|
||||
#include "cygheap.h"
|
||||
#include "tls_pbuf.h"
|
||||
|
||||
UINT
|
||||
get_cp ()
|
||||
/* The SJIS, JIS and EUCJP conversion in newlib does not use UTF as
|
||||
wchar_t character representation. That's unfortunate for us since
|
||||
we require UTF for the OS. What we do here is to have our own
|
||||
implementation of the base functions for the conversion using
|
||||
the MulitByteToWideChar/WideCharToMultiByte functions. */
|
||||
|
||||
/* GBK, CP949, and Big5 conversions are not available so far in newlib. */
|
||||
|
||||
static int
|
||||
__db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp)
|
||||
{
|
||||
if (!active_codepage)
|
||||
codepage_init ("ansi");
|
||||
return active_codepage;
|
||||
if (s == NULL)
|
||||
return 0;
|
||||
|
||||
if (wchar < 0x80)
|
||||
{
|
||||
*s = (char) wchar;
|
||||
return 1;
|
||||
}
|
||||
|
||||
BOOL def_used = false;
|
||||
int ret = WideCharToMultiByte (cp, cp > 50000 ? 0 : WC_NO_BEST_FIT_CHARS,
|
||||
&wchar, 1, s, MB_CUR_MAX, NULL, &def_used);
|
||||
if (ret > 0 && !def_used)
|
||||
return ret;
|
||||
|
||||
r->_errno = EILSEQ;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* tlen is always treated as the maximum buffer size, including the '\0'
|
||||
character. sys_wcstombs will always return a 0-terminated result, no
|
||||
matter what. */
|
||||
int __stdcall
|
||||
sys_wcstombs (char *tgt, int tlen, const PWCHAR src, int slen)
|
||||
extern "C" int
|
||||
__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
{
|
||||
int ret;
|
||||
return __db_wctomb (r,s, wchar, 932);
|
||||
}
|
||||
|
||||
/* Convert UNICODE private use area. Reverse functionality (only for
|
||||
path names) is transform_chars in path.cc. */
|
||||
if (slen < 0)
|
||||
slen = wcslen (src) + 1;
|
||||
WCHAR sbuf[slen];
|
||||
memcpy (sbuf, src, slen * sizeof (WCHAR));
|
||||
const unsigned char *end = (unsigned char *) (sbuf + slen);
|
||||
for (unsigned char *s = ((unsigned char *) sbuf) + 1; s < end;
|
||||
s += sizeof (WCHAR))
|
||||
if (*s == 0xf0)
|
||||
*s = 0;
|
||||
ret = WideCharToMultiByte (get_cp (), 0, sbuf, slen, tgt, tlen, NULL, NULL);
|
||||
if (ret && tgt)
|
||||
extern "C" int
|
||||
__jis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
{
|
||||
return __db_wctomb (r,s, wchar, 50220);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
{
|
||||
return __db_wctomb (r,s, wchar, 51932);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
{
|
||||
return __db_wctomb (r,s, wchar, 936);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
{
|
||||
return __db_wctomb (r,s, wchar, 949);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
{
|
||||
return __db_wctomb (r,s, wchar, 950);
|
||||
}
|
||||
|
||||
static int
|
||||
__db_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
UINT cp, mbstate_t *state)
|
||||
{
|
||||
wchar_t dummy;
|
||||
char buf[2];
|
||||
int ret;
|
||||
|
||||
if (pwc == NULL)
|
||||
pwc = &dummy;
|
||||
|
||||
if (s == NULL)
|
||||
return 0; /* not state-dependent */
|
||||
|
||||
if (n == 0)
|
||||
return -2;
|
||||
|
||||
if (state->__count == 0)
|
||||
{
|
||||
ret = (ret < tlen) ? ret : tlen - 1;
|
||||
tgt[ret] = '\0';
|
||||
if (*(unsigned char *) s < 0x80)
|
||||
{
|
||||
*pwc = *(unsigned char *) s;
|
||||
return *s ? 1 : 0;
|
||||
}
|
||||
ret = MultiByteToWideChar (cp, cp > 50000 ? 0 : MB_ERR_INVALID_CHARS,
|
||||
s, 2, pwc, 1);
|
||||
if (ret)
|
||||
return *s ? 2 : 0;
|
||||
if (n == 1)
|
||||
{
|
||||
state->__count = 1;
|
||||
state->__value.__wchb[0] = *s;
|
||||
return -2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* These Win32 functions are really crappy. Assuming n is 2
|
||||
but the first byte is a singlebyte charcode, the function
|
||||
does not convert that byte and return 1, rather it just
|
||||
returns 0. So, what we do here is to check if the first
|
||||
byte returns a valid value... */
|
||||
ret = MultiByteToWideChar (cp,
|
||||
cp > 50000 ? 0 : MB_ERR_INVALID_CHARS,
|
||||
s, 1, pwc, 1);
|
||||
if (ret)
|
||||
return *s ? 1 : 0;
|
||||
}
|
||||
r->_errno = EILSEQ;
|
||||
return -1;
|
||||
}
|
||||
if (!*s)
|
||||
return -2;
|
||||
buf[0] = state->__value.__wchb[0];
|
||||
buf[1] = *s;
|
||||
ret = MultiByteToWideChar (cp, cp > 50000 ? 0 : MB_ERR_INVALID_CHARS,
|
||||
buf, 2, pwc, 1);
|
||||
if (!ret)
|
||||
{
|
||||
r->_errno = EILSEQ;
|
||||
return -1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__sjis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
{
|
||||
return __db_mbtowc (r, pwc, s, n, 932, state);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__jis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
{
|
||||
return __db_mbtowc (r, pwc, s, n, 50220, state);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__eucjp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
{
|
||||
return __db_mbtowc (r, pwc, s, n, 51932, state);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
{
|
||||
return __db_mbtowc (r, pwc, s, n, 936, state);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
{
|
||||
return __db_mbtowc (r, pwc, s, n, 949, state);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
{
|
||||
return __db_mbtowc (r, pwc, s, n, 950, state);
|
||||
}
|
||||
|
||||
/* Convert Windows codepage to a setlocale compatible character set code.
|
||||
Called from newlib's setlocale() with the current ANSI codepage, if the
|
||||
charset isn't given explicitely in the POSIX compatible locale specifier.
|
||||
The function also returns a pointer to the corresponding _mbtowc_r
|
||||
function. This is used below in the sys_cp_mbstowcs function which
|
||||
is called directly from fhandler_console if the "Alternate Charset" has
|
||||
been switched on by an escape sequence. */
|
||||
extern "C" mbtowc_p
|
||||
__set_charset_from_codepage (UINT cp, char *charset)
|
||||
{
|
||||
switch (cp)
|
||||
{
|
||||
case 437:
|
||||
case 720:
|
||||
case 737:
|
||||
case 775:
|
||||
case 850:
|
||||
case 852:
|
||||
case 855:
|
||||
case 857:
|
||||
case 858:
|
||||
case 862:
|
||||
case 866:
|
||||
case 874:
|
||||
case 1125:
|
||||
case 1250:
|
||||
case 1251:
|
||||
case 1252:
|
||||
case 1253:
|
||||
case 1254:
|
||||
case 1255:
|
||||
case 1256:
|
||||
case 1257:
|
||||
case 1258:
|
||||
__small_sprintf (charset, "CP%u", cp);
|
||||
return __cp_mbtowc;
|
||||
case 28591:
|
||||
case 28592:
|
||||
case 28593:
|
||||
case 28594:
|
||||
case 28595:
|
||||
case 28596:
|
||||
case 28597:
|
||||
case 28598:
|
||||
case 28599:
|
||||
case 28603:
|
||||
case 28605:
|
||||
__small_sprintf (charset, "ISO-8859-%u", cp - 28590);
|
||||
return __iso_mbtowc;
|
||||
case 932:
|
||||
strcpy (charset, "SJIS");
|
||||
return __sjis_mbtowc;
|
||||
case 936:
|
||||
strcpy (charset, "GBK");
|
||||
return __gbk_mbtowc;
|
||||
case 949:
|
||||
strcpy (charset, "CP949");
|
||||
return __kr_mbtowc;
|
||||
case 950:
|
||||
strcpy (charset, "BIG5");
|
||||
return __big5_mbtowc;
|
||||
case 50220:
|
||||
strcpy (charset, "JIS");
|
||||
return __jis_mbtowc;
|
||||
case 51932:
|
||||
strcpy (charset, "EUCJP");
|
||||
return __eucjp_mbtowc;
|
||||
case 65001:
|
||||
strcpy (charset, "UTF-8");
|
||||
return __utf8_mbtowc;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
strcpy (charset, "ASCII");
|
||||
return __ascii_mbtowc;
|
||||
}
|
||||
|
||||
/* Our own sys_wcstombs/sys_mbstowcs functions differ from the
|
||||
wcstombs/mbstowcs API in three ways:
|
||||
|
||||
- The UNICODE private use area is used in filenames to specify
|
||||
characters not allowed in Windows filenames ('*', '?', etc).
|
||||
The sys_wcstombs converts characters in the private use area
|
||||
back to the corresponding ASCII chars.
|
||||
|
||||
- If a wide character in a filename has no representation in the current
|
||||
multibyte charset, then usually you wouldn't be able to access the
|
||||
file. To fix this problem, sys_wcstombs creates a replacement multibyte
|
||||
sequences for the non-representable wide-char. The sequence starts with
|
||||
an ASCII SO (0x0e, Ctrl-N), followed by the UTF-8 representation of the
|
||||
character. The sys_(cp_)mbstowcs function detects ASCII SO characters
|
||||
in the input multibyte string and converts the following multibyte
|
||||
sequence in by treating it as an UTF-8 char. If that fails, the ASCII
|
||||
SO was probably standalone and it gets just copied over as ASCII SO.
|
||||
|
||||
- The functions always create 0-terminated results, no matter what.
|
||||
If the result is truncated due to buffer size, it's a bug in Cygwin
|
||||
and the buffer in the calling function should be raised. */
|
||||
size_t __stdcall
|
||||
sys_wcstombs (char *dst, size_t len, const PWCHAR src, size_t nwc)
|
||||
{
|
||||
char buf[10];
|
||||
char *ptr = dst;
|
||||
wchar_t *pwcs = (wchar_t *) src;
|
||||
size_t n = 0;
|
||||
mbstate_t ps;
|
||||
|
||||
memset (&ps, 0, sizeof ps);
|
||||
if (dst == NULL)
|
||||
len = (size_t) -1;
|
||||
while (n < len && nwc-- > 0)
|
||||
{
|
||||
wchar_t pw = *pwcs;
|
||||
/* Convert UNICODE private use area. Reverse functionality (only for
|
||||
path names) is transform_chars in path.cc. */
|
||||
if ((pw & 0xff00) == 0xf000)
|
||||
pw &= 0xff;
|
||||
int bytes = _wctomb_r (_REENT, buf, pw, &ps);
|
||||
/* Convert chars invalid in the current codepage to a sequence
|
||||
ASCII SO; UTF-8 representation of invalid char.
|
||||
Do the same for ASCII SO itself. */
|
||||
if ((bytes == -1 || pw == 0x0e) && *__locale_charset () != 'U'/*TF-8*/)
|
||||
{
|
||||
buf[0] = 0x0e; /* ASCII SO */
|
||||
bytes = __utf8_wctomb (_REENT, buf + 1, pw, __locale_charset (), &ps);
|
||||
if (bytes == -1)
|
||||
{
|
||||
++pwcs;
|
||||
ps.__count = 0;
|
||||
continue;
|
||||
}
|
||||
++bytes; /* Add the ASCII SO to the byte count. */
|
||||
if (ps.__count == -4) /* First half of a surrogate pair. */
|
||||
{
|
||||
++pwcs;
|
||||
if ((*pwcs & 0xfc00) != 0xdc00) /* Invalid second half. */
|
||||
{
|
||||
++pwcs;
|
||||
ps.__count = 0;
|
||||
continue;
|
||||
}
|
||||
bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs,
|
||||
__locale_charset (), &ps);
|
||||
}
|
||||
}
|
||||
if (n + bytes <= len)
|
||||
{
|
||||
n += bytes;
|
||||
if (dst)
|
||||
{
|
||||
for (int i = 0; i < bytes; ++i)
|
||||
*ptr++ = buf[i];
|
||||
}
|
||||
if (*pwcs++ == 0x00)
|
||||
break;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (n && dst)
|
||||
{
|
||||
n = (n < len) ? n : len - 1;
|
||||
dst[n] = '\0';
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/* Allocate a buffer big enough for the string, always including the
|
||||
terminating '\0'. The buffer pointer is returned in *tgt_p, the return
|
||||
terminating '\0'. The buffer pointer is returned in *dst_p, the return
|
||||
value is the number of bytes written to the buffer, as usual.
|
||||
The "type" argument determines where the resulting buffer is stored.
|
||||
It's either one of the cygheap_types values, or it's "HEAP_NOTHEAP".
|
||||
|
@ -67,57 +378,129 @@ sys_wcstombs (char *tgt, int tlen, const PWCHAR src, int slen)
|
|||
Note that this code is shared by cygserver (which requires it via
|
||||
__small_vsprintf) and so when built there plain calloc is the
|
||||
only choice. */
|
||||
int __stdcall
|
||||
sys_wcstombs_alloc (char **tgt_p, int type, const PWCHAR src, int slen)
|
||||
size_t __stdcall
|
||||
sys_wcstombs_alloc (char **dst_p, int type, const PWCHAR src, size_t nwc)
|
||||
{
|
||||
int ret;
|
||||
size_t ret;
|
||||
|
||||
ret = WideCharToMultiByte (get_cp (), 0, src, slen, NULL, 0 ,NULL, NULL);
|
||||
if (ret)
|
||||
ret = sys_wcstombs (NULL, (size_t) -1, src, nwc);
|
||||
if (ret > 0)
|
||||
{
|
||||
size_t tlen = (slen == -1) ? ret : ret + 1;
|
||||
size_t dlen = ret + 1;
|
||||
|
||||
if (type == HEAP_NOTHEAP)
|
||||
*tgt_p = (char *) calloc (tlen, sizeof (char));
|
||||
*dst_p = (char *) calloc (dlen, sizeof (char));
|
||||
else
|
||||
*tgt_p = (char *) ccalloc ((cygheap_types) type, tlen, sizeof (char));
|
||||
if (!*tgt_p)
|
||||
*dst_p = (char *) ccalloc ((cygheap_types) type, dlen, sizeof (char));
|
||||
if (!*dst_p)
|
||||
return 0;
|
||||
ret = sys_wcstombs (*tgt_p, tlen, src, slen);
|
||||
ret = sys_wcstombs (*dst_p, dlen, src, nwc);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __stdcall
|
||||
sys_mbstowcs (PWCHAR tgt, int tlen, const char *src, int slen)
|
||||
/* sys_cp_mbstowcs is actually most of the time called as sys_mbstowcs with
|
||||
a 0 codepage. If cp is not 0, the codepage is evaluated and used for the
|
||||
conversion. This is so that fhandler_console can switch to an alternate
|
||||
charset, which is the charset returned by GetConsoleCP (). Most of the
|
||||
time this is used for box and line drawing characters. */
|
||||
size_t __stdcall
|
||||
sys_cp_mbstowcs (UINT cp, PWCHAR dst, size_t dlen, const char *src, size_t nms)
|
||||
{
|
||||
int ret = MultiByteToWideChar (get_cp (), 0, src, slen, tgt, tlen);
|
||||
if (ret && tgt)
|
||||
wchar_t *ptr = dst;
|
||||
char *pmbs = (char *) src;
|
||||
size_t count = 0;
|
||||
size_t len = dlen;
|
||||
int bytes;
|
||||
mbstate_t ps;
|
||||
char charsetbuf[32];
|
||||
char *charset = __locale_charset ();
|
||||
mbtowc_p f_mbtowc = __mbtowc;
|
||||
|
||||
if (cp)
|
||||
f_mbtowc = __set_charset_from_codepage (cp, charset = charsetbuf);
|
||||
|
||||
memset (&ps, 0, sizeof ps);
|
||||
if (dst == NULL)
|
||||
len = (size_t)-1;
|
||||
while (len > 0)
|
||||
{
|
||||
ret = (ret < tlen) ? ret : tlen - 1;
|
||||
tgt[ret] = L'\0';
|
||||
/* ASCII SO. Convert following UTF-8 sequence (if not UTF-8 anyway). */
|
||||
if (*pmbs == 0x0e && *charset != 'U'/*TF-8*/)
|
||||
{
|
||||
pmbs++;
|
||||
bytes = __utf8_mbtowc (_REENT, ptr, pmbs, nms, charset, &ps);
|
||||
if (bytes < 0)
|
||||
{
|
||||
/* Invalid UTF-8 sequence? Treat the ASCII SO character as
|
||||
stand-alone ASCII SO char. */
|
||||
bytes = 1;
|
||||
if (dst)
|
||||
*ptr = 0x0e;
|
||||
memset (&ps, 0, sizeof ps);
|
||||
break;
|
||||
}
|
||||
if (bytes == 0)
|
||||
break;
|
||||
if (ps.__count == 4) /* First half of a surrogate. */
|
||||
{
|
||||
wchar_t *ptr2 = dst ? ptr + 1 : NULL;
|
||||
int bytes2 = __utf8_mbtowc (_REENT, ptr2, pmbs + bytes,
|
||||
nms - bytes, charset, &ps);
|
||||
if (bytes2 < 0)
|
||||
break;
|
||||
pmbs += bytes2;
|
||||
nms -= bytes2;
|
||||
++count;
|
||||
ptr = dst ? ptr + 1 : NULL;
|
||||
--len;
|
||||
}
|
||||
}
|
||||
else
|
||||
bytes = f_mbtowc (_REENT, ptr, pmbs, nms, charset, &ps);
|
||||
if (bytes > 0)
|
||||
{
|
||||
pmbs += bytes;
|
||||
nms -= bytes;
|
||||
++count;
|
||||
ptr = dst ? ptr + 1 : NULL;
|
||||
--len;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bytes == 0)
|
||||
++count;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
||||
if (count && dst)
|
||||
{
|
||||
count = (count < dlen) ? count : dlen - 1;
|
||||
dst[count] = L'\0';
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/* Same as sys_wcstombs_alloc, just backwards. */
|
||||
int __stdcall
|
||||
sys_mbstowcs_alloc (PWCHAR *tgt_p, int type, const char *src, int slen)
|
||||
size_t __stdcall
|
||||
sys_mbstowcs_alloc (PWCHAR *dst_p, int type, const char *src, size_t nms)
|
||||
{
|
||||
int ret;
|
||||
size_t ret;
|
||||
|
||||
ret = MultiByteToWideChar (get_cp (), 0, src, slen, NULL, 0);
|
||||
if (ret)
|
||||
ret = sys_mbstowcs (NULL, (size_t) -1, src, nms);
|
||||
if (ret > 0)
|
||||
{
|
||||
size_t tlen = (slen == -1 ? ret : ret + 1);
|
||||
size_t dlen = ret + 1;
|
||||
|
||||
if (type == HEAP_NOTHEAP)
|
||||
*tgt_p = (PWCHAR) calloc (tlen, sizeof (WCHAR));
|
||||
*dst_p = (PWCHAR) calloc (dlen, sizeof (WCHAR));
|
||||
else
|
||||
*tgt_p = (PWCHAR) ccalloc ((cygheap_types) type, tlen, sizeof (WCHAR));
|
||||
if (!*tgt_p)
|
||||
*dst_p = (PWCHAR) ccalloc ((cygheap_types) type, dlen, sizeof (WCHAR));
|
||||
if (!*dst_p)
|
||||
return 0;
|
||||
ret = sys_mbstowcs (*tgt_p, tlen, src, slen);
|
||||
ret = sys_mbstowcs (*dst_p, dlen, src, nms);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* wchar.h: Extra wchar defs
|
||||
|
||||
Copyright 2007 Red Hat, Inc.
|
||||
Copyright 2007, 2009 Red Hat, Inc.
|
||||
|
||||
This file is part of Cygwin.
|
||||
|
||||
|
@ -17,21 +17,22 @@ details. */
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#undef wcscasecmp
|
||||
#define wcscasecmp cygwin_wcscasecmp
|
||||
int __stdcall cygwin_wcscasecmp (const wchar_t *, const wchar_t *);
|
||||
extern "C" int __utf8_wctomb (struct _reent *, char *, wchar_t,
|
||||
const char *, mbstate_t *);
|
||||
|
||||
#undef wcsncasecmp
|
||||
#define wcsncasecmp cygwin_wcsncasecmp
|
||||
int __stdcall cygwin_wcsncasecmp (const wchar_t *, const wchar_t *, size_t);
|
||||
typedef int mbtowc_f (struct _reent *, wchar_t *, const char *, size_t,
|
||||
const char *, mbstate_t *);
|
||||
typedef mbtowc_f *mbtowc_p;
|
||||
|
||||
#undef wcslwr
|
||||
#define wcslwr cygwin_wcslwr
|
||||
wchar_t * __stdcall cygwin_wcslwr (wchar_t *);
|
||||
extern "C" mbtowc_p __mbtowc;
|
||||
extern "C" mbtowc_f __ascii_mbtowc;
|
||||
extern "C" mbtowc_f __utf8_mbtowc;
|
||||
extern "C" mbtowc_f __iso_mbtowc;
|
||||
extern "C" mbtowc_f __cp_mbtowc;
|
||||
|
||||
#undef wcsupr
|
||||
#define wcsupr cygwin_wcsupr
|
||||
wchar_t * __stdcall cygwin_wcsupr (wchar_t *);
|
||||
extern "C" char *__locale_charset ();
|
||||
|
||||
extern "C" mbtowc_p __set_charset_from_codepage (UINT cp, char *charset);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -104,21 +104,24 @@ extern const char case_folded_upper[];
|
|||
/* The one function we use from winuser.h most of the time */
|
||||
extern "C" DWORD WINAPI GetLastError (void);
|
||||
|
||||
void codepage_init (const char *buf);
|
||||
UINT get_cp ();
|
||||
|
||||
/* Used as type by sys_wcstombs_alloc and sys_mbstowcs_alloc. For a
|
||||
description see there. */
|
||||
#define HEAP_NOTHEAP -1
|
||||
|
||||
int __stdcall sys_wcstombs (char *, int, const PWCHAR, int = -1)
|
||||
size_t __stdcall sys_wcstombs (char *, size_t, const PWCHAR, size_t = (size_t) -1)
|
||||
__attribute__ ((regparm(3)));
|
||||
int __stdcall sys_wcstombs_alloc (char **, int, const PWCHAR, int = -1)
|
||||
size_t __stdcall sys_wcstombs_alloc (char **, int, const PWCHAR, size_t = (size_t) -1)
|
||||
__attribute__ ((regparm(3)));
|
||||
|
||||
int __stdcall sys_mbstowcs (PWCHAR, int, const char *, int = -1)
|
||||
size_t __stdcall sys_cp_mbstowcs (UINT, PWCHAR, size_t, const char *, size_t = (size_t) -1)
|
||||
__attribute__ ((regparm(3)));
|
||||
int __stdcall sys_mbstowcs_alloc (PWCHAR *, int, const char *, int = -1)
|
||||
inline size_t
|
||||
sys_mbstowcs (PWCHAR dst, size_t dlen, const char *src,
|
||||
size_t nms = (size_t) -1)
|
||||
{
|
||||
return sys_cp_mbstowcs (0, dst, dlen, src, nms);
|
||||
}
|
||||
size_t __stdcall sys_mbstowcs_alloc (PWCHAR *, int, const char *, size_t = (size_t) -1)
|
||||
__attribute__ ((regparm(3)));
|
||||
|
||||
/* Used to check if Cygwin DLL is dynamically loaded. */
|
||||
|
@ -333,8 +336,6 @@ inline void clear_procimptoken ()
|
|||
CloseHandle (old_procimp);
|
||||
}
|
||||
}
|
||||
|
||||
void set_file_api_mode (codepage_type);
|
||||
#endif
|
||||
|
||||
#endif /* defined __cplusplus */
|
||||
|
|
Loading…
Reference in New Issue