From 1597484cb589da409832f64db94c8ac79ccf468c Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Thu, 31 Jan 2008 20:26:01 +0000 Subject: [PATCH] * dcrt0.cc (dll_crt0_1): Use GetCommandLineW and convert to current codepage. * environ.cc (set_file_api_mode): Always set file api to ANSI if not using the OEM codepage. (codepage_init): Allow "utf8" codepage. * fhandler_clipboard.cc (set_clipboard): Convert clipbuf to void and cast as needed. Always convert input to wide char and write CF_UNICODETEXT to clipboard. (fhandler_dev_clipboard::read): Read CF_UNICODETEXT from clipboard and convert to current codepage if CYGWIN_NATIVE format is not available. * fhandler_console.cc: Drop redundant undef. * smallprint.cc (__small_vsprintf): Convert PWCHAR and UNICODE_STRING to current codepage for printing. * strfuncs.cc: Use PWCHAR throughout. (get_cp): Return CP_UTF8 for utf8_cp codepage setting. (sys_wcstombs): Allow NULL target buffer. (sys_wcstombs_alloc): New function. (sys_mbstowcs_alloc): Ditto. * winsup.h (codepage_type): Add utf8_cp. (HEAP_NOTHEAP): Define. (sys_wcstombs_alloc): Declare. (sys_mbstowcs_alloc): Declare. --- winsup/cygwin/ChangeLog | 25 ++++++++++ winsup/cygwin/dcrt0.cc | 10 ++-- winsup/cygwin/environ.cc | 15 +++--- winsup/cygwin/fhandler_clipboard.cc | 55 ++++++++++++++-------- winsup/cygwin/fhandler_console.cc | 1 - winsup/cygwin/smallprint.cc | 19 +++----- winsup/cygwin/strfuncs.cc | 71 +++++++++++++++++++++++++++-- winsup/cygwin/winsup.h | 14 ++++-- 8 files changed, 155 insertions(+), 55 deletions(-) diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog index 20af6ca60..debfe630e 100644 --- a/winsup/cygwin/ChangeLog +++ b/winsup/cygwin/ChangeLog @@ -1,3 +1,28 @@ +2008-01-31 Corinna Vinschen + + * dcrt0.cc (dll_crt0_1): Use GetCommandLineW and convert to current + codepage. + * environ.cc (set_file_api_mode): Always set file api to ANSI if not + using the OEM codepage. + (codepage_init): Allow "utf8" codepage. + * fhandler_clipboard.cc (set_clipboard): Convert clipbuf to void and + cast as needed. Always convert input to wide char and write + CF_UNICODETEXT to clipboard. + (fhandler_dev_clipboard::read): Read CF_UNICODETEXT from clipboard and + convert to current codepage if CYGWIN_NATIVE format is not available. + * fhandler_console.cc: Drop redundant undef. + * smallprint.cc (__small_vsprintf): Convert PWCHAR and UNICODE_STRING + to current codepage for printing. + * strfuncs.cc: Use PWCHAR throughout. + (get_cp): Return CP_UTF8 for utf8_cp codepage setting. + (sys_wcstombs): Allow NULL target buffer. + (sys_wcstombs_alloc): New function. + (sys_mbstowcs_alloc): Ditto. + * winsup.h (codepage_type): Add utf8_cp. + (HEAP_NOTHEAP): Define. + (sys_wcstombs_alloc): Declare. + (sys_mbstowcs_alloc): Declare. + 2008-01-31 Corinna Vinschen * cygheap.h (struct cwdstuff): Add dir member to store cwd handle. diff --git a/winsup/cygwin/dcrt0.cc b/winsup/cygwin/dcrt0.cc index a022c8305..a9679c90c 100644 --- a/winsup/cygwin/dcrt0.cc +++ b/winsup/cygwin/dcrt0.cc @@ -18,6 +18,7 @@ details. */ #include "exceptions.h" #include #include +#include #include #include #include "sigproc.h" @@ -855,11 +856,10 @@ dll_crt0_1 (void *) if (!__argc) { - char *line = GetCommandLineA (); - line = strcpy ((char *) alloca (strlen (line) + 1), line); - - if (current_codepage == oem_cp) - CharToOemA (line, line); + PWCHAR wline = GetCommandLineW (); + size_t size = sys_wcstombs (NULL, size, wline); + char *line = (char *) alloca (size); + sys_wcstombs (line, size, wline); /* Scan the command line and build argv. Expand wildcards if not called from another cygwin process. */ diff --git a/winsup/cygwin/environ.cc b/winsup/cygwin/environ.cc index 9c68dcaaf..d21e6f726 100644 --- a/winsup/cygwin/environ.cc +++ b/winsup/cygwin/environ.cc @@ -506,7 +506,7 @@ set_file_api_mode (codepage_type cp) SetFileApisToOEM (); debug_printf ("File APIs set to OEM"); } - else if (cp == ansi_cp) + else { SetFileApisToANSI (); debug_printf ("File APIs set to ANSI"); @@ -520,17 +520,14 @@ codepage_init (const char *buf) return; if (strcasematch (buf, "oem")) - { - current_codepage = oem_cp; - set_file_api_mode (current_codepage); - } + current_codepage = oem_cp; else if (strcasematch (buf, "ansi")) - { - current_codepage = ansi_cp; - set_file_api_mode (current_codepage); - } + current_codepage = ansi_cp; + else if (strcasematch (buf, "utf8")) + current_codepage = utf8_cp; else debug_printf ("Wrong codepage name: %s", buf); + set_file_api_mode (current_codepage); } static void diff --git a/winsup/cygwin/fhandler_clipboard.cc b/winsup/cygwin/fhandler_clipboard.cc index 15ae5f936..070c545af 100644 --- a/winsup/cygwin/fhandler_clipboard.cc +++ b/winsup/cygwin/fhandler_clipboard.cc @@ -14,7 +14,9 @@ details. */ #include #include #include +#include #include +#include #include #include #include "cygerrno.h" @@ -82,7 +84,7 @@ static int set_clipboard (const void *buf, size_t len) { HGLOBAL hmem; - unsigned char *clipbuf; + void *clipbuf; /* Native CYGWIN format */ OpenClipboard (0); hmem = GlobalAlloc (GMEM_MOVEABLE, len + sizeof (size_t)); @@ -91,8 +93,8 @@ set_clipboard (const void *buf, size_t len) system_printf ("Couldn't allocate global buffer for write"); return -1; } - clipbuf = (unsigned char *) GlobalLock (hmem); - memcpy (clipbuf + sizeof (size_t), buf, len); + clipbuf = GlobalLock (hmem); + memcpy ((unsigned char *) clipbuf + sizeof (size_t), buf, len); *(size_t *) (clipbuf) = len; GlobalUnlock (hmem); EmptyClipboard (); @@ -118,19 +120,24 @@ set_clipboard (const void *buf, size_t len) /* CF_TEXT/CF_OEMTEXT for copying to wordpad and the like */ OpenClipboard (0); - hmem = GlobalAlloc (GMEM_MOVEABLE, len + 2); + + len = MultiByteToWideChar (get_cp (), 0, (const char *) buf, len, NULL, 0); + if (!len) + { + system_printf ("Invalid string"); + return -1; + } + hmem = GlobalAlloc (GMEM_MOVEABLE, (len + 1) * sizeof (WCHAR)); if (!hmem) { system_printf ("Couldn't allocate global buffer for write"); return -1; } - clipbuf = (unsigned char *) GlobalLock (hmem); - memcpy (clipbuf, buf, len); - *(clipbuf + len) = '\0'; - *(clipbuf + len + 1) = '\0'; + clipbuf = GlobalLock (hmem); + sys_mbstowcs ((PWCHAR) clipbuf, (const char *) buf, len); + *((PWCHAR) clipbuf + len) = L'\0'; GlobalUnlock (hmem); - if (!SetClipboardData - ((current_codepage == ansi_cp ? CF_TEXT : CF_OEMTEXT), hmem)) + if (!SetClipboardData (CF_UNICODETEXT, hmem)) { system_printf ("Couldn't write to the clipboard"); /* FIXME: return an appriate error code &| set_errno(); */ @@ -196,7 +203,7 @@ fhandler_dev_clipboard::read (void *ptr, size_t& len) else { formatlist[0] = cygnativeformat; - formatlist[1] = current_codepage == ansi_cp ? CF_TEXT : CF_OEMTEXT; + formatlist[1] = CF_UNICODETEXT; OpenClipboard (0); if ((format = GetPriorityClipboardFormat (formatlist, 2)) <= 0) { @@ -222,16 +229,24 @@ fhandler_dev_clipboard::read (void *ptr, size_t& len) } else { - LPSTR lpstr; - lpstr = (LPSTR) GlobalLock (hglb); - - ret = ((len > (strlen (lpstr) - pos)) ? (strlen (lpstr) - pos) - : len); - - memcpy (ptr, lpstr + pos, ret); - //ret = snprintf((char *) ptr, len, "%s", lpstr);//+pos); + int wret; + PWCHAR buf; + buf = (PWCHAR) GlobalLock (hglb); + size_t glen = GlobalSize (hglb) / sizeof (WCHAR) - 1; + + /* This loop is necessary because the number of bytes returned + by WideCharToMultiByte does not indicate the number of wide + chars used for it, so we could potentially drop wide chars. */ + if (glen - pos > len) + glen = pos + len; + while ((wret = sys_wcstombs (NULL, 0, buf + pos, glen - pos)) + != -1 + && (size_t) wret > len) + --glen; + ret = sys_wcstombs ((char *) ptr, len, buf + pos, glen - pos); + //ret = snprintf((char *) ptr, len, "%s", buf);//+pos); pos += ret; - if (pos + len - ret >= strlen (lpstr)) + if (pos + len - ret >= wcslen (buf)) eof = true; GlobalUnlock (hglb); } diff --git a/winsup/cygwin/fhandler_console.cc b/winsup/cygwin/fhandler_console.cc index e4afe16d5..9bc6b0fd2 100644 --- a/winsup/cygwin/fhandler_console.cc +++ b/winsup/cygwin/fhandler_console.cc @@ -527,7 +527,6 @@ fhandler_console::read (void *pv, size_t& buflen) else if (res == line_edit_input_done) break; } -#undef ich } while (buflen) diff --git a/winsup/cygwin/smallprint.cc b/winsup/cygwin/smallprint.cc index 9208947e2..36cf6db88 100644 --- a/winsup/cygwin/smallprint.cc +++ b/winsup/cygwin/smallprint.cc @@ -195,24 +195,17 @@ __small_vsprintf (char *dst, const char *fmt, va_list ap) us = va_arg (ap, PUNICODE_STRING); wfillin: { - ANSI_STRING as = { 0, 0, NULL }; - NTSTATUS status; + char *tmp; - if (current_codepage == ansi_cp) - status = RtlUnicodeStringToAnsiString (&as, us, TRUE); - else - status = RtlUnicodeStringToOemString (&as, us, TRUE); - if (!NT_SUCCESS (status)) + if (!sys_wcstombs_alloc (&tmp, PATH_MAX, us->Buffer, + us->Length / sizeof (WCHAR))) { s = "invalid UNICODE_STRING"; goto fillin; } - for (i = 0; i < as.Length; ++i) - *dst++ = as.Buffer[i]; - if (current_codepage == ansi_cp) - RtlFreeAnsiString (&as); - else - RtlFreeOemString (&as); + for (i = 0; *tmp && i < n; i++) + *dst++ = *tmp++; + free (tmp); } break; default: diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 9d0e3ec08..0dac85e99 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -10,28 +10,44 @@ Cygwin license. Please consult the file "CYGWIN_LICENSE" for details. */ #include "winsup.h" +#include #include #include #include +#include "cygerrno.h" +#include "security.h" +#include "path.h" +#include "fhandler.h" +#include "dtable.h" +#include "cygheap.h" codepage_type current_codepage = ansi_cp; UINT get_cp () { - return current_codepage == ansi_cp ? GetACP() : GetOEMCP(); + switch (current_codepage) + { + case oem_cp: + return GetOEMCP (); + case utf8_cp: + return CP_UTF8; + case ansi_cp: + default: + return GetACP (); + } } /* tlen is always treated as the maximum buffer size, including the '\0' character. sys_wcstombs will always return a 0-terminated result, no matter what. */ int __stdcall -sys_wcstombs (char *tgt, int tlen, const WCHAR *src, int slen) +sys_wcstombs (char *tgt, int tlen, const PWCHAR src, int slen) { int ret; ret = WideCharToMultiByte (get_cp (), 0, src, slen, tgt, tlen, NULL, NULL); - if (ret) + if (ret && tgt) { ret = (ret < tlen) ? ret : tlen - 1; tgt[ret] = '\0'; @@ -39,13 +55,60 @@ sys_wcstombs (char *tgt, int tlen, const WCHAR *src, int slen) return ret; } +/* Allocate a buffer big enough for the string, always including the + terminating '\0'. The buffer pointer is returned in *tgt_p, the return + value is the number of bytes written to the buffer, as usual. + The "type" argument determines where the resulting buffer is stored. + It's either one of the cygheap_types values, or it's "HEAP_NOTHEAP". + In the latter case the allocation uses simple calloc. */ int __stdcall -sys_mbstowcs (WCHAR *tgt, const char *src, int len) +sys_wcstombs_alloc (char **tgt_p, int type, const PWCHAR src, int slen) +{ + int ret; + + ret = WideCharToMultiByte (get_cp (), 0, src, slen, NULL, 0,NULL, NULL); + if (ret) + { + size_t tlen = (slen == -1 ? ret : ret + 1); + + if (type == HEAP_NOTHEAP) + *tgt_p = (char *) calloc (tlen, sizeof (char)); + else + *tgt_p = (char *) ccalloc ((cygheap_types) type, tlen, sizeof (char)); + if (!*tgt_p) + return 0; + ret = sys_wcstombs (*tgt_p, tlen, src, slen); + } + return ret; +} + +int __stdcall +sys_mbstowcs (PWCHAR tgt, const char *src, int len) { int res = MultiByteToWideChar (get_cp (), 0, src, -1, tgt, len); return res; } +/* Same as sys_wcstombs_alloc, just backwards. */ +int __stdcall +sys_mbstowcs_alloc (PWCHAR *tgt_p, int type, const char *src) +{ + int ret; + + ret = MultiByteToWideChar (get_cp (), 0, src, -1, NULL, 0); + if (ret) + { + if (type == HEAP_NOTHEAP) + *tgt_p = (PWCHAR) calloc (ret, sizeof (WCHAR)); + else + *tgt_p = (PWCHAR) ccalloc ((cygheap_types) type, ret, sizeof (WCHAR)); + if (!*tgt_p) + return 0; + ret = sys_mbstowcs (*tgt_p, src, ret); + } + return ret; +} + static WCHAR hex_wchars[] = L"0123456789abcdef"; NTSTATUS NTAPI diff --git a/winsup/cygwin/winsup.h b/winsup/cygwin/winsup.h index 5b4b573f5..505326cec 100644 --- a/winsup/cygwin/winsup.h +++ b/winsup/cygwin/winsup.h @@ -110,15 +110,23 @@ extern const char case_folded_upper[]; /* The one function we use from winuser.h most of the time */ extern "C" DWORD WINAPI GetLastError (void); -enum codepage_type {ansi_cp, oem_cp}; +enum codepage_type {ansi_cp, oem_cp, utf8_cp}; extern codepage_type current_codepage; UINT get_cp (); -int __stdcall sys_wcstombs(char *, int, const WCHAR *, int = -1) +/* Used as type by sys_wcstombs_alloc and sys_mbstowcs_alloc. For a + description see there. */ +#define HEAP_NOTHEAP -1 + +int __stdcall sys_wcstombs (char *, int, const PWCHAR, int = -1) + __attribute__ ((regparm(3))); +int __stdcall sys_wcstombs_alloc (char **, int, const PWCHAR, int = -1) __attribute__ ((regparm(3))); -int __stdcall sys_mbstowcs(WCHAR *, const char *, int) +int __stdcall sys_mbstowcs (PWCHAR, const char *, int) + __attribute__ ((regparm(3))); +int __stdcall sys_mbstowcs_alloc (PWCHAR *, int, const char *) __attribute__ ((regparm(3))); /* Used to check if Cygwin DLL is dynamically loaded. */