Do not treat the command line or environment like paths
* dcrt0.cc (dll_crt0_1), environ.cc (environ_init, getwinenveq, build_env), strfuncs.cc (sys_wcstombs, sys_wcstombs_alloc), wchar.c (sys_wcstombs, sys_wcstombs_alloc): avoid mis-conversions of text that does not, actually, refer to a path or file name Detailed explanation: Our WCS -> UTF conversion handles the private Unicode page specially to allow for otherwise invalid file names. However, this handling makes no sense for command-lines, nor environment variables, which we would rather convert verbatim. As a stop-gap solution, let's just introduce a version of the sys_wcstombs() function that specifically excludes that file name conversion magic. The proper solution is to change sys_wcstombs() to assume that it is not a path that wants to be converted, and introduce sys_wcstombs_path() that does, but that is a bigger task which we leave for another patch. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
9ee2624240
commit
e0d4e3fec7
|
@ -953,9 +953,9 @@ dll_crt0_1 (void *)
|
||||||
if (!__argc)
|
if (!__argc)
|
||||||
{
|
{
|
||||||
PWCHAR wline = GetCommandLineW ();
|
PWCHAR wline = GetCommandLineW ();
|
||||||
size_t size = sys_wcstombs (NULL, 0, wline) + 1;
|
size_t size = sys_wcstombs_no_path (NULL, 0, wline) + 1;
|
||||||
char *line = (char *) alloca (size);
|
char *line = (char *) alloca (size);
|
||||||
sys_wcstombs (line, size, wline);
|
sys_wcstombs_no_path (line, size, wline);
|
||||||
|
|
||||||
/* Scan the command line and build argv. Expand wildcards if not
|
/* Scan the command line and build argv. Expand wildcards if not
|
||||||
called from another cygwin process. */
|
called from another cygwin process. */
|
||||||
|
|
|
@ -833,7 +833,7 @@ environ_init (char **envp, int envc)
|
||||||
eventually want to use them). */
|
eventually want to use them). */
|
||||||
for (i = 0, w = rawenv; *w != L'\0'; w = wcschr (w, L'\0') + 1, i++)
|
for (i = 0, w = rawenv; *w != L'\0'; w = wcschr (w, L'\0') + 1, i++)
|
||||||
{
|
{
|
||||||
sys_wcstombs_alloc (&newp, HEAP_NOTHEAP, w);
|
sys_wcstombs_alloc_no_path (&newp, HEAP_NOTHEAP, w);
|
||||||
if (i >= envc)
|
if (i >= envc)
|
||||||
envp = (char **) realloc (envp, (4 + (envc += 100)) * sizeof (char *));
|
envp = (char **) realloc (envp, (4 + (envc += 100)) * sizeof (char *));
|
||||||
envp[i] = newp;
|
envp[i] = newp;
|
||||||
|
@ -895,7 +895,7 @@ getwinenveq (const char *name, size_t namelen, int x)
|
||||||
int totlen = GetEnvironmentVariableW (name0, valbuf, 32768);
|
int totlen = GetEnvironmentVariableW (name0, valbuf, 32768);
|
||||||
if (totlen > 0)
|
if (totlen > 0)
|
||||||
{
|
{
|
||||||
totlen = sys_wcstombs (NULL, 0, valbuf) + 1;
|
totlen = sys_wcstombs_no_path (NULL, 0, valbuf) + 1;
|
||||||
if (x == HEAP_1_STR)
|
if (x == HEAP_1_STR)
|
||||||
totlen += namelen;
|
totlen += namelen;
|
||||||
else
|
else
|
||||||
|
@ -903,7 +903,7 @@ getwinenveq (const char *name, size_t namelen, int x)
|
||||||
char *p = (char *) cmalloc_abort ((cygheap_types) x, totlen);
|
char *p = (char *) cmalloc_abort ((cygheap_types) x, totlen);
|
||||||
if (namelen)
|
if (namelen)
|
||||||
strcpy (p, name);
|
strcpy (p, name);
|
||||||
sys_wcstombs (p + namelen, totlen, valbuf);
|
sys_wcstombs_no_path (p + namelen, totlen, valbuf);
|
||||||
debug_printf ("using value from GetEnvironmentVariable for '%W'", name0);
|
debug_printf ("using value from GetEnvironmentVariable for '%W'", name0);
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
@ -1055,7 +1055,7 @@ build_env (const char * const *envp, PWCHAR &envblock, int &envc,
|
||||||
for (winnum = 0, var = cwinenv;
|
for (winnum = 0, var = cwinenv;
|
||||||
*var;
|
*var;
|
||||||
++winnum, var = wcschr (var, L'\0') + 1)
|
++winnum, var = wcschr (var, L'\0') + 1)
|
||||||
sys_wcstombs_alloc (&winenv[winnum], HEAP_NOTHEAP, var);
|
sys_wcstombs_alloc_no_path (&winenv[winnum], HEAP_NOTHEAP, var);
|
||||||
}
|
}
|
||||||
DestroyEnvironmentBlock (cwinenv);
|
DestroyEnvironmentBlock (cwinenv);
|
||||||
/* Eliminate variables which are already available in envp, as well as
|
/* Eliminate variables which are already available in envp, as well as
|
||||||
|
|
|
@ -409,8 +409,9 @@ __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||||
to buffer size, it's a bug in Cygwin and the buffer in the calling
|
to buffer size, it's a bug in Cygwin and the buffer in the calling
|
||||||
function should be raised.
|
function should be raised.
|
||||||
*/
|
*/
|
||||||
size_t __reg3
|
static size_t __reg3
|
||||||
sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
|
sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
|
||||||
|
bool is_path)
|
||||||
{
|
{
|
||||||
char buf[10];
|
char buf[10];
|
||||||
char *ptr = dst;
|
char *ptr = dst;
|
||||||
|
@ -434,7 +435,7 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
|
||||||
ASCII area <= 0x7f (only for path names) is transform_chars above.
|
ASCII area <= 0x7f (only for path names) is transform_chars above.
|
||||||
Reverse functionality for invalid bytes in a multibyte sequence is
|
Reverse functionality for invalid bytes in a multibyte sequence is
|
||||||
in sys_cp_mbstowcs below. */
|
in sys_cp_mbstowcs below. */
|
||||||
if ((pw & 0xff00) == 0xf000
|
if (is_path && (pw & 0xff00) == 0xf000
|
||||||
&& (((cwc = (pw & 0xff)) <= 0x7f && tfx_rev_chars[cwc] >= 0xf000)
|
&& (((cwc = (pw & 0xff)) <= 0x7f && tfx_rev_chars[cwc] >= 0xf000)
|
||||||
|| (cwc >= 0x80 && MB_CUR_MAX > 1)))
|
|| (cwc >= 0x80 && MB_CUR_MAX > 1)))
|
||||||
{
|
{
|
||||||
|
@ -496,6 +497,18 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t __reg3
|
||||||
|
sys_wcstombs (char *dst, size_t len, const wchar_t * src, size_t nwc)
|
||||||
|
{
|
||||||
|
return sys_wcstombs (dst, len, src, nwc, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t __reg3
|
||||||
|
sys_wcstombs_no_path (char *dst, size_t len, const wchar_t * src, size_t nwc)
|
||||||
|
{
|
||||||
|
return sys_wcstombs (dst, len, src, nwc, false);
|
||||||
|
}
|
||||||
|
|
||||||
/* Allocate a buffer big enough for the string, always including the
|
/* Allocate a buffer big enough for the string, always including the
|
||||||
terminating '\0'. The buffer pointer is returned in *dst_p, the return
|
terminating '\0'. The buffer pointer is returned in *dst_p, the return
|
||||||
value is the number of bytes written to the buffer, as usual.
|
value is the number of bytes written to the buffer, as usual.
|
||||||
|
@ -506,12 +519,13 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
|
||||||
Note that this code is shared by cygserver (which requires it via
|
Note that this code is shared by cygserver (which requires it via
|
||||||
__small_vsprintf) and so when built there plain calloc is the
|
__small_vsprintf) and so when built there plain calloc is the
|
||||||
only choice. */
|
only choice. */
|
||||||
size_t __reg3
|
static size_t __reg3
|
||||||
sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc)
|
sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc,
|
||||||
|
bool is_path)
|
||||||
{
|
{
|
||||||
size_t ret;
|
size_t ret;
|
||||||
|
|
||||||
ret = sys_wcstombs (NULL, (size_t) -1, src, nwc);
|
ret = sys_wcstombs (NULL, (size_t) -1, src, nwc, is_path);
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
{
|
{
|
||||||
size_t dlen = ret + 1;
|
size_t dlen = ret + 1;
|
||||||
|
@ -522,11 +536,24 @@ sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc)
|
||||||
*dst_p = (char *) ccalloc ((cygheap_types) type, dlen, sizeof (char));
|
*dst_p = (char *) ccalloc ((cygheap_types) type, dlen, sizeof (char));
|
||||||
if (!*dst_p)
|
if (!*dst_p)
|
||||||
return 0;
|
return 0;
|
||||||
ret = sys_wcstombs (*dst_p, dlen, src, nwc);
|
ret = sys_wcstombs (*dst_p, dlen, src, nwc, is_path);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t __reg3
|
||||||
|
sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc)
|
||||||
|
{
|
||||||
|
return sys_wcstombs_alloc (dst_p, type, src, nwc, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t __reg3
|
||||||
|
sys_wcstombs_alloc_no_path (char **dst_p, int type, const wchar_t *src,
|
||||||
|
size_t nwc)
|
||||||
|
{
|
||||||
|
return sys_wcstombs_alloc (dst_p, type, src, nwc, false);
|
||||||
|
}
|
||||||
|
|
||||||
/* sys_cp_mbstowcs is actually most of the time called as sys_mbstowcs with
|
/* sys_cp_mbstowcs is actually most of the time called as sys_mbstowcs with
|
||||||
a 0 codepage. If cp is not 0, the codepage is evaluated and used for the
|
a 0 codepage. If cp is not 0, the codepage is evaluated and used for the
|
||||||
conversion. This is so that fhandler_console can switch to an alternate
|
conversion. This is so that fhandler_console can switch to an alternate
|
||||||
|
|
|
@ -52,8 +52,12 @@ extern char *__locale_charset ();
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
size_t __reg3 sys_wcstombs (char *dst, size_t len, const wchar_t * src,
|
size_t __reg3 sys_wcstombs (char *dst, size_t len, const wchar_t * src,
|
||||||
size_t nwc = (size_t) -1);
|
size_t nwc = (size_t) -1);
|
||||||
|
size_t __reg3 sys_wcstombs_no_path (char *dst, size_t len,
|
||||||
|
const wchar_t * src, size_t nwc = (size_t) -1);
|
||||||
size_t __reg3 sys_wcstombs_alloc (char **, int, const wchar_t *,
|
size_t __reg3 sys_wcstombs_alloc (char **, int, const wchar_t *,
|
||||||
size_t = (size_t) -1);
|
size_t = (size_t) -1);
|
||||||
|
size_t __reg3 sys_wcstombs_alloc_no_path (char **, int, const wchar_t *,
|
||||||
|
size_t = (size_t) -1);
|
||||||
|
|
||||||
size_t __reg3 sys_cp_mbstowcs (mbtowc_p, const char *, wchar_t *, size_t,
|
size_t __reg3 sys_cp_mbstowcs (mbtowc_p, const char *, wchar_t *, size_t,
|
||||||
const char *, size_t = (size_t) -1);
|
const char *, size_t = (size_t) -1);
|
||||||
|
|
Loading…
Reference in New Issue