From 8802178fddfd28038bb95867869bb5bc6ac9743a Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Fri, 23 Apr 2010 11:07:35 +0000 Subject: [PATCH] * path.h (get_nt_native_path): Add third parameter to declaration and declare with regparms. * path.cc (get_nt_native_path): Add third parameter to allow conversion of leading and trailing dots and spaces on filesystems only supporting filenames following DOS rules. (path_conv::get_nt_native_path): Call get_nt_native_path according to fs.has_dos_filenames_only flag. (getfileattr): Accommodate new parameter to get_nt_native_path. (symlink_info::check): Revamp fs_update_called handling to call fs.update only once per call. Call get_nt_native_path according to fs.has_dos_filenames_only flag. Streamline filesystem dependent code not to be called more than once unnecessarily. Drop code tweaking incoming path for broken filesystems only allowing DOS pathnames. Rely on changed get_nt_native_path instead. * mount.cc (fillout_mntent): Accommodate new parameter to get_nt_native_path. * strfuncs.cc (tfx_rev_chars): New conversion table with comment. (sys_cp_wcstombs): Use tfx_rev_chars rather than tfx_chars. --- winsup/cygwin/ChangeLog | 21 +++++++++++ winsup/cygwin/mount.cc | 2 +- winsup/cygwin/path.cc | 74 ++++++++++++++++++++++++--------------- winsup/cygwin/path.h | 2 +- winsup/cygwin/strfuncs.cc | 44 +++++++++++++++++++++-- 5 files changed, 110 insertions(+), 33 deletions(-) diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog index ef716d747..b2c4ce624 100644 --- a/winsup/cygwin/ChangeLog +++ b/winsup/cygwin/ChangeLog @@ -1,3 +1,24 @@ +2010-04-23 Corinna Vinschen + + * path.h (get_nt_native_path): Add third parameter to declaration and + declare with regparms. + * path.cc (get_nt_native_path): Add third parameter to allow conversion + of leading and trailing dots and spaces on filesystems only supporting + filenames following DOS rules. + (path_conv::get_nt_native_path): Call get_nt_native_path according to + fs.has_dos_filenames_only flag. + (getfileattr): Accommodate new parameter to get_nt_native_path. + (symlink_info::check): Revamp fs_update_called handling to call + fs.update only once per call. Call get_nt_native_path according to + fs.has_dos_filenames_only flag. Streamline filesystem dependent code + not to be called more than once unnecessarily. Drop code tweaking + incoming path for broken filesystems only allowing DOS pathnames. + Rely on changed get_nt_native_path instead. + * mount.cc (fillout_mntent): Accommodate new parameter to + get_nt_native_path. + * strfuncs.cc (tfx_rev_chars): New conversion table with comment. + (sys_cp_wcstombs): Use tfx_rev_chars rather than tfx_chars. + 2010-04-22 Corinna Vinschen * path.cc (symlink_info::check): Make sure to restart only once. diff --git a/winsup/cygwin/mount.cc b/winsup/cygwin/mount.cc index 74268f617..ee9c94f19 100644 --- a/winsup/cygwin/mount.cc +++ b/winsup/cygwin/mount.cc @@ -1470,7 +1470,7 @@ fillout_mntent (const char *native_path, const char *posix_path, unsigned flags) tmp_pathbuf tp; UNICODE_STRING unat; tp.u_get (&unat); - get_nt_native_path (native_path, unat); + get_nt_native_path (native_path, unat, false); if (append_bs) RtlAppendUnicodeToString (&unat, L"\\"); mntinfo.update (&unat, NULL); diff --git a/winsup/cygwin/path.cc b/winsup/cygwin/path.cc index fd9c3a01a..6b0b1d1c4 100644 --- a/winsup/cygwin/path.cc +++ b/winsup/cygwin/path.cc @@ -393,7 +393,7 @@ str2uni_cat (UNICODE_STRING &tgt, const char *srcstr) } PUNICODE_STRING -get_nt_native_path (const char *path, UNICODE_STRING& upath) +get_nt_native_path (const char *path, UNICODE_STRING& upath, bool dos) { upath.Length = 0; if (path[0] == '/') /* special path w/o NT path representation. */ @@ -425,6 +425,26 @@ get_nt_native_path (const char *path, UNICODE_STRING& upath) RtlAppendUnicodeStringToString (&upath, &ro_u_natp); str2uni_cat (upath, path + 4); } + if (dos) + { + /* Unfortunately we can't just use transform_chars with the tfx_rev_chars + table since only leading and trainlig spaces and dots are affected. + So we step to every backslash and fix surrounding dots and spaces. + That makes these broken filesystems a bit slower, but, hey. */ + PWCHAR cp = upath.Buffer + 7; + PWCHAR cend = upath.Buffer + upath.Length / sizeof (WCHAR); + while (++cp < cend) + if (*cp == L'\\') + { + PWCHAR ccp = cp - 1; + while (*ccp == L'.' || *ccp == L' ') + *ccp-- |= 0xf000; + while (cp[1] == L' ') + *++cp |= 0xf000; + } + while (*--cp == L'.' || *cp == L' ') + *cp |= 0xf000; + } return &upath; } @@ -437,7 +457,7 @@ path_conv::get_nt_native_path () uni_path.MaximumLength = (strlen (path) + 10) * sizeof (WCHAR); wide_path = (PWCHAR) cmalloc_abort (HEAP_STR, uni_path.MaximumLength); uni_path.Buffer = wide_path; - ::get_nt_native_path (path, uni_path); + ::get_nt_native_path (path, uni_path, fs.has_dos_filenames_only ()); } return &uni_path; } @@ -501,7 +521,7 @@ getfileattr (const char *path, bool caseinsensitive) /* path has to be always ab InitializeObjectAttributes (&attr, &upath, caseinsensitive ? OBJ_CASE_INSENSITIVE : 0, NULL, NULL); - get_nt_native_path (path, upath); + get_nt_native_path (path, upath, false); status = NtQueryAttributesFile (&attr, &fbi); if (NT_SUCCESS (status)) @@ -2178,9 +2198,10 @@ symlink_info::check (char *path, const suffix_info *suffixes, unsigned opt, IO_STATUS_BLOCK io; FILE_BASIC_INFORMATION fbi; suffix_scan suffix; + bool fs_update_called = false; - ULONG ci_flag = cygwin_shared->obcaseinsensitive || (pflags & PATH_NOPOSIX) - ? OBJ_CASE_INSENSITIVE : 0; + const ULONG ci_flag = cygwin_shared->obcaseinsensitive + || (pflags & PATH_NOPOSIX) ? OBJ_CASE_INSENSITIVE : 0; /* TODO: Temporarily do all char->UNICODE conversion here. This should already be slightly faster than using Ascii functions. */ tmp_pathbuf tp; @@ -2212,10 +2233,9 @@ restart: while (suffix.next ()) { bool no_ea = false; - bool fs_update_called = false; error = 0; - get_nt_native_path (suffix.path, upath); + get_nt_native_path (suffix.path, upath, fs.has_dos_filenames_only ()); if (h) { NtClose (h); @@ -2261,7 +2281,8 @@ restart: } if (status == STATUS_OBJECT_NAME_NOT_FOUND) { - if (ci_flag == 0 && wincap.has_broken_udf ()) + if (ci_flag == 0 && wincap.has_broken_udf () + && (!fs_update_called || fs.is_udf ())) { /* On NT 5.x UDF is broken (at least) in terms of case sensitivity. When trying to open a file case sensitive, @@ -2276,10 +2297,9 @@ restart: attr.Attributes = 0; if (NT_SUCCESS (status)) { - fs.update (&upath, h); - if (fs.is_udf ()) - fs_update_called = true; - else + if (!fs_update_called) + fs_update_called = fs.update (&upath, h); + if (!fs.is_udf ()) { NtClose (h); status = STATUS_OBJECT_NAME_NOT_FOUND; @@ -2295,31 +2315,29 @@ restart: we encountered a STATUS_OBJECT_NAME_NOT_FOUND *and* we didn't already attach a suffix *and* the above special case for UDF on XP didn't succeeed. */ - if (!restarted && !*ext_here && !fs_update_called) + if (!restarted && !*ext_here + && (!fs_update_called || fs.has_dos_filenames_only ())) { /* Check for leading space or trailing dot or space in last component. */ char *pend = ext_here; - while (pend[-1] == '.' || pend[-1] == ' ') + if (pend[-1] == '.' || pend[-1] == ' ') --pend; char *pbeg = pend; while (pbeg[-1] != '\\') --pbeg; /* If so, call fs.update to check if the filesystem is one of the broken ones. */ - if ((*pbeg == ' ' || *pend != '\0') - && fs.update (&upath, NULL) - && fs.has_dos_filenames_only ()) + if (*pbeg == ' ' || *pend != '\0') { - /* If so, strip leading spaces and trailing dots and spaces - from filename and... */ - if (pbeg) - while (*pbeg == ' ') - memmove (pbeg, pbeg + 1, --pend - pbeg); - *pend = '\0'; - /* ...try again. */ - restarted = true; - goto restart; + if (!fs_update_called) + fs_update_called = fs.update (&upath, NULL); + if (fs.has_dos_filenames_only ()) + { + /* If so, try again. */ + restarted = true; + goto restart; + } } } } @@ -2327,7 +2345,7 @@ restart: if (NT_SUCCESS (status) /* Check file system while we're having the file open anyway. This speeds up path_conv noticably (~10%). */ - && (fs_update_called || fs.update (&upath, h)) + && (fs_update_called || (fs_update_called = fs.update (&upath, h))) && NT_SUCCESS (status = fs.has_buggy_basic_info () ? NtQueryAttributesFile (&attr, &fbi) : NtQueryInformationFile (h, &io, &fbi, sizeof fbi, @@ -2397,7 +2415,7 @@ restart: TRUE, &basename, TRUE); /* Take the opportunity to check file system while we're having the handle to the parent dir. */ - fs.update (&upath, h); + fs_update_called = fs.update (&upath, h); NtClose (dir); if (!NT_SUCCESS (status)) { diff --git a/winsup/cygwin/path.h b/winsup/cygwin/path.h index 855f4a26d..a0c7df3f3 100644 --- a/winsup/cygwin/path.h +++ b/winsup/cygwin/path.h @@ -302,7 +302,7 @@ int path_prefix_p (const char *path1, const char *path2, int len1, bool is_floppy (const char *); int normalize_win32_path (const char *, char *, char *&); int normalize_posix_path (const char *, char *, char *&); -PUNICODE_STRING get_nt_native_path (const char *, UNICODE_STRING&); +PUNICODE_STRING get_nt_native_path (const char *, UNICODE_STRING&, bool) __attribute__ ((regparm (3))); /* FIXME: Move to own include file eventually */ diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 6d9f4a232..097421703 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -23,8 +23,7 @@ details. */ use area in the U+f0XX range. The affected characters are all control chars 1 <= c <= 31, as well as the characters " * : < > ? |. The backslash is affected as well, but we can't transform it as long as we accept Win32 - paths as input. - The reverse functionality is in function sys_cp_wcstombs. */ + paths as input. */ static const WCHAR tfx_chars[] = { 0, 0xf000 | 1, 0xf000 | 2, 0xf000 | 3, 0xf000 | 4, 0xf000 | 5, 0xf000 | 6, 0xf000 | 7, @@ -60,6 +59,45 @@ static const WCHAR tfx_chars[] = { 0xf000 | '|', '}', '~', 127 }; +/* This is the table for the reverse functionality in sys_cp_wcstombs. + It differs deliberately in two code places (space and dot) to allow + converting back space and dot on filesystems only supporting DOS + filenames. */ +static const WCHAR tfx_rev_chars[] = { + 0, 0xf000 | 1, 0xf000 | 2, 0xf000 | 3, + 0xf000 | 4, 0xf000 | 5, 0xf000 | 6, 0xf000 | 7, + 0xf000 | 8, 0xf000 | 9, 0xf000 | 10, 0xf000 | 11, + 0xf000 | 12, 0xf000 | 13, 0xf000 | 14, 0xf000 | 15, + 0xf000 | 16, 0xf000 | 17, 0xf000 | 18, 0xf000 | 19, + 0xf000 | 20, 0xf000 | 21, 0xf000 | 22, 0xf000 | 23, + 0xf000 | 24, 0xf000 | 25, 0xf000 | 26, 0xf000 | 27, + 0xf000 | 28, 0xf000 | 29, 0xf000 | 30, 0xf000 | 31, + 0xf000 | ' ', '!', 0xf000 | '"', '#', + '$', '%', '&', 39, + '(', ')', 0xf000 | '*', '+', + ',', '-', 0xf000 | '.', '\\', + '0', '1', '2', '3', + '4', '5', '6', '7', + '8', '9', 0xf000 | ':', ';', + 0xf000 | '<', '=', 0xf000 | '>', 0xf000 | '?', + '@', 'A', 'B', 'C', + 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', + 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', '[', + '\\', ']', '^', '_', + '`', 'a', 'b', 'c', + 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', + 't', 'u', 'v', 'w', + 'x', 'y', 'z', '{', + 0xf000 | '|', '}', '~', 127 +}; + void transform_chars (PWCHAR path, PWCHAR path_end) { @@ -382,7 +420,7 @@ sys_cp_wcstombs (wctomb_p f_wctomb, const char *charset, char *dst, size_t len, Reverse functionality for invalid bytes in a multibyte sequence is in sys_cp_mbstowcs below. */ if ((pw & 0xff00) == 0xf000 - && (((cwc = (pw & 0xff)) <= 0x7f && tfx_chars[cwc] >= 0xf000) + && (((cwc = (pw & 0xff)) <= 0x7f && tfx_rev_chars[cwc] >= 0xf000) || (cwc >= 0x80 && MB_CUR_MAX > 1))) { buf[0] = (char) cwc;