4
0
mirror of git://sourceware.org/git/newlib-cygwin.git synced 2025-02-21 16:26:12 +08:00

Make basename and dirname functions work with path names

containing multibyte character strings.
This commit is contained in:
Keith Marshall 2007-03-08 23:15:58 +00:00
parent 062bfdacdb
commit af8e63023a
3 changed files with 210 additions and 79 deletions

View File

@ -1,3 +1,9 @@
2007-03-08 Keith Marshall <keithmarshall@users.sourceforge.net>
* mingwex/basename.c: Make it work with path names containing
multibyte character strings.
* mingwex/dirname.c: Likewise.
2007-03-05 Danny Smith <dannysmith@users.sourceforge.net> 2007-03-05 Danny Smith <dannysmith@users.sourceforge.net>
* include/io.h (__mingw_access): New static inline wrapper to restore * include/io.h (__mingw_access): New static inline wrapper to restore

View File

@ -20,8 +20,10 @@
*/ */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include <libgen.h> #include <libgen.h>
#include <locale.h>
#ifndef __cdecl /* If compiling on any non-Win32 platform ... */ #ifndef __cdecl /* If compiling on any non-Win32 platform ... */
#define __cdecl /* this may not be defined. */ #define __cdecl /* this may not be defined. */
@ -29,61 +31,114 @@
__cdecl char *basename( char *path ) __cdecl char *basename( char *path )
{ {
char *retname; size_t len;
static char retfail[] = "."; static char *retfail = NULL;
/* to handle path names for files in multibyte character locales,
* we need to set up LC_CTYPE to match the host file system locale
*/
char *locale = setlocale( LC_CTYPE, NULL );
if( locale != NULL ) locale = strdup( locale );
setlocale( LC_CTYPE, "" );
if( path && *path ) if( path && *path )
{ {
/* step over the drive designator, if present ... /* allocate sufficient local storage space,
* (FIXME: maybe should confirm *path is a valid drive designator). * in which to create a wide character reference copy of path
*/ */
if( path[1] == ':' ) wchar_t refcopy[1 + (len = mbstowcs( NULL, path, 0 ))];
path += 2;
/* create the wide character reference copy of path,
* and step over the drive designator, if present ...
*/
wchar_t *refpath = refcopy;
if( ((len = mbstowcs( refpath, path, len )) > 1) && (refpath[1] == L':') )
{
/* FIXME: maybe should confirm *refpath is a valid drive designator */
refpath += 2;
}
/* ensure that our wide character reference path is NUL terminated */
refcopy[ len ] = L'\0';
/* check again, just to ensure we still have a non-empty path name ... */ /* check again, just to ensure we still have a non-empty path name ... */
if( *path ) if( *refpath )
{ {
/* and, when we do ... /* and, when we do, process it in the wide character domain ...
* scan from left to right, to the char after the final dir separator * scanning from left to right, to the char after the final dir separator
*/ */
for( retname = path ; *path ; ++path ) wchar_t *refname;
for( refname = refpath ; *refpath ; ++refpath )
{ {
if( (*path == '/') || (*path == '\\') ) if( (*refpath == L'/') || (*refpath == L'\\') )
{ {
/* we found a dir separator ... /* we found a dir separator ...
* step over it, and any others which immediately follow it * step over it, and any others which immediately follow it
*/ */
while( (*path == '/') || (*path == '\\') ) while( (*refpath == L'/') || (*refpath == L'\\') )
++path; ++refpath;
/* if we didn't reach the end of the path string ... */ /* if we didn't reach the end of the path string ... */
if( *path ) if( *refpath )
/* then we have a new candidate for the base name */ /* then we have a new candidate for the base name */
retname = path; refname = refpath;
/* otherwise ... /* otherwise ...
* strip off any trailing dir separators which we found * strip off any trailing dir separators which we found
*/ */
else while( (path > retname) && ((*--path == '/') || (*path == '\\')) ) else while( (refpath > refname)
*path = '\0'; && ((*--refpath == L'/') || (*refpath == L'\\')) )
*refpath = L'\0';
} }
} }
/* retname now points at the resolved base name ... /* in the wide character domain ...
* if it's not empty, then we return it as it is, otherwise ... * refname now points at the resolved base name ...
* we must have had only dir separators in the original path name,
* so we return "/".
*/ */
return *retname ? retname : strcpy( retfail, "/" ); if( *refname )
{
/* if it's not empty,
* then we transform the full normalised path back into
* the multibyte character domain, and skip over the dirname,
* to return the resolved basename.
*/
if( (len = wcstombs( path, refcopy, len )) != (size_t)(-1) )
path[ len ] = '\0';
*refname = L'\0';
if( (len = wcstombs( NULL, refcopy, 0 )) != (size_t)(-1) )
path += len;
}
else
{
/* the basename is empty, so return the default value of "/",
* transforming from wide char to multibyte char domain, and
* returning it in our own buffer.
*/
retfail = realloc( retfail, len = 1 + wcstombs( NULL, L"/", 0 ));
wcstombs( path = retfail, L"/", len );
}
/* restore the caller's locale, clean up, and return the result */
setlocale( LC_CTYPE, locale );
free( locale );
return( path );
} }
/* or we had an empty residual path name, after the drive designator, /* or we had an empty residual path name, after the drive designator,
@ -93,11 +148,20 @@ __cdecl char *basename( char *path )
/* and, if we get to here ... /* and, if we get to here ...
* the path name is either NULL, or it decomposes to an empty string; * the path name is either NULL, or it decomposes to an empty string;
* in either case, we return the default value of "." in our static buffer, * in either case, we return the default value of "." in our own buffer,
* (but strcpy it, just in case the caller trashed it after a previous call). * reloading it with the correct value, transformed from the wide char
* to the multibyte char domain, just in case the caller trashed it
* after a previous call.
*/ */
return strcpy( retfail, "." ); retfail = realloc( retfail, len = 1 + wcstombs( NULL, L".", 0 ));
wcstombs( retfail, L".", len );
/* restore the caller's locale, clean up, and return the result */
setlocale( LC_CTYPE, locale );
free( locale );
return( retfail );
} }
/* $RCSfile$: end of file */ /* $RCSfile$$Revision$: end of file */

View File

@ -20,8 +20,10 @@
*/ */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include <libgen.h> #include <libgen.h>
#include <locale.h>
#ifndef __cdecl /* If compiling on any non-Win32 platform ... */ #ifndef __cdecl /* If compiling on any non-Win32 platform ... */
#define __cdecl /* this may not be defined. */ #define __cdecl /* this may not be defined. */
@ -29,12 +31,30 @@
__cdecl char *dirname( char *path ) __cdecl char *dirname( char *path )
{ {
static char retfail[] = "?:."; size_t len;
char *retname, *basename, *copyptr = retfail; static char *retfail = NULL;
/* to handle path names for files in multibyte character locales,
* we need to set up LC_CTYPE to match the host file system locale.
*/
char *locale = setlocale( LC_CTYPE, NULL );
if( locale != NULL ) locale = strdup( locale );
setlocale( LC_CTYPE, "" );
if( path && *path ) if( path && *path )
{ {
retname = path; /* allocate sufficient local storage space,
* in which to create a wide character reference copy of path
*/
wchar_t refcopy[1 + (len = mbstowcs( NULL, path, 0 ))];
/* create the wide character reference copy of path */
wchar_t *refpath = refcopy;
len = mbstowcs( refpath, path, len );
refcopy[ len ] = L'\0';
/* SUSv3 identifies a special case, where path is exactly equal to "//"; /* SUSv3 identifies a special case, where path is exactly equal to "//";
* (we will also accept "\\" in the Win32 context, but not "/\" or "\/", * (we will also accept "\\" in the Win32 context, but not "/\" or "\/",
@ -44,48 +64,55 @@ __cdecl char *dirname( char *path )
* simply return the path unchanged, (i.e. "//" or "\\"). * simply return the path unchanged, (i.e. "//" or "\\").
*/ */
if( (*path == '/') || (*path == '\\') ) if( (len > 1) && ((refpath[0] == L'/') || (refpath[0] == L'\\')) )
{ {
if( (path[1] == *retname) && (path[2] == '\0') ) if( (refpath[1] == refpath[0]) && (refpath[2] == L'\0') )
return retname; {
setlocale( LC_CTYPE, locale );
free( locale );
return path;
}
} }
/* For all other cases ... /* For all other cases ...
* step over the drive designator, if present, copying it to retfail ... * step over the drive designator, if present ...
* (FIXME: maybe should confirm *path is a valid drive designator).
*/ */
else if( *path && (path[1] == ':') ) else if( (len > 1) && (refpath[1] == L':') )
{ {
*copyptr++ = *path++; /* FIXME: maybe should confirm *refpath is a valid drive designator */
*copyptr++ = *path++;
refpath += 2;
} }
if( *path ) /* check again, just to ensure we still have a non-empty path name ... */
if( *refpath )
{ {
/* reproduce the scanning logic of the "basename" function /* reproduce the scanning logic of the "basename" function
* to locate the basename component of the current path string, * to locate the basename component of the current path string,
* (but also remember where the dirname component starts). * (but also remember where the dirname component starts).
*/ */
for( retname = basename = path ; *path ; ++path ) wchar_t *refname, *basename;
for( refname = basename = refpath ; *refpath ; ++refpath )
{ {
if( (*path == '/') || (*path == '\\') ) if( (*refpath == L'/') || (*refpath == L'\\') )
{ {
/* we found a dir separator ... /* we found a dir separator ...
* step over it, and any others which immediately follow it * step over it, and any others which immediately follow it
*/ */
while( (*path == '/') || (*path == '\\') ) while( (*refpath == L'/') || (*refpath == L'\\') )
++path; ++refpath;
/* if we didn't reach the end of the path string ... */ /* if we didn't reach the end of the path string ... */
if( *path ) if( *refpath )
/* then we have a new candidate for the base name */ /* then we have a new candidate for the base name */
basename = path; basename = refpath;
else else
@ -102,73 +129,107 @@ __cdecl char *dirname( char *path )
* to confirm that we have distinct dirname and basename components * to confirm that we have distinct dirname and basename components
*/ */
if( basename > retname ) if( basename > refname )
{ {
/* and, when we do ... /* and, when we do ...
* backtrack over all trailing separators on the dirname component, * backtrack over all trailing separators on the dirname component,
* (but preserve exactly two initial dirname separators, if identical), * (but preserve exactly two initial dirname separators, if identical),
* and add a NULL terminator in their place. * and add a NUL terminator in their place.
*/ */
--basename; do --basename;
while( (basename > retname) && ((*basename == '/') || (*basename == '\\')) ) while( (basename > refname) && ((*basename == L'/') || (*basename == L'\\')) );
--basename; if( (basename == refname) && ((refname[0] == L'/') || (refname[0] == L'\\'))
if( (basename == retname) && ((*retname == '/') || (*retname == '\\')) && (refname[1] == refname[0]) && (refname[2] != L'/') && (refname[2] != L'\\') )
&& (retname[1] == *retname) && (retname[2] != '/') && (retname[2] != '\\') )
++basename; ++basename;
*++basename = '\0'; *++basename = L'\0';
/* adjust the start point of the dirname,
* to accommodate the Win32 drive designator, if it was present.
*/
if( copyptr > retfail )
retname -= 2;
/* if the resultant dirname begins with EXACTLY two dir separators, /* if the resultant dirname begins with EXACTLY two dir separators,
* AND both are identical, then we preserve them. * AND both are identical, then we preserve them.
*/ */
path = copyptr = retname; refpath = refcopy;
while( ((*path == '/') || (*path == '\\')) ) while( ((*refpath == L'/') || (*refpath == L'\\')) )
++path; ++refpath;
if( ((path - retname) == 2) && (*++copyptr == *retname) ) if( ((refpath - refcopy) > 2) || (refcopy[1] != refcopy[0]) )
++copyptr; refpath = refcopy;
/* and finally ... /* and finally ...
* we remove any residual, redundantly duplicated separators from the dirname, * we remove any residual, redundantly duplicated separators from the dirname,
* reterminate, and return it. * reterminate, and return it.
*/ */
path = copyptr; refname = refpath;
while( *path ) while( *refpath )
{ {
if( ((*copyptr++ = *path) == '/') || (*path++ == '\\') ) if( ((*refname++ = *refpath) == L'/') || (*refpath++ == L'\\') )
{ {
while( (*path == '/') || (*path == '\\') ) while( (*refpath == L'/') || (*refpath == L'\\') )
++path; ++refpath;
} }
} }
*copyptr = '\0'; *refname = L'\0';
return retname;
/* finally ...
* transform the resolved dirname back into the multibyte char domain,
* restore the caller's locale, and return the resultant dirname
*/
if( (len = wcstombs( path, refcopy, len )) != (size_t)(-1) )
path[ len ] = '\0';
} }
else if( (*retname == '/') || (*retname == '\\') ) else
{ {
*copyptr++ = *retname; /* either there were no dirname separators in the path name,
*copyptr = '\0'; * or there was nothing else ...
return retfail; */
if( (*refname == L'/') || (*refname == L'\\') )
{
/* it was all separators, so return one */
++refname;
}
else
{
/* there were no separators, so return '.' */
*refname++ = L'.';
}
/* add a NUL terminator, in either case,
* then transform to the multibyte char domain,
* using our own buffer
*/
*refname = L'\0';
retfail = realloc( retfail, len = 1 + wcstombs( NULL, refcopy, 0 ));
wcstombs( path = retfail, refcopy, len );
} }
/* restore caller's locale, clean up, and return the resolved dirname */
setlocale( LC_CTYPE, locale );
free( locale );
return path;
} }
} }
/* path is NULL, or an empty string; default return value is "." ... /* path is NULL, or an empty string; default return value is "." ...
* return this in our own static buffer, but strcpy it, just in case * return this in our own buffer, regenerated by wide char transform,
* the caller trashed it after a previous call. * in case the caller trashed it after a previous call.
*/ */
strcpy( copyptr, "." ); retfail = realloc( retfail, len = 1 + wcstombs( NULL, L".", 0 ));
wcstombs( retfail, L".", len );
/* restore caller's locale, clean up, and return the default dirname */
setlocale( LC_CTYPE, locale );
free( locale );
return retfail; return retfail;
} }
/* $RCSfile$: end of file */ /* $RCSfile$$Revision$: end of file */