mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-02-21 16:26:12 +08:00
Make basename and dirname functions work with path names
containing multibyte character strings.
This commit is contained in:
parent
062bfdacdb
commit
af8e63023a
@ -1,3 +1,9 @@
|
|||||||
|
2007-03-08 Keith Marshall <keithmarshall@users.sourceforge.net>
|
||||||
|
|
||||||
|
* mingwex/basename.c: Make it work with path names containing
|
||||||
|
multibyte character strings.
|
||||||
|
* mingwex/dirname.c: Likewise.
|
||||||
|
|
||||||
2007-03-05 Danny Smith <dannysmith@users.sourceforge.net>
|
2007-03-05 Danny Smith <dannysmith@users.sourceforge.net>
|
||||||
|
|
||||||
* include/io.h (__mingw_access): New static inline wrapper to restore
|
* include/io.h (__mingw_access): New static inline wrapper to restore
|
||||||
|
@ -20,8 +20,10 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <libgen.h>
|
#include <libgen.h>
|
||||||
|
#include <locale.h>
|
||||||
|
|
||||||
#ifndef __cdecl /* If compiling on any non-Win32 platform ... */
|
#ifndef __cdecl /* If compiling on any non-Win32 platform ... */
|
||||||
#define __cdecl /* this may not be defined. */
|
#define __cdecl /* this may not be defined. */
|
||||||
@ -29,61 +31,114 @@
|
|||||||
|
|
||||||
__cdecl char *basename( char *path )
|
__cdecl char *basename( char *path )
|
||||||
{
|
{
|
||||||
char *retname;
|
size_t len;
|
||||||
static char retfail[] = ".";
|
static char *retfail = NULL;
|
||||||
|
|
||||||
|
/* to handle path names for files in multibyte character locales,
|
||||||
|
* we need to set up LC_CTYPE to match the host file system locale
|
||||||
|
*/
|
||||||
|
|
||||||
|
char *locale = setlocale( LC_CTYPE, NULL );
|
||||||
|
if( locale != NULL ) locale = strdup( locale );
|
||||||
|
setlocale( LC_CTYPE, "" );
|
||||||
|
|
||||||
if( path && *path )
|
if( path && *path )
|
||||||
{
|
{
|
||||||
/* step over the drive designator, if present ...
|
/* allocate sufficient local storage space,
|
||||||
* (FIXME: maybe should confirm *path is a valid drive designator).
|
* in which to create a wide character reference copy of path
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if( path[1] == ':' )
|
wchar_t refcopy[1 + (len = mbstowcs( NULL, path, 0 ))];
|
||||||
path += 2;
|
|
||||||
|
/* create the wide character reference copy of path,
|
||||||
|
* and step over the drive designator, if present ...
|
||||||
|
*/
|
||||||
|
|
||||||
|
wchar_t *refpath = refcopy;
|
||||||
|
if( ((len = mbstowcs( refpath, path, len )) > 1) && (refpath[1] == L':') )
|
||||||
|
{
|
||||||
|
/* FIXME: maybe should confirm *refpath is a valid drive designator */
|
||||||
|
|
||||||
|
refpath += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ensure that our wide character reference path is NUL terminated */
|
||||||
|
|
||||||
|
refcopy[ len ] = L'\0';
|
||||||
|
|
||||||
/* check again, just to ensure we still have a non-empty path name ... */
|
/* check again, just to ensure we still have a non-empty path name ... */
|
||||||
|
|
||||||
if( *path )
|
if( *refpath )
|
||||||
{
|
{
|
||||||
/* and, when we do ...
|
/* and, when we do, process it in the wide character domain ...
|
||||||
* scan from left to right, to the char after the final dir separator
|
* scanning from left to right, to the char after the final dir separator
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for( retname = path ; *path ; ++path )
|
wchar_t *refname;
|
||||||
|
for( refname = refpath ; *refpath ; ++refpath )
|
||||||
{
|
{
|
||||||
if( (*path == '/') || (*path == '\\') )
|
if( (*refpath == L'/') || (*refpath == L'\\') )
|
||||||
{
|
{
|
||||||
/* we found a dir separator ...
|
/* we found a dir separator ...
|
||||||
* step over it, and any others which immediately follow it
|
* step over it, and any others which immediately follow it
|
||||||
*/
|
*/
|
||||||
|
|
||||||
while( (*path == '/') || (*path == '\\') )
|
while( (*refpath == L'/') || (*refpath == L'\\') )
|
||||||
++path;
|
++refpath;
|
||||||
|
|
||||||
/* if we didn't reach the end of the path string ... */
|
/* if we didn't reach the end of the path string ... */
|
||||||
|
|
||||||
if( *path )
|
if( *refpath )
|
||||||
|
|
||||||
/* then we have a new candidate for the base name */
|
/* then we have a new candidate for the base name */
|
||||||
|
|
||||||
retname = path;
|
refname = refpath;
|
||||||
|
|
||||||
/* otherwise ...
|
/* otherwise ...
|
||||||
* strip off any trailing dir separators which we found
|
* strip off any trailing dir separators which we found
|
||||||
*/
|
*/
|
||||||
|
|
||||||
else while( (path > retname) && ((*--path == '/') || (*path == '\\')) )
|
else while( (refpath > refname)
|
||||||
*path = '\0';
|
&& ((*--refpath == L'/') || (*refpath == L'\\')) )
|
||||||
|
*refpath = L'\0';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* retname now points at the resolved base name ...
|
/* in the wide character domain ...
|
||||||
* if it's not empty, then we return it as it is, otherwise ...
|
* refname now points at the resolved base name ...
|
||||||
* we must have had only dir separators in the original path name,
|
|
||||||
* so we return "/".
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
return *retname ? retname : strcpy( retfail, "/" );
|
if( *refname )
|
||||||
|
{
|
||||||
|
/* if it's not empty,
|
||||||
|
* then we transform the full normalised path back into
|
||||||
|
* the multibyte character domain, and skip over the dirname,
|
||||||
|
* to return the resolved basename.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if( (len = wcstombs( path, refcopy, len )) != (size_t)(-1) )
|
||||||
|
path[ len ] = '\0';
|
||||||
|
*refname = L'\0';
|
||||||
|
if( (len = wcstombs( NULL, refcopy, 0 )) != (size_t)(-1) )
|
||||||
|
path += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* the basename is empty, so return the default value of "/",
|
||||||
|
* transforming from wide char to multibyte char domain, and
|
||||||
|
* returning it in our own buffer.
|
||||||
|
*/
|
||||||
|
|
||||||
|
retfail = realloc( retfail, len = 1 + wcstombs( NULL, L"/", 0 ));
|
||||||
|
wcstombs( path = retfail, L"/", len );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* restore the caller's locale, clean up, and return the result */
|
||||||
|
|
||||||
|
setlocale( LC_CTYPE, locale );
|
||||||
|
free( locale );
|
||||||
|
return( path );
|
||||||
}
|
}
|
||||||
|
|
||||||
/* or we had an empty residual path name, after the drive designator,
|
/* or we had an empty residual path name, after the drive designator,
|
||||||
@ -93,11 +148,20 @@ __cdecl char *basename( char *path )
|
|||||||
|
|
||||||
/* and, if we get to here ...
|
/* and, if we get to here ...
|
||||||
* the path name is either NULL, or it decomposes to an empty string;
|
* the path name is either NULL, or it decomposes to an empty string;
|
||||||
* in either case, we return the default value of "." in our static buffer,
|
* in either case, we return the default value of "." in our own buffer,
|
||||||
* (but strcpy it, just in case the caller trashed it after a previous call).
|
* reloading it with the correct value, transformed from the wide char
|
||||||
|
* to the multibyte char domain, just in case the caller trashed it
|
||||||
|
* after a previous call.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
return strcpy( retfail, "." );
|
retfail = realloc( retfail, len = 1 + wcstombs( NULL, L".", 0 ));
|
||||||
|
wcstombs( retfail, L".", len );
|
||||||
|
|
||||||
|
/* restore the caller's locale, clean up, and return the result */
|
||||||
|
|
||||||
|
setlocale( LC_CTYPE, locale );
|
||||||
|
free( locale );
|
||||||
|
return( retfail );
|
||||||
}
|
}
|
||||||
|
|
||||||
/* $RCSfile$: end of file */
|
/* $RCSfile$$Revision$: end of file */
|
||||||
|
@ -20,8 +20,10 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <libgen.h>
|
#include <libgen.h>
|
||||||
|
#include <locale.h>
|
||||||
|
|
||||||
#ifndef __cdecl /* If compiling on any non-Win32 platform ... */
|
#ifndef __cdecl /* If compiling on any non-Win32 platform ... */
|
||||||
#define __cdecl /* this may not be defined. */
|
#define __cdecl /* this may not be defined. */
|
||||||
@ -29,12 +31,30 @@
|
|||||||
|
|
||||||
__cdecl char *dirname( char *path )
|
__cdecl char *dirname( char *path )
|
||||||
{
|
{
|
||||||
static char retfail[] = "?:.";
|
size_t len;
|
||||||
char *retname, *basename, *copyptr = retfail;
|
static char *retfail = NULL;
|
||||||
|
|
||||||
|
/* to handle path names for files in multibyte character locales,
|
||||||
|
* we need to set up LC_CTYPE to match the host file system locale.
|
||||||
|
*/
|
||||||
|
|
||||||
|
char *locale = setlocale( LC_CTYPE, NULL );
|
||||||
|
if( locale != NULL ) locale = strdup( locale );
|
||||||
|
setlocale( LC_CTYPE, "" );
|
||||||
|
|
||||||
if( path && *path )
|
if( path && *path )
|
||||||
{
|
{
|
||||||
retname = path;
|
/* allocate sufficient local storage space,
|
||||||
|
* in which to create a wide character reference copy of path
|
||||||
|
*/
|
||||||
|
|
||||||
|
wchar_t refcopy[1 + (len = mbstowcs( NULL, path, 0 ))];
|
||||||
|
|
||||||
|
/* create the wide character reference copy of path */
|
||||||
|
|
||||||
|
wchar_t *refpath = refcopy;
|
||||||
|
len = mbstowcs( refpath, path, len );
|
||||||
|
refcopy[ len ] = L'\0';
|
||||||
|
|
||||||
/* SUSv3 identifies a special case, where path is exactly equal to "//";
|
/* SUSv3 identifies a special case, where path is exactly equal to "//";
|
||||||
* (we will also accept "\\" in the Win32 context, but not "/\" or "\/",
|
* (we will also accept "\\" in the Win32 context, but not "/\" or "\/",
|
||||||
@ -44,48 +64,55 @@ __cdecl char *dirname( char *path )
|
|||||||
* simply return the path unchanged, (i.e. "//" or "\\").
|
* simply return the path unchanged, (i.e. "//" or "\\").
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if( (*path == '/') || (*path == '\\') )
|
if( (len > 1) && ((refpath[0] == L'/') || (refpath[0] == L'\\')) )
|
||||||
{
|
{
|
||||||
if( (path[1] == *retname) && (path[2] == '\0') )
|
if( (refpath[1] == refpath[0]) && (refpath[2] == L'\0') )
|
||||||
return retname;
|
{
|
||||||
|
setlocale( LC_CTYPE, locale );
|
||||||
|
free( locale );
|
||||||
|
return path;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For all other cases ...
|
/* For all other cases ...
|
||||||
* step over the drive designator, if present, copying it to retfail ...
|
* step over the drive designator, if present ...
|
||||||
* (FIXME: maybe should confirm *path is a valid drive designator).
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
else if( *path && (path[1] == ':') )
|
else if( (len > 1) && (refpath[1] == L':') )
|
||||||
{
|
{
|
||||||
*copyptr++ = *path++;
|
/* FIXME: maybe should confirm *refpath is a valid drive designator */
|
||||||
*copyptr++ = *path++;
|
|
||||||
|
refpath += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( *path )
|
/* check again, just to ensure we still have a non-empty path name ... */
|
||||||
|
|
||||||
|
if( *refpath )
|
||||||
{
|
{
|
||||||
/* reproduce the scanning logic of the "basename" function
|
/* reproduce the scanning logic of the "basename" function
|
||||||
* to locate the basename component of the current path string,
|
* to locate the basename component of the current path string,
|
||||||
* (but also remember where the dirname component starts).
|
* (but also remember where the dirname component starts).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for( retname = basename = path ; *path ; ++path )
|
wchar_t *refname, *basename;
|
||||||
|
for( refname = basename = refpath ; *refpath ; ++refpath )
|
||||||
{
|
{
|
||||||
if( (*path == '/') || (*path == '\\') )
|
if( (*refpath == L'/') || (*refpath == L'\\') )
|
||||||
{
|
{
|
||||||
/* we found a dir separator ...
|
/* we found a dir separator ...
|
||||||
* step over it, and any others which immediately follow it
|
* step over it, and any others which immediately follow it
|
||||||
*/
|
*/
|
||||||
|
|
||||||
while( (*path == '/') || (*path == '\\') )
|
while( (*refpath == L'/') || (*refpath == L'\\') )
|
||||||
++path;
|
++refpath;
|
||||||
|
|
||||||
/* if we didn't reach the end of the path string ... */
|
/* if we didn't reach the end of the path string ... */
|
||||||
|
|
||||||
if( *path )
|
if( *refpath )
|
||||||
|
|
||||||
/* then we have a new candidate for the base name */
|
/* then we have a new candidate for the base name */
|
||||||
|
|
||||||
basename = path;
|
basename = refpath;
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
||||||
@ -102,73 +129,107 @@ __cdecl char *dirname( char *path )
|
|||||||
* to confirm that we have distinct dirname and basename components
|
* to confirm that we have distinct dirname and basename components
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if( basename > retname )
|
if( basename > refname )
|
||||||
{
|
{
|
||||||
/* and, when we do ...
|
/* and, when we do ...
|
||||||
* backtrack over all trailing separators on the dirname component,
|
* backtrack over all trailing separators on the dirname component,
|
||||||
* (but preserve exactly two initial dirname separators, if identical),
|
* (but preserve exactly two initial dirname separators, if identical),
|
||||||
* and add a NULL terminator in their place.
|
* and add a NUL terminator in their place.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
--basename;
|
do --basename;
|
||||||
while( (basename > retname) && ((*basename == '/') || (*basename == '\\')) )
|
while( (basename > refname) && ((*basename == L'/') || (*basename == L'\\')) );
|
||||||
--basename;
|
if( (basename == refname) && ((refname[0] == L'/') || (refname[0] == L'\\'))
|
||||||
if( (basename == retname) && ((*retname == '/') || (*retname == '\\'))
|
&& (refname[1] == refname[0]) && (refname[2] != L'/') && (refname[2] != L'\\') )
|
||||||
&& (retname[1] == *retname) && (retname[2] != '/') && (retname[2] != '\\') )
|
|
||||||
++basename;
|
++basename;
|
||||||
*++basename = '\0';
|
*++basename = L'\0';
|
||||||
|
|
||||||
/* adjust the start point of the dirname,
|
|
||||||
* to accommodate the Win32 drive designator, if it was present.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if( copyptr > retfail )
|
|
||||||
retname -= 2;
|
|
||||||
|
|
||||||
/* if the resultant dirname begins with EXACTLY two dir separators,
|
/* if the resultant dirname begins with EXACTLY two dir separators,
|
||||||
* AND both are identical, then we preserve them.
|
* AND both are identical, then we preserve them.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
path = copyptr = retname;
|
refpath = refcopy;
|
||||||
while( ((*path == '/') || (*path == '\\')) )
|
while( ((*refpath == L'/') || (*refpath == L'\\')) )
|
||||||
++path;
|
++refpath;
|
||||||
if( ((path - retname) == 2) && (*++copyptr == *retname) )
|
if( ((refpath - refcopy) > 2) || (refcopy[1] != refcopy[0]) )
|
||||||
++copyptr;
|
refpath = refcopy;
|
||||||
|
|
||||||
/* and finally ...
|
/* and finally ...
|
||||||
* we remove any residual, redundantly duplicated separators from the dirname,
|
* we remove any residual, redundantly duplicated separators from the dirname,
|
||||||
* reterminate, and return it.
|
* reterminate, and return it.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
path = copyptr;
|
refname = refpath;
|
||||||
while( *path )
|
while( *refpath )
|
||||||
{
|
{
|
||||||
if( ((*copyptr++ = *path) == '/') || (*path++ == '\\') )
|
if( ((*refname++ = *refpath) == L'/') || (*refpath++ == L'\\') )
|
||||||
{
|
{
|
||||||
while( (*path == '/') || (*path == '\\') )
|
while( (*refpath == L'/') || (*refpath == L'\\') )
|
||||||
++path;
|
++refpath;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*copyptr = '\0';
|
*refname = L'\0';
|
||||||
return retname;
|
|
||||||
|
/* finally ...
|
||||||
|
* transform the resolved dirname back into the multibyte char domain,
|
||||||
|
* restore the caller's locale, and return the resultant dirname
|
||||||
|
*/
|
||||||
|
|
||||||
|
if( (len = wcstombs( path, refcopy, len )) != (size_t)(-1) )
|
||||||
|
path[ len ] = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
else if( (*retname == '/') || (*retname == '\\') )
|
else
|
||||||
{
|
{
|
||||||
*copyptr++ = *retname;
|
/* either there were no dirname separators in the path name,
|
||||||
*copyptr = '\0';
|
* or there was nothing else ...
|
||||||
return retfail;
|
*/
|
||||||
|
|
||||||
|
if( (*refname == L'/') || (*refname == L'\\') )
|
||||||
|
{
|
||||||
|
/* it was all separators, so return one */
|
||||||
|
|
||||||
|
++refname;
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* there were no separators, so return '.' */
|
||||||
|
|
||||||
|
*refname++ = L'.';
|
||||||
|
}
|
||||||
|
|
||||||
|
/* add a NUL terminator, in either case,
|
||||||
|
* then transform to the multibyte char domain,
|
||||||
|
* using our own buffer
|
||||||
|
*/
|
||||||
|
|
||||||
|
*refname = L'\0';
|
||||||
|
retfail = realloc( retfail, len = 1 + wcstombs( NULL, refcopy, 0 ));
|
||||||
|
wcstombs( path = retfail, refcopy, len );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* restore caller's locale, clean up, and return the resolved dirname */
|
||||||
|
|
||||||
|
setlocale( LC_CTYPE, locale );
|
||||||
|
free( locale );
|
||||||
|
return path;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* path is NULL, or an empty string; default return value is "." ...
|
/* path is NULL, or an empty string; default return value is "." ...
|
||||||
* return this in our own static buffer, but strcpy it, just in case
|
* return this in our own buffer, regenerated by wide char transform,
|
||||||
* the caller trashed it after a previous call.
|
* in case the caller trashed it after a previous call.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
strcpy( copyptr, "." );
|
retfail = realloc( retfail, len = 1 + wcstombs( NULL, L".", 0 ));
|
||||||
|
wcstombs( retfail, L".", len );
|
||||||
|
|
||||||
|
/* restore caller's locale, clean up, and return the default dirname */
|
||||||
|
|
||||||
|
setlocale( LC_CTYPE, locale );
|
||||||
|
free( locale );
|
||||||
return retfail;
|
return retfail;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* $RCSfile$: end of file */
|
/* $RCSfile$$Revision$: end of file */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user