mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-03-02 04:55:35 +08:00
Cygwin: glob: implement collating symbol support
Allow the [.<sym>.] expression This requires a string comparision rather than a character comparison. Introduce and use __wscollate_range_cmp. Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
parent
244faaea8e
commit
ce5aa09807
@ -160,6 +160,9 @@ typedef char Char;
|
|||||||
#define M_SET META('[')
|
#define M_SET META('[')
|
||||||
#define M_NAMED META(':')
|
#define M_NAMED META(':')
|
||||||
#define M_EQUIV META('=')
|
#define M_EQUIV META('=')
|
||||||
|
#define M_COLL(_ccnt) META('.' | ((_ccnt) << 8))
|
||||||
|
#define M_COLL_P(_c) (((_c) & M_COLL_MASK) == META('.'))
|
||||||
|
#define M_COLL_CNT(_c) (((_c) & ~M_COLL_MASK) >> 8)
|
||||||
#define ismeta(c) (((c)&M_QUOTE) != 0)
|
#define ismeta(c) (((c)&M_QUOTE) != 0)
|
||||||
|
|
||||||
static int compare(const void *, const void *);
|
static int compare(const void *, const void *);
|
||||||
@ -528,41 +531,61 @@ glob0(const Char *pattern, glob_t *pglob, size_t *limit)
|
|||||||
*bufnext++ = M_SET;
|
*bufnext++ = M_SET;
|
||||||
if (c == NOT)
|
if (c == NOT)
|
||||||
*bufnext++ = M_NOT;
|
*bufnext++ = M_NOT;
|
||||||
c = *qpatnext;
|
c = *qpatnext++;
|
||||||
do {
|
do {
|
||||||
wint_t wclass[64];
|
wint_t wclass[64];
|
||||||
Char ctype;
|
Char ctype;
|
||||||
|
|
||||||
ctype = check_classes_expr(qpatnext, wclass,
|
ctype = check_classes_expr(--qpatnext, wclass,
|
||||||
64);
|
64);
|
||||||
if (ctype) {
|
++qpatnext;
|
||||||
|
if (ctype == COLON) {
|
||||||
wctype_t type;
|
wctype_t type;
|
||||||
|
char cclass[64];
|
||||||
|
|
||||||
if (ctype == COLON) {
|
/* No worries, char classes are
|
||||||
char cclass[64];
|
ASCII-only anyway */
|
||||||
|
wcitoascii (cclass, wclass);
|
||||||
/* No worries, char classes are
|
if ((type = wctype (cclass))) {
|
||||||
ASCII-only anyway */
|
*bufnext++ = M_NAMED;
|
||||||
wcitoascii (cclass, wclass);
|
*bufnext++ = CHAR (type);
|
||||||
if ((type = wctype (cclass))) {
|
}
|
||||||
*bufnext++ = M_NAMED;
|
continue;
|
||||||
*bufnext++ = CHAR (type);
|
}
|
||||||
}
|
if (ctype == EQUALS) {
|
||||||
} else if (ctype == EQUALS &&
|
if (wclass[0] && !wclass[1]) {
|
||||||
wclass[0] && !wclass[1]) {
|
|
||||||
*bufnext++ = M_EQUIV;
|
*bufnext++ = M_EQUIV;
|
||||||
*bufnext++ = CHAR (wclass[0]);
|
*bufnext++ = CHAR (wclass[0]);
|
||||||
}
|
}
|
||||||
/* TODO: [. is ignored yet */
|
|
||||||
qpatnext++;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
*bufnext++ = CHAR(c);
|
if (ctype == DOT &&
|
||||||
|
is_unicode_coll_elem (wclass)) {
|
||||||
|
*bufnext++ =
|
||||||
|
M_COLL (wcilen (wclass));
|
||||||
|
wint_t *wcp = wclass;
|
||||||
|
while ((*bufnext++ = *wcp++))
|
||||||
|
;
|
||||||
|
--bufnext; /* drop NUL */
|
||||||
|
} else
|
||||||
|
*bufnext++ = CHAR(c);
|
||||||
if (*qpatnext == RANGE &&
|
if (*qpatnext == RANGE &&
|
||||||
(c = qpatnext[1]) != RBRACKET) {
|
(c = qpatnext[1]) != RBRACKET) {
|
||||||
*bufnext++ = M_RNG;
|
*bufnext++ = M_RNG;
|
||||||
*bufnext++ = CHAR(c);
|
|
||||||
qpatnext += 2;
|
ctype = check_classes_expr(++qpatnext,
|
||||||
|
wclass, 64);
|
||||||
|
if (ctype == DOT &&
|
||||||
|
is_unicode_coll_elem (wclass)) {
|
||||||
|
*bufnext++ =
|
||||||
|
M_COLL (wcilen (wclass));
|
||||||
|
wint_t *wcp = wclass;
|
||||||
|
while ((*bufnext++ = *wcp++))
|
||||||
|
;
|
||||||
|
--bufnext; /* drop NUL */
|
||||||
|
} else
|
||||||
|
*bufnext++ = CHAR(c);
|
||||||
|
++qpatnext;
|
||||||
}
|
}
|
||||||
} while ((c = *qpatnext++) != RBRACKET);
|
} while ((c = *qpatnext++) != RBRACKET);
|
||||||
pglob->gl_flags |= GLOB_MAGCHAR;
|
pglob->gl_flags |= GLOB_MAGCHAR;
|
||||||
@ -849,11 +872,12 @@ static int
|
|||||||
match(Char *name, Char *pat, Char *patend)
|
match(Char *name, Char *pat, Char *patend)
|
||||||
{
|
{
|
||||||
int ok, negate_range;
|
int ok, negate_range;
|
||||||
Char c, k;
|
Char *c, *k;
|
||||||
|
size_t k_len;
|
||||||
|
|
||||||
while (pat < patend) {
|
while (pat < patend) {
|
||||||
c = *pat++;
|
c = pat++;
|
||||||
switch (c & M_MASK) {
|
switch (*c & M_MASK) {
|
||||||
case M_ALL:
|
case M_ALL:
|
||||||
if (pat == patend)
|
if (pat == patend)
|
||||||
return(1);
|
return(1);
|
||||||
@ -868,36 +892,53 @@ match(Char *name, Char *pat, Char *patend)
|
|||||||
break;
|
break;
|
||||||
case M_SET:
|
case M_SET:
|
||||||
ok = 0;
|
ok = 0;
|
||||||
if ((k = *name++) == EOS)
|
if (*(k = name) == EOS)
|
||||||
return(0);
|
return(0);
|
||||||
|
k_len = next_unicode_char (k);
|
||||||
|
name += k_len;
|
||||||
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
|
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
|
||||||
++pat;
|
++pat;
|
||||||
while (((c = *pat++) & M_MASK) != M_END)
|
while ((*(c = pat++) & M_MASK) != M_END) {
|
||||||
if ((c & M_MASK) == M_NAMED) {
|
size_t len1 = 1, len2 = 1;
|
||||||
if (iswctype (k, *pat++))
|
|
||||||
|
if ((*c & M_MASK) == M_NAMED) {
|
||||||
|
if (iswctype (*k, *pat++))
|
||||||
ok = 1;
|
ok = 1;
|
||||||
} else if ((c & M_MASK) == M_EQUIV) {
|
continue;
|
||||||
if (is_unicode_equiv (k, *pat++))
|
}
|
||||||
|
if ((*c & M_MASK) == M_EQUIV) {
|
||||||
|
if (is_unicode_equiv (*k, *pat++))
|
||||||
ok = 1;
|
ok = 1;
|
||||||
} else if ((*pat & M_MASK) == M_RNG) {
|
continue;
|
||||||
|
}
|
||||||
|
if (M_COLL_P(*c)) {
|
||||||
|
len1 = M_COLL_CNT(*c);
|
||||||
|
++c;
|
||||||
|
pat += len1;
|
||||||
|
}
|
||||||
|
if ((*pat & M_MASK) == M_RNG) {
|
||||||
|
if (M_COLL_P(pat[1]))
|
||||||
|
len2 = M_COLL_CNT(*++pat);
|
||||||
#ifdef __CYGWIN__
|
#ifdef __CYGWIN__
|
||||||
if ((!__get_current_collate_locale ()->lcid) ?
|
if ((!__get_current_collate_locale ()->lcid) ?
|
||||||
#else
|
#else
|
||||||
if (__collate_load_error ?
|
if (__collate_load_error ?
|
||||||
#endif
|
#endif
|
||||||
CCHAR(c) <= CCHAR(k) && CCHAR(k) <= CCHAR(pat[1]) :
|
*c <= *k && *k <= pat[1] :
|
||||||
__wcollate_range_cmp(CCHAR(c), CCHAR(k)) <= 0
|
__wscollate_range_cmp(c, k, len1, k_len) <= 0
|
||||||
&& __wcollate_range_cmp(CCHAR(k), CCHAR(pat[1])) <= 0
|
&& __wscollate_range_cmp(k, pat + 1, k_len, len2) <= 0
|
||||||
)
|
)
|
||||||
ok = 1;
|
ok = 1;
|
||||||
pat += 2;
|
pat += len2 + 1;
|
||||||
} else if (c == k)
|
} else if (len1 == k_len &&
|
||||||
|
wcincmp (c, k, len1) == 0)
|
||||||
ok = 1;
|
ok = 1;
|
||||||
|
}
|
||||||
if (ok == negate_range)
|
if (ok == negate_range)
|
||||||
return(0);
|
return(0);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (Cchar(*name++) != Cchar(c))
|
if (Cchar(*name++) != Cchar(*c))
|
||||||
return(0);
|
return(0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,7 @@ extern "C" {
|
|||||||
extern const int __collate_load_error;
|
extern const int __collate_load_error;
|
||||||
|
|
||||||
extern int __wcollate_range_cmp (wint_t, wint_t);
|
extern int __wcollate_range_cmp (wint_t, wint_t);
|
||||||
|
extern int __wscollate_range_cmp (wint_t *, wint_t *, size_t, size_t);
|
||||||
|
|
||||||
int is_unicode_equiv (wint_t, wint_t);
|
int is_unicode_equiv (wint_t, wint_t);
|
||||||
|
|
||||||
|
@ -1195,6 +1195,25 @@ __wcollate_range_cmp (wint_t c1, wint_t c2)
|
|||||||
return wcscoll (s1, s2);
|
return wcscoll (s1, s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Not so much BSD. Used from glob.cc, fnmatch.c and regcomp.c.
|
||||||
|
|
||||||
|
First arg is always from pattern space, second arg is the tested string.
|
||||||
|
len is the length of the pattern in the first arg. */
|
||||||
|
extern "C" int
|
||||||
|
__wscollate_range_cmp (wint_t *c1, wint_t *c2,
|
||||||
|
size_t c1len, size_t c2len)
|
||||||
|
{
|
||||||
|
wchar_t s1[c1len * 2 + 1] = { 0 }; /* # of chars if all are surrogates */
|
||||||
|
wchar_t s2[c2len * 2 + 1] = { 0 };
|
||||||
|
|
||||||
|
wcintowcs (s1, c1, c1len);
|
||||||
|
wcintowcs (s2, c2, c2len);
|
||||||
|
return wcscoll_l (s1, s2, __get_current_locale ());
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t ce_size = sizeof collating_element / sizeof *collating_element;
|
||||||
|
const size_t ce_e_size = sizeof *collating_element;
|
||||||
|
|
||||||
/* Check if UTF-32 input character `test' is in the same equivalence class
|
/* Check if UTF-32 input character `test' is in the same equivalence class
|
||||||
as UTF-32 character 'eqv'.
|
as UTF-32 character 'eqv'.
|
||||||
Note that we only recognize input in Unicode normalization form C, that
|
Note that we only recognize input in Unicode normalization form C, that
|
||||||
|
Loading…
x
Reference in New Issue
Block a user