* regex/engine.c (step): Drop Cygwin-specific definition.
(NONCHAR): Better cast here to make the test work. Move comment from step here. (matcher): Disable skipping initial string in multibyte case. * regex/regcomp.c (p_bracket): Don't simplify singleton in the invert case. (p_b_term): Handle early end of pattern after dash in bracket expression. (singleton): Don't ignore the wides just because there's already a singleton in the single byte chars. Fix condition for a singleton wide accordingly. (findmust): Check for LC_CTYPE charset, rather than LC_COLLATE charset. * regex2.h (CHIN): Fix condition in the icase & invert case. (ISWORD): Fix wrong cast to unsigned char.
This commit is contained in:
parent
45c8c6469a
commit
44caccfca2
|
@ -1,3 +1,20 @@
|
||||||
|
2010-02-11 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
|
* regex/engine.c (step): Drop Cygwin-specific definition.
|
||||||
|
(NONCHAR): Better cast here to make the test work. Move comment
|
||||||
|
from step here.
|
||||||
|
(matcher): Disable skipping initial string in multibyte case.
|
||||||
|
* regex/regcomp.c (p_bracket): Don't simplify singleton in the invert
|
||||||
|
case.
|
||||||
|
(p_b_term): Handle early end of pattern after dash in bracket
|
||||||
|
expression.
|
||||||
|
(singleton): Don't ignore the wides just because there's already a
|
||||||
|
singleton in the single byte chars. Fix condition for a singleton
|
||||||
|
wide accordingly.
|
||||||
|
(findmust): Check for LC_CTYPE charset, rather than LC_COLLATE charset.
|
||||||
|
* regex2.h (CHIN): Fix condition in the icase & invert case.
|
||||||
|
(ISWORD): Fix wrong cast to unsigned char.
|
||||||
|
|
||||||
2010-02-11 Andy Koppe <andy.koppe@gmail.com>
|
2010-02-11 Andy Koppe <andy.koppe@gmail.com>
|
||||||
|
|
||||||
* nlsfuncs.cc (initial_setlocale): Move check whether charset has
|
* nlsfuncs.cc (initial_setlocale): Move check whether charset has
|
||||||
|
|
|
@ -106,11 +106,7 @@ static const char *dissect(struct match *m, const char *start, const char *stop,
|
||||||
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
|
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
|
||||||
static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||||
static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||||
#ifdef __CYGWIN__
|
|
||||||
static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
|
|
||||||
#else
|
|
||||||
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
|
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
|
||||||
#endif
|
|
||||||
#define MAX_RECURSION 100
|
#define MAX_RECURSION 100
|
||||||
#define BOL (OUT-1)
|
#define BOL (OUT-1)
|
||||||
#define EOL (BOL-1)
|
#define EOL (BOL-1)
|
||||||
|
@ -119,7 +115,10 @@ static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_
|
||||||
#define BOW (BOL-4)
|
#define BOW (BOL-4)
|
||||||
#define EOW (BOL-5)
|
#define EOW (BOL-5)
|
||||||
#define BADCHAR (BOL-6)
|
#define BADCHAR (BOL-6)
|
||||||
#define NONCHAR(c) ((c) <= OUT)
|
/* When using wint_t, which is defined as unsigned int on BSD,
|
||||||
|
as well as on Cygwin or Linux, the NONCHAR test is broken without
|
||||||
|
the below cast. I'm wondering how this is supposed to work at all... */
|
||||||
|
#define NONCHAR(c) ((int)(c) <= OUT)
|
||||||
#ifdef REDEBUG
|
#ifdef REDEBUG
|
||||||
static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
|
static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
|
||||||
#endif
|
#endif
|
||||||
|
@ -248,9 +247,12 @@ matcher(struct re_guts *g,
|
||||||
ZAPSTATE(&m->mbs);
|
ZAPSTATE(&m->mbs);
|
||||||
|
|
||||||
/* Adjust start according to moffset, to speed things up */
|
/* Adjust start according to moffset, to speed things up */
|
||||||
|
#ifndef MNAMES
|
||||||
|
/* The code evaluating moffset doesn't seem to work right
|
||||||
|
in the multibyte case. */
|
||||||
if (g->moffset > -1)
|
if (g->moffset > -1)
|
||||||
start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
|
start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
|
||||||
|
#endif
|
||||||
SP("mloop", m->st, *start);
|
SP("mloop", m->st, *start);
|
||||||
|
|
||||||
/* this loop does only one repetition except for backrefs */
|
/* this loop does only one repetition except for backrefs */
|
||||||
|
@ -993,14 +995,7 @@ step(struct re_guts *g,
|
||||||
sopno start, /* start state within strip */
|
sopno start, /* start state within strip */
|
||||||
sopno stop, /* state after stop state within strip */
|
sopno stop, /* state after stop state within strip */
|
||||||
states bef, /* states reachable before */
|
states bef, /* states reachable before */
|
||||||
#ifdef __CYGWIN__
|
|
||||||
/* When using wint_t, which is defined as unsigned int on BSD,
|
|
||||||
as well as on Cygwin or Linux, the NONCHAR test is broken.
|
|
||||||
I'm wondering how this is supposed to work at all... */
|
|
||||||
int ch, /* character or NONCHAR code */
|
|
||||||
#else
|
|
||||||
wint_t ch, /* character or NONCHAR code */
|
wint_t ch, /* character or NONCHAR code */
|
||||||
#endif
|
|
||||||
states aft) /* states already known reachable after */
|
states aft) /* states already known reachable after */
|
||||||
{
|
{
|
||||||
cset *cs;
|
cset *cs;
|
||||||
|
|
|
@ -762,7 +762,8 @@ p_bracket(struct parse *p)
|
||||||
if (cs->invert && p->g->cflags®_NEWLINE)
|
if (cs->invert && p->g->cflags®_NEWLINE)
|
||||||
cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
|
cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
|
||||||
|
|
||||||
if ((ch = singleton(cs)) != OUT) { /* optimize singleton sets */
|
if ((ch = singleton(cs)) != OUT /* optimize singleton sets */
|
||||||
|
&& cs->invert == 0) { /* But not in invert case. */
|
||||||
ordinary(p, ch);
|
ordinary(p, ch);
|
||||||
freeset(p, cs);
|
freeset(p, cs);
|
||||||
} else
|
} else
|
||||||
|
@ -833,6 +834,9 @@ p_b_term(struct parse *p, cset *cs)
|
||||||
finish = '-';
|
finish = '-';
|
||||||
else
|
else
|
||||||
finish = p_b_symbol(p);
|
finish = p_b_symbol(p);
|
||||||
|
} else if (SEE('-') && !MORE2()) {
|
||||||
|
SETERROR(REG_EBRACK);
|
||||||
|
return;
|
||||||
} else
|
} else
|
||||||
finish = start;
|
finish = start;
|
||||||
if (start == finish)
|
if (start == finish)
|
||||||
|
@ -1212,9 +1216,9 @@ singleton(cset *cs)
|
||||||
n++;
|
n++;
|
||||||
s = i;
|
s = i;
|
||||||
}
|
}
|
||||||
if (n == 1)
|
if (n == 1 && cs->nwides == 0)
|
||||||
return (s);
|
return (s);
|
||||||
if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
|
if (n == 0 && cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
|
||||||
cs->icase == 0)
|
cs->icase == 0)
|
||||||
return (cs->wides[0]);
|
return (cs->wides[0]);
|
||||||
/* Don't bother handling the other cases. */
|
/* Don't bother handling the other cases. */
|
||||||
|
@ -1467,7 +1471,7 @@ findmust(struct parse *p, struct re_guts *g)
|
||||||
*/
|
*/
|
||||||
if (MB_CUR_MAX > 1 &&
|
if (MB_CUR_MAX > 1 &&
|
||||||
#ifdef __CYGWIN__
|
#ifdef __CYGWIN__
|
||||||
strcmp(collate_charset, "UTF-8") != 0)
|
strcmp(__locale_charset (), "UTF-8") != 0)
|
||||||
#else
|
#else
|
||||||
strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
|
strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -151,10 +151,14 @@ CHIN(cset *cs, wint_t ch)
|
||||||
if (ch < NC)
|
if (ch < NC)
|
||||||
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
|
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
|
||||||
cs->invert);
|
cs->invert);
|
||||||
else if (cs->icase)
|
else if (cs->icase) {
|
||||||
return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
|
if (cs->invert)
|
||||||
CHIN1(cs, towupper(ch)));
|
return (CHIN1(cs, ch) && CHIN1(cs, towlower(ch)) &&
|
||||||
else
|
CHIN1(cs, towupper(ch)));
|
||||||
|
else
|
||||||
|
return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
|
||||||
|
CHIN1(cs, towupper(ch)));
|
||||||
|
} else
|
||||||
return (CHIN1(cs, ch));
|
return (CHIN1(cs, ch));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,4 +193,4 @@ struct re_guts {
|
||||||
|
|
||||||
/* misc utilities */
|
/* misc utilities */
|
||||||
#define OUT (CHAR_MIN - 1) /* a non-character value */
|
#define OUT (CHAR_MIN - 1) /* a non-character value */
|
||||||
#define ISWORD(c) (iswalnum((uch)(c)) || (c) == '_')
|
#define ISWORD(c) (iswalnum((wint_t)(c)) || (c) == '_')
|
||||||
|
|
Loading…
Reference in New Issue