* regex/regcomp.c (xwcrtomb): New function to convert wide chars
outside of the base plane to UTF-8. Call throughout instead of wcrtomb. (wgetnext): Handle surrogate pairs on UTF-16 systems. * regex/regexec.c (xmbrtowc): Ditto.
This commit is contained in:
parent
e122c47112
commit
7bd2296c83
|
@ -1,3 +1,11 @@
|
||||||
|
2010-02-12 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
|
* regex/regcomp.c (xwcrtomb): New function to convert wide chars
|
||||||
|
outside of the base plane to UTF-8. Call throughout instead of
|
||||||
|
wcrtomb.
|
||||||
|
(wgetnext): Handle surrogate pairs on UTF-16 systems.
|
||||||
|
* regex/regexec.c (xmbrtowc): Ditto.
|
||||||
|
|
||||||
2010-02-12 Corinna Vinschen <corinna@vinschen.de>
|
2010-02-12 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
* sec_auth.cc (get_user_local_groups): Retrieve name of well known
|
* sec_auth.cc (get_user_local_groups): Retrieve name of well known
|
||||||
|
|
|
@ -140,6 +140,7 @@ static void computejumps(struct parse *p, struct re_guts *g);
|
||||||
static void computematchjumps(struct parse *p, struct re_guts *g);
|
static void computematchjumps(struct parse *p, struct re_guts *g);
|
||||||
static sopno pluscount(struct parse *p, struct re_guts *g);
|
static sopno pluscount(struct parse *p, struct re_guts *g);
|
||||||
static wint_t wgetnext(struct parse *p);
|
static wint_t wgetnext(struct parse *p);
|
||||||
|
static size_t xwcrtomb (char *s, wint_t wc, mbstate_t *ps);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -994,7 +995,7 @@ bothcases(struct parse *p, wint_t ch)
|
||||||
assert(othercase(ch) != ch); /* p_bracket() would recurse */
|
assert(othercase(ch) != ch); /* p_bracket() would recurse */
|
||||||
p->next = bracket;
|
p->next = bracket;
|
||||||
memset(&mbs, 0, sizeof(mbs));
|
memset(&mbs, 0, sizeof(mbs));
|
||||||
n = wcrtomb(bracket, ch, &mbs);
|
n = xwcrtomb(bracket, ch, &mbs);
|
||||||
assert(n != (size_t)-1);
|
assert(n != (size_t)-1);
|
||||||
bracket[n] = ']';
|
bracket[n] = ']';
|
||||||
bracket[n + 1] = '\0';
|
bracket[n + 1] = '\0';
|
||||||
|
@ -1136,6 +1137,7 @@ wgetnext(struct parse *p)
|
||||||
{
|
{
|
||||||
mbstate_t mbs;
|
mbstate_t mbs;
|
||||||
wchar_t wc;
|
wchar_t wc;
|
||||||
|
wint_t ret;
|
||||||
size_t n;
|
size_t n;
|
||||||
|
|
||||||
memset(&mbs, 0, sizeof(mbs));
|
memset(&mbs, 0, sizeof(mbs));
|
||||||
|
@ -1144,12 +1146,43 @@ wgetnext(struct parse *p)
|
||||||
SETERROR(REG_ILLSEQ);
|
SETERROR(REG_ILLSEQ);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
ret = wc;
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
n = 1;
|
n = 1;
|
||||||
|
else if (sizeof (wchar_t) == 2 && wc >= 0xd800 && wc <= 0xdbff) {
|
||||||
|
/* UTF-16 surrogate pair. Fetch second half and
|
||||||
|
compute UTF-32 value */
|
||||||
|
int n2 = mbrtowc(&wc, p->next + n, p->end - p->next - n, &mbs);
|
||||||
|
if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) {
|
||||||
|
SETERROR(REG_ILLSEQ);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
ret = (((ret & 0x3ff) << 10) | (wc & 0x3ff))
|
||||||
|
+ 0x10000;
|
||||||
|
n += n2;
|
||||||
|
}
|
||||||
p->next += n;
|
p->next += n;
|
||||||
return (wc);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
xwcrtomb (char *s, wint_t wc, mbstate_t *ps)
|
||||||
|
{
|
||||||
|
if (sizeof (wchar_t) == 2 && wc >= 0x10000)
|
||||||
|
{
|
||||||
|
/* UTF-16 systems can't handle these values directly. Since the
|
||||||
|
rest of the code isn't surrogate pair aware, we handle this here,
|
||||||
|
invisible for the rest of the code. */
|
||||||
|
*s++ = 0xf0 | ((wc & 0x1c0000) >> 18);
|
||||||
|
*s++ = 0x80 | ((wc & 0x3f000) >> 12);
|
||||||
|
*s++ = 0x80 | ((wc & 0xfc0) >> 6);
|
||||||
|
*s = 0x80 | (wc & 0x3f);
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
return wcrtomb (s, wc, ps);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
- seterr - set an error condition
|
- seterr - set an error condition
|
||||||
== static int seterr(struct parse *p, int e);
|
== static int seterr(struct parse *p, int e);
|
||||||
|
@ -1490,7 +1523,7 @@ findmust(struct parse *p, struct re_guts *g)
|
||||||
memset(&mbs, 0, sizeof(mbs));
|
memset(&mbs, 0, sizeof(mbs));
|
||||||
newstart = scan - 1;
|
newstart = scan - 1;
|
||||||
}
|
}
|
||||||
clen = wcrtomb(buf, OPND(s), &mbs);
|
clen = xwcrtomb(buf, OPND(s), &mbs);
|
||||||
if (clen == (size_t)-1)
|
if (clen == (size_t)-1)
|
||||||
goto toohard;
|
goto toohard;
|
||||||
newlen += clen;
|
newlen += clen;
|
||||||
|
@ -1609,7 +1642,7 @@ findmust(struct parse *p, struct re_guts *g)
|
||||||
while (cp < g->must + g->mlen) {
|
while (cp < g->must + g->mlen) {
|
||||||
while (OP(s = *scan++) != OCHAR)
|
while (OP(s = *scan++) != OCHAR)
|
||||||
continue;
|
continue;
|
||||||
clen = wcrtomb(cp, OPND(s), &mbs);
|
clen = xwcrtomb(cp, OPND(s), &mbs);
|
||||||
assert(clen != (size_t)-1);
|
assert(clen != (size_t)-1);
|
||||||
cp += clen;
|
cp += clen;
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,8 +84,24 @@ xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
|
||||||
if (wi != NULL)
|
if (wi != NULL)
|
||||||
*wi = dummy;
|
*wi = dummy;
|
||||||
return (1);
|
return (1);
|
||||||
} else
|
} else {
|
||||||
|
if (sizeof (wchar_t) == 2 && wc >= 0xd800 && wc <= 0xdbff) {
|
||||||
|
/* UTF-16 surrogate pair. Fetch second half and
|
||||||
|
compute UTF-32 value */
|
||||||
|
int n2 = mbrtowc(&wc, s + nr, n - nr, mbs);
|
||||||
|
if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) {
|
||||||
|
memset(mbs, 0, sizeof(*mbs));
|
||||||
|
if (wi != NULL)
|
||||||
|
*wi = dummy;
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
if (wi != NULL)
|
||||||
|
*wi = (((*wi & 0x3ff) << 10) | (wc & 0x3ff))
|
||||||
|
+ 0x10000;
|
||||||
|
nr += n2;
|
||||||
|
}
|
||||||
return (nr);
|
return (nr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline size_t
|
static __inline size_t
|
||||||
|
|
Loading…
Reference in New Issue