Cygwin: regex: convert wchar_t to wint_t

- call mbrtowi instead of mbrtowc
- drop Cygwin-only surrogate handling from wgetnext and xmbrtowc since
  it's encapsulated in mbrtowi.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2023-02-14 13:02:15 +01:00
parent 0321fb5736
commit 588624da2b
2 changed files with 8 additions and 39 deletions

View File

@ -921,7 +921,7 @@ p_b_coll_elem(struct parse *p,
struct cname *cp; struct cname *cp;
int len; int len;
mbstate_t mbs; mbstate_t mbs;
wchar_t wc; wint_t wc;
size_t clen; size_t clen;
while (MORE() && !SEETWO(endc, ']')) while (MORE() && !SEETWO(endc, ']'))
@ -935,7 +935,7 @@ p_b_coll_elem(struct parse *p,
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
return(cp->code); /* known name */ return(cp->code); /* known name */
memset(&mbs, 0, sizeof(mbs)); memset(&mbs, 0, sizeof(mbs));
if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len) if ((clen = mbrtowi(&wc, sp, len, &mbs)) == len)
return (wc); /* single character */ return (wc); /* single character */
else if (clen == (size_t)-1 || clen == (size_t)-2) else if (clen == (size_t)-1 || clen == (size_t)-2)
SETERROR(REG_ILLSEQ); SETERROR(REG_ILLSEQ);
@ -1119,8 +1119,7 @@ static wint_t
wgetnext(struct parse *p) wgetnext(struct parse *p)
{ {
mbstate_t mbs; mbstate_t mbs;
wchar_t wc; wint_t wc;
wint_t ret;
size_t n; size_t n;
#ifdef __CYGWIN__ #ifdef __CYGWIN__
@ -1136,29 +1135,15 @@ wgetnext(struct parse *p)
return (wint_t) (unsigned char) *p->next++; return (wint_t) (unsigned char) *p->next++;
#endif #endif
memset(&mbs, 0, sizeof(mbs)); memset(&mbs, 0, sizeof(mbs));
n = mbrtowc(&wc, p->next, p->end - p->next, &mbs); n = mbrtowi(&wc, p->next, p->end - p->next, &mbs);
if (n == (size_t)-1 || n == (size_t)-2) { if (n == (size_t)-1 || n == (size_t)-2) {
SETERROR(REG_ILLSEQ); SETERROR(REG_ILLSEQ);
return (0); return (0);
} }
ret = wc;
if (n == 0) if (n == 0)
n = 1; n = 1;
else if (sizeof (wchar_t) == 2 && wc >= 0xd800 && wc <= 0xdbff) {
/* UTF-16 surrogate pair. Fetch second half and
compute UTF-32 value */
size_t n2 = mbrtowc(&wc, p->next + n,
p->end - p->next - n, &mbs);
if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) {
SETERROR(REG_ILLSEQ);
return (0);
}
ret = (((ret & 0x3ff) << 10) | (wc & 0x3ff))
+ 0x10000;
n += n2;
}
p->next += n; p->next += n;
return (ret); return (wc);
} }
static size_t static size_t

View File

@ -68,9 +68,9 @@ static __inline size_t
xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy) xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
{ {
size_t nr; size_t nr;
wchar_t wc; wint_t wc;
nr = mbrtowc(&wc, s, n, mbs); nr = mbrtowi(&wc, s, n, mbs);
if (wi != NULL) if (wi != NULL)
*wi = wc; *wi = wc;
if (nr == 0) if (nr == 0)
@ -80,25 +80,9 @@ xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
if (wi != NULL) if (wi != NULL)
*wi = dummy; *wi = dummy;
return (1); return (1);
} else { } else
if (sizeof (wchar_t) == 2 && wc >= 0xd800 && wc <= 0xdbff) {
/* UTF-16 surrogate pair. Fetch second half and
compute UTF-32 value */
size_t n2 = mbrtowc(&wc, s + nr, n - nr, mbs);
if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) {
memset(mbs, 0, sizeof(*mbs));
if (wi != NULL)
*wi = dummy;
return (1);
}
if (wi != NULL)
*wi = (((*wi & 0x3ff) << 10) | (wc & 0x3ff))
+ 0x10000;
nr += n2;
}
return (nr); return (nr);
} }
}
static __inline size_t static __inline size_t
xmbrtowc_dummy(wint_t *wi, xmbrtowc_dummy(wint_t *wi,