newlib: vf[w]scanf: Fix conversion multibyte <-> wchar_t
* vfscanf: per POSIX, if the target type is wchar_t, the width is counted in (multibyte) characters, not in bytes. * vfscanf: Handle UTF-8 multibyte sequences converted to surrogate pairs on UTF-16 systems. * vfwscanf: Don't count high surrogates in input against field width counting. Per POSIX, input is Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
parent
9638c07527
commit
a49209d2bc
|
@ -488,10 +488,15 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||
_p = _p0; \
|
||||
_w; \
|
||||
})
|
||||
/* For systems with wchar_t == 2 (UTF-16) check if there's room for
|
||||
at least 2 wchar_t's (surrogate pairs). */
|
||||
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
|
||||
({ \
|
||||
size_t _nw = (_w); \
|
||||
if (_p_p && _p - _p0 == _nw) \
|
||||
ptrdiff_t _dif = _p - _p0; \
|
||||
if (_p_p && \
|
||||
((sizeof (_type) == 2 && _dif >= _nw - 1) \
|
||||
|| _dif >= _nw)) \
|
||||
{ \
|
||||
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
|
||||
if (!_p0) \
|
||||
|
@ -499,7 +504,7 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||
nassigned = EOF; \
|
||||
goto match_failure; \
|
||||
} \
|
||||
_p = _p0 + _nw; \
|
||||
_p = _p0 + _dif; \
|
||||
*_p_p = _p0; \
|
||||
_nw <<= 1; \
|
||||
} \
|
||||
|
@ -948,7 +953,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||
size_t wcp_siz = 0;
|
||||
#endif
|
||||
mbstate_t state;
|
||||
memset (&state, 0, sizeof (mbstate_t));
|
||||
if (flags & SUPPRESS)
|
||||
wcp = NULL;
|
||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||
|
@ -958,13 +962,17 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||
else
|
||||
wcp = GET_ARG (N, ap, wchar_t *);
|
||||
n = 0;
|
||||
while (width-- != 0)
|
||||
while (width != 0)
|
||||
{
|
||||
if (n == MB_CUR_MAX)
|
||||
goto input_failure;
|
||||
buf[n++] = *fp->_p;
|
||||
fp->_r -= 1;
|
||||
fp->_p += 1;
|
||||
/* Got a high surrogate, allow low surrogate to slip
|
||||
through */
|
||||
if (mbslen != 3 || state.__count != 4)
|
||||
memset (&state, 0, sizeof (mbstate_t));
|
||||
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
|
||||
== (size_t)-1)
|
||||
goto input_failure; /* Invalid sequence */
|
||||
|
@ -973,6 +981,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||
if (mbslen != (size_t)-2) /* Incomplete sequence */
|
||||
{
|
||||
nread += n;
|
||||
/* Handle high surrogate */
|
||||
if (mbslen != 3 || state.__count != 4)
|
||||
width -= 1;
|
||||
if (!(flags & SUPPRESS))
|
||||
{
|
||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||
|
@ -1122,7 +1133,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||
#endif
|
||||
/* Process %S and %ls placeholders */
|
||||
mbstate_t state;
|
||||
memset (&state, 0, sizeof (mbstate_t));
|
||||
if (flags & SUPPRESS)
|
||||
wcp = &wc;
|
||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||
|
@ -1139,7 +1149,10 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||
buf[n++] = *fp->_p;
|
||||
fp->_r -= 1;
|
||||
fp->_p += 1;
|
||||
width--;
|
||||
/* Got a high surrogate, allow low surrogate to slip
|
||||
through */
|
||||
if (mbslen != 3 || state.__count != 4)
|
||||
memset (&state, 0, sizeof (mbstate_t));
|
||||
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
|
||||
== (size_t)-1)
|
||||
goto input_failure;
|
||||
|
@ -1154,6 +1167,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||
break;
|
||||
}
|
||||
nread += n;
|
||||
/* Handle high surrogate */
|
||||
if (mbslen != 3 || state.__count != 4)
|
||||
width -= 1;
|
||||
if ((flags & SUPPRESS) == 0)
|
||||
{
|
||||
wcp += 1;
|
||||
|
|
|
@ -376,6 +376,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||
wint_t wi; /* handy wint_t */
|
||||
char *mbp = NULL; /* multibyte string pointer for %c %s %[ */
|
||||
size_t nconv; /* number of bytes in mb. conversion */
|
||||
char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
|
||||
|
||||
char *cp;
|
||||
short *sp;
|
||||
|
@ -458,13 +459,15 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||
_p = _p0; \
|
||||
_w; \
|
||||
})
|
||||
/* For char output, check if there's room for at least MB_CUR_MAX
|
||||
characters. */
|
||||
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
|
||||
({ \
|
||||
size_t _nw = (_w); \
|
||||
ptrdiff_t _dif = _p - _p0; \
|
||||
if (_p_p && \
|
||||
((sizeof (_type) == 1 && _dif >= _nw - MB_CUR_MAX) \
|
||||
|| (sizeof (_type) != 1 && _dif == _nw))) \
|
||||
|| _dif >= _nw)) \
|
||||
{ \
|
||||
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
|
||||
if (!_p0) \
|
||||
|
@ -925,7 +928,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||
#endif
|
||||
|
||||
if (flags & SUPPRESS)
|
||||
;
|
||||
mbp = mbbuf;
|
||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||
else if (flags & MALLOC)
|
||||
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
|
||||
|
@ -934,16 +937,19 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||
mbp = GET_ARG(N, ap, char *);
|
||||
n = 0;
|
||||
memset ((_PTR)&mbs, '\0', sizeof (mbstate_t));
|
||||
while (width-- != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF)
|
||||
while (width != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF)
|
||||
{
|
||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
|
||||
#endif
|
||||
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
|
||||
if (nconv == (size_t) -1)
|
||||
goto input_failure;
|
||||
/* Ignore high surrogate in width counting */
|
||||
if (nconv != 0 || mbs.__count != -4)
|
||||
width--;
|
||||
if (!(flags & SUPPRESS))
|
||||
{
|
||||
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
|
||||
if (nconv == (size_t) -1)
|
||||
goto input_failure;
|
||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
|
||||
#endif
|
||||
mbp += nconv;
|
||||
}
|
||||
n++;
|
||||
|
@ -1014,7 +1020,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||
#endif
|
||||
|
||||
if (flags & SUPPRESS)
|
||||
;
|
||||
mbp = mbbuf;
|
||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||
else if (flags & MALLOC)
|
||||
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
|
||||
|
@ -1024,13 +1030,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||
n = 0;
|
||||
memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
|
||||
while ((wi = _fgetwc_r (rptr, fp)) != WEOF
|
||||
&& width-- != 0 && INCCL (wi))
|
||||
&& width != 0 && INCCL (wi))
|
||||
{
|
||||
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
|
||||
if (nconv == (size_t) -1)
|
||||
goto input_failure;
|
||||
/* Ignore high surrogate in width counting */
|
||||
if (nconv != 0 || mbs.__count != -4)
|
||||
width--;
|
||||
if (!(flags & SUPPRESS))
|
||||
{
|
||||
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
|
||||
if (nconv == (size_t) -1)
|
||||
goto input_failure;
|
||||
mbp += nconv;
|
||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
|
||||
|
@ -1101,7 +1110,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||
#endif
|
||||
|
||||
if (flags & SUPPRESS)
|
||||
;
|
||||
mbp = mbbuf;
|
||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||
else if (flags & MALLOC)
|
||||
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
|
||||
|
@ -1110,13 +1119,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||
mbp = GET_ARG(N, ap, char *);
|
||||
memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
|
||||
while ((wi = _fgetwc_r (rptr, fp)) != WEOF
|
||||
&& width-- != 0 && !iswspace (wi))
|
||||
&& width != 0 && !iswspace (wi))
|
||||
{
|
||||
nconv = wcrtomb(mbp, wi, &mbs);
|
||||
if (nconv == (size_t)-1)
|
||||
goto input_failure;
|
||||
/* Ignore high surrogate in width counting */
|
||||
if (nconv != 0 || mbs.__count != -4)
|
||||
width--;
|
||||
if (!(flags & SUPPRESS))
|
||||
{
|
||||
nconv = wcrtomb(mbp, wi, &mbs);
|
||||
if (nconv == (size_t)-1)
|
||||
goto input_failure;
|
||||
mbp += nconv;
|
||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
|
||||
|
|
Loading…
Reference in New Issue