2002-09-09 Jeff Johnston <jjohnstn@redhat.com>

* libc/include/sys/_types.h (_mbstate_t): Changed to use
        unsigned char internally.
        * libc/sys/linux/sys/_types.h: Ditto.
        * libc/include/sys/reent.h
        * libc/stdlib/mblen.c (mblen): Use function-specific state
        value from default reentrancy structure.
        * libc/stdlib/mblen_r.c (_mblen_r):  If return code from
        _mbtowc_r is less than 0, reset state __count value and
        return -1.
        * libc/stdlib/mbrlen.c (mbrlen): If the input state pointer
        is NULL, use the function-specific pointer provided in the
        default reentrancy structure.
        * libc/stdlib/mbrtowc.c: Add reentrant form of function.
        If input state pointer is NULL, use function-specific area
        provided in reentrancy structure.
        * libc/stdlib/mbsrtowcs.c: Ditto.
        * libc/stdlib/wcrtomb.c: Ditto.
        * libc/stdlib/wcsrtombs.c: Ditto.
        * libc/stdlib/mbstowcs.c: Reformat.
        * libc/stdlib/wcstombs.c: Ditto.
        * libc/stdlib/mbstowcs_r.c (_mbstowcs_r): If an error occurs,
        reset the state's __count value and return -1.
        * libc/stdlib/mbtowc.c: Ditto.
        * libc/stdlib/mbtowc_r.c (_mbtowc_r): Add restartable functionality.
        If number of bytes is used up before completing a valid multibyte
        character, return -2 and save the state.
        * libc/stdlib/wctomb_r.c (_wctomb_r): Define __state as __count
        and change some __count references to __state for clarity.
This commit is contained in:
Jeff Johnston 2002-09-09 21:42:14 +00:00
parent b0591c89af
commit 9c64d2a7ba
17 changed files with 665 additions and 347 deletions

View File

@ -1,3 +1,34 @@
2002-09-09 Jeff Johnston <jjohnstn@redhat.com>
* libc/include/sys/_types.h (_mbstate_t): Changed to use
unsigned char internally.
* libc/sys/linux/sys/_types.h: Ditto.
* libc/include/sys/reent.h
* libc/stdlib/mblen.c (mblen): Use function-specific state
value from default reentrancy structure.
* libc/stdlib/mblen_r.c (_mblen_r): If return code from
_mbtowc_r is less than 0, reset state __count value and
return -1.
* libc/stdlib/mbrlen.c (mbrlen): If the input state pointer
is NULL, use the function-specific pointer provided in the
default reentrancy structure.
* libc/stdlib/mbrtowc.c: Add reentrant form of function.
If input state pointer is NULL, use function-specific area
provided in reentrancy structure.
* libc/stdlib/mbsrtowcs.c: Ditto.
* libc/stdlib/wcrtomb.c: Ditto.
* libc/stdlib/wcsrtombs.c: Ditto.
* libc/stdlib/mbstowcs.c: Reformat.
* libc/stdlib/wcstombs.c: Ditto.
* libc/stdlib/mbstowcs_r.c (_mbstowcs_r): If an error occurs,
reset the state's __count value and return -1.
* libc/stdlib/mbtowc.c: Ditto.
* libc/stdlib/mbtowc_r.c (_mbtowc_r): Add restartable functionality.
If number of bytes is used up before completing a valid multibyte
character, return -2 and save the state.
* libc/stdlib/wctomb_r.c (_wctomb_r): Define __state as __count
and change some __count references to __state for clarity.
2002-09-06 Jeff Johnston <jjohnstn@redhat.com>
* libc/include/sys/config.h (MB_LEN_MAX): Removed as this

View File

@ -30,7 +30,7 @@ typedef struct
union
{
wint_t __wch;
char __wchb[4];
unsigned char __wchb[4];
} __value; /* Value so far. */
} _mbstate_t;
#endif /* _MBSTATE_T */

View File

@ -321,6 +321,11 @@ struct _misc_reent
_mbstate_t _mbtowc_state;
char _l64a_buf[8];
int _getdate_err;
_mbstate_t _mbrlen_state;
_mbstate_t _mbrtowc_state;
_mbstate_t _mbsrtowcs_state;
_mbstate_t _wcrtomb_state;
_mbstate_t _wcsrtombs_state;
};
/* This version of _reent is layed our with "int"s in pairs, to help
@ -478,6 +483,16 @@ struct _reent
_r->_misc->_wctomb_state.__value.__wch = 0; \
_r->_misc->_mbtowc_state.__count = 0; \
_r->_misc->_mbtowc_state.__value.__wch = 0; \
_r->_misc->_mbrlen_state.__count = 0; \
_r->_misc->_mbrlen_state.__value.__wch = 0; \
_r->_misc->_mbrtowc_state.__count = 0; \
_r->_misc->_mbrtowc_state.__value.__wch = 0; \
_r->_misc->_mbsrtowcs_state.__count = 0; \
_r->_misc->_mbsrtowcs_state.__value.__wch = 0; \
_r->_misc->_wcrtomb_state.__count = 0; \
_r->_misc->_wcrtomb_state.__value.__wch = 0; \
_r->_misc->_wcsrtombs_state.__count = 0; \
_r->_misc->_wcsrtombs_state.__value.__wch = 0; \
_r->_misc->_l64a_buf[0] = '\0'; \
_r->_misc->_getdate_err = 0; \
} while (0)
@ -503,6 +518,11 @@ struct _reent
#define _REENT_MBLEN_STATE(ptr) ((ptr)->_misc->_mblen_state)
#define _REENT_MBTOWC_STATE(ptr)((ptr)->_misc->_mbtowc_state)
#define _REENT_WCTOMB_STATE(ptr)((ptr)->_misc->_wctomb_state)
#define _REENT_MBRLEN_STATE(ptr) ((ptr)->_misc->_mbrlen_state)
#define _REENT_MBRTOWC_STATE(ptr) ((ptr)->_misc->_mbrtowc_state)
#define _REENT_MBSRTOWCS_STATE(ptr) ((ptr)->_misc->_mbsrtowcs_state)
#define _REENT_WCRTOMB_STATE(ptr) ((ptr)->_misc->_wcrtomb_state)
#define _REENT_WCSRTOMBS_STATE(ptr) ((ptr)->_misc->_wcsrtombs_state)
#define _REENT_L64A_BUF(ptr) ((ptr)->_misc->_l64a_buf)
#define _REENT_GETDATE_ERR_P(ptr) (&((ptr)->_misc->_getdate_err))
#define _REENT_SIGNAL_BUF(ptr) ((ptr)->_signal_buf)
@ -555,6 +575,11 @@ struct _reent
char _l64a_buf[8];
char _signal_buf[_REENT_SIGNAL_SIZE];
int _getdate_err;
_mbstate_t _mbrlen_state;
_mbstate_t _mbrtowc_state;
_mbstate_t _mbsrtowcs_state;
_mbstate_t _wcrtomb_state;
_mbstate_t _wcsrtombs_state;
} _reent;
/* Two next two fields were once used by malloc. They are no longer
used. They are used to preserve the space used before so as to
@ -587,7 +612,7 @@ struct _reent
{ 0,0,0,0,0,0,0,0}, 0, 1, \
{{_RAND48_SEED_0, _RAND48_SEED_1, _RAND48_SEED_2}, \
{_RAND48_MULT_0, _RAND48_MULT_1, _RAND48_MULT_2}, _RAND48_ADD}, \
{0, {0}}, {0, {0}}, {0, {0}}, "", "", 0} } }
{0, {0}}, {0, {0}}, {0, {0}}, "", "", 0, {0, {0}}, {0, {0}}, {0, {0}}, {0, {0}}, {0, {0}} } } }
#define _REENT_INIT_PTR(var) \
{ int i; \
@ -630,6 +655,16 @@ struct _reent
var->_new._reent._mbtowc_state.__value.__wch = 0; \
var->_new._reent._wctomb_state.__count = 0; \
var->_new._reent._wctomb_state.__value.__wch = 0; \
var->_new._reent._mbrlen_state.__count = 0; \
var->_new._reent._mbrlen_state.__value.__wch = 0; \
var->_new._reent._mbrtowc_state.__count = 0; \
var->_new._reent._mbrtowc_state.__value.__wch = 0; \
var->_new._reent._mbsrtowcs_state.__count = 0; \
var->_new._reent._mbsrtowcs_state.__value.__wch = 0; \
var->_new._reent._wcrtomb_state.__count = 0; \
var->_new._reent._wcrtomb_state.__value.__wch = 0; \
var->_new._reent._wcsrtombs_state.__count = 0; \
var->_new._reent._wcsrtombs_state.__value.__wch = 0; \
var->_new._reent._l64a_buf[0] = '\0'; \
var->_new._reent._signal_buf[0] = '\0'; \
var->_new._reent._getdate_err = 0; \
@ -668,6 +703,11 @@ struct _reent
#define _REENT_MBLEN_STATE(ptr) ((ptr)->_new._reent._mblen_state)
#define _REENT_MBTOWC_STATE(ptr)((ptr)->_new._reent._mbtowc_state)
#define _REENT_WCTOMB_STATE(ptr)((ptr)->_new._reent._wctomb_state)
#define _REENT_MBRLEN_STATE(ptr)((ptr)->_new._reent._mbrlen_state)
#define _REENT_MBRTOWC_STATE(ptr)((ptr)->_new._reent._mbrtowc_state)
#define _REENT_MBSRTOWCS_STATE(ptr)((ptr)->_new._reent._mbsrtowcs_state)
#define _REENT_WCRTOMB_STATE(ptr)((ptr)->_new._reent._wcrtomb_state)
#define _REENT_WCSRTOMBS_STATE(ptr)((ptr)->_new._reent._wcsrtombs_state)
#define _REENT_L64A_BUF(ptr) ((ptr)->_new._reent._l64a_buf)
#define _REENT_SIGNAL_BUF(ptr) ((ptr)->_new._reent._signal_buf)
#define _REENT_GETDATE_ERR_P(ptr) (&((ptr)->_new._reent._getdate_err))

View File

@ -52,21 +52,26 @@ _DEFUN (mblen, (s, n),
size_t n)
{
#ifdef MB_CAPABLE
int retval = 0;
_REENT_CHECK_MISC(_REENT);
retval = _mbtowc_r (_REENT, NULL, s, n, &(_REENT_MBLEN_STATE(_REENT)));
if (retval < 0)
return -1;
else
return retval;
int retval = 0;
mbstate_t *state;
_REENT_CHECK_MISC(_REENT);
state = &(_REENT_MBLEN_STATE(_REENT));
retval = _mbtowc_r (_REENT, NULL, s, n, state);
if (retval < 0)
{
state->__count = 0;
return -1;
}
else
return retval;
#else /* not MB_CAPABLE */
if (s == NULL || *s == '\0')
return 0;
if (n == 0)
return -1;
return 1;
if (s == NULL || *s == '\0')
return 0;
if (n == 0)
return -1;
return 1;
#endif /* not MB_CAPABLE */
}

View File

@ -54,14 +54,22 @@ _DEFUN (_mblen_r, (r, s, n, state),
mbstate_t *state)
{
#ifdef MB_CAPABLE
int retval;
retval = _mbtowc_r (r, NULL, s, n, state);
return _mbtowc_r (r, NULL, s, n, state);
if (retval < 0)
{
state->__count = 0;
return -1;
}
return retval;
#else /* not MB_CAPABLE */
if (s == NULL || *s == '\0')
return 0;
if (n == 0)
return -1;
return 1;
if (s == NULL || *s == '\0')
return 0;
if (n == 0)
return -1;
return 1;
#endif /* not MB_CAPABLE */
}

View File

@ -7,7 +7,13 @@
size_t
mbrlen(const char *s, size_t n, mbstate_t *ps)
{
mbstate_t internal;
#ifdef MB_CAPABLE
if (ps == NULL)
{
_REENT_CHECK_MISC(_REENT);
ps = &(_REENT_MBRLEN_STATE(_REENT));
}
#endif
return mbrtowc(NULL, s, n, ps != NULL ? ps : &internal);
return mbrtowc(NULL, s, n, ps);
}

View File

@ -6,24 +6,46 @@
#include <string.h>
size_t
mbrtowc(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
_DEFUN (_mbrtowc_r, (ptr, pwc, s, n, ps),
struct _reent *ptr _AND
wchar_t *pwc _AND
const char *s _AND
size_t n _AND
mbstate_t *ps)
{
int retval = 0;
_REENT_CHECK_MISC(_REENT);
#ifdef MB_CAPABLE
if (ps == NULL)
{
_REENT_CHECK_MISC(ptr);
ps = &(_REENT_MBRTOWC_STATE(ptr));
}
#endif
if (s == NULL)
retval = _mbtowc_r (_REENT, pwc, "", 1, ps);
retval = _mbtowc_r (ptr, pwc, "", 1, ps);
else
retval = _mbtowc_r (_REENT, pwc, s, n, ps);
if (*pwc == NULL)
memset (ps, '\0', sizeof (mbstate_t));
retval = _mbtowc_r (ptr, pwc, s, n, ps);
if (retval == -1)
{
_REENT->_errno = EILSEQ;
ps->__count = 0;
ptr->_errno = EILSEQ;
return (size_t)(-1);
}
else
return (size_t)retval;
}
#ifndef _REENT_ONLY
size_t
_DEFUN (mbrtowc, (pwc, s, n, ps),
wchar_t *pwc _AND
const char *s _AND
size_t n _AND
mbstate_t *ps)
{
return _mbrtowc_r (_REENT, pwc, s, n, ps);
}
#endif /* !_REENT_ONLY */

View File

@ -5,20 +5,62 @@
#include <errno.h>
size_t
mbsrtowcs(wchar_t *dst, const char **src, size_t len, mbstate_t *ps)
_DEFUN (_mbsrtowcs_r, (r, dst, src, n, ps),
struct _reent *r _AND
wchar_t *dst _AND
const char **src _AND
size_t n _AND
mbstate_t *ps)
{
int retval = 0;
mbstate_t internal;
wchar_t *ptr = dst;
size_t max = n;
int bytes;
_REENT_CHECK_MISC(_REENT);
retval = _mbstowcs_r (_REENT, dst, *src, len, ps != NULL ? ps : &internal);
if (retval == -1)
#ifdef MB_CAPABLE
if (ps == NULL)
{
_REENT->_errno = EILSEQ;
return (size_t)(-1);
_REENT_CHECK_MISC(r);
ps = &(_REENT_MBSRTOWCS_STATE(r));
}
else
return (size_t)retval;
#endif
while (n > 0)
{
bytes = _mbtowc_r (r, ptr, *src, MB_CUR_MAX, ps);
if (bytes > 0)
{
*src += bytes;
++ptr;
--n;
}
else if (bytes == -2)
{
*src += MB_CUR_MAX;
}
else if (bytes == 0)
{
*src = NULL;
return (size_t)(ptr - dst);
}
else
{
ps->__count = 0;
r->_errno = EILSEQ;
return (size_t)-1;
}
}
return (size_t)max;
}
#ifndef _REENT_ONLY
size_t
_DEFUN (mbsrtowcs, (dst, src, len, ps),
wchar_t *dst _AND
const char **src _AND
size_t len _AND
mbstate_t *ps)
{
return _mbsrtowcs_r (_REENT, dst, src, len, ps);
}
#endif /* !_REENT_ONLY */

View File

@ -59,23 +59,23 @@ _DEFUN (mbstowcs, (pwcs, s, n),
size_t n)
{
#ifdef MB_CAPABLE
mbstate_t state;
state.__count = 0;
return _mbstowcs_r (_REENT, pwcs, s, n, &state);
mbstate_t state;
state.__count = 0;
return _mbstowcs_r (_REENT, pwcs, s, n, &state);
#else /* not MB_CAPABLE */
int count = 0;
if (n != 0) {
do {
if ((*pwcs++ = (wchar_t) *s++) == 0)
break;
count++;
} while (--n != 0);
}
return count;
int count = 0;
if (n != 0) {
do {
if ((*pwcs++ = (wchar_t) *s++) == 0)
break;
count++;
} while (--n != 0);
}
return count;
#endif /* not MB_CAPABLE */
}

View File

@ -18,7 +18,10 @@ _DEFUN (_mbstowcs_r, (reent, pwcs, s, n, state),
{
bytes = _mbtowc_r (r, ptr, t, MB_CUR_MAX, state);
if (bytes < 0)
return -1;
{
state->__count = 0;
return -1;
}
else if (bytes == 0)
return ptr - pwcs;
t += bytes;

View File

@ -52,6 +52,7 @@ effects vary with the locale.
#ifndef _REENT_ONLY
#include <stdlib.h>
#include <wchar.h>
int
_DEFUN (mbtowc, (pwc, s, n),
@ -60,23 +61,28 @@ _DEFUN (mbtowc, (pwc, s, n),
size_t n)
{
#ifdef MB_CAPABLE
int retval = 0;
_REENT_CHECK_MISC(_REENT);
int retval = 0;
mbstate_t *ps;
retval = _mbtowc_r (_REENT, pwc, s, n, &(_REENT_MBTOWC_STATE(_REENT)));
if (retval < 0)
return -1;
else
return retval;
_REENT_CHECK_MISC(_REENT);
ps = &(_REENT_MBTOWC_STATE(_REENT));
retval = _mbtowc_r (_REENT, pwc, s, n, ps);
if (retval < 0)
{
ps->__count = 0;
return -1;
}
return retval;
#else /* not MB_CAPABLE */
if (s == NULL)
return 0;
if (n == 0)
return -1;
if (pwc)
*pwc = (wchar_t) *s;
return (*s != '\0');
if (s == NULL)
return 0;
if (n == 0)
return -1;
if (pwc)
*pwc = (wchar_t) *s;
return (*s != '\0');
#endif /* not MB_CAPABLE */
}

View File

@ -7,9 +7,9 @@
#ifdef MB_CAPABLE
typedef enum { ESCAPE, DOLLAR, BRACKET, AT, B, J,
NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
typedef enum { ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
J2_ESC, J2_ESC_BR, DONE, INV, JIS_S_NUM } JIS_STATE;
typedef enum { COPY_A, COPY_J, COPY_J2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR } JIS_ACTION;
typedef enum { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
INV, JIS_S_NUM } JIS_STATE;
typedef enum { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
/**************************************************************************************
* state/action tables for processing JIS encoding
@ -20,33 +20,30 @@ typedef enum { COPY_A, COPY_J, COPY_J2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR } JIS
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
/* ASCII */ { A_ESC, DONE, DONE, DONE, DONE, DONE, DONE, DONE, DONE },
/* A_ESC */ { DONE, A_ESC_DL, DONE, DONE, DONE, DONE, DONE, DONE, DONE },
/* A_ESC_DL */{ DONE, DONE, DONE, JIS, JIS, DONE, DONE, DONE, DONE },
/* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
/* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV },
/* JIS_1 */ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2, INV },
/* JIS_2 */ { J2_ESC, DONE, DONE, DONE, DONE, DONE, INV, DONE, DONE },
/* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
/* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII },
/* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV },
/* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
/* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
/* J2_ESC */ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
/* J2_ESC_BR*/{ INV, INV, INV, INV, DONE, DONE, INV, INV, INV },
};
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
/* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A},
/* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR },
/* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A},
/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, MAKE_J, MAKE_J, COPY_A, COPY_A, COPY_A, COPY_A},
/* JIS */ { NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
/* JIS_1 */ { ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
/* JIS_2 */ { NOOP, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, COPY_J2},
/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A},
/* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR },
/* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
/* J2_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
/* J2_ESC_BR*/{ ERROR, ERROR, ERROR, ERROR, COPY_J, COPY_J, ERROR, ERROR, ERROR },
/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR },
};
#endif /* MB_CAPABLE */
/* we override the mbstate_t __count field for more complex encodings and use it store a state value */
#define __state __count
int
_DEFUN (_mbtowc_r, (r, pwc, s, n, state),
struct _reent *r _AND
@ -70,230 +67,305 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
{ /* fall-through */ }
else if (!strcmp (r->_current_locale, "C-UTF-8"))
{
wchar_t char1 = 0;
int ch;
int i = 0;
if (s == NULL)
return 0; /* UTF-8 character encodings are not state-dependent */
/* we know n >= 1 if we get here */
*pwc = 0;
char1 = (wchar_t)*t;
if (state->__count == 0)
ch = t[i++];
else
{
++n;
ch = state->__value.__wchb[0];
}
if (char1 == '\0')
return 0; /* s points to the null character */
if (ch == '\0')
{
*pwc = 0;
state->__count = 0;
return 0; /* s points to the null character */
}
if (char1 >= 0x0 && char1 <= 0x7f)
{
/* single-byte sequence */
*pwc = char1;
return 1;
}
else if (char1 >= 0xc0 && char1 <= 0xdf)
{
/* two-byte sequence */
if (n >= 2)
{
wchar_t char2 = (wchar_t)*(t+1);
if (char2 < 0x80 || char2 > 0xbf)
return -1;
if (char1 < 0xc2)
/* overlong UTF-8 sequence */
return -1;
*pwc = ((char1 & 0x1f) << 6)
| (char2 & 0x3f);
return 2;
}
else
return -1;
}
else if (char1 >= 0xe0 && char1 <= 0xef)
{
/* three-byte sequence */
if (n >= 3)
{
wchar_t char2 = (wchar_t)*(t+1);
wchar_t char3 = (wchar_t)*(t+2);
if (char2 < 0x80 || char2 > 0xbf)
return -1;
if (char3 < 0x80 || char3 > 0xbf)
return -1;
if (char1 == 0xe0)
{
if (char2 < 0xa0)
/* overlong UTF-8 sequence */
return -1;
}
*pwc = ((char1 & 0x0f) << 12)
| ((char2 & 0x3f) << 6)
| (char3 & 0x3f);
if (*pwc >= 0xd800 && *pwc <= 0xdfff)
{
return -1;
}
else
return 3;
}
else
return -2;
}
else if (char1 >= 0xf0 && char1 <= 0xf7)
{
/* four-byte sequence */
if (n >= 4)
{
wchar_t char2 = (wchar_t)*(t+1);
wchar_t char3 = (wchar_t)*(t+2);
wchar_t char4 = (wchar_t)*(t+3);
if (char2 < 0x80 || char2 > 0xbf)
return -1;
if (char3 < 0x80 || char3 > 0xbf)
return -1;
if (char4 < 0x80 || char4 > 0xbf)
return -1;
if (char1 == 0xf0)
{
if (char2 < 0x90)
/* overlong UTF-8 sequence */
return -1;
}
*pwc = ((char1 & 0x07) << 18)
| ((char2 & 0x3f) << 12)
| ((char3 & 0x3f) << 6)
| (char4 & 0x3f);
return 4;
}
else
return -2;
}
else if (char1 >= 0xf8 && char1 <= 0xfb)
{
/* five-byte sequence */
if (n >= 5)
{
wchar_t char2 = (wchar_t)*(t+1);
wchar_t char3 = (wchar_t)*(t+2);
wchar_t char4 = (wchar_t)*(t+3);
wchar_t char5 = (wchar_t)*(t+4);
if (char2 < 0x80 || char2 > 0xbf)
return -1;
if (char3 < 0x80 || char3 > 0xbf)
return -1;
if (char4 < 0x80 || char4 > 0xbf)
return -1;
if (char5 < 0x80 || char5 > 0xbf)
return -1;
if (char1 == 0xf8)
{
if (char2 < 0x88)
/* overlong UTF-8 sequence */
return -1;
}
*pwc = ((char1 & 0x03) << 24)
| ((char2 & 0x3f) << 18)
| ((char3 & 0x3f) << 12)
| ((char4 & 0x3f) << 6)
| (char5 & 0x3f);
return 5;
}
else
return -2;
}
else if (char1 >= 0xfc && char1 <= 0xfd)
if (ch >= 0x0 && ch <= 0x7f)
{
/* single-byte sequence */
state->__count = 0;
*pwc = ch;
return 1;
}
else if (ch >= 0xc0 && ch <= 0xdf)
{
/* two-byte sequence */
state->__value.__wchb[0] = ch;
state->__count = 1;
if (n < 2)
return -2;
ch = t[i++];
if (ch < 0x80 || ch > 0xbf)
return -1;
if (state->__value.__wchb[0] < 0xc2)
/* overlong UTF-8 sequence */
return -1;
state->__count = 0;
*pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
| (wchar_t)(ch & 0x3f);
return i;
}
else if (ch >= 0xe0 && ch <= 0xef)
{
/* three-byte sequence */
wchar_t tmp;
state->__value.__wchb[0] = ch;
if (state->__count == 0)
state->__count = 1;
else
++n;
if (n < 2)
return -2;
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
/* overlong UTF-8 sequence */
return -1;
if (ch < 0x80 || ch > 0xbf)
return -1;
state->__value.__wchb[1] = ch;
state->__count = 2;
if (n < 3)
return -2;
ch = t[i++];
if (ch < 0x80 || ch > 0xbf)
return -1;
state->__count = 0;
tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
| (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
| (wchar_t)(ch & 0x3f);
if (tmp >= 0xd800 && tmp <= 0xdfff)
return -1;
*pwc = tmp;
return i;
}
else if (ch >= 0xf0 && ch <= 0xf7)
{
/* four-byte sequence */
if (sizeof(wchar_t) < 4)
return -1; /* we can't store such a value */
state->__value.__wchb[0] = ch;
if (state->__count == 0)
state->__count = 1;
else
++n;
if (n < 2)
return -2;
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
if (state->__value.__wchb[0] == 0xf0 && ch < 0x90)
/* overlong UTF-8 sequence */
return -1;
if (ch < 0x80 || ch > 0xbf)
return -1;
state->__value.__wchb[1] = ch;
if (state->__count == 1)
state->__count = 2;
else
++n;
if (n < 3)
return -2;
ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
if (ch < 0x80 || ch > 0xbf)
return -1;
state->__value.__wchb[2] = ch;
state->__count = 3;
if (n < 4)
return -2;
ch = t[i++];
if (ch < 0x80 || ch > 0xbf)
return -1;
*pwc = (wchar_t)((state->__value.__wchb[0] & 0x07) << 18)
| (wchar_t)((state->__value.__wchb[1] & 0x3f) << 12)
| (wchar_t)((state->__value.__wchb[2] & 0x3f) << 6)
| (wchar_t)(ch & 0x3f);
state->__count = 0;
return i;
}
else if (ch >= 0xf8 && ch <= 0xfb)
{
/* five-byte sequence */
if (sizeof(wchar_t) < 4)
return -1; /* we can't store such a value */
state->__value.__wchb[0] = ch;
if (state->__count == 0)
state->__count = 1;
else
++n;
if (n < 2)
return -2;
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
if (state->__value.__wchb[0] == 0xf8 && ch < 0x88)
/* overlong UTF-8 sequence */
return -1;
if (ch < 0x80 || ch > 0xbf)
return -1;
state->__value.__wchb[1] = ch;
if (state->__count == 1)
state->__count = 2;
else
++n;
if (n < 3)
return -2;
ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
if (ch < 0x80 || ch > 0xbf)
return -1;
state->__value.__wchb[2] = ch;
if (state->__count == 2)
state->__count = 3;
else
++n;
if (n < 4)
return -2;
ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3];
if (ch < 0x80 || ch > 0xbf)
return -1;
state->__value.__wchb[3] = ch;
state->__count = 4;
if (n < 5)
return -2;
ch = t[i++];
*pwc = (wchar_t)((state->__value.__wchb[0] & 0x03) << 24)
| (wchar_t)((state->__value.__wchb[1] & 0x3f) << 18)
| (wchar_t)((state->__value.__wchb[2] & 0x3f) << 12)
| (wchar_t)((state->__value.__wchb[3] & 0x3f) << 6)
| (wchar_t)(ch & 0x3f);
state->__count = 0;
return i;
}
else if (ch >= 0xfc && ch <= 0xfd)
{
/* six-byte sequence */
if (n >= 6)
{
wchar_t char2 = (wchar_t)*(t+1);
wchar_t char3 = (wchar_t)*(t+2);
wchar_t char4 = (wchar_t)*(t+3);
wchar_t char5 = (wchar_t)*(t+4);
wchar_t char6 = (wchar_t)*(t+5);
if (char2 < 0x80 || char2 > 0xbf)
return -1;
if (char3 < 0x80 || char3 > 0xbf)
return -1;
if (char4 < 0x80 || char4 > 0xbf)
return -1;
if (char5 < 0x80 || char5 > 0xbf)
return -1;
if (char6 < 0x80 || char6 > 0xbf)
return -1;
if (char1 == 0xfc)
{
if (char2 < 0x84)
/* overlong UTF-8 sequence */
return -1;
}
*pwc = ((char1 & 0x01) << 30)
| ((char2 & 0x3f) << 24)
| ((char3 & 0x3f) << 18)
| ((char4 & 0x3f) << 12)
| ((char5 & 0x3f) << 6)
| (char6 & 0x3f);
return 6;
}
else
return -2;
}
int ch2;
if (sizeof(wchar_t) < 4)
return -1; /* we can't store such a value */
state->__value.__wchb[0] = ch;
if (state->__count == 0)
state->__count = 1;
else
++n;
if (n < 2)
return -2;
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
if (state->__value.__wchb[0] == 0xfc && ch < 0x84)
/* overlong UTF-8 sequence */
return -1;
if (ch < 0x80 || ch > 0xbf)
return -1;
state->__value.__wchb[1] = ch;
if (state->__count == 1)
state->__count = 2;
else
++n;
if (n < 3)
return -2;
ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
if (ch < 0x80 || ch > 0xbf)
return -1;
state->__value.__wchb[2] = ch;
if (state->__count == 2)
state->__count = 3;
else
++n;
if (n < 4)
return -2;
ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3];
if (ch < 0x80 || ch > 0xbf)
return -1;
state->__value.__wchb[3] = ch;
if (state->__count == 3)
state->__count = 4;
else
++n;
if (n < 5)
return -2;
if (n == 5)
return -1; /* at this point we can't save enough to restart */
ch = t[i++];
if (ch < 0x80 || ch > 0xbf)
return -1;
ch2 = t[i++];
*pwc = (wchar_t)((state->__value.__wchb[0] & 0x01) << 30)
| (wchar_t)((state->__value.__wchb[1] & 0x3f) << 24)
| (wchar_t)((state->__value.__wchb[2] & 0x3f) << 18)
| (wchar_t)((state->__value.__wchb[3] & 0x3f) << 12)
| (wchar_t)((ch & 0x3f) << 6)
| (wchar_t)(ch2 & 0x3f);
state->__count = 0;
return i;
}
else
return -1;
return -1;
}
else if (!strcmp (r->_current_locale, "C-SJIS"))
{
int char1;
int ch;
int i = 0;
if (s == NULL)
return 0; /* not state-dependent */
char1 = *t;
if (_issjis1 (char1))
{
int char2 = t[1];
if (n <= 1)
return -2;
if (_issjis2 (char2))
{
*pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
return 2;
}
else
return -1;
}
ch = t[i++];
if (state->__count == 0)
{
if (_issjis1 (ch))
{
state->__value.__wchb[0] = ch;
state->__count = 1;
if (n <= 1)
return -2;
ch = t[i++];
}
}
if (state->__count == 1)
{
if (_issjis2 (ch))
{
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
state->__count = 0;
return i;
}
else
return -1;
}
}
else if (!strcmp (r->_current_locale, "C-EUCJP"))
{
int char1;
int ch;
int i = 0;
if (s == NULL)
return 0; /* not state-dependent */
char1 = *t;
if (_iseucjp (char1))
{
int char2 = t[1];
if (n <= 1)
return -2;
if (_iseucjp (char2))
{
*pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
return 2;
}
else
return -1;
}
ch = t[i++];
if (state->__count == 0)
{
if (_iseucjp (ch))
{
state->__value.__wchb[0] = ch;
state->__count = 1;
if (n <= 1)
return -2;
ch = t[i++];
}
}
if (state->__count == 1)
{
if (_iseucjp (ch))
{
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
state->__count = 0;
return i;
}
else
return -1;
}
}
else if (!strcmp (r->_current_locale, "C-JIS"))
{
@ -301,15 +373,16 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
JIS_ACTION action;
JIS_CHAR_TYPE ch;
unsigned char *ptr;
int i, curr_ch;
unsigned int i;
int curr_ch;
if (s == NULL)
{
state->__count = 0;
state->__state = ASCII;
return 1; /* state-dependent */
}
curr_state = (state->__count == 0 ? ASCII : JIS);
curr_state = state->__state;
ptr = t;
for (i = 0; i < n; ++i)
@ -353,23 +426,21 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
case NOOP:
break;
case EMPTY:
state->__count = 0;
state->__state = ASCII;
*pwc = (wchar_t)0;
return i;
return 0;
case COPY_A:
state->__count = 0;
state->__state = ASCII;
*pwc = (wchar_t)*ptr;
return (i + 1);
case COPY_J:
state->__count = 0;
*pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
return (i + 1);
case COPY_J1:
state->__value.__wchb[0] = t[i];
break;
case COPY_J2:
state->__count = 1;
*pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
return (ptr - t) + 2;
state->__state = JIS;
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
return (i + 1);
case MAKE_A:
case MAKE_J:
ptr = (char *)(t + i + 1);
break;
case ERROR:
@ -379,6 +450,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
}
state->__state = curr_state;
return -2; /* n < bytes needed */
}
#endif /* MB_CAPABLE */

View File

@ -5,21 +5,45 @@
#include <errno.h>
size_t
wcrtomb(char *s, wchar_t wc, mbstate_t *ps)
_DEFUN (_wcrtomb_r, (ptr, s, wc, ps),
struct _reent *ptr _AND
char *s _AND
wchar_t wc _AND
mbstate_t *ps)
{
int retval = 0;
_REENT_CHECK_MISC(_REENT);
char buf[10];
#ifdef MB_CAPABLE
if (ps == NULL)
{
_REENT_CHECK_MISC(ptr);
ps = &(_REENT_WCRTOMB_STATE(ptr));
}
#endif
if (s == NULL)
retval = _wctomb_r (_REENT, "", wc, ps);
retval = _wctomb_r (ptr, buf, L'\0', ps);
else
retval = _wctomb_r (_REENT, s, wc, ps);
retval = _wctomb_r (ptr, s, wc, ps);
if (retval == -1)
{
_REENT->_errno = EILSEQ;
ps->__count = 0;
ptr->_errno = EILSEQ;
return (size_t)(-1);
}
else
return (size_t)retval;
}
#ifndef _REENT_ONLY
size_t
_DEFUN (wcrtomb, (s, wc, ps),
char *s _AND
wchar_t wc _AND
mbstate_t *ps)
{
return _wcrtomb_r (_REENT, s, wc, ps);
}
#endif /* !_REENT_ONLY */

View File

@ -5,18 +5,74 @@
#include <errno.h>
size_t
wcsrtombs (char *dst, const wchar_t **src, size_t len, mbstate_t *ps)
_DEFUN (_wcsrtombs_r, (r, dst, src, len, ps),
struct _reent *r _AND
char *dst _AND
const wchar_t **src _AND
size_t len _AND
mbstate_t *ps)
{
int retval = 0;
_REENT_CHECK_MISC(_REENT);
char *ptr = dst;
char buff[10];
int i, n;
int count;
wint_t wch;
retval = _wcstombs_r (_REENT, dst, *src, len, ps);
if (retval == -1)
#ifdef MB_CAPABLE
if (ps == NULL)
{
_REENT->_errno = EILSEQ;
return (size_t)(-1);
_REENT_CHECK_MISC(r);
ps = &(_REENT_WCSRTOMBS_STATE(r));
}
else
return (size_t)retval;
#endif
n = (int)len;
while (n > 0)
{
wchar_t *pwcs = (wchar_t *)(*src);
int count = ps->__count;
wint_t wch = ps->__value.__wch;
int bytes = _wctomb_r (r, buff, *pwcs, ps);
if (bytes == -1)
{
r->_errno = EILSEQ;
ps->__count = 0;
return (size_t)-1;
}
if (bytes <= n)
{
for (i = 0; i < bytes; ++i)
*ptr++ = buff[i];
if (*pwcs == 0x00)
{
*src = NULL;
ps->__count = 0;
return (size_t)(ptr - dst - 1);
}
++(*src);
}
else
{
/* not enough room, we must back up state to before _wctomb_r call */
ps->__count = count;
ps->__value.__wch = wch;
}
n -= bytes;
}
return (size_t)(ptr - dst);
}
#ifndef _REENT_ONLY
size_t
_DEFUN (wcsrtombs, (dst, src, len, ps),
char *dst _AND
const wchar_t **src _AND
size_t len _AND
mbstate_t *ps)
{
return _wcsrtombs_r (_REENT, dst, src, len, ps);
}
#endif /* !_REENT_ONLY */

View File

@ -60,22 +60,22 @@ _DEFUN (wcstombs, (s, pwcs, n),
size_t n)
{
#ifdef MB_CAPABLE
mbstate_t state;
state.__count = 0;
return _wcstombs_r (_REENT, s, pwcs, n, &state);
mbstate_t state;
state.__count = 0;
return _wcstombs_r (_REENT, s, pwcs, n, &state);
#else /* not MB_CAPABLE */
int count = 0;
if (n != 0) {
do {
if ((*s++ = (char) *pwcs++) == 0)
break;
count++;
} while (--n != 0);
}
return count;
int count = 0;
if (n != 0) {
do {
if ((*s++ = (char) *pwcs++) == 0)
break;
count++;
} while (--n != 0);
}
return count;
#endif /* not MB_CAPABLE */
}

View File

@ -4,6 +4,9 @@
#include <locale.h>
#include "mbctype.h"
/* for some conversions, we use the __count field as a place to store a state value */
#define __state __count
int
_DEFUN (_wctomb_r, (r, s, wchar, state),
struct _reent *r _AND
@ -126,10 +129,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
/* first byte is non-zero..validate multi-byte char */
if (_isjis (char1) && _isjis (char2))
{
if (state->__count == 0)
if (state->__state == 0)
{
/* must switch from ASCII to JIS state */
state->__count = 1;
state->__state = 1;
*s++ = ESC_CHAR;
*s++ = '$';
*s++ = 'B';
@ -144,10 +147,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
}
else
{
if (state->__count != 0)
if (state->__state != 0)
{
/* must switch from JIS to ASCII state */
state->__count = 0;
state->__state = 0;
*s++ = ESC_CHAR;
*s++ = '(';
*s++ = 'B';

View File

@ -30,7 +30,7 @@ typedef struct
union
{
wint_t __wch;
char __wchb[4];
unsigned char __wchb[4];
} __value; /* Value so far. */
} _mbstate_t;
#endif /* _MBSTATE_T */