4
0
mirror of git://sourceware.org/git/newlib-cygwin.git synced 2025-01-22 23:17:28 +08:00
Corinna Vinschen 8a43189438 Revert "* libc/stdlib/mbtowc_r.c (__ascii_mbtowc): Disallow conversion of"
This reverts commit 2b77087a48ea56e77fca5aeab478c922f6473d7c.

For some reason lost in time, commit 2b77087a48ea5 introduced
Cygwin-specific code treating single byte characters outside the
portable character set as illegal chars.  However, Cygwin was
always alone with this over-correct behaviour and it leads to
stuff like gnulib replacing functions defined in Cygwin with
their own implementation just due to that.

Revert this change, sans the changes to ChangeLog.

Fixes: 2b77087a48ea ("* libc/stdlib/mbtowc_r.c (__ascii_mbtowc): Disallow conversion of")
Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
2023-07-31 22:39:09 +02:00

969 lines
21 KiB
C

#include <newlib.h>
#include <stdlib.h>
#include <locale.h>
#include "mbctype.h"
#include <wchar.h>
#include <string.h>
#include <errno.h>
#include "local.h"
int
_mbtowc_r (struct _reent *r,
wchar_t *__restrict pwc,
const char *__restrict s,
size_t n,
mbstate_t *state)
{
return __MBTOWC (r, pwc, s, n, state);
}
int
__ascii_mbtowc (struct _reent *r,
wchar_t *pwc,
const char *s,
size_t n,
mbstate_t *state)
{
wchar_t dummy;
unsigned char *t = (unsigned char *)s;
if (pwc == NULL)
pwc = &dummy;
if (s == NULL)
return 0;
if (n == 0)
return -2;
*pwc = (wchar_t)*t;
if (*t == '\0')
return 0;
return 1;
}
#ifdef _MB_CAPABLE
typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J,
NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
INV, JIS_S_NUM } JIS_STATE;
typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
/**************************************************************************************
* state/action tables for processing JIS encoding
* Where possible, switches to JIS are grouped with proceding JIS characters and switches
* to ASCII are grouped with preceding JIS characters. Thus, maximum returned length
* is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
*************************************************************************************/
#ifndef __CYGWIN__
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
/* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
/* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV },
/* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
/* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII },
/* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV },
/* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
/* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
};
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
/* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A},
/* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR },
/* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A},
/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A},
/* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR },
/* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR },
};
#endif /* !__CYGWIN__ */
/* we override the mbstate_t __count field for more complex encodings and use it store a state value */
#define __state __count
#ifdef _MB_EXTENDED_CHARSETS_ISO
static int
___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
int iso_idx, mbstate_t *state)
{
wchar_t dummy;
unsigned char *t = (unsigned char *)s;
if (pwc == NULL)
pwc = &dummy;
if (s == NULL)
return 0;
if (n == 0)
return -2;
if (*t >= 0xa0)
{
if (iso_idx >= 0)
{
*pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
if (*pwc == 0) /* Invalid character */
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
return 1;
}
}
*pwc = (wchar_t) *t;
if (*t == '\0')
return 0;
return 1;
}
static int
__iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, -1, state);
}
static int
__iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 0, state);
}
static int
__iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 1, state);
}
static int
__iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 2, state);
}
static int
__iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 3, state);
}
static int
__iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 4, state);
}
static int
__iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 5, state);
}
static int
__iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 6, state);
}
static int
__iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 7, state);
}
static int
__iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 8, state);
}
static int
__iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 9, state);
}
static int
__iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 10, state);
}
static int
__iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 11, state);
}
static int
__iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 12, state);
}
static int
__iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___iso_mbtowc (r, pwc, s, n, 13, state);
}
static mbtowc_p __iso_8859_mbtowc[17] = {
NULL,
__iso_8859_1_mbtowc,
__iso_8859_2_mbtowc,
__iso_8859_3_mbtowc,
__iso_8859_4_mbtowc,
__iso_8859_5_mbtowc,
__iso_8859_6_mbtowc,
__iso_8859_7_mbtowc,
__iso_8859_8_mbtowc,
__iso_8859_9_mbtowc,
__iso_8859_10_mbtowc,
__iso_8859_11_mbtowc,
NULL, /* No ISO 8859-12 */
__iso_8859_13_mbtowc,
__iso_8859_14_mbtowc,
__iso_8859_15_mbtowc,
__iso_8859_16_mbtowc
};
/* val *MUST* be valid! All checks for validity are supposed to be
performed before calling this function. */
mbtowc_p
__iso_mbtowc (int val)
{
return __iso_8859_mbtowc[val];
}
#endif /* _MB_EXTENDED_CHARSETS_ISO */
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
static int
___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
int cp_idx, mbstate_t *state)
{
wchar_t dummy;
unsigned char *t = (unsigned char *)s;
if (pwc == NULL)
pwc = &dummy;
if (s == NULL)
return 0;
if (n == 0)
return -2;
if (*t >= 0x80)
{
if (cp_idx >= 0)
{
*pwc = __cp_conv[cp_idx][*t - 0x80];
if (*pwc == 0) /* Invalid character */
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
return 1;
}
}
*pwc = (wchar_t)*t;
if (*t == '\0')
return 0;
return 1;
}
static int
__cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 0, state);
}
static int
__cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 1, state);
}
static int
__cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 2, state);
}
static int
__cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 3, state);
}
static int
__cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 4, state);
}
static int
__cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 5, state);
}
static int
__cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 6, state);
}
static int
__cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 7, state);
}
static int
__cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 8, state);
}
static int
__cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 9, state);
}
static int
__cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 10, state);
}
static int
__cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 11, state);
}
static int
__cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 12, state);
}
static int
__cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 13, state);
}
static int
__cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 14, state);
}
static int
__cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 15, state);
}
static int
__cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 16, state);
}
static int
__cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 17, state);
}
static int
__cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 18, state);
}
static int
__cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 19, state);
}
static int
__cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 20, state);
}
static int
__cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 21, state);
}
static int
__cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 22, state);
}
static int
__cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 23, state);
}
static int
__cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 24, state);
}
static int
__cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 25, state);
}
static int
__cp_103_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
mbstate_t *state)
{
return ___cp_mbtowc (r, pwc, s, n, 26, state);
}
static mbtowc_p __cp_xxx_mbtowc[27] = {
__cp_437_mbtowc,
__cp_720_mbtowc,
__cp_737_mbtowc,
__cp_775_mbtowc,
__cp_850_mbtowc,
__cp_852_mbtowc,
__cp_855_mbtowc,
__cp_857_mbtowc,
__cp_858_mbtowc,
__cp_862_mbtowc,
__cp_866_mbtowc,
__cp_874_mbtowc,
__cp_1125_mbtowc,
__cp_1250_mbtowc,
__cp_1251_mbtowc,
__cp_1252_mbtowc,
__cp_1253_mbtowc,
__cp_1254_mbtowc,
__cp_1255_mbtowc,
__cp_1256_mbtowc,
__cp_1257_mbtowc,
__cp_1258_mbtowc,
__cp_20866_mbtowc,
__cp_21866_mbtowc,
__cp_101_mbtowc,
__cp_102_mbtowc,
__cp_103_mbtowc,
};
/* val *MUST* be valid! All checks for validity are supposed to be
performed before calling this function. */
mbtowc_p
__cp_mbtowc (int val)
{
return __cp_xxx_mbtowc[__cp_val_index (val)];
}
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
int
__utf8_mbtowc (struct _reent *r,
wchar_t *pwc,
const char *s,
size_t n,
mbstate_t *state)
{
wchar_t dummy;
unsigned char *t = (unsigned char *)s;
int ch;
int i = 0;
if (pwc == NULL)
pwc = &dummy;
if (s == NULL)
return 0;
if (n == 0)
return -2;
if (state->__count == 0)
ch = t[i++];
else
ch = state->__value.__wchb[0];
if (ch == '\0')
{
*pwc = 0;
state->__count = 0;
return 0; /* s points to the null character */
}
if (ch <= 0x7f)
{
/* single-byte sequence */
state->__count = 0;
*pwc = ch;
return 1;
}
if (ch >= 0xc0 && ch <= 0xdf)
{
/* two-byte sequence */
state->__value.__wchb[0] = ch;
if (state->__count == 0)
state->__count = 1;
else if (n < (size_t)-1)
++n;
if (n < 2)
return -2;
ch = t[i++];
if (ch < 0x80 || ch > 0xbf)
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
if (state->__value.__wchb[0] < 0xc2)
{
/* overlong UTF-8 sequence */
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
state->__count = 0;
*pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
| (wchar_t)(ch & 0x3f);
return i;
}
if (ch >= 0xe0 && ch <= 0xef)
{
/* three-byte sequence */
wchar_t tmp;
state->__value.__wchb[0] = ch;
if (state->__count == 0)
state->__count = 1;
else if (n < (size_t)-1)
++n;
if (n < 2)
return -2;
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
{
/* overlong UTF-8 sequence */
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
if (ch < 0x80 || ch > 0xbf)
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
state->__value.__wchb[1] = ch;
if (state->__count == 1)
state->__count = 2;
else if (n < (size_t)-1)
++n;
if (n < 3)
return -2;
ch = t[i++];
if (ch < 0x80 || ch > 0xbf)
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
state->__count = 0;
tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
| (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
| (wchar_t)(ch & 0x3f);
*pwc = tmp;
return i;
}
if (ch >= 0xf0 && ch <= 0xf4)
{
/* four-byte sequence */
wint_t tmp;
state->__value.__wchb[0] = ch;
if (state->__count == 0)
state->__count = 1;
else if (n < (size_t)-1)
++n;
if (n < 2)
return -2;
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
|| (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
{
/* overlong UTF-8 sequence or result is > 0x10ffff */
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
if (ch < 0x80 || ch > 0xbf)
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
state->__value.__wchb[1] = ch;
if (state->__count == 1)
state->__count = 2;
else if (n < (size_t)-1)
++n;
if (n < 3)
return -2;
ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
if (ch < 0x80 || ch > 0xbf)
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
state->__value.__wchb[2] = ch;
if (state->__count == 2)
state->__count = 3;
else if (n < (size_t)-1)
++n;
if (state->__count == 3 && sizeof(wchar_t) == 2)
{
/* On systems which have wchar_t being UTF-16 values, the value
doesn't fit into a single wchar_t in this case. So what we
do here is to store the state with a special value of __count
and return the first half of a surrogate pair. The first
three bytes of a UTF-8 sequence are enough to generate the
first half of a UTF-16 surrogate pair. As return value we
choose to return the number of bytes actually read up to
here.
The second half of the surrogate pair is returned in case we
recognize the special __count value of four, and the next
byte is actually a valid value. See below. */
tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
| (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
| (wint_t)((state->__value.__wchb[2] & 0x3f) << 6);
state->__count = 4;
*pwc = 0xd800 | ((tmp - 0x10000) >> 10);
return i;
}
if (n < 4)
return -2;
ch = t[i++];
if (ch < 0x80 || ch > 0xbf)
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
| (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
| (wint_t)((state->__value.__wchb[2] & 0x3f) << 6)
| (wint_t)(ch & 0x3f);
if (state->__count == 4 && sizeof(wchar_t) == 2)
/* Create the second half of the surrogate pair for systems with
wchar_t == UTF-16 . */
*pwc = 0xdc00 | (tmp & 0x3ff);
else
*pwc = tmp;
state->__count = 0;
return i;
}
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
/* Cygwin defines its own doublebyte charset conversion functions
because the underlying OS requires wchar_t == UTF-16. */
#ifndef __CYGWIN__
int
__sjis_mbtowc (struct _reent *r,
wchar_t *pwc,
const char *s,
size_t n,
mbstate_t *state)
{
wchar_t dummy;
unsigned char *t = (unsigned char *)s;
int ch;
int i = 0;
if (pwc == NULL)
pwc = &dummy;
if (s == NULL)
return 0; /* not state-dependent */
if (n == 0)
return -2;
ch = t[i++];
if (state->__count == 0)
{
if (_issjis1 (ch))
{
state->__value.__wchb[0] = ch;
state->__count = 1;
if (n <= 1)
return -2;
ch = t[i++];
}
}
if (state->__count == 1)
{
if (_issjis2 (ch))
{
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
state->__count = 0;
return i;
}
else
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
}
*pwc = (wchar_t)*t;
if (*t == '\0')
return 0;
return 1;
}
int
__eucjp_mbtowc (struct _reent *r,
wchar_t *pwc,
const char *s,
size_t n,
mbstate_t *state)
{
wchar_t dummy;
unsigned char *t = (unsigned char *)s;
int ch;
int i = 0;
if (pwc == NULL)
pwc = &dummy;
if (s == NULL)
return 0;
if (n == 0)
return -2;
ch = t[i++];
if (state->__count == 0)
{
if (_iseucjp1 (ch))
{
state->__value.__wchb[0] = ch;
state->__count = 1;
if (n <= 1)
return -2;
ch = t[i++];
}
}
if (state->__count == 1)
{
if (_iseucjp2 (ch))
{
if (state->__value.__wchb[0] == 0x8f)
{
state->__value.__wchb[1] = ch;
state->__count = 2;
if (n <= i)
return -2;
ch = t[i++];
}
else
{
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
state->__count = 0;
return i;
}
}
else
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
}
if (state->__count == 2)
{
if (_iseucjp2 (ch))
{
*pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
+ (wchar_t)(ch & 0x7f);
state->__count = 0;
return i;
}
else
{
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
}
*pwc = (wchar_t)*t;
if (*t == '\0')
return 0;
return 1;
}
int
__jis_mbtowc (struct _reent *r,
wchar_t *pwc,
const char *s,
size_t n,
mbstate_t *state)
{
wchar_t dummy;
unsigned char *t = (unsigned char *)s;
JIS_STATE curr_state;
JIS_ACTION action;
JIS_CHAR_TYPE ch;
unsigned char *ptr;
unsigned int i;
int curr_ch;
if (pwc == NULL)
pwc = &dummy;
if (s == NULL)
{
state->__state = ASCII;
return 1; /* state-dependent */
}
if (n == 0)
return -2;
curr_state = state->__state;
ptr = t;
for (i = 0; i < n; ++i)
{
curr_ch = t[i];
switch (curr_ch)
{
case ESC_CHAR:
ch = ESCAPE;
break;
case '$':
ch = DOLLAR;
break;
case '@':
ch = AT;
break;
case '(':
ch = BRACKET;
break;
case 'B':
ch = B;
break;
case 'J':
ch = J;
break;
case '\0':
ch = NUL;
break;
default:
if (_isjis (curr_ch))
ch = JIS_CHAR;
else
ch = OTHER;
}
action = JIS_action_table[curr_state][ch];
curr_state = JIS_state_table[curr_state][ch];
switch (action)
{
case NOOP:
break;
case EMPTY:
state->__state = ASCII;
*pwc = (wchar_t)0;
return 0;
case COPY_A:
state->__state = ASCII;
*pwc = (wchar_t)*ptr;
return (i + 1);
case COPY_J1:
state->__value.__wchb[0] = t[i];
break;
case COPY_J2:
state->__state = JIS;
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
return (i + 1);
case MAKE_A:
ptr = (unsigned char *)(t + i + 1);
break;
case ERROR:
default:
_REENT_ERRNO(r) = EILSEQ;
return -1;
}
}
state->__state = curr_state;
return -2; /* n < bytes needed */
}
#endif /* !__CYGWIN__*/
#endif /* _MB_CAPABLE */