mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-01-25 16:47:20 +08:00
8a43189438
This reverts commit 2b77087a48ea56e77fca5aeab478c922f6473d7c. For some reason lost in time, commit 2b77087a48ea5 introduced Cygwin-specific code treating single byte characters outside the portable character set as illegal chars. However, Cygwin was always alone with this over-correct behaviour and it leads to stuff like gnulib replacing functions defined in Cygwin with their own implementation just due to that. Revert this change, sans the changes to ChangeLog. Fixes: 2b77087a48ea ("* libc/stdlib/mbtowc_r.c (__ascii_mbtowc): Disallow conversion of") Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
969 lines
21 KiB
C
969 lines
21 KiB
C
#include <newlib.h>
|
|
#include <stdlib.h>
|
|
#include <locale.h>
|
|
#include "mbctype.h"
|
|
#include <wchar.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include "local.h"
|
|
|
|
int
|
|
_mbtowc_r (struct _reent *r,
|
|
wchar_t *__restrict pwc,
|
|
const char *__restrict s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return __MBTOWC (r, pwc, s, n, state);
|
|
}
|
|
|
|
int
|
|
__ascii_mbtowc (struct _reent *r,
|
|
wchar_t *pwc,
|
|
const char *s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
#ifdef _MB_CAPABLE
|
|
typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J,
|
|
NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
|
|
typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
|
|
INV, JIS_S_NUM } JIS_STATE;
|
|
typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
|
|
|
|
/**************************************************************************************
|
|
* state/action tables for processing JIS encoding
|
|
* Where possible, switches to JIS are grouped with proceding JIS characters and switches
|
|
* to ASCII are grouped with preceding JIS characters. Thus, maximum returned length
|
|
* is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
|
|
*************************************************************************************/
|
|
|
|
#ifndef __CYGWIN__
|
|
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
|
|
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
|
|
/* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
|
|
/* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV },
|
|
/* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
|
|
/* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII },
|
|
/* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV },
|
|
/* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
|
|
/* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
|
|
};
|
|
|
|
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
|
|
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
|
|
/* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A},
|
|
/* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR },
|
|
/* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A},
|
|
/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A},
|
|
/* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR },
|
|
/* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
|
|
/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR },
|
|
};
|
|
#endif /* !__CYGWIN__ */
|
|
|
|
/* we override the mbstate_t __count field for more complex encodings and use it store a state value */
|
|
#define __state __count
|
|
|
|
#ifdef _MB_EXTENDED_CHARSETS_ISO
|
|
static int
|
|
___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
int iso_idx, mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
if (*t >= 0xa0)
|
|
{
|
|
if (iso_idx >= 0)
|
|
{
|
|
*pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
|
|
if (*pwc == 0) /* Invalid character */
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t) *t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
__iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, -1, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 0, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 1, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 2, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 3, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 4, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 5, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 6, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 7, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 8, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 9, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 10, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 11, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 12, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 13, state);
|
|
}
|
|
|
|
static mbtowc_p __iso_8859_mbtowc[17] = {
|
|
NULL,
|
|
__iso_8859_1_mbtowc,
|
|
__iso_8859_2_mbtowc,
|
|
__iso_8859_3_mbtowc,
|
|
__iso_8859_4_mbtowc,
|
|
__iso_8859_5_mbtowc,
|
|
__iso_8859_6_mbtowc,
|
|
__iso_8859_7_mbtowc,
|
|
__iso_8859_8_mbtowc,
|
|
__iso_8859_9_mbtowc,
|
|
__iso_8859_10_mbtowc,
|
|
__iso_8859_11_mbtowc,
|
|
NULL, /* No ISO 8859-12 */
|
|
__iso_8859_13_mbtowc,
|
|
__iso_8859_14_mbtowc,
|
|
__iso_8859_15_mbtowc,
|
|
__iso_8859_16_mbtowc
|
|
};
|
|
|
|
/* val *MUST* be valid! All checks for validity are supposed to be
|
|
performed before calling this function. */
|
|
mbtowc_p
|
|
__iso_mbtowc (int val)
|
|
{
|
|
return __iso_8859_mbtowc[val];
|
|
}
|
|
#endif /* _MB_EXTENDED_CHARSETS_ISO */
|
|
|
|
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
|
|
static int
|
|
___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
int cp_idx, mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
if (*t >= 0x80)
|
|
{
|
|
if (cp_idx >= 0)
|
|
{
|
|
*pwc = __cp_conv[cp_idx][*t - 0x80];
|
|
if (*pwc == 0) /* Invalid character */
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
__cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 0, state);
|
|
}
|
|
|
|
static int
|
|
__cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 1, state);
|
|
}
|
|
|
|
static int
|
|
__cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 2, state);
|
|
}
|
|
|
|
static int
|
|
__cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 3, state);
|
|
}
|
|
|
|
static int
|
|
__cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 4, state);
|
|
}
|
|
|
|
static int
|
|
__cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 5, state);
|
|
}
|
|
|
|
static int
|
|
__cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 6, state);
|
|
}
|
|
|
|
static int
|
|
__cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 7, state);
|
|
}
|
|
|
|
static int
|
|
__cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 8, state);
|
|
}
|
|
|
|
static int
|
|
__cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 9, state);
|
|
}
|
|
|
|
static int
|
|
__cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 10, state);
|
|
}
|
|
|
|
static int
|
|
__cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 11, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 12, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 13, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 14, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 15, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 16, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 17, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 18, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 19, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 20, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 21, state);
|
|
}
|
|
|
|
static int
|
|
__cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 22, state);
|
|
}
|
|
|
|
static int
|
|
__cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 23, state);
|
|
}
|
|
|
|
static int
|
|
__cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 24, state);
|
|
}
|
|
|
|
static int
|
|
__cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 25, state);
|
|
}
|
|
|
|
static int
|
|
__cp_103_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 26, state);
|
|
}
|
|
|
|
static mbtowc_p __cp_xxx_mbtowc[27] = {
|
|
__cp_437_mbtowc,
|
|
__cp_720_mbtowc,
|
|
__cp_737_mbtowc,
|
|
__cp_775_mbtowc,
|
|
__cp_850_mbtowc,
|
|
__cp_852_mbtowc,
|
|
__cp_855_mbtowc,
|
|
__cp_857_mbtowc,
|
|
__cp_858_mbtowc,
|
|
__cp_862_mbtowc,
|
|
__cp_866_mbtowc,
|
|
__cp_874_mbtowc,
|
|
__cp_1125_mbtowc,
|
|
__cp_1250_mbtowc,
|
|
__cp_1251_mbtowc,
|
|
__cp_1252_mbtowc,
|
|
__cp_1253_mbtowc,
|
|
__cp_1254_mbtowc,
|
|
__cp_1255_mbtowc,
|
|
__cp_1256_mbtowc,
|
|
__cp_1257_mbtowc,
|
|
__cp_1258_mbtowc,
|
|
__cp_20866_mbtowc,
|
|
__cp_21866_mbtowc,
|
|
__cp_101_mbtowc,
|
|
__cp_102_mbtowc,
|
|
__cp_103_mbtowc,
|
|
};
|
|
|
|
/* val *MUST* be valid! All checks for validity are supposed to be
|
|
performed before calling this function. */
|
|
mbtowc_p
|
|
__cp_mbtowc (int val)
|
|
{
|
|
return __cp_xxx_mbtowc[__cp_val_index (val)];
|
|
}
|
|
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
|
|
|
|
int
|
|
__utf8_mbtowc (struct _reent *r,
|
|
wchar_t *pwc,
|
|
const char *s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
int ch;
|
|
int i = 0;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
if (state->__count == 0)
|
|
ch = t[i++];
|
|
else
|
|
ch = state->__value.__wchb[0];
|
|
|
|
if (ch == '\0')
|
|
{
|
|
*pwc = 0;
|
|
state->__count = 0;
|
|
return 0; /* s points to the null character */
|
|
}
|
|
|
|
if (ch <= 0x7f)
|
|
{
|
|
/* single-byte sequence */
|
|
state->__count = 0;
|
|
*pwc = ch;
|
|
return 1;
|
|
}
|
|
if (ch >= 0xc0 && ch <= 0xdf)
|
|
{
|
|
/* two-byte sequence */
|
|
state->__value.__wchb[0] = ch;
|
|
if (state->__count == 0)
|
|
state->__count = 1;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 2)
|
|
return -2;
|
|
ch = t[i++];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
if (state->__value.__wchb[0] < 0xc2)
|
|
{
|
|
/* overlong UTF-8 sequence */
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__count = 0;
|
|
*pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
|
|
| (wchar_t)(ch & 0x3f);
|
|
return i;
|
|
}
|
|
if (ch >= 0xe0 && ch <= 0xef)
|
|
{
|
|
/* three-byte sequence */
|
|
wchar_t tmp;
|
|
state->__value.__wchb[0] = ch;
|
|
if (state->__count == 0)
|
|
state->__count = 1;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 2)
|
|
return -2;
|
|
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
|
|
if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
|
|
{
|
|
/* overlong UTF-8 sequence */
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__value.__wchb[1] = ch;
|
|
if (state->__count == 1)
|
|
state->__count = 2;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 3)
|
|
return -2;
|
|
ch = t[i++];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__count = 0;
|
|
tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
|
|
| (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
|
|
| (wchar_t)(ch & 0x3f);
|
|
*pwc = tmp;
|
|
return i;
|
|
}
|
|
if (ch >= 0xf0 && ch <= 0xf4)
|
|
{
|
|
/* four-byte sequence */
|
|
wint_t tmp;
|
|
state->__value.__wchb[0] = ch;
|
|
if (state->__count == 0)
|
|
state->__count = 1;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 2)
|
|
return -2;
|
|
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
|
|
if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
|
|
|| (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
|
|
{
|
|
/* overlong UTF-8 sequence or result is > 0x10ffff */
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__value.__wchb[1] = ch;
|
|
if (state->__count == 1)
|
|
state->__count = 2;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 3)
|
|
return -2;
|
|
ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__value.__wchb[2] = ch;
|
|
if (state->__count == 2)
|
|
state->__count = 3;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (state->__count == 3 && sizeof(wchar_t) == 2)
|
|
{
|
|
/* On systems which have wchar_t being UTF-16 values, the value
|
|
doesn't fit into a single wchar_t in this case. So what we
|
|
do here is to store the state with a special value of __count
|
|
and return the first half of a surrogate pair. The first
|
|
three bytes of a UTF-8 sequence are enough to generate the
|
|
first half of a UTF-16 surrogate pair. As return value we
|
|
choose to return the number of bytes actually read up to
|
|
here.
|
|
The second half of the surrogate pair is returned in case we
|
|
recognize the special __count value of four, and the next
|
|
byte is actually a valid value. See below. */
|
|
tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
|
|
| (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
|
|
| (wint_t)((state->__value.__wchb[2] & 0x3f) << 6);
|
|
state->__count = 4;
|
|
*pwc = 0xd800 | ((tmp - 0x10000) >> 10);
|
|
return i;
|
|
}
|
|
if (n < 4)
|
|
return -2;
|
|
ch = t[i++];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
|
|
| (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
|
|
| (wint_t)((state->__value.__wchb[2] & 0x3f) << 6)
|
|
| (wint_t)(ch & 0x3f);
|
|
if (state->__count == 4 && sizeof(wchar_t) == 2)
|
|
/* Create the second half of the surrogate pair for systems with
|
|
wchar_t == UTF-16 . */
|
|
*pwc = 0xdc00 | (tmp & 0x3ff);
|
|
else
|
|
*pwc = tmp;
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
|
|
/* Cygwin defines its own doublebyte charset conversion functions
|
|
because the underlying OS requires wchar_t == UTF-16. */
|
|
#ifndef __CYGWIN__
|
|
int
|
|
__sjis_mbtowc (struct _reent *r,
|
|
wchar_t *pwc,
|
|
const char *s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
int ch;
|
|
int i = 0;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0; /* not state-dependent */
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
ch = t[i++];
|
|
if (state->__count == 0)
|
|
{
|
|
if (_issjis1 (ch))
|
|
{
|
|
state->__value.__wchb[0] = ch;
|
|
state->__count = 1;
|
|
if (n <= 1)
|
|
return -2;
|
|
ch = t[i++];
|
|
}
|
|
}
|
|
if (state->__count == 1)
|
|
{
|
|
if (_issjis2 (ch))
|
|
{
|
|
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
else
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
__eucjp_mbtowc (struct _reent *r,
|
|
wchar_t *pwc,
|
|
const char *s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
int ch;
|
|
int i = 0;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
ch = t[i++];
|
|
if (state->__count == 0)
|
|
{
|
|
if (_iseucjp1 (ch))
|
|
{
|
|
state->__value.__wchb[0] = ch;
|
|
state->__count = 1;
|
|
if (n <= 1)
|
|
return -2;
|
|
ch = t[i++];
|
|
}
|
|
}
|
|
if (state->__count == 1)
|
|
{
|
|
if (_iseucjp2 (ch))
|
|
{
|
|
if (state->__value.__wchb[0] == 0x8f)
|
|
{
|
|
state->__value.__wchb[1] = ch;
|
|
state->__count = 2;
|
|
if (n <= i)
|
|
return -2;
|
|
ch = t[i++];
|
|
}
|
|
else
|
|
{
|
|
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
}
|
|
if (state->__count == 2)
|
|
{
|
|
if (_iseucjp2 (ch))
|
|
{
|
|
*pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
|
|
+ (wchar_t)(ch & 0x7f);
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
else
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
__jis_mbtowc (struct _reent *r,
|
|
wchar_t *pwc,
|
|
const char *s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
JIS_STATE curr_state;
|
|
JIS_ACTION action;
|
|
JIS_CHAR_TYPE ch;
|
|
unsigned char *ptr;
|
|
unsigned int i;
|
|
int curr_ch;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
{
|
|
state->__state = ASCII;
|
|
return 1; /* state-dependent */
|
|
}
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
curr_state = state->__state;
|
|
ptr = t;
|
|
|
|
for (i = 0; i < n; ++i)
|
|
{
|
|
curr_ch = t[i];
|
|
switch (curr_ch)
|
|
{
|
|
case ESC_CHAR:
|
|
ch = ESCAPE;
|
|
break;
|
|
case '$':
|
|
ch = DOLLAR;
|
|
break;
|
|
case '@':
|
|
ch = AT;
|
|
break;
|
|
case '(':
|
|
ch = BRACKET;
|
|
break;
|
|
case 'B':
|
|
ch = B;
|
|
break;
|
|
case 'J':
|
|
ch = J;
|
|
break;
|
|
case '\0':
|
|
ch = NUL;
|
|
break;
|
|
default:
|
|
if (_isjis (curr_ch))
|
|
ch = JIS_CHAR;
|
|
else
|
|
ch = OTHER;
|
|
}
|
|
|
|
action = JIS_action_table[curr_state][ch];
|
|
curr_state = JIS_state_table[curr_state][ch];
|
|
|
|
switch (action)
|
|
{
|
|
case NOOP:
|
|
break;
|
|
case EMPTY:
|
|
state->__state = ASCII;
|
|
*pwc = (wchar_t)0;
|
|
return 0;
|
|
case COPY_A:
|
|
state->__state = ASCII;
|
|
*pwc = (wchar_t)*ptr;
|
|
return (i + 1);
|
|
case COPY_J1:
|
|
state->__value.__wchb[0] = t[i];
|
|
break;
|
|
case COPY_J2:
|
|
state->__state = JIS;
|
|
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
|
|
return (i + 1);
|
|
case MAKE_A:
|
|
ptr = (unsigned char *)(t + i + 1);
|
|
break;
|
|
case ERROR:
|
|
default:
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
|
|
}
|
|
|
|
state->__state = curr_state;
|
|
return -2; /* n < bytes needed */
|
|
}
|
|
#endif /* !__CYGWIN__*/
|
|
#endif /* _MB_CAPABLE */
|