mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-01-25 08:37:33 +08:00
2b052b3c50
Commit 89eb4bce152f was pretty half-hearted, missing the codepage character type tables and wctomb/mbtowc mappings. Fixes: 89eb4bce152f ("Cygwin: support KOI8-T codeset") Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
977 lines
22 KiB
C
977 lines
22 KiB
C
#include <newlib.h>
|
|
#include <stdlib.h>
|
|
#include <locale.h>
|
|
#include "mbctype.h"
|
|
#include <wchar.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include "local.h"
|
|
|
|
int
|
|
_mbtowc_r (struct _reent *r,
|
|
wchar_t *__restrict pwc,
|
|
const char *__restrict s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return __MBTOWC (r, pwc, s, n, state);
|
|
}
|
|
|
|
int
|
|
__ascii_mbtowc (struct _reent *r,
|
|
wchar_t *pwc,
|
|
const char *s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
#ifdef __CYGWIN__
|
|
if ((wchar_t)*t >= 0x80)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
#ifdef _MB_CAPABLE
|
|
typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J,
|
|
NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
|
|
typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
|
|
INV, JIS_S_NUM } JIS_STATE;
|
|
typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
|
|
|
|
/**************************************************************************************
|
|
* state/action tables for processing JIS encoding
|
|
* Where possible, switches to JIS are grouped with proceding JIS characters and switches
|
|
* to ASCII are grouped with preceding JIS characters. Thus, maximum returned length
|
|
* is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
|
|
*************************************************************************************/
|
|
|
|
#ifndef __CYGWIN__
|
|
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
|
|
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
|
|
/* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
|
|
/* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV },
|
|
/* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
|
|
/* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII },
|
|
/* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV },
|
|
/* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
|
|
/* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
|
|
};
|
|
|
|
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
|
|
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
|
|
/* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A},
|
|
/* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR },
|
|
/* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A},
|
|
/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A},
|
|
/* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR },
|
|
/* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
|
|
/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR },
|
|
};
|
|
#endif /* !__CYGWIN__ */
|
|
|
|
/* we override the mbstate_t __count field for more complex encodings and use it store a state value */
|
|
#define __state __count
|
|
|
|
#ifdef _MB_EXTENDED_CHARSETS_ISO
|
|
static int
|
|
___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
int iso_idx, mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
if (*t >= 0xa0)
|
|
{
|
|
if (iso_idx >= 0)
|
|
{
|
|
*pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
|
|
if (*pwc == 0) /* Invalid character */
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t) *t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
__iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, -1, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 0, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 1, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 2, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 3, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 4, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 5, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 6, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 7, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 8, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 9, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 10, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 11, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 12, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 13, state);
|
|
}
|
|
|
|
static mbtowc_p __iso_8859_mbtowc[17] = {
|
|
NULL,
|
|
__iso_8859_1_mbtowc,
|
|
__iso_8859_2_mbtowc,
|
|
__iso_8859_3_mbtowc,
|
|
__iso_8859_4_mbtowc,
|
|
__iso_8859_5_mbtowc,
|
|
__iso_8859_6_mbtowc,
|
|
__iso_8859_7_mbtowc,
|
|
__iso_8859_8_mbtowc,
|
|
__iso_8859_9_mbtowc,
|
|
__iso_8859_10_mbtowc,
|
|
__iso_8859_11_mbtowc,
|
|
NULL, /* No ISO 8859-12 */
|
|
__iso_8859_13_mbtowc,
|
|
__iso_8859_14_mbtowc,
|
|
__iso_8859_15_mbtowc,
|
|
__iso_8859_16_mbtowc
|
|
};
|
|
|
|
/* val *MUST* be valid! All checks for validity are supposed to be
|
|
performed before calling this function. */
|
|
mbtowc_p
|
|
__iso_mbtowc (int val)
|
|
{
|
|
return __iso_8859_mbtowc[val];
|
|
}
|
|
#endif /* _MB_EXTENDED_CHARSETS_ISO */
|
|
|
|
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
|
|
static int
|
|
___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
int cp_idx, mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
if (*t >= 0x80)
|
|
{
|
|
if (cp_idx >= 0)
|
|
{
|
|
*pwc = __cp_conv[cp_idx][*t - 0x80];
|
|
if (*pwc == 0) /* Invalid character */
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
__cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 0, state);
|
|
}
|
|
|
|
static int
|
|
__cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 1, state);
|
|
}
|
|
|
|
static int
|
|
__cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 2, state);
|
|
}
|
|
|
|
static int
|
|
__cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 3, state);
|
|
}
|
|
|
|
static int
|
|
__cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 4, state);
|
|
}
|
|
|
|
static int
|
|
__cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 5, state);
|
|
}
|
|
|
|
static int
|
|
__cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 6, state);
|
|
}
|
|
|
|
static int
|
|
__cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 7, state);
|
|
}
|
|
|
|
static int
|
|
__cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 8, state);
|
|
}
|
|
|
|
static int
|
|
__cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 9, state);
|
|
}
|
|
|
|
static int
|
|
__cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 10, state);
|
|
}
|
|
|
|
static int
|
|
__cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 11, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 12, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 13, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 14, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 15, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 16, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 17, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 18, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 19, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 20, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 21, state);
|
|
}
|
|
|
|
static int
|
|
__cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 22, state);
|
|
}
|
|
|
|
static int
|
|
__cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 23, state);
|
|
}
|
|
|
|
static int
|
|
__cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 24, state);
|
|
}
|
|
|
|
static int
|
|
__cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 25, state);
|
|
}
|
|
|
|
static int
|
|
__cp_103_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 26, state);
|
|
}
|
|
|
|
static mbtowc_p __cp_xxx_mbtowc[27] = {
|
|
__cp_437_mbtowc,
|
|
__cp_720_mbtowc,
|
|
__cp_737_mbtowc,
|
|
__cp_775_mbtowc,
|
|
__cp_850_mbtowc,
|
|
__cp_852_mbtowc,
|
|
__cp_855_mbtowc,
|
|
__cp_857_mbtowc,
|
|
__cp_858_mbtowc,
|
|
__cp_862_mbtowc,
|
|
__cp_866_mbtowc,
|
|
__cp_874_mbtowc,
|
|
__cp_1125_mbtowc,
|
|
__cp_1250_mbtowc,
|
|
__cp_1251_mbtowc,
|
|
__cp_1252_mbtowc,
|
|
__cp_1253_mbtowc,
|
|
__cp_1254_mbtowc,
|
|
__cp_1255_mbtowc,
|
|
__cp_1256_mbtowc,
|
|
__cp_1257_mbtowc,
|
|
__cp_1258_mbtowc,
|
|
__cp_20866_mbtowc,
|
|
__cp_21866_mbtowc,
|
|
__cp_101_mbtowc,
|
|
__cp_102_mbtowc,
|
|
__cp_103_mbtowc,
|
|
};
|
|
|
|
/* val *MUST* be valid! All checks for validity are supposed to be
|
|
performed before calling this function. */
|
|
mbtowc_p
|
|
__cp_mbtowc (int val)
|
|
{
|
|
return __cp_xxx_mbtowc[__cp_val_index (val)];
|
|
}
|
|
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
|
|
|
|
int
|
|
__utf8_mbtowc (struct _reent *r,
|
|
wchar_t *pwc,
|
|
const char *s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
int ch;
|
|
int i = 0;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
if (state->__count == 0)
|
|
ch = t[i++];
|
|
else
|
|
ch = state->__value.__wchb[0];
|
|
|
|
if (ch == '\0')
|
|
{
|
|
*pwc = 0;
|
|
state->__count = 0;
|
|
return 0; /* s points to the null character */
|
|
}
|
|
|
|
if (ch <= 0x7f)
|
|
{
|
|
/* single-byte sequence */
|
|
state->__count = 0;
|
|
*pwc = ch;
|
|
return 1;
|
|
}
|
|
if (ch >= 0xc0 && ch <= 0xdf)
|
|
{
|
|
/* two-byte sequence */
|
|
state->__value.__wchb[0] = ch;
|
|
if (state->__count == 0)
|
|
state->__count = 1;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 2)
|
|
return -2;
|
|
ch = t[i++];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
if (state->__value.__wchb[0] < 0xc2)
|
|
{
|
|
/* overlong UTF-8 sequence */
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__count = 0;
|
|
*pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
|
|
| (wchar_t)(ch & 0x3f);
|
|
return i;
|
|
}
|
|
if (ch >= 0xe0 && ch <= 0xef)
|
|
{
|
|
/* three-byte sequence */
|
|
wchar_t tmp;
|
|
state->__value.__wchb[0] = ch;
|
|
if (state->__count == 0)
|
|
state->__count = 1;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 2)
|
|
return -2;
|
|
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
|
|
if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
|
|
{
|
|
/* overlong UTF-8 sequence */
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__value.__wchb[1] = ch;
|
|
if (state->__count == 1)
|
|
state->__count = 2;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 3)
|
|
return -2;
|
|
ch = t[i++];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__count = 0;
|
|
tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
|
|
| (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
|
|
| (wchar_t)(ch & 0x3f);
|
|
*pwc = tmp;
|
|
return i;
|
|
}
|
|
if (ch >= 0xf0 && ch <= 0xf4)
|
|
{
|
|
/* four-byte sequence */
|
|
wint_t tmp;
|
|
state->__value.__wchb[0] = ch;
|
|
if (state->__count == 0)
|
|
state->__count = 1;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 2)
|
|
return -2;
|
|
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
|
|
if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
|
|
|| (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
|
|
{
|
|
/* overlong UTF-8 sequence or result is > 0x10ffff */
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__value.__wchb[1] = ch;
|
|
if (state->__count == 1)
|
|
state->__count = 2;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 3)
|
|
return -2;
|
|
ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__value.__wchb[2] = ch;
|
|
if (state->__count == 2)
|
|
state->__count = 3;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (state->__count == 3 && sizeof(wchar_t) == 2)
|
|
{
|
|
/* On systems which have wchar_t being UTF-16 values, the value
|
|
doesn't fit into a single wchar_t in this case. So what we
|
|
do here is to store the state with a special value of __count
|
|
and return the first half of a surrogate pair. The first
|
|
three bytes of a UTF-8 sequence are enough to generate the
|
|
first half of a UTF-16 surrogate pair. As return value we
|
|
choose to return the number of bytes actually read up to
|
|
here.
|
|
The second half of the surrogate pair is returned in case we
|
|
recognize the special __count value of four, and the next
|
|
byte is actually a valid value. See below. */
|
|
tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
|
|
| (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
|
|
| (wint_t)((state->__value.__wchb[2] & 0x3f) << 6);
|
|
state->__count = 4;
|
|
*pwc = 0xd800 | ((tmp - 0x10000) >> 10);
|
|
return i;
|
|
}
|
|
if (n < 4)
|
|
return -2;
|
|
ch = t[i++];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
|
|
| (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
|
|
| (wint_t)((state->__value.__wchb[2] & 0x3f) << 6)
|
|
| (wint_t)(ch & 0x3f);
|
|
if (state->__count == 4 && sizeof(wchar_t) == 2)
|
|
/* Create the second half of the surrogate pair for systems with
|
|
wchar_t == UTF-16 . */
|
|
*pwc = 0xdc00 | (tmp & 0x3ff);
|
|
else
|
|
*pwc = tmp;
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
|
|
/* Cygwin defines its own doublebyte charset conversion functions
|
|
because the underlying OS requires wchar_t == UTF-16. */
|
|
#ifndef __CYGWIN__
|
|
int
|
|
__sjis_mbtowc (struct _reent *r,
|
|
wchar_t *pwc,
|
|
const char *s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
int ch;
|
|
int i = 0;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0; /* not state-dependent */
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
ch = t[i++];
|
|
if (state->__count == 0)
|
|
{
|
|
if (_issjis1 (ch))
|
|
{
|
|
state->__value.__wchb[0] = ch;
|
|
state->__count = 1;
|
|
if (n <= 1)
|
|
return -2;
|
|
ch = t[i++];
|
|
}
|
|
}
|
|
if (state->__count == 1)
|
|
{
|
|
if (_issjis2 (ch))
|
|
{
|
|
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
else
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
__eucjp_mbtowc (struct _reent *r,
|
|
wchar_t *pwc,
|
|
const char *s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
int ch;
|
|
int i = 0;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
ch = t[i++];
|
|
if (state->__count == 0)
|
|
{
|
|
if (_iseucjp1 (ch))
|
|
{
|
|
state->__value.__wchb[0] = ch;
|
|
state->__count = 1;
|
|
if (n <= 1)
|
|
return -2;
|
|
ch = t[i++];
|
|
}
|
|
}
|
|
if (state->__count == 1)
|
|
{
|
|
if (_iseucjp2 (ch))
|
|
{
|
|
if (state->__value.__wchb[0] == 0x8f)
|
|
{
|
|
state->__value.__wchb[1] = ch;
|
|
state->__count = 2;
|
|
if (n <= i)
|
|
return -2;
|
|
ch = t[i++];
|
|
}
|
|
else
|
|
{
|
|
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
}
|
|
if (state->__count == 2)
|
|
{
|
|
if (_iseucjp2 (ch))
|
|
{
|
|
*pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
|
|
+ (wchar_t)(ch & 0x7f);
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
else
|
|
{
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
__jis_mbtowc (struct _reent *r,
|
|
wchar_t *pwc,
|
|
const char *s,
|
|
size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
JIS_STATE curr_state;
|
|
JIS_ACTION action;
|
|
JIS_CHAR_TYPE ch;
|
|
unsigned char *ptr;
|
|
unsigned int i;
|
|
int curr_ch;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
{
|
|
state->__state = ASCII;
|
|
return 1; /* state-dependent */
|
|
}
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
curr_state = state->__state;
|
|
ptr = t;
|
|
|
|
for (i = 0; i < n; ++i)
|
|
{
|
|
curr_ch = t[i];
|
|
switch (curr_ch)
|
|
{
|
|
case ESC_CHAR:
|
|
ch = ESCAPE;
|
|
break;
|
|
case '$':
|
|
ch = DOLLAR;
|
|
break;
|
|
case '@':
|
|
ch = AT;
|
|
break;
|
|
case '(':
|
|
ch = BRACKET;
|
|
break;
|
|
case 'B':
|
|
ch = B;
|
|
break;
|
|
case 'J':
|
|
ch = J;
|
|
break;
|
|
case '\0':
|
|
ch = NUL;
|
|
break;
|
|
default:
|
|
if (_isjis (curr_ch))
|
|
ch = JIS_CHAR;
|
|
else
|
|
ch = OTHER;
|
|
}
|
|
|
|
action = JIS_action_table[curr_state][ch];
|
|
curr_state = JIS_state_table[curr_state][ch];
|
|
|
|
switch (action)
|
|
{
|
|
case NOOP:
|
|
break;
|
|
case EMPTY:
|
|
state->__state = ASCII;
|
|
*pwc = (wchar_t)0;
|
|
return 0;
|
|
case COPY_A:
|
|
state->__state = ASCII;
|
|
*pwc = (wchar_t)*ptr;
|
|
return (i + 1);
|
|
case COPY_J1:
|
|
state->__value.__wchb[0] = t[i];
|
|
break;
|
|
case COPY_J2:
|
|
state->__state = JIS;
|
|
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
|
|
return (i + 1);
|
|
case MAKE_A:
|
|
ptr = (unsigned char *)(t + i + 1);
|
|
break;
|
|
case ERROR:
|
|
default:
|
|
_REENT_ERRNO(r) = EILSEQ;
|
|
return -1;
|
|
}
|
|
|
|
}
|
|
|
|
state->__state = curr_state;
|
|
return -2; /* n < bytes needed */
|
|
}
|
|
#endif /* !__CYGWIN__*/
|
|
#endif /* _MB_CAPABLE */
|