mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-01-22 15:07:43 +08:00
f70d9ae6ad
Some architectures like ARM encode the short enum option state in the object file and the linker checks that this option is consistent for all objects of an executable. In case applications use -fno-short-enums, then this leads to linker warnings. Use the enum __packed attribute for the relevent enums to avoid the -fshort-enums compiler option. This attribute is at least available on GCC, LLVM/clang and the Intel compiler. Signed-off-by: Sebastian Huber <sebastian.huber@embedded-brains.de>
975 lines
22 KiB
C
975 lines
22 KiB
C
#include <newlib.h>
|
|
#include <stdlib.h>
|
|
#include <locale.h>
|
|
#include "mbctype.h"
|
|
#include <wchar.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include "local.h"
|
|
|
|
int
|
|
_DEFUN (_mbtowc_r, (r, pwc, s, n, state),
|
|
struct _reent *r _AND
|
|
wchar_t *__restrict pwc _AND
|
|
const char *__restrict s _AND
|
|
size_t n _AND
|
|
mbstate_t *state)
|
|
{
|
|
return __MBTOWC (r, pwc, s, n, state);
|
|
}
|
|
|
|
int
|
|
_DEFUN (__ascii_mbtowc, (r, pwc, s, n, state),
|
|
struct _reent *r _AND
|
|
wchar_t *pwc _AND
|
|
const char *s _AND
|
|
size_t n _AND
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
#ifdef __CYGWIN__
|
|
if ((wchar_t)*t >= 0x80)
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
#ifdef _MB_CAPABLE
|
|
typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J,
|
|
NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
|
|
typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
|
|
INV, JIS_S_NUM } JIS_STATE;
|
|
typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
|
|
|
|
/**************************************************************************************
|
|
* state/action tables for processing JIS encoding
|
|
* Where possible, switches to JIS are grouped with proceding JIS characters and switches
|
|
* to ASCII are grouped with preceding JIS characters. Thus, maximum returned length
|
|
* is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
|
|
*************************************************************************************/
|
|
|
|
#ifndef __CYGWIN__
|
|
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
|
|
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
|
|
/* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
|
|
/* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV },
|
|
/* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
|
|
/* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII },
|
|
/* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV },
|
|
/* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
|
|
/* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
|
|
};
|
|
|
|
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
|
|
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
|
|
/* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A},
|
|
/* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR },
|
|
/* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A},
|
|
/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A},
|
|
/* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR },
|
|
/* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
|
|
/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR },
|
|
};
|
|
#endif /* !__CYGWIN__ */
|
|
|
|
/* we override the mbstate_t __count field for more complex encodings and use it store a state value */
|
|
#define __state __count
|
|
|
|
#ifdef _MB_EXTENDED_CHARSETS_ISO
|
|
static int
|
|
___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
int iso_idx, mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
if (*t >= 0xa0)
|
|
{
|
|
if (iso_idx >= 0)
|
|
{
|
|
*pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
|
|
if (*pwc == 0) /* Invalid character */
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t) *t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
__iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, -1, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 0, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 1, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 2, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 3, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 4, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 5, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 6, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 7, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 8, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 9, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 10, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 11, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 12, state);
|
|
}
|
|
|
|
static int
|
|
__iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___iso_mbtowc (r, pwc, s, n, 13, state);
|
|
}
|
|
|
|
static mbtowc_p __iso_8859_mbtowc[17] = {
|
|
NULL,
|
|
__iso_8859_1_mbtowc,
|
|
__iso_8859_2_mbtowc,
|
|
__iso_8859_3_mbtowc,
|
|
__iso_8859_4_mbtowc,
|
|
__iso_8859_5_mbtowc,
|
|
__iso_8859_6_mbtowc,
|
|
__iso_8859_7_mbtowc,
|
|
__iso_8859_8_mbtowc,
|
|
__iso_8859_9_mbtowc,
|
|
__iso_8859_10_mbtowc,
|
|
__iso_8859_11_mbtowc,
|
|
NULL, /* No ISO 8859-12 */
|
|
__iso_8859_13_mbtowc,
|
|
__iso_8859_14_mbtowc,
|
|
__iso_8859_15_mbtowc,
|
|
__iso_8859_16_mbtowc
|
|
};
|
|
|
|
/* val *MUST* be valid! All checks for validity are supposed to be
|
|
performed before calling this function. */
|
|
mbtowc_p
|
|
__iso_mbtowc (int val)
|
|
{
|
|
return __iso_8859_mbtowc[val];
|
|
}
|
|
#endif /* _MB_EXTENDED_CHARSETS_ISO */
|
|
|
|
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
|
|
static int
|
|
___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
int cp_idx, mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
if (*t >= 0x80)
|
|
{
|
|
if (cp_idx >= 0)
|
|
{
|
|
*pwc = __cp_conv[cp_idx][*t - 0x80];
|
|
if (*pwc == 0) /* Invalid character */
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
__cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 0, state);
|
|
}
|
|
|
|
static int
|
|
__cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 1, state);
|
|
}
|
|
|
|
static int
|
|
__cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 2, state);
|
|
}
|
|
|
|
static int
|
|
__cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 3, state);
|
|
}
|
|
|
|
static int
|
|
__cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 4, state);
|
|
}
|
|
|
|
static int
|
|
__cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 5, state);
|
|
}
|
|
|
|
static int
|
|
__cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 6, state);
|
|
}
|
|
|
|
static int
|
|
__cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 7, state);
|
|
}
|
|
|
|
static int
|
|
__cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 8, state);
|
|
}
|
|
|
|
static int
|
|
__cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 9, state);
|
|
}
|
|
|
|
static int
|
|
__cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 10, state);
|
|
}
|
|
|
|
static int
|
|
__cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 11, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 12, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 13, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 14, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 15, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 16, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 17, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 18, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 19, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 20, state);
|
|
}
|
|
|
|
static int
|
|
__cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 21, state);
|
|
}
|
|
|
|
static int
|
|
__cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 22, state);
|
|
}
|
|
|
|
static int
|
|
__cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 23, state);
|
|
}
|
|
|
|
static int
|
|
__cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 24, state);
|
|
}
|
|
|
|
static int
|
|
__cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
|
mbstate_t *state)
|
|
{
|
|
return ___cp_mbtowc (r, pwc, s, n, 25, state);
|
|
}
|
|
|
|
static mbtowc_p __cp_xxx_mbtowc[26] = {
|
|
__cp_437_mbtowc,
|
|
__cp_720_mbtowc,
|
|
__cp_737_mbtowc,
|
|
__cp_775_mbtowc,
|
|
__cp_850_mbtowc,
|
|
__cp_852_mbtowc,
|
|
__cp_855_mbtowc,
|
|
__cp_857_mbtowc,
|
|
__cp_858_mbtowc,
|
|
__cp_862_mbtowc,
|
|
__cp_866_mbtowc,
|
|
__cp_874_mbtowc,
|
|
__cp_1125_mbtowc,
|
|
__cp_1250_mbtowc,
|
|
__cp_1251_mbtowc,
|
|
__cp_1252_mbtowc,
|
|
__cp_1253_mbtowc,
|
|
__cp_1254_mbtowc,
|
|
__cp_1255_mbtowc,
|
|
__cp_1256_mbtowc,
|
|
__cp_1257_mbtowc,
|
|
__cp_1258_mbtowc,
|
|
__cp_20866_mbtowc,
|
|
__cp_21866_mbtowc,
|
|
__cp_101_mbtowc,
|
|
__cp_102_mbtowc
|
|
};
|
|
|
|
/* val *MUST* be valid! All checks for validity are supposed to be
|
|
performed before calling this function. */
|
|
mbtowc_p
|
|
__cp_mbtowc (int val)
|
|
{
|
|
return __cp_xxx_mbtowc[__cp_val_index (val)];
|
|
}
|
|
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
|
|
|
|
int
|
|
_DEFUN (__utf8_mbtowc, (r, pwc, s, n, state),
|
|
struct _reent *r _AND
|
|
wchar_t *pwc _AND
|
|
const char *s _AND
|
|
size_t n _AND
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
int ch;
|
|
int i = 0;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
if (state->__count == 0)
|
|
ch = t[i++];
|
|
else
|
|
ch = state->__value.__wchb[0];
|
|
|
|
if (ch == '\0')
|
|
{
|
|
*pwc = 0;
|
|
state->__count = 0;
|
|
return 0; /* s points to the null character */
|
|
}
|
|
|
|
if (ch <= 0x7f)
|
|
{
|
|
/* single-byte sequence */
|
|
state->__count = 0;
|
|
*pwc = ch;
|
|
return 1;
|
|
}
|
|
if (ch >= 0xc0 && ch <= 0xdf)
|
|
{
|
|
/* two-byte sequence */
|
|
state->__value.__wchb[0] = ch;
|
|
if (state->__count == 0)
|
|
state->__count = 1;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 2)
|
|
return -2;
|
|
ch = t[i++];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
if (state->__value.__wchb[0] < 0xc2)
|
|
{
|
|
/* overlong UTF-8 sequence */
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__count = 0;
|
|
*pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
|
|
| (wchar_t)(ch & 0x3f);
|
|
return i;
|
|
}
|
|
if (ch >= 0xe0 && ch <= 0xef)
|
|
{
|
|
/* three-byte sequence */
|
|
wchar_t tmp;
|
|
state->__value.__wchb[0] = ch;
|
|
if (state->__count == 0)
|
|
state->__count = 1;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 2)
|
|
return -2;
|
|
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
|
|
if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
|
|
{
|
|
/* overlong UTF-8 sequence */
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__value.__wchb[1] = ch;
|
|
if (state->__count == 1)
|
|
state->__count = 2;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 3)
|
|
return -2;
|
|
ch = t[i++];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__count = 0;
|
|
tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
|
|
| (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
|
|
| (wchar_t)(ch & 0x3f);
|
|
*pwc = tmp;
|
|
return i;
|
|
}
|
|
if (ch >= 0xf0 && ch <= 0xf4)
|
|
{
|
|
/* four-byte sequence */
|
|
wint_t tmp;
|
|
state->__value.__wchb[0] = ch;
|
|
if (state->__count == 0)
|
|
state->__count = 1;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 2)
|
|
return -2;
|
|
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
|
|
if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
|
|
|| (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
|
|
{
|
|
/* overlong UTF-8 sequence or result is > 0x10ffff */
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__value.__wchb[1] = ch;
|
|
if (state->__count == 1)
|
|
state->__count = 2;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (n < 3)
|
|
return -2;
|
|
ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
state->__value.__wchb[2] = ch;
|
|
if (state->__count == 2)
|
|
state->__count = 3;
|
|
else if (n < (size_t)-1)
|
|
++n;
|
|
if (state->__count == 3 && sizeof(wchar_t) == 2)
|
|
{
|
|
/* On systems which have wchar_t being UTF-16 values, the value
|
|
doesn't fit into a single wchar_t in this case. So what we
|
|
do here is to store the state with a special value of __count
|
|
and return the first half of a surrogate pair. The first
|
|
three bytes of a UTF-8 sequence are enough to generate the
|
|
first half of a UTF-16 surrogate pair. As return value we
|
|
choose to return the number of bytes actually read up to
|
|
here.
|
|
The second half of the surrogate pair is returned in case we
|
|
recognize the special __count value of four, and the next
|
|
byte is actually a valid value. See below. */
|
|
tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
|
|
| (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
|
|
| (wint_t)((state->__value.__wchb[2] & 0x3f) << 6);
|
|
state->__count = 4;
|
|
*pwc = 0xd800 | ((tmp - 0x10000) >> 10);
|
|
return i;
|
|
}
|
|
if (n < 4)
|
|
return -2;
|
|
ch = t[i++];
|
|
if (ch < 0x80 || ch > 0xbf)
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
|
|
| (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
|
|
| (wint_t)((state->__value.__wchb[2] & 0x3f) << 6)
|
|
| (wint_t)(ch & 0x3f);
|
|
if (state->__count == 4 && sizeof(wchar_t) == 2)
|
|
/* Create the second half of the surrogate pair for systems with
|
|
wchar_t == UTF-16 . */
|
|
*pwc = 0xdc00 | (tmp & 0x3ff);
|
|
else
|
|
*pwc = tmp;
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
|
|
/* Cygwin defines its own doublebyte charset conversion functions
|
|
because the underlying OS requires wchar_t == UTF-16. */
|
|
#ifndef __CYGWIN__
|
|
int
|
|
_DEFUN (__sjis_mbtowc, (r, pwc, s, n, state),
|
|
struct _reent *r _AND
|
|
wchar_t *pwc _AND
|
|
const char *s _AND
|
|
size_t n _AND
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
int ch;
|
|
int i = 0;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0; /* not state-dependent */
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
ch = t[i++];
|
|
if (state->__count == 0)
|
|
{
|
|
if (_issjis1 (ch))
|
|
{
|
|
state->__value.__wchb[0] = ch;
|
|
state->__count = 1;
|
|
if (n <= 1)
|
|
return -2;
|
|
ch = t[i++];
|
|
}
|
|
}
|
|
if (state->__count == 1)
|
|
{
|
|
if (_issjis2 (ch))
|
|
{
|
|
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
else
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
_DEFUN (__eucjp_mbtowc, (r, pwc, s, n, state),
|
|
struct _reent *r _AND
|
|
wchar_t *pwc _AND
|
|
const char *s _AND
|
|
size_t n _AND
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
int ch;
|
|
int i = 0;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
return 0;
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
ch = t[i++];
|
|
if (state->__count == 0)
|
|
{
|
|
if (_iseucjp1 (ch))
|
|
{
|
|
state->__value.__wchb[0] = ch;
|
|
state->__count = 1;
|
|
if (n <= 1)
|
|
return -2;
|
|
ch = t[i++];
|
|
}
|
|
}
|
|
if (state->__count == 1)
|
|
{
|
|
if (_iseucjp2 (ch))
|
|
{
|
|
if (state->__value.__wchb[0] == 0x8f)
|
|
{
|
|
state->__value.__wchb[1] = ch;
|
|
state->__count = 2;
|
|
if (n <= i)
|
|
return -2;
|
|
ch = t[i++];
|
|
}
|
|
else
|
|
{
|
|
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
}
|
|
if (state->__count == 2)
|
|
{
|
|
if (_iseucjp2 (ch))
|
|
{
|
|
*pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
|
|
+ (wchar_t)(ch & 0x7f);
|
|
state->__count = 0;
|
|
return i;
|
|
}
|
|
else
|
|
{
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
*pwc = (wchar_t)*t;
|
|
|
|
if (*t == '\0')
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
_DEFUN (__jis_mbtowc, (r, pwc, s, n, state),
|
|
struct _reent *r _AND
|
|
wchar_t *pwc _AND
|
|
const char *s _AND
|
|
size_t n _AND
|
|
mbstate_t *state)
|
|
{
|
|
wchar_t dummy;
|
|
unsigned char *t = (unsigned char *)s;
|
|
JIS_STATE curr_state;
|
|
JIS_ACTION action;
|
|
JIS_CHAR_TYPE ch;
|
|
unsigned char *ptr;
|
|
unsigned int i;
|
|
int curr_ch;
|
|
|
|
if (pwc == NULL)
|
|
pwc = &dummy;
|
|
|
|
if (s == NULL)
|
|
{
|
|
state->__state = ASCII;
|
|
return 1; /* state-dependent */
|
|
}
|
|
|
|
if (n == 0)
|
|
return -2;
|
|
|
|
curr_state = state->__state;
|
|
ptr = t;
|
|
|
|
for (i = 0; i < n; ++i)
|
|
{
|
|
curr_ch = t[i];
|
|
switch (curr_ch)
|
|
{
|
|
case ESC_CHAR:
|
|
ch = ESCAPE;
|
|
break;
|
|
case '$':
|
|
ch = DOLLAR;
|
|
break;
|
|
case '@':
|
|
ch = AT;
|
|
break;
|
|
case '(':
|
|
ch = BRACKET;
|
|
break;
|
|
case 'B':
|
|
ch = B;
|
|
break;
|
|
case 'J':
|
|
ch = J;
|
|
break;
|
|
case '\0':
|
|
ch = NUL;
|
|
break;
|
|
default:
|
|
if (_isjis (curr_ch))
|
|
ch = JIS_CHAR;
|
|
else
|
|
ch = OTHER;
|
|
}
|
|
|
|
action = JIS_action_table[curr_state][ch];
|
|
curr_state = JIS_state_table[curr_state][ch];
|
|
|
|
switch (action)
|
|
{
|
|
case NOOP:
|
|
break;
|
|
case EMPTY:
|
|
state->__state = ASCII;
|
|
*pwc = (wchar_t)0;
|
|
return 0;
|
|
case COPY_A:
|
|
state->__state = ASCII;
|
|
*pwc = (wchar_t)*ptr;
|
|
return (i + 1);
|
|
case COPY_J1:
|
|
state->__value.__wchb[0] = t[i];
|
|
break;
|
|
case COPY_J2:
|
|
state->__state = JIS;
|
|
*pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
|
|
return (i + 1);
|
|
case MAKE_A:
|
|
ptr = (unsigned char *)(t + i + 1);
|
|
break;
|
|
case ERROR:
|
|
default:
|
|
r->_errno = EILSEQ;
|
|
return -1;
|
|
}
|
|
|
|
}
|
|
|
|
state->__state = curr_state;
|
|
return -2; /* n < bytes needed */
|
|
}
|
|
#endif /* !__CYGWIN__*/
|
|
#endif /* _MB_CAPABLE */
|