#include #include #include #include #include #include "mbctype.h" #include "local.h" int (*__wctomb) (struct _reent *, char *, wchar_t, const char *charset, mbstate_t *) #ifdef __CYGWIN__ = __utf8_wctomb; #else = __ascii_wctomb; #endif int _DEFUN (_wctomb_r, (r, s, wchar, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND mbstate_t *state) { return __wctomb (r, s, _wchar, __locale_charset (), state); } int _DEFUN (__ascii_wctomb, (r, s, wchar, charset, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND const char *charset _AND mbstate_t *state) { /* Avoids compiler warnings about comparisons that are always false due to limited range when sizeof(wchar_t) is 2 but sizeof(wint_t) is 4, as is the case on cygwin. */ wint_t wchar = _wchar; if (s == NULL) return 0; if ((size_t)wchar >= 0x100) { r->_errno = EILSEQ; return -1; } *s = (char) wchar; return 1; } #ifdef _MB_CAPABLE /* for some conversions, we use the __count field as a place to store a state value */ #define __state __count int _DEFUN (__utf8_wctomb, (r, s, wchar, charset, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND const char *charset _AND mbstate_t *state) { wint_t wchar = _wchar; if (s == NULL) return 0; /* UTF-8 encoding is not state-dependent */ if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff)) { /* At this point only the second half of a surrogate pair is valid. */ r->_errno = EILSEQ; return -1; } if (wchar <= 0x7f) { *s = wchar; return 1; } if (wchar >= 0x80 && wchar <= 0x7ff) { *s++ = 0xc0 | ((wchar & 0x7c0) >> 6); *s = 0x80 | (wchar & 0x3f); return 2; } if (wchar >= 0x800 && wchar <= 0xffff) { if (wchar >= 0xd800 && wchar <= 0xdfff) { wint_t tmp; /* UTF-16 surrogates -- must not occur in normal UCS-4 data */ if (sizeof (wchar_t) != 2) { r->_errno = EILSEQ; return -1; } if (wchar >= 0xdc00) { /* Second half of a surrogate pair. It's not valid if we don't have already read a first half of a surrogate before. */ if (state->__count != -4) { r->_errno = EILSEQ; return -1; } /* If it's valid, reconstruct the full Unicode value and return the trailing three bytes of the UTF-8 char. */ tmp = (state->__value.__wchb[0] << 16) | (state->__value.__wchb[1] << 8) | (wchar & 0x3ff); state->__count = 0; *s++ = 0x80 | ((tmp & 0x3f000) >> 12); *s++ = 0x80 | ((tmp & 0xfc0) >> 6); *s = 0x80 | (tmp & 0x3f); return 3; } /* First half of a surrogate pair. Store the state and return the first byte of the UTF-8 char. */ tmp = ((wchar & 0x3ff) << 10) + 0x10000; state->__value.__wchb[0] = (tmp >> 16) & 0xff; state->__value.__wchb[1] = (tmp >> 8) & 0xff; state->__count = -4; *s = (0xf0 | ((tmp & 0x1c0000) >> 18)); return 1; } *s++ = 0xe0 | ((wchar & 0xf000) >> 12); *s++ = 0x80 | ((wchar & 0xfc0) >> 6); *s = 0x80 | (wchar & 0x3f); return 3; } if (wchar >= 0x10000 && wchar <= 0x10ffff) { *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18); *s++ = 0x80 | ((wchar & 0x3f000) >> 12); *s++ = 0x80 | ((wchar & 0xfc0) >> 6); *s = 0x80 | (wchar & 0x3f); return 4; } r->_errno = EILSEQ; return -1; } /* Cygwin defines its own doublebyte charset conversion functions because the underlying OS requires wchar_t == UTF-16. */ #ifndef __CYGWIN__ int _DEFUN (__sjis_wctomb, (r, s, wchar, charset, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND const char *charset _AND mbstate_t *state) { wint_t wchar = _wchar; unsigned char char2 = (unsigned char)wchar; unsigned char char1 = (unsigned char)(wchar >> 8); if (s == NULL) return 0; /* not state-dependent */ if (char1 != 0x00) { /* first byte is non-zero..validate multi-byte char */ if (_issjis1(char1) && _issjis2(char2)) { *s++ = (char)char1; *s = (char)char2; return 2; } else { r->_errno = EILSEQ; return -1; } } *s = (char) wchar; return 1; } int _DEFUN (__eucjp_wctomb, (r, s, wchar, charset, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND const char *charset _AND mbstate_t *state) { wint_t wchar = _wchar; unsigned char char2 = (unsigned char)wchar; unsigned char char1 = (unsigned char)(wchar >> 8); if (s == NULL) return 0; /* not state-dependent */ if (char1 != 0x00) { /* first byte is non-zero..validate multi-byte char */ if (_iseucjp1 (char1) && _iseucjp2 (char2)) { *s++ = (char)char1; *s = (char)char2; return 2; } else if (_iseucjp2 (char1) && _iseucjp2 (char2 | 0x80)) { *s++ = (char)0x8f; *s++ = (char)char1; *s = (char)(char2 | 0x80); return 3; } else { r->_errno = EILSEQ; return -1; } } *s = (char) wchar; return 1; } int _DEFUN (__jis_wctomb, (r, s, wchar, charset, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND const char *charset _AND mbstate_t *state) { wint_t wchar = _wchar; int cnt = 0; unsigned char char2 = (unsigned char)wchar; unsigned char char1 = (unsigned char)(wchar >> 8); if (s == NULL) return 1; /* state-dependent */ if (char1 != 0x00) { /* first byte is non-zero..validate multi-byte char */ if (_isjis (char1) && _isjis (char2)) { if (state->__state == 0) { /* must switch from ASCII to JIS state */ state->__state = 1; *s++ = ESC_CHAR; *s++ = '$'; *s++ = 'B'; cnt = 3; } *s++ = (char)char1; *s = (char)char2; return cnt + 2; } r->_errno = EILSEQ; return -1; } if (state->__state != 0) { /* must switch from JIS to ASCII state */ state->__state = 0; *s++ = ESC_CHAR; *s++ = '('; *s++ = 'B'; cnt = 3; } *s = (char)char2; return cnt + 1; } #endif /* !__CYGWIN__ */ #ifdef _MB_EXTENDED_CHARSETS_ISO int _DEFUN (__iso_wctomb, (r, s, wchar, charset, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND const char *charset _AND mbstate_t *state) { wint_t wchar = _wchar; if (s == NULL) return 0; /* wchars <= 0x9f translate to all ISO charsets directly. */ if (wchar >= 0xa0) { int iso_idx = __iso_8859_index (charset + 9); if (iso_idx >= 0) { unsigned char mb; if (s == NULL) return 0; for (mb = 0; mb < 0x60; ++mb) if (__iso_8859_conv[iso_idx][mb] == wchar) { *s = (char) (mb + 0xa0); return 1; } r->_errno = EILSEQ; return -1; } } if ((size_t)wchar >= 0x100) { r->_errno = EILSEQ; return -1; } *s = (char) wchar; return 1; } #endif /* _MB_EXTENDED_CHARSETS_ISO */ #ifdef _MB_EXTENDED_CHARSETS_WINDOWS int _DEFUN (__cp_wctomb, (r, s, wchar, charset, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND const char *charset _AND mbstate_t *state) { wint_t wchar = _wchar; if (s == NULL) return 0; if (wchar >= 0x80) { int cp_idx = __cp_index (charset + 2); if (cp_idx >= 0) { unsigned char mb; if (s == NULL) return 0; for (mb = 0; mb < 0x80; ++mb) if (__cp_conv[cp_idx][mb] == wchar) { *s = (char) (mb + 0x80); return 1; } r->_errno = EILSEQ; return -1; } } if ((size_t)wchar >= 0x100) { r->_errno = EILSEQ; return -1; } *s = (char) wchar; return 1; } #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ #endif /* _MB_CAPABLE */