Cygwin: nlsfuncs.cc: introduce collating elements and helper functions

lc_collelem.h: autogenerated table of collating element, taken
               from glibc

is_unicode_coll_elem: Check if a UTF-32 string is a collating element
next_unicode_char: return length of prefix from a string constituting
                   a complete character in the current locale, taking
                   collating elements into acocunt.
This commit is contained in:
Corinna Vinschen 2023-02-20 22:38:41 +01:00
parent ffba9604d1
commit 1eadb23887
3 changed files with 3019 additions and 0 deletions

View File

@ -17,6 +17,10 @@ extern int __wcollate_range_cmp (wint_t, wint_t);
int is_unicode_equiv (wint_t, wint_t);
int is_unicode_coll_elem (const wint_t *);
size_t next_unicode_char (wint_t *);
#ifdef __cplusplus
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -17,8 +17,10 @@ details. */
#include "dtable.h"
#include "cygheap.h"
#include "tls_pbuf.h"
#include "collate.h"
#include "lc_msg.h"
#include "lc_era.h"
#include "lc_collelem.h"
#define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
@ -1241,6 +1243,83 @@ is_unicode_equiv (wint_t test, wint_t eqv)
return 1;
}
static int
comp_coll_elem (const void *key, const void *array_member)
{
collating_element_t *ckey = (collating_element_t *) key;
collating_element_t *carray_member = (collating_element_t *) array_member;
int ret = wcicmp ((const wint_t *) ckey->element,
(const wint_t *) carray_member->element);
/* The locale in the collating_element array never has a codeset
attached. So the length of the collating_element locale is
always <= length of the key locale, and that's all we need to
check. Also, if the collating_element locale is empty, we're
all set. */
if (ret == 0 && carray_member->locale[0])
ret = strncmp (ckey->locale, carray_member->locale,
strlen (carray_member->locale));
return ret;
}
extern "C" int
is_unicode_coll_elem (const wint_t *test)
{
collating_element_t ct = {
(const char32_t *) test,
__get_current_locale ()->categories[LC_COLLATE]
};
collating_element_t *cmatch;
if (wcilen (test) == 1)
return 1;
cmatch = (collating_element_t *)
bsearch (&ct, collating_element, ce_size, ce_e_size, comp_coll_elem);
return !!cmatch;
}
static int
comp_coll_elem_n (const void *key, const void *array_member)
{
collating_element_t *ckey = (collating_element_t *) key;
collating_element_t *carray_member = (collating_element_t *) array_member;
int ret = wcincmp ((const wint_t *) ckey->element,
(const wint_t *) carray_member->element,
wcilen ((const wint_t *) carray_member->element));
/* The locale in the collating_element array never has a codeset
attached. So the length of the collating_element locale is
always <= length of the key locale, and that's all we need to
check. Also, if the collating_element locale is empty, we're
all set. */
if (ret == 0 && carray_member->locale[0])
ret = strncmp (ckey->locale, carray_member->locale,
strlen (carray_member->locale));
return ret;
}
/* Return the number of UTF-32 chars making up the next full character in
inp, taking valid collation elements in the current locale into account. */
extern "C" size_t
next_unicode_char (wint_t *inp)
{
collating_element_t ct = {
(const char32_t *) inp,
__get_current_locale ()->categories[LC_COLLATE]
};
collating_element_t *cmatch;
if (wcilen (inp) > 1)
{
cmatch = (collating_element_t *)
bsearch (&ct, collating_element, ce_size, ce_e_size,
comp_coll_elem_n);
if (cmatch)
return wcilen ((const wint_t *) cmatch->element);
}
return 1;
}
extern "C" size_t
wcsxfrm_l (wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t wsn,
struct __locale_t *locale)