Fix #7: toml_utf8_to_ucs() returns incorrect results
This commit is contained in:
parent
624013252b
commit
56c42b7aed
16
toml.c
16
toml.c
|
@ -71,9 +71,11 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
|
|||
if (0x6 == (i >> 5)) {
|
||||
if (len < 2) return -1;
|
||||
v = i & 0x1f;
|
||||
i = *(++buf);
|
||||
if (0x2 != (i >> 6)) return -1;
|
||||
v = (v << 6) | (i & 0x3f);
|
||||
for (int j = 0; j < 1; j++) {
|
||||
i = *buf++;
|
||||
if (0x2 != (i >> 6)) return -1;
|
||||
v = (v << 6) | (i & 0x3f);
|
||||
}
|
||||
return *ret = v, (const char*) buf - orig;
|
||||
}
|
||||
|
||||
|
@ -84,7 +86,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
|
|||
if (len < 3) return -1;
|
||||
v = i & 0x0F;
|
||||
for (int j = 0; j < 2; j++) {
|
||||
i = *(++buf);
|
||||
i = *buf++;
|
||||
if (0x2 != (i >> 6)) return -1;
|
||||
v = (v << 6) | (i & 0x3f);
|
||||
}
|
||||
|
@ -98,7 +100,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
|
|||
if (len < 4) return -1;
|
||||
v = i & 0x07;
|
||||
for (int j = 0; j < 3; j++) {
|
||||
i = *(++buf);
|
||||
i = *buf++;
|
||||
if (0x2 != (i >> 6)) return -1;
|
||||
v = (v << 6) | (i & 0x3f);
|
||||
}
|
||||
|
@ -112,7 +114,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
|
|||
if (len < 5) return -1;
|
||||
v = i & 0x03;
|
||||
for (int j = 0; j < 4; j++) {
|
||||
i = *(++buf);
|
||||
i = *buf++;
|
||||
if (0x2 != (i >> 6)) return -1;
|
||||
v = (v << 6) | (i & 0x3f);
|
||||
}
|
||||
|
@ -126,7 +128,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
|
|||
if (len < 6) return -1;
|
||||
v = i & 0x01;
|
||||
for (int j = 0; j < 5; j++) {
|
||||
i = *(++buf);
|
||||
i = *buf++;
|
||||
if (0x2 != (i >> 6)) return -1;
|
||||
v = (v << 6) | (i & 0x3f);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
CFLAGS = -g -I..
|
||||
|
||||
TESTS = t1
|
||||
|
||||
all: $(TESTS)
|
||||
|
||||
t1: t1.c ../toml.c
|
||||
|
||||
clean:
|
||||
rm -f $(TESTS)
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "../toml.h"
|
||||
|
||||
|
||||
|
||||
int main(int argc, const char* argv[])
|
||||
{
|
||||
char xxbuf[6], buf[6];
|
||||
int64_t xxcode, code;
|
||||
int xxsize;
|
||||
|
||||
|
||||
xxsize = 2, xxcode = 0x80; memcpy(xxbuf, "\xc2\x80", xxsize);
|
||||
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||
|
||||
xxsize = 2, xxcode = 0x7ff; memcpy(xxbuf, "\xdf\xbf", xxsize);
|
||||
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||
|
||||
xxsize = 3, xxcode = 0x800; memcpy(xxbuf, "\xe0\xa0\x80", xxsize);
|
||||
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||
|
||||
xxsize = 3, xxcode = 0xfffd; memcpy(xxbuf, "\xef\xbf\xbd", xxsize);
|
||||
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||
|
||||
xxsize = 4, xxcode = 0x10000; memcpy(xxbuf, "\xf0\x90\x80\x80", xxsize);
|
||||
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||
|
||||
xxsize = 4, xxcode = 0x1fffff; memcpy(xxbuf, "\xf7\xbf\xbf\xbf", xxsize);
|
||||
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||
|
||||
xxsize = 5, xxcode = 0x200000; memcpy(xxbuf, "\xf8\x88\x80\x80\x80", xxsize);
|
||||
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||
|
||||
xxsize = 5, xxcode = 0x3ffffff; memcpy(xxbuf, "\xfb\xbf\xbf\xbf\xbf", xxsize);
|
||||
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||
|
||||
xxsize = 6, xxcode = 0x4000000; memcpy(xxbuf, "\xfc\x84\x80\x80\x80\x80", xxsize);
|
||||
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||
|
||||
xxsize = 6, xxcode = 0x7fffffff; memcpy(xxbuf, "\xfd\xbf\xbf\xbf\xbf\xbf", xxsize);
|
||||
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
|
||||
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue