From d3fddd7d28f314a6738f2a0ba4d9f58024b01984 Mon Sep 17 00:00:00 2001 From: Oxore Date: Thu, 12 Jul 2018 11:51:00 +0300 Subject: Introduce testing with munit, refactor Split unicode routines from text. Testing: add munit submodule, move translation units with main functions to separate folder, make corresponding changes in Makefile. Make simple test for unicode handling routine. Remove _vimrc_local. test.c: optimize includes. Add test run to .travis.yml. --- src/unicode.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 src/unicode.c (limited to 'src/unicode.c') diff --git a/src/unicode.c b/src/unicode.c new file mode 100644 index 0000000..dea541a --- /dev/null +++ b/src/unicode.c @@ -0,0 +1,52 @@ +#include +#include + +#include "unicode.h" + +static inline unsigned int utf8_char_len(unsigned char c) +{ + if (c > 0x00 && c < 0xC0) + return 1; + else if (c >= 0xC2 && c < 0xE0) + return 2; + else if (c >= 0xE0 && c < 0xF0) + return 3; + else if (c >= 0xF0 && c < 0xF5) + return 4; + else + return 0; +} + +unsigned long utf8_strlen(void *string) +{ + unsigned long len = 0, keep = 0; + for (unsigned char *c = string; *c; (keep ? --keep : ++len), ++c) + if (!keep) + keep = (keep = utf8_char_len(*c)) ? keep - 1 : keep; + return len; +} + +void utf8to32_strcpy(wchar_t *dest, char *src) +{ + wchar_t *dc = dest; + char *c = src; + unsigned long len = 0; + while (*c) { + int clen = utf8_char_len(*c); + if (clen == 1) { + dc[len] = c[0] & 0x7f; + } else if (clen == 2) { + dc[len] = ((c[0] & 0x1f) << 6) | ((c[1] & 0x3f) << 0); + } else if (clen == 3) { + dc[len] = ((c[0] & 0x0f) << 12) | ((c[1] & 0x3f) << 6) | ((c[2] & 0x3f) << 0); + } else if (clen == 4) { + dc[len] = ((c[0] & 0x07) << 18) | ((c[1] & 0x3f) << 12) | ((c[2] & 0x3f) << 6) | ((c[3] & 0x3f) << 0); + } else { + dc[len] = 0; + return; + } + c += clen; + ++len; + } + dc[len] = 0; +} -- cgit v1.2.3