summaryrefslogtreecommitdiff
path: root/src/unicode.c
diff options
context:
space:
mode:
authorOxore <oxore@protonmail.com>2018-07-12 11:51:00 +0300
committerOxore <oxore@protonmail.com>2018-07-12 11:51:00 +0300
commitd3fddd7d28f314a6738f2a0ba4d9f58024b01984 (patch)
tree7345455bdc6f051289ffa9854fcad2001a55a223 /src/unicode.c
parent9a9711945c2add826e5887aabe2330bee9042b4b (diff)
Introduce testing with munit, refactor
Split unicode routines from text. Testing: add munit submodule, move translation units with main functions to separate folder, make corresponding changes in Makefile. Make simple test for unicode handling routine. Remove _vimrc_local. test.c: optimize includes. Add test run to .travis.yml.
Diffstat (limited to 'src/unicode.c')
-rw-r--r--src/unicode.c52
1 files changed, 52 insertions, 0 deletions
diff --git a/src/unicode.c b/src/unicode.c
new file mode 100644
index 0000000..dea541a
--- /dev/null
+++ b/src/unicode.c
@@ -0,0 +1,52 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "unicode.h"
+
+static inline unsigned int utf8_char_len(unsigned char c)
+{
+ if (c > 0x00 && c < 0xC0)
+ return 1;
+ else if (c >= 0xC2 && c < 0xE0)
+ return 2;
+ else if (c >= 0xE0 && c < 0xF0)
+ return 3;
+ else if (c >= 0xF0 && c < 0xF5)
+ return 4;
+ else
+ return 0;
+}
+
+unsigned long utf8_strlen(void *string)
+{
+ unsigned long len = 0, keep = 0;
+ for (unsigned char *c = string; *c; (keep ? --keep : ++len), ++c)
+ if (!keep)
+ keep = (keep = utf8_char_len(*c)) ? keep - 1 : keep;
+ return len;
+}
+
+void utf8to32_strcpy(wchar_t *dest, char *src)
+{
+ wchar_t *dc = dest;
+ char *c = src;
+ unsigned long len = 0;
+ while (*c) {
+ int clen = utf8_char_len(*c);
+ if (clen == 1) {
+ dc[len] = c[0] & 0x7f;
+ } else if (clen == 2) {
+ dc[len] = ((c[0] & 0x1f) << 6) | ((c[1] & 0x3f) << 0);
+ } else if (clen == 3) {
+ dc[len] = ((c[0] & 0x0f) << 12) | ((c[1] & 0x3f) << 6) | ((c[2] & 0x3f) << 0);
+ } else if (clen == 4) {
+ dc[len] = ((c[0] & 0x07) << 18) | ((c[1] & 0x3f) << 12) | ((c[2] & 0x3f) << 6) | ((c[3] & 0x3f) << 0);
+ } else {
+ dc[len] = 0;
+ return;
+ }
+ c += clen;
+ ++len;
+ }
+ dc[len] = 0;
+}