blob: dea541abec1bbbeb26b3e1e4c721cd36ff3f9f97 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
#include <stdlib.h>
#include <string.h>
#include "unicode.h"
static inline unsigned int utf8_char_len(unsigned char c)
{
if (c > 0x00 && c < 0xC0)
return 1;
else if (c >= 0xC2 && c < 0xE0)
return 2;
else if (c >= 0xE0 && c < 0xF0)
return 3;
else if (c >= 0xF0 && c < 0xF5)
return 4;
else
return 0;
}
unsigned long utf8_strlen(void *string)
{
unsigned long len = 0, keep = 0;
for (unsigned char *c = string; *c; (keep ? --keep : ++len), ++c)
if (!keep)
keep = (keep = utf8_char_len(*c)) ? keep - 1 : keep;
return len;
}
void utf8to32_strcpy(wchar_t *dest, char *src)
{
wchar_t *dc = dest;
char *c = src;
unsigned long len = 0;
while (*c) {
int clen = utf8_char_len(*c);
if (clen == 1) {
dc[len] = c[0] & 0x7f;
} else if (clen == 2) {
dc[len] = ((c[0] & 0x1f) << 6) | ((c[1] & 0x3f) << 0);
} else if (clen == 3) {
dc[len] = ((c[0] & 0x0f) << 12) | ((c[1] & 0x3f) << 6) | ((c[2] & 0x3f) << 0);
} else if (clen == 4) {
dc[len] = ((c[0] & 0x07) << 18) | ((c[1] & 0x3f) << 12) | ((c[2] & 0x3f) << 6) | ((c[3] & 0x3f) << 0);
} else {
dc[len] = 0;
return;
}
c += clen;
++len;
}
dc[len] = 0;
}
|