-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstring.c
138 lines (104 loc) · 3.02 KB
/
string.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#pragma clang diagnostic ignored "-Wgnu-binary-literal"
#include "string.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
bool str_equals(const char* const str, const char* const other)
{
return strcmp(str, other) == 0;
}
bool str_equals_n(const char* const str, const char* const other, const unsigned int n)
{
return strncmp(str, other, n) == 0;
}
bool str_starts_with(const char* const str, const char* const prefix)
{
return str_equals_n(str, prefix, strlen(prefix));
}
char* copy_str(const char* const str)
{
const size_t len = strlen(str);
char* const dst = calloc(len + 1, sizeof(char));
strncpy(dst, str, len);
return dst;
}
char* format_str(const char* const format, ...)
{
va_list args = { 0 };
va_start(args, format);
char* data = NULL;
size_t len = 0;
FILE* stream = open_memstream(&data, &len);
(void)vfprintf(stream, format, args);
(void)fclose(stream);
va_end(args);
return data;
}
struct char_descriptor encode_char_utf8(char* const dst, const uint32_t code)
{
struct char_descriptor desc = { .code = code };
// 0xxxxxxx
if (code <= 0x7f) {
dst[0] = (char)(0b01111111 & code);
desc.len = 1;
return desc;
}
// 110xxxxx 10xxxxxx
if (code <= 0x7ff) {
dst[0] = (char)(0b11000000 | (0b00011111 & (code >> 6)));
dst[1] = (char)(0b10000000 | (0b00111111 & code));
desc.len = 2;
return desc;
}
// 1110xxxx 10xxxxxx 10xxxxxx
if (code <= 0x7fff) {
dst[0] = (char)(0b11100000 | (0b00001111 & (code >> 12)));
dst[1] = (char)(0b10000000 | (0b00111111 & (code >> 6)));
dst[2] = (char)(0b10000000 | (0b00111111 & code));
desc.len = 3;
return desc;
}
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
dst[0] = (char)(0b11110000 | (0b00000111 & (code >> 18)));
dst[1] = (char)(0b10000000 | (0b00111111 & (code >> 12)));
dst[2] = (char)(0b10000000 | (0b00111111 & (code >> 6)));
dst[3] = (char)(0b10000000 | (0b00111111 & code));
desc.len = 4;
return desc;
}
struct char_descriptor decode_char_utf8(const char* const src)
{
struct char_descriptor desc = { 0 };
const unsigned char* const c = (unsigned char*)src;
if (*c <= 0x7f) {
desc.code = 0b01111111 & *c;
desc.len = 1;
return desc;
}
if (*c >= 0b11110000) {
desc.len = 4;
desc.code = 0b00000111 & *c;
} else if (*c >= 0b11100000) {
desc.len = 3;
desc.code = 0b00001111 & *c;
} else if (*c >= 0b11000000) {
desc.len = 2;
desc.code = 0b00011111 & *c;
}
for (int i = 1; i < desc.len; i++) {
desc.code = (desc.code << 6) | (0b00111111 & c[i]);
}
return desc;
}
void decode_str_utf8(struct char_descriptor* const descs, const char* const src)
{
int i = 0;
const char* c = src;
while (*c) {
const struct char_descriptor desc = decode_char_utf8(c);
descs[i] = desc;
i++;
c += desc.len;
}
}