1 /*
2 * Copyright 2006 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8
9 #include "SkUtils.h"
10
sk_memset16(uint16_t buffer[],uint16_t value,int count)11 void sk_memset16(uint16_t buffer[], uint16_t value, int count) {
12 for (int i = 0; i < count; i++) {
13 buffer[i] = value;
14 }
15 }
sk_memset32(uint32_t buffer[],uint32_t value,int count)16 void sk_memset32(uint32_t buffer[], uint32_t value, int count) {
17 for (int i = 0; i < count; i++) {
18 buffer[i] = value;
19 }
20 }
sk_memset64(uint64_t buffer[],uint64_t value,int count)21 void sk_memset64(uint64_t buffer[], uint64_t value, int count) {
22 for (int i = 0; i < count; i++) {
23 buffer[i] = value;
24 }
25 }
26
27 /* 0xxxxxxx 1 total
28 10xxxxxx // never a leading byte
29 110xxxxx 2 total
30 1110xxxx 3 total
31 11110xxx 4 total
32
33 11 10 01 01 xx xx xx xx 0...
34 0xE5XX0000
35 0xE5 << 24
36 */
37
utf8_byte_is_valid(uint8_t c)38 static bool utf8_byte_is_valid(uint8_t c) {
39 return c < 0xF5 && (c & 0xFE) != 0xC0;
40 }
utf8_byte_is_continuation(uint8_t c)41 static bool utf8_byte_is_continuation(uint8_t c) {
42 return (c & 0xC0) == 0x80;
43 }
utf8_byte_is_leading_byte(uint8_t c)44 static bool utf8_byte_is_leading_byte(uint8_t c) {
45 return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c);
46 }
47
48 #ifdef SK_DEBUG
assert_utf8_leadingbyte(unsigned c)49 static void assert_utf8_leadingbyte(unsigned c) {
50 SkASSERT(utf8_byte_is_leading_byte(SkToU8(c)));
51 }
52
SkUTF8_LeadByteToCount(unsigned c)53 int SkUTF8_LeadByteToCount(unsigned c) {
54 assert_utf8_leadingbyte(c);
55 return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
56 }
57 #else
58 #define assert_utf8_leadingbyte(c)
59 #endif
60
61 /**
62 * @returns -1 iff invalid UTF8 byte,
63 * 0 iff UTF8 continuation byte,
64 * 1 iff ASCII byte,
65 * 2 iff leading byte of 2-byte sequence,
66 * 3 iff leading byte of 3-byte sequence, and
67 * 4 iff leading byte of 4-byte sequence.
68 *
69 * I.e.: if return value > 0, then gives length of sequence.
70 */
utf8_byte_type(uint8_t c)71 static int utf8_byte_type(uint8_t c) {
72 if (c < 0x80) {
73 return 1;
74 } else if (c < 0xC0) {
75 return 0;
76 } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear"
77 return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
78 } else {
79 return -1;
80 }
81 }
utf8_type_is_valid_leading_byte(int type)82 static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
83
SkUTF8_CountUnichars(const char utf8[])84 int SkUTF8_CountUnichars(const char utf8[]) {
85 SkASSERT(utf8);
86
87 int count = 0;
88
89 for (;;) {
90 int c = *(const uint8_t*)utf8;
91 if (c == 0) {
92 break;
93 }
94 utf8 += SkUTF8_LeadByteToCount(c);
95 count += 1;
96 }
97 return count;
98 }
99
100 // SAFE: returns -1 if invalid UTF-8
SkUTF8_CountUnicharsWithError(const char utf8[],size_t byteLength)101 int SkUTF8_CountUnicharsWithError(const char utf8[], size_t byteLength) {
102 SkASSERT(utf8 || 0 == byteLength);
103
104 int count = 0;
105 const char* stop = utf8 + byteLength;
106
107 while (utf8 < stop) {
108 int type = utf8_byte_type(*(const uint8_t*)utf8);
109 SkASSERT(type >= -1 && type <= 4);
110 if (!utf8_type_is_valid_leading_byte(type) ||
111 utf8 + type > stop) { // Sequence extends beyond end.
112 return -1;
113 }
114 while(type-- > 1) {
115 ++utf8;
116 if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
117 return -1;
118 }
119 }
120 ++utf8;
121 ++count;
122 }
123 return count;
124 }
125
SkUTF8_ToUnichar(const char utf8[])126 SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
127 SkASSERT(utf8);
128
129 const uint8_t* p = (const uint8_t*)utf8;
130 int c = *p;
131 int hic = c << 24;
132
133 assert_utf8_leadingbyte(c);
134
135 if (hic < 0) {
136 uint32_t mask = (uint32_t)~0x3F;
137 hic = SkLeftShift(hic, 1);
138 do {
139 c = (c << 6) | (*++p & 0x3F);
140 mask <<= 5;
141 } while ((hic = SkLeftShift(hic, 1)) < 0);
142 c &= ~mask;
143 }
144 return c;
145 }
146
147 // SAFE: returns -1 on invalid UTF-8 sequence.
SkUTF8_NextUnicharWithError(const char ** ptr,const char * end)148 SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) {
149 SkASSERT(ptr && *ptr);
150 SkASSERT(*ptr < end);
151 const uint8_t* p = (const uint8_t*)*ptr;
152 int c = *p;
153 int hic = c << 24;
154
155 if (!utf8_byte_is_leading_byte(c)) {
156 return -1;
157 }
158 if (hic < 0) {
159 uint32_t mask = (uint32_t)~0x3F;
160 hic = SkLeftShift(hic, 1);
161 do {
162 ++p;
163 if (p >= (const uint8_t*)end) {
164 return -1;
165 }
166 // check before reading off end of array.
167 uint8_t nextByte = *p;
168 if (!utf8_byte_is_continuation(nextByte)) {
169 return -1;
170 }
171 c = (c << 6) | (nextByte & 0x3F);
172 mask <<= 5;
173 } while ((hic = SkLeftShift(hic, 1)) < 0);
174 c &= ~mask;
175 }
176 *ptr = (char*)p + 1;
177 return c;
178 }
179
SkUTF8_NextUnichar(const char ** ptr)180 SkUnichar SkUTF8_NextUnichar(const char** ptr) {
181 SkASSERT(ptr && *ptr);
182
183 const uint8_t* p = (const uint8_t*)*ptr;
184 int c = *p;
185 int hic = c << 24;
186
187 assert_utf8_leadingbyte(c);
188
189 if (hic < 0) {
190 uint32_t mask = (uint32_t)~0x3F;
191 hic = SkLeftShift(hic, 1);
192 do {
193 c = (c << 6) | (*++p & 0x3F);
194 mask <<= 5;
195 } while ((hic = SkLeftShift(hic, 1)) < 0);
196 c &= ~mask;
197 }
198 *ptr = (char*)p + 1;
199 return c;
200 }
201
SkUTF8_PrevUnichar(const char ** ptr)202 SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
203 SkASSERT(ptr && *ptr);
204
205 const char* p = *ptr;
206
207 if (*--p & 0x80) {
208 while (*--p & 0x40) {
209 ;
210 }
211 }
212
213 *ptr = (char*)p;
214 return SkUTF8_NextUnichar(&p);
215 }
216
SkUTF8_FromUnichar(SkUnichar uni,char utf8[])217 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
218 if ((uint32_t)uni > 0x10FFFF) {
219 SkDEBUGFAIL("bad unichar");
220 return 0;
221 }
222
223 if (uni <= 127) {
224 if (utf8) {
225 *utf8 = (char)uni;
226 }
227 return 1;
228 }
229
230 char tmp[4];
231 char* p = tmp;
232 size_t count = 1;
233
234 SkDEBUGCODE(SkUnichar orig = uni;)
235
236 while (uni > 0x7F >> count) {
237 *p++ = (char)(0x80 | (uni & 0x3F));
238 uni >>= 6;
239 count += 1;
240 }
241
242 if (utf8) {
243 p = tmp;
244 utf8 += count;
245 while (p < tmp + count - 1) {
246 *--utf8 = *p++;
247 }
248 *--utf8 = (char)(~(0xFF >> count) | uni);
249 }
250
251 SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
252 return count;
253 }
254
255 ///////////////////////////////////////////////////////////////////////////////
256
SkUTF16_CountUnichars(const uint16_t src[])257 int SkUTF16_CountUnichars(const uint16_t src[]) {
258 SkASSERT(src);
259
260 int count = 0;
261 unsigned c;
262 while ((c = *src++) != 0) {
263 SkASSERT(!SkUTF16_IsLowSurrogate(c));
264 if (SkUTF16_IsHighSurrogate(c)) {
265 c = *src++;
266 SkASSERT(SkUTF16_IsLowSurrogate(c));
267 }
268 count += 1;
269 }
270 return count;
271 }
272
SkUTF16_CountUnichars(const uint16_t src[],int numberOf16BitValues)273 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) {
274 SkASSERT(src);
275
276 const uint16_t* stop = src + numberOf16BitValues;
277 int count = 0;
278 while (src < stop) {
279 unsigned c = *src++;
280 SkASSERT(!SkUTF16_IsLowSurrogate(c));
281 if (SkUTF16_IsHighSurrogate(c)) {
282 SkASSERT(src < stop);
283 c = *src++;
284 SkASSERT(SkUTF16_IsLowSurrogate(c));
285 }
286 count += 1;
287 }
288 return count;
289 }
290
SkUTF16_NextUnichar(const uint16_t ** srcPtr)291 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
292 SkASSERT(srcPtr && *srcPtr);
293
294 const uint16_t* src = *srcPtr;
295 SkUnichar c = *src++;
296
297 SkASSERT(!SkUTF16_IsLowSurrogate(c));
298 if (SkUTF16_IsHighSurrogate(c)) {
299 unsigned c2 = *src++;
300 SkASSERT(SkUTF16_IsLowSurrogate(c2));
301
302 // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000
303 // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF)
304 c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
305 }
306 *srcPtr = src;
307 return c;
308 }
309
SkUTF16_PrevUnichar(const uint16_t ** srcPtr)310 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
311 SkASSERT(srcPtr && *srcPtr);
312
313 const uint16_t* src = *srcPtr;
314 SkUnichar c = *--src;
315
316 SkASSERT(!SkUTF16_IsHighSurrogate(c));
317 if (SkUTF16_IsLowSurrogate(c)) {
318 unsigned c2 = *--src;
319 SkASSERT(SkUTF16_IsHighSurrogate(c2));
320 c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
321 }
322 *srcPtr = src;
323 return c;
324 }
325
SkUTF16_FromUnichar(SkUnichar uni,uint16_t dst[])326 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
327 SkASSERT((unsigned)uni <= 0x10FFFF);
328
329 int extra = (uni > 0xFFFF);
330
331 if (dst) {
332 if (extra) {
333 // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
334 // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
335 dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
336 dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
337
338 SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
339 SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
340 } else {
341 dst[0] = SkToU16(uni);
342 SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
343 SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
344 }
345 }
346 return 1 + extra;
347 }
348
SkUTF16_ToUTF8(const uint16_t utf16[],int numberOf16BitValues,char utf8[])349 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
350 char utf8[]) {
351 SkASSERT(numberOf16BitValues >= 0);
352 if (numberOf16BitValues <= 0) {
353 return 0;
354 }
355
356 SkASSERT(utf16 != nullptr);
357
358 const uint16_t* stop = utf16 + numberOf16BitValues;
359 size_t size = 0;
360
361 if (utf8 == nullptr) { // just count
362 while (utf16 < stop) {
363 size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
364 }
365 } else {
366 char* start = utf8;
367 while (utf16 < stop) {
368 utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
369 }
370 size = utf8 - start;
371 }
372 return size;
373 }
374