1 /*
2  * Copyright 2006 The Android Open Source Project
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 
9 #include "SkUtils.h"
10 
sk_memset16(uint16_t buffer[],uint16_t value,int count)11 void sk_memset16(uint16_t buffer[], uint16_t value, int count) {
12     for (int i = 0; i < count; i++) {
13         buffer[i] = value;
14     }
15 }
sk_memset32(uint32_t buffer[],uint32_t value,int count)16 void sk_memset32(uint32_t buffer[], uint32_t value, int count) {
17     for (int i = 0; i < count; i++) {
18         buffer[i] = value;
19     }
20 }
sk_memset64(uint64_t buffer[],uint64_t value,int count)21 void sk_memset64(uint64_t buffer[], uint64_t value, int count) {
22     for (int i = 0; i < count; i++) {
23         buffer[i] = value;
24     }
25 }
26 
27 /*  0xxxxxxx    1 total
28     10xxxxxx    // never a leading byte
29     110xxxxx    2 total
30     1110xxxx    3 total
31     11110xxx    4 total
32 
33     11 10 01 01 xx xx xx xx 0...
34     0xE5XX0000
35     0xE5 << 24
36 */
37 
utf8_byte_is_valid(uint8_t c)38 static bool utf8_byte_is_valid(uint8_t c) {
39     return c < 0xF5 && (c & 0xFE) != 0xC0;
40 }
utf8_byte_is_continuation(uint8_t c)41 static bool utf8_byte_is_continuation(uint8_t c) {
42     return  (c & 0xC0) == 0x80;
43 }
utf8_byte_is_leading_byte(uint8_t c)44 static bool utf8_byte_is_leading_byte(uint8_t c) {
45     return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c);
46 }
47 
48 #ifdef SK_DEBUG
assert_utf8_leadingbyte(unsigned c)49     static void assert_utf8_leadingbyte(unsigned c) {
50         SkASSERT(utf8_byte_is_leading_byte(SkToU8(c)));
51     }
52 
SkUTF8_LeadByteToCount(unsigned c)53     int SkUTF8_LeadByteToCount(unsigned c) {
54         assert_utf8_leadingbyte(c);
55         return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
56     }
57 #else
58     #define assert_utf8_leadingbyte(c)
59 #endif
60 
61 /**
62  * @returns -1  iff invalid UTF8 byte,
63  *           0  iff UTF8 continuation byte,
64  *           1  iff ASCII byte,
65  *           2  iff leading byte of 2-byte sequence,
66  *           3  iff leading byte of 3-byte sequence, and
67  *           4  iff leading byte of 4-byte sequence.
68  *
69  * I.e.: if return value > 0, then gives length of sequence.
70 */
utf8_byte_type(uint8_t c)71 static int utf8_byte_type(uint8_t c) {
72     if (c < 0x80) {
73         return 1;
74     } else if (c < 0xC0) {
75         return 0;
76     } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear"
77         return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
78     } else {
79         return -1;
80     }
81 }
utf8_type_is_valid_leading_byte(int type)82 static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
83 
SkUTF8_CountUnichars(const char utf8[])84 int SkUTF8_CountUnichars(const char utf8[]) {
85     SkASSERT(utf8);
86 
87     int count = 0;
88 
89     for (;;) {
90         int c = *(const uint8_t*)utf8;
91         if (c == 0) {
92             break;
93         }
94         utf8 += SkUTF8_LeadByteToCount(c);
95         count += 1;
96     }
97     return count;
98 }
99 
100 // SAFE: returns -1 if invalid UTF-8
SkUTF8_CountUnicharsWithError(const char utf8[],size_t byteLength)101 int SkUTF8_CountUnicharsWithError(const char utf8[], size_t byteLength) {
102     SkASSERT(utf8 || 0 == byteLength);
103 
104     int         count = 0;
105     const char* stop = utf8 + byteLength;
106 
107     while (utf8 < stop) {
108         int type = utf8_byte_type(*(const uint8_t*)utf8);
109         SkASSERT(type >= -1 && type <= 4);
110         if (!utf8_type_is_valid_leading_byte(type) ||
111             utf8 + type > stop) {  // Sequence extends beyond end.
112             return -1;
113         }
114         while(type-- > 1) {
115             ++utf8;
116             if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
117                 return -1;
118             }
119         }
120         ++utf8;
121         ++count;
122     }
123     return count;
124 }
125 
SkUTF8_ToUnichar(const char utf8[])126 SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
127     SkASSERT(utf8);
128 
129     const uint8_t*  p = (const uint8_t*)utf8;
130     int             c = *p;
131     int             hic = c << 24;
132 
133     assert_utf8_leadingbyte(c);
134 
135     if (hic < 0) {
136         uint32_t mask = (uint32_t)~0x3F;
137         hic = SkLeftShift(hic, 1);
138         do {
139             c = (c << 6) | (*++p & 0x3F);
140             mask <<= 5;
141         } while ((hic = SkLeftShift(hic, 1)) < 0);
142         c &= ~mask;
143     }
144     return c;
145 }
146 
147 // SAFE: returns -1 on invalid UTF-8 sequence.
SkUTF8_NextUnicharWithError(const char ** ptr,const char * end)148 SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) {
149     SkASSERT(ptr && *ptr);
150     SkASSERT(*ptr < end);
151     const uint8_t*  p = (const uint8_t*)*ptr;
152     int             c = *p;
153     int             hic = c << 24;
154 
155     if (!utf8_byte_is_leading_byte(c)) {
156         return -1;
157     }
158     if (hic < 0) {
159         uint32_t mask = (uint32_t)~0x3F;
160         hic = SkLeftShift(hic, 1);
161         do {
162             ++p;
163             if (p >= (const uint8_t*)end) {
164                 return -1;
165             }
166             // check before reading off end of array.
167             uint8_t nextByte = *p;
168             if (!utf8_byte_is_continuation(nextByte)) {
169                 return -1;
170             }
171             c = (c << 6) | (nextByte & 0x3F);
172             mask <<= 5;
173         } while ((hic = SkLeftShift(hic, 1)) < 0);
174         c &= ~mask;
175     }
176     *ptr = (char*)p + 1;
177     return c;
178 }
179 
SkUTF8_NextUnichar(const char ** ptr)180 SkUnichar SkUTF8_NextUnichar(const char** ptr) {
181     SkASSERT(ptr && *ptr);
182 
183     const uint8_t*  p = (const uint8_t*)*ptr;
184     int             c = *p;
185     int             hic = c << 24;
186 
187     assert_utf8_leadingbyte(c);
188 
189     if (hic < 0) {
190         uint32_t mask = (uint32_t)~0x3F;
191         hic = SkLeftShift(hic, 1);
192         do {
193             c = (c << 6) | (*++p & 0x3F);
194             mask <<= 5;
195         } while ((hic = SkLeftShift(hic, 1)) < 0);
196         c &= ~mask;
197     }
198     *ptr = (char*)p + 1;
199     return c;
200 }
201 
SkUTF8_PrevUnichar(const char ** ptr)202 SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
203     SkASSERT(ptr && *ptr);
204 
205     const char* p = *ptr;
206 
207     if (*--p & 0x80) {
208         while (*--p & 0x40) {
209             ;
210         }
211     }
212 
213     *ptr = (char*)p;
214     return SkUTF8_NextUnichar(&p);
215 }
216 
SkUTF8_FromUnichar(SkUnichar uni,char utf8[])217 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
218     if ((uint32_t)uni > 0x10FFFF) {
219         SkDEBUGFAIL("bad unichar");
220         return 0;
221     }
222 
223     if (uni <= 127) {
224         if (utf8) {
225             *utf8 = (char)uni;
226         }
227         return 1;
228     }
229 
230     char    tmp[4];
231     char*   p = tmp;
232     size_t  count = 1;
233 
234     SkDEBUGCODE(SkUnichar orig = uni;)
235 
236     while (uni > 0x7F >> count) {
237         *p++ = (char)(0x80 | (uni & 0x3F));
238         uni >>= 6;
239         count += 1;
240     }
241 
242     if (utf8) {
243         p = tmp;
244         utf8 += count;
245         while (p < tmp + count - 1) {
246             *--utf8 = *p++;
247         }
248         *--utf8 = (char)(~(0xFF >> count) | uni);
249     }
250 
251     SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
252     return count;
253 }
254 
255 ///////////////////////////////////////////////////////////////////////////////
256 
SkUTF16_CountUnichars(const uint16_t src[])257 int SkUTF16_CountUnichars(const uint16_t src[]) {
258     SkASSERT(src);
259 
260     int count = 0;
261     unsigned c;
262     while ((c = *src++) != 0) {
263         SkASSERT(!SkUTF16_IsLowSurrogate(c));
264         if (SkUTF16_IsHighSurrogate(c)) {
265             c = *src++;
266             SkASSERT(SkUTF16_IsLowSurrogate(c));
267         }
268         count += 1;
269     }
270     return count;
271 }
272 
SkUTF16_CountUnichars(const uint16_t src[],int numberOf16BitValues)273 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) {
274     SkASSERT(src);
275 
276     const uint16_t* stop = src + numberOf16BitValues;
277     int count = 0;
278     while (src < stop) {
279         unsigned c = *src++;
280         SkASSERT(!SkUTF16_IsLowSurrogate(c));
281         if (SkUTF16_IsHighSurrogate(c)) {
282             SkASSERT(src < stop);
283             c = *src++;
284             SkASSERT(SkUTF16_IsLowSurrogate(c));
285         }
286         count += 1;
287     }
288     return count;
289 }
290 
SkUTF16_NextUnichar(const uint16_t ** srcPtr)291 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
292     SkASSERT(srcPtr && *srcPtr);
293 
294     const uint16_t* src = *srcPtr;
295     SkUnichar       c = *src++;
296 
297     SkASSERT(!SkUTF16_IsLowSurrogate(c));
298     if (SkUTF16_IsHighSurrogate(c)) {
299         unsigned c2 = *src++;
300         SkASSERT(SkUTF16_IsLowSurrogate(c2));
301 
302         // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000
303         // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF)
304         c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
305     }
306     *srcPtr = src;
307     return c;
308 }
309 
SkUTF16_PrevUnichar(const uint16_t ** srcPtr)310 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
311     SkASSERT(srcPtr && *srcPtr);
312 
313     const uint16_t* src = *srcPtr;
314     SkUnichar       c = *--src;
315 
316     SkASSERT(!SkUTF16_IsHighSurrogate(c));
317     if (SkUTF16_IsLowSurrogate(c)) {
318         unsigned c2 = *--src;
319         SkASSERT(SkUTF16_IsHighSurrogate(c2));
320         c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
321     }
322     *srcPtr = src;
323     return c;
324 }
325 
SkUTF16_FromUnichar(SkUnichar uni,uint16_t dst[])326 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
327     SkASSERT((unsigned)uni <= 0x10FFFF);
328 
329     int extra = (uni > 0xFFFF);
330 
331     if (dst) {
332         if (extra) {
333             // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
334             // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
335             dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
336             dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
337 
338             SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
339             SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
340         } else {
341             dst[0] = SkToU16(uni);
342             SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
343             SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
344         }
345     }
346     return 1 + extra;
347 }
348 
SkUTF16_ToUTF8(const uint16_t utf16[],int numberOf16BitValues,char utf8[])349 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
350                       char utf8[]) {
351     SkASSERT(numberOf16BitValues >= 0);
352     if (numberOf16BitValues <= 0) {
353         return 0;
354     }
355 
356     SkASSERT(utf16 != nullptr);
357 
358     const uint16_t* stop = utf16 + numberOf16BitValues;
359     size_t          size = 0;
360 
361     if (utf8 == nullptr) {    // just count
362         while (utf16 < stop) {
363             size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
364         }
365     } else {
366         char* start = utf8;
367         while (utf16 < stop) {
368             utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
369         }
370         size = utf8 - start;
371     }
372     return size;
373 }
374