1 
2 /*
3  * Copyright 2006 The Android Open Source Project
4  *
5  * Use of this source code is governed by a BSD-style license that can be
6  * found in the LICENSE file.
7  */
8 
9 
10 #include "SkUtils.h"
11 
12 /*  0xxxxxxx    1 total
13     10xxxxxx    // never a leading byte
14     110xxxxx    2 total
15     1110xxxx    3 total
16     11110xxx    4 total
17 
18     11 10 01 01 xx xx xx xx 0...
19     0xE5XX0000
20     0xE5 << 24
21 */
22 
23 #ifdef SK_DEBUG
assert_utf8_leadingbyte(unsigned c)24     static void assert_utf8_leadingbyte(unsigned c) {
25         SkASSERT(c <= 0xF7);    // otherwise leading byte is too big (more than 4 bytes)
26         SkASSERT((c & 0xC0) != 0x80);   // can't begin with a middle char
27     }
28 
SkUTF8_LeadByteToCount(unsigned c)29     int SkUTF8_LeadByteToCount(unsigned c) {
30         assert_utf8_leadingbyte(c);
31         return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
32     }
33 #else
34     #define assert_utf8_leadingbyte(c)
35 #endif
36 
SkUTF8_CountUnichars(const char utf8[])37 int SkUTF8_CountUnichars(const char utf8[]) {
38     SkASSERT(utf8);
39 
40     int count = 0;
41 
42     for (;;) {
43         int c = *(const uint8_t*)utf8;
44         if (c == 0) {
45             break;
46         }
47         utf8 += SkUTF8_LeadByteToCount(c);
48         count += 1;
49     }
50     return count;
51 }
52 
SkUTF8_CountUnichars(const char utf8[],size_t byteLength)53 int SkUTF8_CountUnichars(const char utf8[], size_t byteLength) {
54     SkASSERT(utf8 || 0 == byteLength);
55 
56     int         count = 0;
57     const char* stop = utf8 + byteLength;
58 
59     while (utf8 < stop) {
60         utf8 += SkUTF8_LeadByteToCount(*(const uint8_t*)utf8);
61         count += 1;
62     }
63     return count;
64 }
65 
SkUTF8_ToUnichar(const char utf8[])66 SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
67     SkASSERT(utf8);
68 
69     const uint8_t*  p = (const uint8_t*)utf8;
70     int             c = *p;
71     int             hic = c << 24;
72 
73     assert_utf8_leadingbyte(c);
74 
75     if (hic < 0) {
76         uint32_t mask = (uint32_t)~0x3F;
77         hic = SkLeftShift(hic, 1);
78         do {
79             c = (c << 6) | (*++p & 0x3F);
80             mask <<= 5;
81         } while ((hic = SkLeftShift(hic, 1)) < 0);
82         c &= ~mask;
83     }
84     return c;
85 }
86 
SkUTF8_NextUnichar(const char ** ptr)87 SkUnichar SkUTF8_NextUnichar(const char** ptr) {
88     SkASSERT(ptr && *ptr);
89 
90     const uint8_t*  p = (const uint8_t*)*ptr;
91     int             c = *p;
92     int             hic = c << 24;
93 
94     assert_utf8_leadingbyte(c);
95 
96     if (hic < 0) {
97         uint32_t mask = (uint32_t)~0x3F;
98         hic = SkLeftShift(hic, 1);
99         do {
100             c = (c << 6) | (*++p & 0x3F);
101             mask <<= 5;
102         } while ((hic = SkLeftShift(hic, 1)) < 0);
103         c &= ~mask;
104     }
105     *ptr = (char*)p + 1;
106     return c;
107 }
108 
SkUTF8_PrevUnichar(const char ** ptr)109 SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
110     SkASSERT(ptr && *ptr);
111 
112     const char* p = *ptr;
113 
114     if (*--p & 0x80) {
115         while (*--p & 0x40) {
116             ;
117         }
118     }
119 
120     *ptr = (char*)p;
121     return SkUTF8_NextUnichar(&p);
122 }
123 
SkUTF8_FromUnichar(SkUnichar uni,char utf8[])124 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
125     if ((uint32_t)uni > 0x10FFFF) {
126         SkDEBUGFAIL("bad unichar");
127         return 0;
128     }
129 
130     if (uni <= 127) {
131         if (utf8) {
132             *utf8 = (char)uni;
133         }
134         return 1;
135     }
136 
137     char    tmp[4];
138     char*   p = tmp;
139     size_t  count = 1;
140 
141     SkDEBUGCODE(SkUnichar orig = uni;)
142 
143     while (uni > 0x7F >> count) {
144         *p++ = (char)(0x80 | (uni & 0x3F));
145         uni >>= 6;
146         count += 1;
147     }
148 
149     if (utf8) {
150         p = tmp;
151         utf8 += count;
152         while (p < tmp + count - 1) {
153             *--utf8 = *p++;
154         }
155         *--utf8 = (char)(~(0xFF >> count) | uni);
156     }
157 
158     SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
159     return count;
160 }
161 
162 ///////////////////////////////////////////////////////////////////////////////
163 
SkUTF16_CountUnichars(const uint16_t src[])164 int SkUTF16_CountUnichars(const uint16_t src[]) {
165     SkASSERT(src);
166 
167     int count = 0;
168     unsigned c;
169     while ((c = *src++) != 0) {
170         SkASSERT(!SkUTF16_IsLowSurrogate(c));
171         if (SkUTF16_IsHighSurrogate(c)) {
172             c = *src++;
173             SkASSERT(SkUTF16_IsLowSurrogate(c));
174         }
175         count += 1;
176     }
177     return count;
178 }
179 
SkUTF16_CountUnichars(const uint16_t src[],int numberOf16BitValues)180 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) {
181     SkASSERT(src);
182 
183     const uint16_t* stop = src + numberOf16BitValues;
184     int count = 0;
185     while (src < stop) {
186         unsigned c = *src++;
187         SkASSERT(!SkUTF16_IsLowSurrogate(c));
188         if (SkUTF16_IsHighSurrogate(c)) {
189             SkASSERT(src < stop);
190             c = *src++;
191             SkASSERT(SkUTF16_IsLowSurrogate(c));
192         }
193         count += 1;
194     }
195     return count;
196 }
197 
SkUTF16_NextUnichar(const uint16_t ** srcPtr)198 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
199     SkASSERT(srcPtr && *srcPtr);
200 
201     const uint16_t* src = *srcPtr;
202     SkUnichar       c = *src++;
203 
204     SkASSERT(!SkUTF16_IsLowSurrogate(c));
205     if (SkUTF16_IsHighSurrogate(c)) {
206         unsigned c2 = *src++;
207         SkASSERT(SkUTF16_IsLowSurrogate(c2));
208 
209         // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000
210         // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF)
211         c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
212     }
213     *srcPtr = src;
214     return c;
215 }
216 
SkUTF16_PrevUnichar(const uint16_t ** srcPtr)217 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
218     SkASSERT(srcPtr && *srcPtr);
219 
220     const uint16_t* src = *srcPtr;
221     SkUnichar       c = *--src;
222 
223     SkASSERT(!SkUTF16_IsHighSurrogate(c));
224     if (SkUTF16_IsLowSurrogate(c)) {
225         unsigned c2 = *--src;
226         SkASSERT(SkUTF16_IsHighSurrogate(c2));
227         c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
228     }
229     *srcPtr = src;
230     return c;
231 }
232 
SkUTF16_FromUnichar(SkUnichar uni,uint16_t dst[])233 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
234     SkASSERT((unsigned)uni <= 0x10FFFF);
235 
236     int extra = (uni > 0xFFFF);
237 
238     if (dst) {
239         if (extra) {
240             // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
241             // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
242             dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
243             dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
244 
245             SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
246             SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
247         } else {
248             dst[0] = SkToU16(uni);
249             SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
250             SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
251         }
252     }
253     return 1 + extra;
254 }
255 
SkUTF16_ToUTF8(const uint16_t utf16[],int numberOf16BitValues,char utf8[])256 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
257                       char utf8[]) {
258     SkASSERT(numberOf16BitValues >= 0);
259     if (numberOf16BitValues <= 0) {
260         return 0;
261     }
262 
263     SkASSERT(utf16 != nullptr);
264 
265     const uint16_t* stop = utf16 + numberOf16BitValues;
266     size_t          size = 0;
267 
268     if (utf8 == nullptr) {    // just count
269         while (utf16 < stop) {
270             size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
271         }
272     } else {
273         char* start = utf8;
274         while (utf16 < stop) {
275             utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
276         }
277         size = utf8 - start;
278     }
279     return size;
280 }
281