• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright 2006 The Android Open Source Project
3   *
4   * Use of this source code is governed by a BSD-style license that can be
5   * found in the LICENSE file.
6   */
7  
8  
9  #include "SkUtils.h"
10  
sk_memset16(uint16_t buffer[],uint16_t value,int count)11  void sk_memset16(uint16_t buffer[], uint16_t value, int count) {
12      for (int i = 0; i < count; i++) {
13          buffer[i] = value;
14      }
15  }
sk_memset32(uint32_t buffer[],uint32_t value,int count)16  void sk_memset32(uint32_t buffer[], uint32_t value, int count) {
17      for (int i = 0; i < count; i++) {
18          buffer[i] = value;
19      }
20  }
sk_memset64(uint64_t buffer[],uint64_t value,int count)21  void sk_memset64(uint64_t buffer[], uint64_t value, int count) {
22      for (int i = 0; i < count; i++) {
23          buffer[i] = value;
24      }
25  }
26  
27  /*  0xxxxxxx    1 total
28      10xxxxxx    // never a leading byte
29      110xxxxx    2 total
30      1110xxxx    3 total
31      11110xxx    4 total
32  
33      11 10 01 01 xx xx xx xx 0...
34      0xE5XX0000
35      0xE5 << 24
36  */
37  
utf8_byte_is_valid(uint8_t c)38  static bool utf8_byte_is_valid(uint8_t c) {
39      return c < 0xF5 && (c & 0xFE) != 0xC0;
40  }
utf8_byte_is_continuation(uint8_t c)41  static bool utf8_byte_is_continuation(uint8_t c) {
42      return  (c & 0xC0) == 0x80;
43  }
utf8_byte_is_leading_byte(uint8_t c)44  static bool utf8_byte_is_leading_byte(uint8_t c) {
45      return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c);
46  }
47  
48  #ifdef SK_DEBUG
assert_utf8_leadingbyte(unsigned c)49      static void assert_utf8_leadingbyte(unsigned c) {
50          SkASSERT(utf8_byte_is_leading_byte(SkToU8(c)));
51      }
52  
SkUTF8_LeadByteToCount(unsigned c)53      int SkUTF8_LeadByteToCount(unsigned c) {
54          assert_utf8_leadingbyte(c);
55          return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1;
56      }
57  #else
58      #define assert_utf8_leadingbyte(c)
59  #endif
60  
61  /**
62   * @returns -1  iff invalid UTF8 byte,
63   *           0  iff UTF8 continuation byte,
64   *           1  iff ASCII byte,
65   *           2  iff leading byte of 2-byte sequence,
66   *           3  iff leading byte of 3-byte sequence, and
67   *           4  iff leading byte of 4-byte sequence.
68   *
69   * I.e.: if return value > 0, then gives length of sequence.
70  */
utf8_byte_type(uint8_t c)71  static int utf8_byte_type(uint8_t c) {
72      if (c < 0x80) {
73          return 1;
74      } else if (c < 0xC0) {
75          return 0;
76      } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear"
77          return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
78      } else {
79          return -1;
80      }
81  }
utf8_type_is_valid_leading_byte(int type)82  static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
83  
SkUTF8_CountUnichars(const char utf8[])84  int SkUTF8_CountUnichars(const char utf8[]) {
85      SkASSERT(utf8);
86  
87      int count = 0;
88  
89      for (;;) {
90          int c = *(const uint8_t*)utf8;
91          if (c == 0) {
92              break;
93          }
94          utf8 += SkUTF8_LeadByteToCount(c);
95          count += 1;
96      }
97      return count;
98  }
99  
100  // SAFE: returns -1 if invalid UTF-8
SkUTF8_CountUnicharsWithError(const char utf8[],size_t byteLength)101  int SkUTF8_CountUnicharsWithError(const char utf8[], size_t byteLength) {
102      SkASSERT(utf8 || 0 == byteLength);
103  
104      int         count = 0;
105      const char* stop = utf8 + byteLength;
106  
107      while (utf8 < stop) {
108          int type = utf8_byte_type(*(const uint8_t*)utf8);
109          SkASSERT(type >= -1 && type <= 4);
110          if (!utf8_type_is_valid_leading_byte(type) ||
111              utf8 + type > stop) {  // Sequence extends beyond end.
112              return -1;
113          }
114          while(type-- > 1) {
115              ++utf8;
116              if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
117                  return -1;
118              }
119          }
120          ++utf8;
121          ++count;
122      }
123      return count;
124  }
125  
SkUTF8_ToUnichar(const char utf8[])126  SkUnichar SkUTF8_ToUnichar(const char utf8[]) {
127      SkASSERT(utf8);
128  
129      const uint8_t*  p = (const uint8_t*)utf8;
130      int             c = *p;
131      int             hic = c << 24;
132  
133      assert_utf8_leadingbyte(c);
134  
135      if (hic < 0) {
136          uint32_t mask = (uint32_t)~0x3F;
137          hic = SkLeftShift(hic, 1);
138          do {
139              c = (c << 6) | (*++p & 0x3F);
140              mask <<= 5;
141          } while ((hic = SkLeftShift(hic, 1)) < 0);
142          c &= ~mask;
143      }
144      return c;
145  }
146  
147  // SAFE: returns -1 on invalid UTF-8 sequence.
SkUTF8_NextUnicharWithError(const char ** ptr,const char * end)148  SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) {
149      SkASSERT(ptr && *ptr);
150      SkASSERT(*ptr < end);
151      const uint8_t*  p = (const uint8_t*)*ptr;
152      int             c = *p;
153      int             hic = c << 24;
154  
155      if (!utf8_byte_is_leading_byte(c)) {
156          return -1;
157      }
158      if (hic < 0) {
159          uint32_t mask = (uint32_t)~0x3F;
160          hic = SkLeftShift(hic, 1);
161          do {
162              ++p;
163              if (p >= (const uint8_t*)end) {
164                  return -1;
165              }
166              // check before reading off end of array.
167              uint8_t nextByte = *p;
168              if (!utf8_byte_is_continuation(nextByte)) {
169                  return -1;
170              }
171              c = (c << 6) | (nextByte & 0x3F);
172              mask <<= 5;
173          } while ((hic = SkLeftShift(hic, 1)) < 0);
174          c &= ~mask;
175      }
176      *ptr = (char*)p + 1;
177      return c;
178  }
179  
SkUTF8_NextUnichar(const char ** ptr)180  SkUnichar SkUTF8_NextUnichar(const char** ptr) {
181      SkASSERT(ptr && *ptr);
182  
183      const uint8_t*  p = (const uint8_t*)*ptr;
184      int             c = *p;
185      int             hic = c << 24;
186  
187      assert_utf8_leadingbyte(c);
188  
189      if (hic < 0) {
190          uint32_t mask = (uint32_t)~0x3F;
191          hic = SkLeftShift(hic, 1);
192          do {
193              c = (c << 6) | (*++p & 0x3F);
194              mask <<= 5;
195          } while ((hic = SkLeftShift(hic, 1)) < 0);
196          c &= ~mask;
197      }
198      *ptr = (char*)p + 1;
199      return c;
200  }
201  
SkUTF8_PrevUnichar(const char ** ptr)202  SkUnichar SkUTF8_PrevUnichar(const char** ptr) {
203      SkASSERT(ptr && *ptr);
204  
205      const char* p = *ptr;
206  
207      if (*--p & 0x80) {
208          while (*--p & 0x40) {
209              ;
210          }
211      }
212  
213      *ptr = (char*)p;
214      return SkUTF8_NextUnichar(&p);
215  }
216  
SkUTF8_FromUnichar(SkUnichar uni,char utf8[])217  size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) {
218      if ((uint32_t)uni > 0x10FFFF) {
219          SkDEBUGFAIL("bad unichar");
220          return 0;
221      }
222  
223      if (uni <= 127) {
224          if (utf8) {
225              *utf8 = (char)uni;
226          }
227          return 1;
228      }
229  
230      char    tmp[4];
231      char*   p = tmp;
232      size_t  count = 1;
233  
234      SkDEBUGCODE(SkUnichar orig = uni;)
235  
236      while (uni > 0x7F >> count) {
237          *p++ = (char)(0x80 | (uni & 0x3F));
238          uni >>= 6;
239          count += 1;
240      }
241  
242      if (utf8) {
243          p = tmp;
244          utf8 += count;
245          while (p < tmp + count - 1) {
246              *--utf8 = *p++;
247          }
248          *--utf8 = (char)(~(0xFF >> count) | uni);
249      }
250  
251      SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8));
252      return count;
253  }
254  
255  ///////////////////////////////////////////////////////////////////////////////
256  
SkUTF16_CountUnichars(const uint16_t src[])257  int SkUTF16_CountUnichars(const uint16_t src[]) {
258      SkASSERT(src);
259  
260      int count = 0;
261      unsigned c;
262      while ((c = *src++) != 0) {
263          SkASSERT(!SkUTF16_IsLowSurrogate(c));
264          if (SkUTF16_IsHighSurrogate(c)) {
265              c = *src++;
266              SkASSERT(SkUTF16_IsLowSurrogate(c));
267          }
268          count += 1;
269      }
270      return count;
271  }
272  
SkUTF16_CountUnichars(const uint16_t src[],int numberOf16BitValues)273  int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) {
274      SkASSERT(src);
275  
276      const uint16_t* stop = src + numberOf16BitValues;
277      int count = 0;
278      while (src < stop) {
279          unsigned c = *src++;
280          SkASSERT(!SkUTF16_IsLowSurrogate(c));
281          if (SkUTF16_IsHighSurrogate(c)) {
282              SkASSERT(src < stop);
283              c = *src++;
284              SkASSERT(SkUTF16_IsLowSurrogate(c));
285          }
286          count += 1;
287      }
288      return count;
289  }
290  
SkUTF16_NextUnichar(const uint16_t ** srcPtr)291  SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) {
292      SkASSERT(srcPtr && *srcPtr);
293  
294      const uint16_t* src = *srcPtr;
295      SkUnichar       c = *src++;
296  
297      SkASSERT(!SkUTF16_IsLowSurrogate(c));
298      if (SkUTF16_IsHighSurrogate(c)) {
299          unsigned c2 = *src++;
300          SkASSERT(SkUTF16_IsLowSurrogate(c2));
301  
302          // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000
303          // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF)
304          c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
305      }
306      *srcPtr = src;
307      return c;
308  }
309  
SkUTF16_PrevUnichar(const uint16_t ** srcPtr)310  SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) {
311      SkASSERT(srcPtr && *srcPtr);
312  
313      const uint16_t* src = *srcPtr;
314      SkUnichar       c = *--src;
315  
316      SkASSERT(!SkUTF16_IsHighSurrogate(c));
317      if (SkUTF16_IsLowSurrogate(c)) {
318          unsigned c2 = *--src;
319          SkASSERT(SkUTF16_IsHighSurrogate(c2));
320          c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00);
321      }
322      *srcPtr = src;
323      return c;
324  }
325  
SkUTF16_FromUnichar(SkUnichar uni,uint16_t dst[])326  size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) {
327      SkASSERT((unsigned)uni <= 0x10FFFF);
328  
329      int extra = (uni > 0xFFFF);
330  
331      if (dst) {
332          if (extra) {
333              // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10));
334              // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64));
335              dst[0] = SkToU16((0xD800 - 64) + (uni >> 10));
336              dst[1] = SkToU16(0xDC00 | (uni & 0x3FF));
337  
338              SkASSERT(SkUTF16_IsHighSurrogate(dst[0]));
339              SkASSERT(SkUTF16_IsLowSurrogate(dst[1]));
340          } else {
341              dst[0] = SkToU16(uni);
342              SkASSERT(!SkUTF16_IsHighSurrogate(dst[0]));
343              SkASSERT(!SkUTF16_IsLowSurrogate(dst[0]));
344          }
345      }
346      return 1 + extra;
347  }
348  
SkUTF16_ToUTF8(const uint16_t utf16[],int numberOf16BitValues,char utf8[])349  size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues,
350                        char utf8[]) {
351      SkASSERT(numberOf16BitValues >= 0);
352      if (numberOf16BitValues <= 0) {
353          return 0;
354      }
355  
356      SkASSERT(utf16 != nullptr);
357  
358      const uint16_t* stop = utf16 + numberOf16BitValues;
359      size_t          size = 0;
360  
361      if (utf8 == nullptr) {    // just count
362          while (utf16 < stop) {
363              size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr);
364          }
365      } else {
366          char* start = utf8;
367          while (utf16 < stop) {
368              utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8);
369          }
370          size = utf8 - start;
371      }
372      return size;
373  }
374