1 /* 2 * Copyright 2006 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 9 #include "SkUtils.h" 10 sk_memset16(uint16_t buffer[],uint16_t value,int count)11 void sk_memset16(uint16_t buffer[], uint16_t value, int count) { 12 for (int i = 0; i < count; i++) { 13 buffer[i] = value; 14 } 15 } sk_memset32(uint32_t buffer[],uint32_t value,int count)16 void sk_memset32(uint32_t buffer[], uint32_t value, int count) { 17 for (int i = 0; i < count; i++) { 18 buffer[i] = value; 19 } 20 } sk_memset64(uint64_t buffer[],uint64_t value,int count)21 void sk_memset64(uint64_t buffer[], uint64_t value, int count) { 22 for (int i = 0; i < count; i++) { 23 buffer[i] = value; 24 } 25 } 26 27 /* 0xxxxxxx 1 total 28 10xxxxxx // never a leading byte 29 110xxxxx 2 total 30 1110xxxx 3 total 31 11110xxx 4 total 32 33 11 10 01 01 xx xx xx xx 0... 34 0xE5XX0000 35 0xE5 << 24 36 */ 37 utf8_byte_is_valid(uint8_t c)38 static bool utf8_byte_is_valid(uint8_t c) { 39 return c < 0xF5 && (c & 0xFE) != 0xC0; 40 } utf8_byte_is_continuation(uint8_t c)41 static bool utf8_byte_is_continuation(uint8_t c) { 42 return (c & 0xC0) == 0x80; 43 } utf8_byte_is_leading_byte(uint8_t c)44 static bool utf8_byte_is_leading_byte(uint8_t c) { 45 return utf8_byte_is_valid(c) && !utf8_byte_is_continuation(c); 46 } 47 48 #ifdef SK_DEBUG assert_utf8_leadingbyte(unsigned c)49 static void assert_utf8_leadingbyte(unsigned c) { 50 SkASSERT(utf8_byte_is_leading_byte(SkToU8(c))); 51 } 52 SkUTF8_LeadByteToCount(unsigned c)53 int SkUTF8_LeadByteToCount(unsigned c) { 54 assert_utf8_leadingbyte(c); 55 return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1; 56 } 57 #else 58 #define assert_utf8_leadingbyte(c) 59 #endif 60 61 /** 62 * @returns -1 iff invalid UTF8 byte, 63 * 0 iff UTF8 continuation byte, 64 * 1 iff ASCII byte, 65 * 2 iff leading byte of 2-byte sequence, 66 * 3 iff leading byte of 3-byte sequence, and 67 * 4 iff leading byte of 4-byte sequence. 68 * 69 * I.e.: if return value > 0, then gives length of sequence. 70 */ utf8_byte_type(uint8_t c)71 static int utf8_byte_type(uint8_t c) { 72 if (c < 0x80) { 73 return 1; 74 } else if (c < 0xC0) { 75 return 0; 76 } else if (c < 0xF5 && (c & 0xFE) != 0xC0) { // "octet values C0, C1, F5 to FF never appear" 77 return (((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1; 78 } else { 79 return -1; 80 } 81 } utf8_type_is_valid_leading_byte(int type)82 static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; } 83 SkUTF8_CountUnichars(const char utf8[])84 int SkUTF8_CountUnichars(const char utf8[]) { 85 SkASSERT(utf8); 86 87 int count = 0; 88 89 for (;;) { 90 int c = *(const uint8_t*)utf8; 91 if (c == 0) { 92 break; 93 } 94 utf8 += SkUTF8_LeadByteToCount(c); 95 count += 1; 96 } 97 return count; 98 } 99 100 // SAFE: returns -1 if invalid UTF-8 SkUTF8_CountUnicharsWithError(const char utf8[],size_t byteLength)101 int SkUTF8_CountUnicharsWithError(const char utf8[], size_t byteLength) { 102 SkASSERT(utf8 || 0 == byteLength); 103 104 int count = 0; 105 const char* stop = utf8 + byteLength; 106 107 while (utf8 < stop) { 108 int type = utf8_byte_type(*(const uint8_t*)utf8); 109 SkASSERT(type >= -1 && type <= 4); 110 if (!utf8_type_is_valid_leading_byte(type) || 111 utf8 + type > stop) { // Sequence extends beyond end. 112 return -1; 113 } 114 while(type-- > 1) { 115 ++utf8; 116 if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) { 117 return -1; 118 } 119 } 120 ++utf8; 121 ++count; 122 } 123 return count; 124 } 125 SkUTF8_ToUnichar(const char utf8[])126 SkUnichar SkUTF8_ToUnichar(const char utf8[]) { 127 SkASSERT(utf8); 128 129 const uint8_t* p = (const uint8_t*)utf8; 130 int c = *p; 131 int hic = c << 24; 132 133 assert_utf8_leadingbyte(c); 134 135 if (hic < 0) { 136 uint32_t mask = (uint32_t)~0x3F; 137 hic = SkLeftShift(hic, 1); 138 do { 139 c = (c << 6) | (*++p & 0x3F); 140 mask <<= 5; 141 } while ((hic = SkLeftShift(hic, 1)) < 0); 142 c &= ~mask; 143 } 144 return c; 145 } 146 147 // SAFE: returns -1 on invalid UTF-8 sequence. SkUTF8_NextUnicharWithError(const char ** ptr,const char * end)148 SkUnichar SkUTF8_NextUnicharWithError(const char** ptr, const char* end) { 149 SkASSERT(ptr && *ptr); 150 SkASSERT(*ptr < end); 151 const uint8_t* p = (const uint8_t*)*ptr; 152 int c = *p; 153 int hic = c << 24; 154 155 if (!utf8_byte_is_leading_byte(c)) { 156 return -1; 157 } 158 if (hic < 0) { 159 uint32_t mask = (uint32_t)~0x3F; 160 hic = SkLeftShift(hic, 1); 161 do { 162 ++p; 163 if (p >= (const uint8_t*)end) { 164 return -1; 165 } 166 // check before reading off end of array. 167 uint8_t nextByte = *p; 168 if (!utf8_byte_is_continuation(nextByte)) { 169 return -1; 170 } 171 c = (c << 6) | (nextByte & 0x3F); 172 mask <<= 5; 173 } while ((hic = SkLeftShift(hic, 1)) < 0); 174 c &= ~mask; 175 } 176 *ptr = (char*)p + 1; 177 return c; 178 } 179 SkUTF8_NextUnichar(const char ** ptr)180 SkUnichar SkUTF8_NextUnichar(const char** ptr) { 181 SkASSERT(ptr && *ptr); 182 183 const uint8_t* p = (const uint8_t*)*ptr; 184 int c = *p; 185 int hic = c << 24; 186 187 assert_utf8_leadingbyte(c); 188 189 if (hic < 0) { 190 uint32_t mask = (uint32_t)~0x3F; 191 hic = SkLeftShift(hic, 1); 192 do { 193 c = (c << 6) | (*++p & 0x3F); 194 mask <<= 5; 195 } while ((hic = SkLeftShift(hic, 1)) < 0); 196 c &= ~mask; 197 } 198 *ptr = (char*)p + 1; 199 return c; 200 } 201 SkUTF8_PrevUnichar(const char ** ptr)202 SkUnichar SkUTF8_PrevUnichar(const char** ptr) { 203 SkASSERT(ptr && *ptr); 204 205 const char* p = *ptr; 206 207 if (*--p & 0x80) { 208 while (*--p & 0x40) { 209 ; 210 } 211 } 212 213 *ptr = (char*)p; 214 return SkUTF8_NextUnichar(&p); 215 } 216 SkUTF8_FromUnichar(SkUnichar uni,char utf8[])217 size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) { 218 if ((uint32_t)uni > 0x10FFFF) { 219 SkDEBUGFAIL("bad unichar"); 220 return 0; 221 } 222 223 if (uni <= 127) { 224 if (utf8) { 225 *utf8 = (char)uni; 226 } 227 return 1; 228 } 229 230 char tmp[4]; 231 char* p = tmp; 232 size_t count = 1; 233 234 SkDEBUGCODE(SkUnichar orig = uni;) 235 236 while (uni > 0x7F >> count) { 237 *p++ = (char)(0x80 | (uni & 0x3F)); 238 uni >>= 6; 239 count += 1; 240 } 241 242 if (utf8) { 243 p = tmp; 244 utf8 += count; 245 while (p < tmp + count - 1) { 246 *--utf8 = *p++; 247 } 248 *--utf8 = (char)(~(0xFF >> count) | uni); 249 } 250 251 SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8)); 252 return count; 253 } 254 255 /////////////////////////////////////////////////////////////////////////////// 256 SkUTF16_CountUnichars(const uint16_t src[])257 int SkUTF16_CountUnichars(const uint16_t src[]) { 258 SkASSERT(src); 259 260 int count = 0; 261 unsigned c; 262 while ((c = *src++) != 0) { 263 SkASSERT(!SkUTF16_IsLowSurrogate(c)); 264 if (SkUTF16_IsHighSurrogate(c)) { 265 c = *src++; 266 SkASSERT(SkUTF16_IsLowSurrogate(c)); 267 } 268 count += 1; 269 } 270 return count; 271 } 272 SkUTF16_CountUnichars(const uint16_t src[],int numberOf16BitValues)273 int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) { 274 SkASSERT(src); 275 276 const uint16_t* stop = src + numberOf16BitValues; 277 int count = 0; 278 while (src < stop) { 279 unsigned c = *src++; 280 SkASSERT(!SkUTF16_IsLowSurrogate(c)); 281 if (SkUTF16_IsHighSurrogate(c)) { 282 SkASSERT(src < stop); 283 c = *src++; 284 SkASSERT(SkUTF16_IsLowSurrogate(c)); 285 } 286 count += 1; 287 } 288 return count; 289 } 290 SkUTF16_NextUnichar(const uint16_t ** srcPtr)291 SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) { 292 SkASSERT(srcPtr && *srcPtr); 293 294 const uint16_t* src = *srcPtr; 295 SkUnichar c = *src++; 296 297 SkASSERT(!SkUTF16_IsLowSurrogate(c)); 298 if (SkUTF16_IsHighSurrogate(c)) { 299 unsigned c2 = *src++; 300 SkASSERT(SkUTF16_IsLowSurrogate(c2)); 301 302 // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000 303 // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF) 304 c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00); 305 } 306 *srcPtr = src; 307 return c; 308 } 309 SkUTF16_PrevUnichar(const uint16_t ** srcPtr)310 SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) { 311 SkASSERT(srcPtr && *srcPtr); 312 313 const uint16_t* src = *srcPtr; 314 SkUnichar c = *--src; 315 316 SkASSERT(!SkUTF16_IsHighSurrogate(c)); 317 if (SkUTF16_IsLowSurrogate(c)) { 318 unsigned c2 = *--src; 319 SkASSERT(SkUTF16_IsHighSurrogate(c2)); 320 c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00); 321 } 322 *srcPtr = src; 323 return c; 324 } 325 SkUTF16_FromUnichar(SkUnichar uni,uint16_t dst[])326 size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) { 327 SkASSERT((unsigned)uni <= 0x10FFFF); 328 329 int extra = (uni > 0xFFFF); 330 331 if (dst) { 332 if (extra) { 333 // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10)); 334 // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64)); 335 dst[0] = SkToU16((0xD800 - 64) + (uni >> 10)); 336 dst[1] = SkToU16(0xDC00 | (uni & 0x3FF)); 337 338 SkASSERT(SkUTF16_IsHighSurrogate(dst[0])); 339 SkASSERT(SkUTF16_IsLowSurrogate(dst[1])); 340 } else { 341 dst[0] = SkToU16(uni); 342 SkASSERT(!SkUTF16_IsHighSurrogate(dst[0])); 343 SkASSERT(!SkUTF16_IsLowSurrogate(dst[0])); 344 } 345 } 346 return 1 + extra; 347 } 348 SkUTF16_ToUTF8(const uint16_t utf16[],int numberOf16BitValues,char utf8[])349 size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues, 350 char utf8[]) { 351 SkASSERT(numberOf16BitValues >= 0); 352 if (numberOf16BitValues <= 0) { 353 return 0; 354 } 355 356 SkASSERT(utf16 != nullptr); 357 358 const uint16_t* stop = utf16 + numberOf16BitValues; 359 size_t size = 0; 360 361 if (utf8 == nullptr) { // just count 362 while (utf16 < stop) { 363 size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr); 364 } 365 } else { 366 char* start = utf8; 367 while (utf16 < stop) { 368 utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8); 369 } 370 size = utf8 - start; 371 } 372 return size; 373 } 374