1 /* 2 * Copyright 2013 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "SkOTTable_name.h" 9 10 #include "SkEndian.h" 11 #include "SkStringUtils.h" 12 #include "SkTSearch.h" 13 #include "SkTemplates.h" 14 #include "SkUtils.h" 15 16 static SkUnichar next_unichar_UTF16BE(const uint8_t** srcPtr, size_t* length) { 17 SkASSERT(srcPtr && *srcPtr && length); 18 SkASSERT(*length > 0); 19 20 uint16_t leading; 21 if (*length < sizeof(leading)) { 22 *length = 0; 23 return 0xFFFD; 24 } 25 memcpy(&leading, *srcPtr, sizeof(leading)); 26 *srcPtr += sizeof(leading); 27 *length -= sizeof(leading); 28 SkUnichar c = SkEndian_SwapBE16(leading); 29 30 if (SkUTF16_IsTrailingSurrogate(c)) { 31 return 0xFFFD; 32 } 33 if (SkUTF16_IsLeadingSurrogate(c)) { 34 uint16_t trailing; 35 if (*length < sizeof(trailing)) { 36 *length = 0; 37 return 0xFFFD; 38 } 39 memcpy(&trailing, *srcPtr, sizeof(trailing)); 40 SkUnichar c2 = SkEndian_SwapBE16(trailing); 41 if (!SkUTF16_IsTrailingSurrogate(c2)) { 42 return 0xFFFD; 43 } 44 *srcPtr += sizeof(trailing); 45 *length -= sizeof(trailing); 46 47 c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00); 48 } 49 return c; 50 } 51 52 static void SkString_from_UTF16BE(const uint8_t* utf16be, size_t length, SkString& utf8) { 53 // Note that utf16be may not be 2-byte aligned. 54 SkASSERT(utf16be != nullptr); 55 56 utf8.reset(); 57 while (length) { 58 utf8.appendUnichar(next_unichar_UTF16BE(&utf16be, &length)); 59 } 60 } 61 62 /** UnicodeFromMacRoman[macRomanPoint - 0x80] -> unicodeCodePoint. 63 * Derived from http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT . 64 * In MacRoman the first 128 code points match ASCII code points. 65 * This maps the second 128 MacRoman code points to unicode code points. 66 */ 67 static const uint16_t UnicodeFromMacRoman[0x80] = { 68 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 69 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, 70 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, 71 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, 72 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 73 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, 74 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, 75 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, 76 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 77 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, 78 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, 79 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, 80 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 81 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, 82 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 83 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7, 84 }; 85 86 static void SkStringFromMacRoman(const uint8_t* macRoman, size_t length, SkString& utf8) { 87 utf8.reset(); 88 for (size_t i = 0; i < length; ++i) { 89 utf8.appendUnichar(macRoman[i] < 0x80 ? macRoman[i] 90 : UnicodeFromMacRoman[macRoman[i] - 0x80]); 91 } 92 } 93 94 static const struct BCP47FromLanguageId { 95 uint16_t languageID; 96 const char* bcp47; 97 } 98 /** The Mac and Windows values do not conflict, so this is currently one single table. */ 99 BCP47FromLanguageID[] = { 100 /** A mapping from Mac Language Designators to BCP 47 codes. 101 * The following list was constructed more or less manually. 102 * Apple now uses BCP 47 (post OSX10.4), so there will be no new entries. 103 */ 104 {0, "en"}, //English 105 {1, "fr"}, //French 106 {2, "de"}, //German 107 {3, "it"}, //Italian 108 {4, "nl"}, //Dutch 109 {5, "sv"}, //Swedish 110 {6, "es"}, //Spanish 111 {7, "da"}, //Danish 112 {8, "pt"}, //Portuguese 113 {9, "nb"}, //Norwegian 114 {10, "he"}, //Hebrew 115 {11, "ja"}, //Japanese 116 {12, "ar"}, //Arabic 117 {13, "fi"}, //Finnish 118 {14, "el"}, //Greek 119 {15, "is"}, //Icelandic 120 {16, "mt"}, //Maltese 121 {17, "tr"}, //Turkish 122 {18, "hr"}, //Croatian 123 {19, "zh-Hant"}, //Chinese (Traditional) 124 {20, "ur"}, //Urdu 125 {21, "hi"}, //Hindi 126 {22, "th"}, //Thai 127 {23, "ko"}, //Korean 128 {24, "lt"}, //Lithuanian 129 {25, "pl"}, //Polish 130 {26, "hu"}, //Hungarian 131 {27, "et"}, //Estonian 132 {28, "lv"}, //Latvian 133 {29, "se"}, //Sami 134 {30, "fo"}, //Faroese 135 {31, "fa"}, //Farsi (Persian) 136 {32, "ru"}, //Russian 137 {33, "zh-Hans"}, //Chinese (Simplified) 138 {34, "nl"}, //Dutch 139 {35, "ga"}, //Irish(Gaelic) 140 {36, "sq"}, //Albanian 141 {37, "ro"}, //Romanian 142 {38, "cs"}, //Czech 143 {39, "sk"}, //Slovak 144 {40, "sl"}, //Slovenian 145 {41, "yi"}, //Yiddish 146 {42, "sr"}, //Serbian 147 {43, "mk"}, //Macedonian 148 {44, "bg"}, //Bulgarian 149 {45, "uk"}, //Ukrainian 150 {46, "be"}, //Byelorussian 151 {47, "uz"}, //Uzbek 152 {48, "kk"}, //Kazakh 153 {49, "az-Cyrl"}, //Azerbaijani (Cyrillic) 154 {50, "az-Arab"}, //Azerbaijani (Arabic) 155 {51, "hy"}, //Armenian 156 {52, "ka"}, //Georgian 157 {53, "mo"}, //Moldavian 158 {54, "ky"}, //Kirghiz 159 {55, "tg"}, //Tajiki 160 {56, "tk"}, //Turkmen 161 {57, "mn-Mong"}, //Mongolian (Traditional) 162 {58, "mn-Cyrl"}, //Mongolian (Cyrillic) 163 {59, "ps"}, //Pashto 164 {60, "ku"}, //Kurdish 165 {61, "ks"}, //Kashmiri 166 {62, "sd"}, //Sindhi 167 {63, "bo"}, //Tibetan 168 {64, "ne"}, //Nepali 169 {65, "sa"}, //Sanskrit 170 {66, "mr"}, //Marathi 171 {67, "bn"}, //Bengali 172 {68, "as"}, //Assamese 173 {69, "gu"}, //Gujarati 174 {70, "pa"}, //Punjabi 175 {71, "or"}, //Oriya 176 {72, "ml"}, //Malayalam 177 {73, "kn"}, //Kannada 178 {74, "ta"}, //Tamil 179 {75, "te"}, //Telugu 180 {76, "si"}, //Sinhalese 181 {77, "my"}, //Burmese 182 {78, "km"}, //Khmer 183 {79, "lo"}, //Lao 184 {80, "vi"}, //Vietnamese 185 {81, "id"}, //Indonesian 186 {82, "tl"}, //Tagalog 187 {83, "ms-Latn"}, //Malay (Roman) 188 {84, "ms-Arab"}, //Malay (Arabic) 189 {85, "am"}, //Amharic 190 {86, "ti"}, //Tigrinya 191 {87, "om"}, //Oromo 192 {88, "so"}, //Somali 193 {89, "sw"}, //Swahili 194 {90, "rw"}, //Kinyarwanda/Ruanda 195 {91, "rn"}, //Rundi 196 {92, "ny"}, //Nyanja/Chewa 197 {93, "mg"}, //Malagasy 198 {94, "eo"}, //Esperanto 199 {128, "cy"}, //Welsh 200 {129, "eu"}, //Basque 201 {130, "ca"}, //Catalan 202 {131, "la"}, //Latin 203 {132, "qu"}, //Quechua 204 {133, "gn"}, //Guarani 205 {134, "ay"}, //Aymara 206 {135, "tt"}, //Tatar 207 {136, "ug"}, //Uighur 208 {137, "dz"}, //Dzongkha 209 {138, "jv-Latn"}, //Javanese (Roman) 210 {139, "su-Latn"}, //Sundanese (Roman) 211 {140, "gl"}, //Galician 212 {141, "af"}, //Afrikaans 213 {142, "br"}, //Breton 214 {143, "iu"}, //Inuktitut 215 {144, "gd"}, //Scottish (Gaelic) 216 {145, "gv"}, //Manx (Gaelic) 217 {146, "ga"}, //Irish (Gaelic with Lenition) 218 {147, "to"}, //Tongan 219 {148, "el"}, //Greek (Polytonic) Note: ISO 15924 does not have an equivalent script name. 220 {149, "kl"}, //Greenlandic 221 {150, "az-Latn"}, //Azerbaijani (Roman) 222 {151, "nn"}, //Nynorsk 223 224 /** A mapping from Windows LCID to BCP 47 codes. 225 * This list is the sorted, curated output of tools/win_lcid.cpp. 226 * Note that these are sorted by value for quick binary lookup, and not logically by lsb. 227 * The 'bare' language ids (e.g. 0x0001 for Arabic) are ommitted 228 * as they do not appear as valid language ids in the OpenType specification. 229 */ 230 { 0x0401, "ar-SA" }, //Arabic 231 { 0x0402, "bg-BG" }, //Bulgarian 232 { 0x0403, "ca-ES" }, //Catalan 233 { 0x0404, "zh-TW" }, //Chinese (Traditional) 234 { 0x0405, "cs-CZ" }, //Czech 235 { 0x0406, "da-DK" }, //Danish 236 { 0x0407, "de-DE" }, //German 237 { 0x0408, "el-GR" }, //Greek 238 { 0x0409, "en-US" }, //English 239 { 0x040a, "es-ES_tradnl" }, //Spanish 240 { 0x040b, "fi-FI" }, //Finnish 241 { 0x040c, "fr-FR" }, //French 242 { 0x040d, "he-IL" }, //Hebrew 243 { 0x040d, "he" }, //Hebrew 244 { 0x040e, "hu-HU" }, //Hungarian 245 { 0x040e, "hu" }, //Hungarian 246 { 0x040f, "is-IS" }, //Icelandic 247 { 0x0410, "it-IT" }, //Italian 248 { 0x0411, "ja-JP" }, //Japanese 249 { 0x0412, "ko-KR" }, //Korean 250 { 0x0413, "nl-NL" }, //Dutch 251 { 0x0414, "nb-NO" }, //Norwegian (Bokmål) 252 { 0x0415, "pl-PL" }, //Polish 253 { 0x0416, "pt-BR" }, //Portuguese 254 { 0x0417, "rm-CH" }, //Romansh 255 { 0x0418, "ro-RO" }, //Romanian 256 { 0x0419, "ru-RU" }, //Russian 257 { 0x041a, "hr-HR" }, //Croatian 258 { 0x041b, "sk-SK" }, //Slovak 259 { 0x041c, "sq-AL" }, //Albanian 260 { 0x041d, "sv-SE" }, //Swedish 261 { 0x041e, "th-TH" }, //Thai 262 { 0x041f, "tr-TR" }, //Turkish 263 { 0x0420, "ur-PK" }, //Urdu 264 { 0x0421, "id-ID" }, //Indonesian 265 { 0x0422, "uk-UA" }, //Ukrainian 266 { 0x0423, "be-BY" }, //Belarusian 267 { 0x0424, "sl-SI" }, //Slovenian 268 { 0x0425, "et-EE" }, //Estonian 269 { 0x0426, "lv-LV" }, //Latvian 270 { 0x0427, "lt-LT" }, //Lithuanian 271 { 0x0428, "tg-Cyrl-TJ" }, //Tajik (Cyrillic) 272 { 0x0429, "fa-IR" }, //Persian 273 { 0x042a, "vi-VN" }, //Vietnamese 274 { 0x042b, "hy-AM" }, //Armenian 275 { 0x042c, "az-Latn-AZ" }, //Azeri (Latin) 276 { 0x042d, "eu-ES" }, //Basque 277 { 0x042e, "hsb-DE" }, //Upper Sorbian 278 { 0x042f, "mk-MK" }, //Macedonian (FYROM) 279 { 0x0432, "tn-ZA" }, //Setswana 280 { 0x0434, "xh-ZA" }, //isiXhosa 281 { 0x0435, "zu-ZA" }, //isiZulu 282 { 0x0436, "af-ZA" }, //Afrikaans 283 { 0x0437, "ka-GE" }, //Georgian 284 { 0x0438, "fo-FO" }, //Faroese 285 { 0x0439, "hi-IN" }, //Hindi 286 { 0x043a, "mt-MT" }, //Maltese 287 { 0x043b, "se-NO" }, //Sami (Northern) 288 { 0x043e, "ms-MY" }, //Malay 289 { 0x043f, "kk-KZ" }, //Kazakh 290 { 0x0440, "ky-KG" }, //Kyrgyz 291 { 0x0441, "sw-KE" }, //Kiswahili 292 { 0x0442, "tk-TM" }, //Turkmen 293 { 0x0443, "uz-Latn-UZ" }, //Uzbek (Latin) 294 { 0x0443, "uz" }, //Uzbek 295 { 0x0444, "tt-RU" }, //Tatar 296 { 0x0445, "bn-IN" }, //Bengali 297 { 0x0446, "pa-IN" }, //Punjabi 298 { 0x0447, "gu-IN" }, //Gujarati 299 { 0x0448, "or-IN" }, //Oriya 300 { 0x0449, "ta-IN" }, //Tamil 301 { 0x044a, "te-IN" }, //Telugu 302 { 0x044b, "kn-IN" }, //Kannada 303 { 0x044c, "ml-IN" }, //Malayalam 304 { 0x044d, "as-IN" }, //Assamese 305 { 0x044e, "mr-IN" }, //Marathi 306 { 0x044f, "sa-IN" }, //Sanskrit 307 { 0x0450, "mn-Cyrl" }, //Mongolian (Cyrillic) 308 { 0x0451, "bo-CN" }, //Tibetan 309 { 0x0452, "cy-GB" }, //Welsh 310 { 0x0453, "km-KH" }, //Khmer 311 { 0x0454, "lo-LA" }, //Lao 312 { 0x0456, "gl-ES" }, //Galician 313 { 0x0457, "kok-IN" }, //Konkani 314 { 0x045a, "syr-SY" }, //Syriac 315 { 0x045b, "si-LK" }, //Sinhala 316 { 0x045d, "iu-Cans-CA" }, //Inuktitut (Syllabics) 317 { 0x045e, "am-ET" }, //Amharic 318 { 0x0461, "ne-NP" }, //Nepali 319 { 0x0462, "fy-NL" }, //Frisian 320 { 0x0463, "ps-AF" }, //Pashto 321 { 0x0464, "fil-PH" }, //Filipino 322 { 0x0465, "dv-MV" }, //Divehi 323 { 0x0468, "ha-Latn-NG" }, //Hausa (Latin) 324 { 0x046a, "yo-NG" }, //Yoruba 325 { 0x046b, "quz-BO" }, //Quechua 326 { 0x046c, "nso-ZA" }, //Sesotho sa Leboa 327 { 0x046d, "ba-RU" }, //Bashkir 328 { 0x046e, "lb-LU" }, //Luxembourgish 329 { 0x046f, "kl-GL" }, //Greenlandic 330 { 0x0470, "ig-NG" }, //Igbo 331 { 0x0478, "ii-CN" }, //Yi 332 { 0x047a, "arn-CL" }, //Mapudungun 333 { 0x047c, "moh-CA" }, //Mohawk 334 { 0x047e, "br-FR" }, //Breton 335 { 0x0480, "ug-CN" }, //Uyghur 336 { 0x0481, "mi-NZ" }, //Maori 337 { 0x0482, "oc-FR" }, //Occitan 338 { 0x0483, "co-FR" }, //Corsican 339 { 0x0484, "gsw-FR" }, //Alsatian 340 { 0x0485, "sah-RU" }, //Yakut 341 { 0x0486, "qut-GT" }, //K'iche 342 { 0x0487, "rw-RW" }, //Kinyarwanda 343 { 0x0488, "wo-SN" }, //Wolof 344 { 0x048c, "prs-AF" }, //Dari 345 { 0x0491, "gd-GB" }, //Scottish Gaelic 346 { 0x0801, "ar-IQ" }, //Arabic 347 { 0x0804, "zh-Hans" }, //Chinese (Simplified) 348 { 0x0807, "de-CH" }, //German 349 { 0x0809, "en-GB" }, //English 350 { 0x080a, "es-MX" }, //Spanish 351 { 0x080c, "fr-BE" }, //French 352 { 0x0810, "it-CH" }, //Italian 353 { 0x0813, "nl-BE" }, //Dutch 354 { 0x0814, "nn-NO" }, //Norwegian (Nynorsk) 355 { 0x0816, "pt-PT" }, //Portuguese 356 { 0x081a, "sr-Latn-CS" }, //Serbian (Latin) 357 { 0x081d, "sv-FI" }, //Swedish 358 { 0x082c, "az-Cyrl-AZ" }, //Azeri (Cyrillic) 359 { 0x082e, "dsb-DE" }, //Lower Sorbian 360 { 0x082e, "dsb" }, //Lower Sorbian 361 { 0x083b, "se-SE" }, //Sami (Northern) 362 { 0x083c, "ga-IE" }, //Irish 363 { 0x083e, "ms-BN" }, //Malay 364 { 0x0843, "uz-Cyrl-UZ" }, //Uzbek (Cyrillic) 365 { 0x0845, "bn-BD" }, //Bengali 366 { 0x0850, "mn-Mong-CN" }, //Mongolian (Traditional Mongolian) 367 { 0x085d, "iu-Latn-CA" }, //Inuktitut (Latin) 368 { 0x085f, "tzm-Latn-DZ" }, //Tamazight (Latin) 369 { 0x086b, "quz-EC" }, //Quechua 370 { 0x0c01, "ar-EG" }, //Arabic 371 { 0x0c04, "zh-Hant" }, //Chinese (Traditional) 372 { 0x0c07, "de-AT" }, //German 373 { 0x0c09, "en-AU" }, //English 374 { 0x0c0a, "es-ES" }, //Spanish 375 { 0x0c0c, "fr-CA" }, //French 376 { 0x0c1a, "sr-Cyrl-CS" }, //Serbian (Cyrillic) 377 { 0x0c3b, "se-FI" }, //Sami (Northern) 378 { 0x0c6b, "quz-PE" }, //Quechua 379 { 0x1001, "ar-LY" }, //Arabic 380 { 0x1004, "zh-SG" }, //Chinese (Simplified) 381 { 0x1007, "de-LU" }, //German 382 { 0x1009, "en-CA" }, //English 383 { 0x100a, "es-GT" }, //Spanish 384 { 0x100c, "fr-CH" }, //French 385 { 0x101a, "hr-BA" }, //Croatian (Latin) 386 { 0x103b, "smj-NO" }, //Sami (Lule) 387 { 0x1401, "ar-DZ" }, //Arabic 388 { 0x1404, "zh-MO" }, //Chinese (Traditional) 389 { 0x1407, "de-LI" }, //German 390 { 0x1409, "en-NZ" }, //English 391 { 0x140a, "es-CR" }, //Spanish 392 { 0x140c, "fr-LU" }, //French 393 { 0x141a, "bs-Latn-BA" }, //Bosnian (Latin) 394 { 0x141a, "bs" }, //Bosnian 395 { 0x143b, "smj-SE" }, //Sami (Lule) 396 { 0x143b, "smj" }, //Sami (Lule) 397 { 0x1801, "ar-MA" }, //Arabic 398 { 0x1809, "en-IE" }, //English 399 { 0x180a, "es-PA" }, //Spanish 400 { 0x180c, "fr-MC" }, //French 401 { 0x181a, "sr-Latn-BA" }, //Serbian (Latin) 402 { 0x183b, "sma-NO" }, //Sami (Southern) 403 { 0x1c01, "ar-TN" }, //Arabic 404 { 0x1c09, "en-ZA" }, //English 405 { 0x1c0a, "es-DO" }, //Spanish 406 { 0x1c1a, "sr-Cyrl-BA" }, //Serbian (Cyrillic) 407 { 0x1c3b, "sma-SE" }, //Sami (Southern) 408 { 0x1c3b, "sma" }, //Sami (Southern) 409 { 0x2001, "ar-OM" }, //Arabic 410 { 0x2009, "en-JM" }, //English 411 { 0x200a, "es-VE" }, //Spanish 412 { 0x201a, "bs-Cyrl-BA" }, //Bosnian (Cyrillic) 413 { 0x201a, "bs-Cyrl" }, //Bosnian (Cyrillic) 414 { 0x203b, "sms-FI" }, //Sami (Skolt) 415 { 0x203b, "sms" }, //Sami (Skolt) 416 { 0x2401, "ar-YE" }, //Arabic 417 { 0x2409, "en-029" }, //English 418 { 0x240a, "es-CO" }, //Spanish 419 { 0x241a, "sr-Latn-RS" }, //Serbian (Latin) 420 { 0x243b, "smn-FI" }, //Sami (Inari) 421 { 0x2801, "ar-SY" }, //Arabic 422 { 0x2809, "en-BZ" }, //English 423 { 0x280a, "es-PE" }, //Spanish 424 { 0x281a, "sr-Cyrl-RS" }, //Serbian (Cyrillic) 425 { 0x2c01, "ar-JO" }, //Arabic 426 { 0x2c09, "en-TT" }, //English 427 { 0x2c0a, "es-AR" }, //Spanish 428 { 0x2c1a, "sr-Latn-ME" }, //Serbian (Latin) 429 { 0x3001, "ar-LB" }, //Arabic 430 { 0x3009, "en-ZW" }, //English 431 { 0x300a, "es-EC" }, //Spanish 432 { 0x301a, "sr-Cyrl-ME" }, //Serbian (Cyrillic) 433 { 0x3401, "ar-KW" }, //Arabic 434 { 0x3409, "en-PH" }, //English 435 { 0x340a, "es-CL" }, //Spanish 436 { 0x3801, "ar-AE" }, //Arabic 437 { 0x380a, "es-UY" }, //Spanish 438 { 0x3c01, "ar-BH" }, //Arabic 439 { 0x3c0a, "es-PY" }, //Spanish 440 { 0x4001, "ar-QA" }, //Arabic 441 { 0x4009, "en-IN" }, //English 442 { 0x400a, "es-BO" }, //Spanish 443 { 0x4409, "en-MY" }, //English 444 { 0x440a, "es-SV" }, //Spanish 445 { 0x4809, "en-SG" }, //English 446 { 0x480a, "es-HN" }, //Spanish 447 { 0x4c0a, "es-NI" }, //Spanish 448 { 0x500a, "es-PR" }, //Spanish 449 { 0x540a, "es-US" }, //Spanish 450 }; 451 452 namespace { 453 bool BCP47FromLanguageIdLess(const BCP47FromLanguageId& a, const BCP47FromLanguageId& b) { 454 return a.languageID < b.languageID; 455 } 456 } 457 458 bool SkOTTableName::Iterator::next(SkOTTableName::Iterator::Record& record) { 459 SkOTTableName nameTable; 460 if (fNameTableSize < sizeof(nameTable)) { 461 return false; 462 } 463 memcpy(&nameTable, fNameTable, sizeof(nameTable)); 464 465 const uint8_t* nameRecords = fNameTable + sizeof(nameTable); 466 const size_t nameRecordsSize = fNameTableSize - sizeof(nameTable); 467 468 const size_t stringTableOffset = SkEndian_SwapBE16(nameTable.stringOffset); 469 if (fNameTableSize < stringTableOffset) { 470 return false; 471 } 472 const uint8_t* stringTable = fNameTable + stringTableOffset; 473 const size_t stringTableSize = fNameTableSize - stringTableOffset; 474 475 // Find the next record which matches the requested type. 476 SkOTTableName::Record nameRecord; 477 const size_t nameRecordsCount = SkEndian_SwapBE16(nameTable.count); 478 const size_t nameRecordsMax = SkTMin(nameRecordsCount, nameRecordsSize / sizeof(nameRecord)); 479 do { 480 if (fIndex >= nameRecordsMax) { 481 return false; 482 } 483 484 memcpy(&nameRecord, nameRecords + sizeof(nameRecord)*fIndex, sizeof(nameRecord)); 485 ++fIndex; 486 } while (fType != -1 && nameRecord.nameID.fontSpecific != fType); 487 488 record.type = nameRecord.nameID.fontSpecific; 489 490 // Decode the name into UTF-8. 491 const size_t nameOffset = SkEndian_SwapBE16(nameRecord.offset); 492 const size_t nameLength = SkEndian_SwapBE16(nameRecord.length); 493 if (stringTableSize < nameOffset + nameLength) { 494 return false; // continue? 495 } 496 const uint8_t* nameString = stringTable + nameOffset; 497 switch (nameRecord.platformID.value) { 498 case SkOTTableName::Record::PlatformID::Windows: 499 if (SkOTTableName::Record::EncodingID::Windows::UnicodeBMPUCS2 500 != nameRecord.encodingID.windows.value 501 && SkOTTableName::Record::EncodingID::Windows::UnicodeUCS4 502 != nameRecord.encodingID.windows.value 503 && SkOTTableName::Record::EncodingID::Windows::Symbol 504 != nameRecord.encodingID.windows.value) 505 { 506 record.name.reset(); 507 break; // continue? 508 } 509 case SkOTTableName::Record::PlatformID::Unicode: 510 case SkOTTableName::Record::PlatformID::ISO: 511 SkString_from_UTF16BE(nameString, nameLength, record.name); 512 break; 513 514 case SkOTTableName::Record::PlatformID::Macintosh: 515 // TODO: need better decoding, especially on Mac. 516 if (SkOTTableName::Record::EncodingID::Macintosh::Roman 517 != nameRecord.encodingID.macintosh.value) 518 { 519 record.name.reset(); 520 break; // continue? 521 } 522 SkStringFromMacRoman(nameString, nameLength, record.name); 523 break; 524 525 case SkOTTableName::Record::PlatformID::Custom: 526 // These should never appear in a 'name' table. 527 default: 528 SkASSERT(false); 529 record.name.reset(); 530 break; // continue? 531 } 532 533 // Determine the language. 534 const uint16_t languageID = SkEndian_SwapBE16(nameRecord.languageID.languageTagID); 535 536 // Handle format 1 languages. 537 if (SkOTTableName::format_1 == nameTable.format && languageID >= 0x8000) { 538 const uint16_t languageTagRecordIndex = languageID - 0x8000; 539 540 if (nameRecordsSize < sizeof(nameRecord)*nameRecordsCount) { 541 return false; //"und" or break? 542 } 543 const uint8_t* format1extData = nameRecords + sizeof(nameRecord)*nameRecordsCount; 544 size_t format1extSize = nameRecordsSize - sizeof(nameRecord)*nameRecordsCount; 545 SkOTTableName::Format1Ext format1ext; 546 if (format1extSize < sizeof(format1ext)) { 547 return false; // "und" or break? 548 } 549 memcpy(&format1ext, format1extData, sizeof(format1ext)); 550 551 const uint8_t* languageTagRecords = format1extData + sizeof(format1ext); 552 size_t languageTagRecordsSize = format1extSize - sizeof(format1ext); 553 if (languageTagRecordIndex < SkEndian_SwapBE16(format1ext.langTagCount)) { 554 SkOTTableName::Format1Ext::LangTagRecord languageTagRecord; 555 if (languageTagRecordsSize < sizeof(languageTagRecord)*(languageTagRecordIndex+1)) { 556 return false; // "und"? 557 } 558 const uint8_t* languageTagData = languageTagRecords 559 + sizeof(languageTagRecord)*languageTagRecordIndex; 560 memcpy(&languageTagRecord, languageTagData, sizeof(languageTagRecord)); 561 562 uint16_t languageOffset = SkEndian_SwapBE16(languageTagRecord.offset); 563 uint16_t languageLength = SkEndian_SwapBE16(languageTagRecord.length); 564 565 if (fNameTableSize < stringTableOffset + languageOffset + languageLength) { 566 return false; // "und"? 567 } 568 const uint8_t* languageString = stringTable + languageOffset; 569 SkString_from_UTF16BE(languageString, languageLength, record.language); 570 return true; 571 } 572 } 573 574 // Handle format 0 languages, translating them into BCP 47. 575 const BCP47FromLanguageId target = { languageID, "" }; 576 int languageIndex = SkTSearch<BCP47FromLanguageId, BCP47FromLanguageIdLess>( 577 BCP47FromLanguageID, SK_ARRAY_COUNT(BCP47FromLanguageID), target, sizeof(target)); 578 if (languageIndex >= 0) { 579 record.language = BCP47FromLanguageID[languageIndex].bcp47; 580 return true; 581 } 582 583 // Unknown language, return the BCP 47 code 'und' for 'undetermined'. 584 record.language = "und"; 585 return true; 586 } 587