1 /* 2 * Copyright © 2014 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_OT_CMAP_TABLE_HH 28 #define HB_OT_CMAP_TABLE_HH 29 30 #include "hb-open-type-private.hh" 31 32 33 namespace OT { 34 35 36 /* 37 * cmap -- Character To Glyph Index Mapping Table 38 */ 39 40 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p') 41 42 43 struct CmapSubtableFormat0 44 { get_glyphOT::CmapSubtableFormat045 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 46 { 47 hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0; 48 if (!gid) 49 return false; 50 *glyph = gid; 51 return true; 52 } 53 sanitizeOT::CmapSubtableFormat054 inline bool sanitize (hb_sanitize_context_t *c) const 55 { 56 TRACE_SANITIZE (this); 57 return_trace (c->check_struct (this)); 58 } 59 60 protected: 61 USHORT format; /* Format number is set to 0. */ 62 USHORT lengthZ; /* Byte length of this subtable. */ 63 USHORT languageZ; /* Ignore. */ 64 BYTE glyphIdArray[256];/* An array that maps character 65 * code to glyph index values. */ 66 public: 67 DEFINE_SIZE_STATIC (6 + 256); 68 }; 69 70 struct CmapSubtableFormat4 71 { 72 struct accelerator_t 73 { initOT::CmapSubtableFormat4::accelerator_t74 inline void init (const CmapSubtableFormat4 *subtable) 75 { 76 segCount = subtable->segCountX2 / 2; 77 endCount = subtable->values; 78 startCount = endCount + segCount + 1; 79 idDelta = startCount + segCount; 80 idRangeOffset = idDelta + segCount; 81 glyphIdArray = idRangeOffset + segCount; 82 glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2; 83 } 84 get_glyph_funcOT::CmapSubtableFormat4::accelerator_t85 static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph) 86 { 87 const accelerator_t *thiz = (const accelerator_t *) obj; 88 89 /* Custom two-array bsearch. */ 90 int min = 0, max = (int) thiz->segCount - 1; 91 const USHORT *startCount = thiz->startCount; 92 const USHORT *endCount = thiz->endCount; 93 unsigned int i; 94 while (min <= max) 95 { 96 int mid = (min + max) / 2; 97 if (codepoint < startCount[mid]) 98 max = mid - 1; 99 else if (codepoint > endCount[mid]) 100 min = mid + 1; 101 else 102 { 103 i = mid; 104 goto found; 105 } 106 } 107 return false; 108 109 found: 110 hb_codepoint_t gid; 111 unsigned int rangeOffset = thiz->idRangeOffset[i]; 112 if (rangeOffset == 0) 113 gid = codepoint + thiz->idDelta[i]; 114 else 115 { 116 /* Somebody has been smoking... */ 117 unsigned int index = rangeOffset / 2 + (codepoint - thiz->startCount[i]) + i - thiz->segCount; 118 if (unlikely (index >= thiz->glyphIdArrayLength)) 119 return false; 120 gid = thiz->glyphIdArray[index]; 121 if (unlikely (!gid)) 122 return false; 123 gid += thiz->idDelta[i]; 124 } 125 126 *glyph = gid & 0xFFFFu; 127 return true; 128 } 129 130 const USHORT *endCount; 131 const USHORT *startCount; 132 const USHORT *idDelta; 133 const USHORT *idRangeOffset; 134 const USHORT *glyphIdArray; 135 unsigned int segCount; 136 unsigned int glyphIdArrayLength; 137 }; 138 get_glyphOT::CmapSubtableFormat4139 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 140 { 141 accelerator_t accel; 142 accel.init (this); 143 return accel.get_glyph_func (&accel, codepoint, glyph); 144 } 145 sanitizeOT::CmapSubtableFormat4146 inline bool sanitize (hb_sanitize_context_t *c) const 147 { 148 TRACE_SANITIZE (this); 149 if (unlikely (!c->check_struct (this))) 150 return_trace (false); 151 152 if (unlikely (!c->check_range (this, length))) 153 { 154 /* Some broken fonts have too long of a "length" value. 155 * If that is the case, just change the value to truncate 156 * the subtable at the end of the blob. */ 157 uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535, 158 (uintptr_t) (c->end - 159 (char *) this)); 160 if (!c->try_set (&length, new_length)) 161 return_trace (false); 162 } 163 164 return_trace (16 + 4 * (unsigned int) segCountX2 <= length); 165 } 166 167 protected: 168 USHORT format; /* Format number is set to 4. */ 169 USHORT length; /* This is the length in bytes of the 170 * subtable. */ 171 USHORT languageZ; /* Ignore. */ 172 USHORT segCountX2; /* 2 x segCount. */ 173 USHORT searchRangeZ; /* 2 * (2**floor(log2(segCount))) */ 174 USHORT entrySelectorZ; /* log2(searchRange/2) */ 175 USHORT rangeShiftZ; /* 2 x segCount - searchRange */ 176 177 USHORT values[VAR]; 178 #if 0 179 USHORT endCount[segCount]; /* End characterCode for each segment, 180 * last=0xFFFFu. */ 181 USHORT reservedPad; /* Set to 0. */ 182 USHORT startCount[segCount]; /* Start character code for each segment. */ 183 SHORT idDelta[segCount]; /* Delta for all character codes in segment. */ 184 USHORT idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */ 185 USHORT glyphIdArray[VAR]; /* Glyph index array (arbitrary length) */ 186 #endif 187 188 public: 189 DEFINE_SIZE_ARRAY (14, values); 190 }; 191 192 struct CmapSubtableLongGroup 193 { 194 friend struct CmapSubtableFormat12; 195 friend struct CmapSubtableFormat13; 196 cmpOT::CmapSubtableLongGroup197 int cmp (hb_codepoint_t codepoint) const 198 { 199 if (codepoint < startCharCode) return -1; 200 if (codepoint > endCharCode) return +1; 201 return 0; 202 } 203 sanitizeOT::CmapSubtableLongGroup204 inline bool sanitize (hb_sanitize_context_t *c) const 205 { 206 TRACE_SANITIZE (this); 207 return_trace (c->check_struct (this)); 208 } 209 210 private: 211 ULONG startCharCode; /* First character code in this group. */ 212 ULONG endCharCode; /* Last character code in this group. */ 213 ULONG glyphID; /* Glyph index; interpretation depends on 214 * subtable format. */ 215 public: 216 DEFINE_SIZE_STATIC (12); 217 }; 218 219 template <typename UINT> 220 struct CmapSubtableTrimmed 221 { get_glyphOT::CmapSubtableTrimmed222 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 223 { 224 /* Rely on our implicit array bound-checking. */ 225 hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode]; 226 if (!gid) 227 return false; 228 *glyph = gid; 229 return true; 230 } 231 sanitizeOT::CmapSubtableTrimmed232 inline bool sanitize (hb_sanitize_context_t *c) const 233 { 234 TRACE_SANITIZE (this); 235 return_trace (c->check_struct (this) && glyphIdArray.sanitize (c)); 236 } 237 238 protected: 239 UINT formatReserved; /* Subtable format and (maybe) padding. */ 240 UINT lengthZ; /* Byte length of this subtable. */ 241 UINT languageZ; /* Ignore. */ 242 UINT startCharCode; /* First character code covered. */ 243 ArrayOf<GlyphID, UINT> 244 glyphIdArray; /* Array of glyph index values for character 245 * codes in the range. */ 246 public: 247 DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray); 248 }; 249 250 struct CmapSubtableFormat6 : CmapSubtableTrimmed<USHORT> {}; 251 struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {}; 252 253 template <typename T> 254 struct CmapSubtableLongSegmented 255 { get_glyphOT::CmapSubtableLongSegmented256 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 257 { 258 int i = groups.bsearch (codepoint); 259 if (i == -1) 260 return false; 261 *glyph = T::group_get_glyph (groups[i], codepoint); 262 return true; 263 } 264 sanitizeOT::CmapSubtableLongSegmented265 inline bool sanitize (hb_sanitize_context_t *c) const 266 { 267 TRACE_SANITIZE (this); 268 return_trace (c->check_struct (this) && groups.sanitize (c)); 269 } 270 271 protected: 272 USHORT format; /* Subtable format; set to 12. */ 273 USHORT reservedZ; /* Reserved; set to 0. */ 274 ULONG lengthZ; /* Byte length of this subtable. */ 275 ULONG languageZ; /* Ignore. */ 276 SortedArrayOf<CmapSubtableLongGroup, ULONG> 277 groups; /* Groupings. */ 278 public: 279 DEFINE_SIZE_ARRAY (16, groups); 280 }; 281 282 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12> 283 { group_get_glyphOT::CmapSubtableFormat12284 static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, 285 hb_codepoint_t u) 286 { return group.glyphID + (u - group.startCharCode); } 287 }; 288 289 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13> 290 { group_get_glyphOT::CmapSubtableFormat13291 static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, 292 hb_codepoint_t u HB_UNUSED) 293 { return group.glyphID; } 294 }; 295 296 typedef enum 297 { 298 GLYPH_VARIANT_NOT_FOUND = 0, 299 GLYPH_VARIANT_FOUND = 1, 300 GLYPH_VARIANT_USE_DEFAULT = 2 301 } glyph_variant_t; 302 303 struct UnicodeValueRange 304 { cmpOT::UnicodeValueRange305 inline int cmp (const hb_codepoint_t &codepoint) const 306 { 307 if (codepoint < startUnicodeValue) return -1; 308 if (codepoint > startUnicodeValue + additionalCount) return +1; 309 return 0; 310 } 311 sanitizeOT::UnicodeValueRange312 inline bool sanitize (hb_sanitize_context_t *c) const 313 { 314 TRACE_SANITIZE (this); 315 return_trace (c->check_struct (this)); 316 } 317 318 UINT24 startUnicodeValue; /* First value in this range. */ 319 BYTE additionalCount; /* Number of additional values in this 320 * range. */ 321 public: 322 DEFINE_SIZE_STATIC (4); 323 }; 324 325 typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS; 326 327 struct UVSMapping 328 { cmpOT::UVSMapping329 inline int cmp (const hb_codepoint_t &codepoint) const 330 { 331 return unicodeValue.cmp (codepoint); 332 } 333 sanitizeOT::UVSMapping334 inline bool sanitize (hb_sanitize_context_t *c) const 335 { 336 TRACE_SANITIZE (this); 337 return_trace (c->check_struct (this)); 338 } 339 340 UINT24 unicodeValue; /* Base Unicode value of the UVS */ 341 GlyphID glyphID; /* Glyph ID of the UVS */ 342 public: 343 DEFINE_SIZE_STATIC (5); 344 }; 345 346 typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS; 347 348 struct VariationSelectorRecord 349 { get_glyphOT::VariationSelectorRecord350 inline glyph_variant_t get_glyph (hb_codepoint_t codepoint, 351 hb_codepoint_t *glyph, 352 const void *base) const 353 { 354 int i; 355 const DefaultUVS &defaults = base+defaultUVS; 356 i = defaults.bsearch (codepoint); 357 if (i != -1) 358 return GLYPH_VARIANT_USE_DEFAULT; 359 const NonDefaultUVS &nonDefaults = base+nonDefaultUVS; 360 i = nonDefaults.bsearch (codepoint); 361 if (i != -1) 362 { 363 *glyph = nonDefaults[i].glyphID; 364 return GLYPH_VARIANT_FOUND; 365 } 366 return GLYPH_VARIANT_NOT_FOUND; 367 } 368 cmpOT::VariationSelectorRecord369 inline int cmp (const hb_codepoint_t &variation_selector) const 370 { 371 return varSelector.cmp (variation_selector); 372 } 373 sanitizeOT::VariationSelectorRecord374 inline bool sanitize (hb_sanitize_context_t *c, const void *base) const 375 { 376 TRACE_SANITIZE (this); 377 return_trace (c->check_struct (this) && 378 defaultUVS.sanitize (c, base) && 379 nonDefaultUVS.sanitize (c, base)); 380 } 381 382 UINT24 varSelector; /* Variation selector. */ 383 OffsetTo<DefaultUVS, ULONG> 384 defaultUVS; /* Offset to Default UVS Table. May be 0. */ 385 OffsetTo<NonDefaultUVS, ULONG> 386 nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */ 387 public: 388 DEFINE_SIZE_STATIC (11); 389 }; 390 391 struct CmapSubtableFormat14 392 { get_glyph_variantOT::CmapSubtableFormat14393 inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint, 394 hb_codepoint_t variation_selector, 395 hb_codepoint_t *glyph) const 396 { 397 return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this); 398 } 399 sanitizeOT::CmapSubtableFormat14400 inline bool sanitize (hb_sanitize_context_t *c) const 401 { 402 TRACE_SANITIZE (this); 403 return_trace (c->check_struct (this) && 404 record.sanitize (c, this)); 405 } 406 407 protected: 408 USHORT format; /* Format number is set to 14. */ 409 ULONG lengthZ; /* Byte length of this subtable. */ 410 SortedArrayOf<VariationSelectorRecord, ULONG> 411 record; /* Variation selector records; sorted 412 * in increasing order of `varSelector'. */ 413 public: 414 DEFINE_SIZE_ARRAY (10, record); 415 }; 416 417 struct CmapSubtable 418 { 419 /* Note: We intentionally do NOT implement subtable formats 2 and 8. */ 420 get_glyphOT::CmapSubtable421 inline bool get_glyph (hb_codepoint_t codepoint, 422 hb_codepoint_t *glyph) const 423 { 424 switch (u.format) { 425 case 0: return u.format0 .get_glyph(codepoint, glyph); 426 case 4: return u.format4 .get_glyph(codepoint, glyph); 427 case 6: return u.format6 .get_glyph(codepoint, glyph); 428 case 10: return u.format10.get_glyph(codepoint, glyph); 429 case 12: return u.format12.get_glyph(codepoint, glyph); 430 case 13: return u.format13.get_glyph(codepoint, glyph); 431 case 14: 432 default: return false; 433 } 434 } 435 sanitizeOT::CmapSubtable436 inline bool sanitize (hb_sanitize_context_t *c) const 437 { 438 TRACE_SANITIZE (this); 439 if (!u.format.sanitize (c)) return_trace (false); 440 switch (u.format) { 441 case 0: return_trace (u.format0 .sanitize (c)); 442 case 4: return_trace (u.format4 .sanitize (c)); 443 case 6: return_trace (u.format6 .sanitize (c)); 444 case 10: return_trace (u.format10.sanitize (c)); 445 case 12: return_trace (u.format12.sanitize (c)); 446 case 13: return_trace (u.format13.sanitize (c)); 447 case 14: return_trace (u.format14.sanitize (c)); 448 default:return_trace (true); 449 } 450 } 451 452 public: 453 union { 454 USHORT format; /* Format identifier */ 455 CmapSubtableFormat0 format0; 456 CmapSubtableFormat4 format4; 457 CmapSubtableFormat6 format6; 458 CmapSubtableFormat10 format10; 459 CmapSubtableFormat12 format12; 460 CmapSubtableFormat13 format13; 461 CmapSubtableFormat14 format14; 462 } u; 463 public: 464 DEFINE_SIZE_UNION (2, format); 465 }; 466 467 468 struct EncodingRecord 469 { cmpOT::EncodingRecord470 inline int cmp (const EncodingRecord &other) const 471 { 472 int ret; 473 ret = platformID.cmp (other.platformID); 474 if (ret) return ret; 475 ret = encodingID.cmp (other.encodingID); 476 if (ret) return ret; 477 return 0; 478 } 479 sanitizeOT::EncodingRecord480 inline bool sanitize (hb_sanitize_context_t *c, const void *base) const 481 { 482 TRACE_SANITIZE (this); 483 return_trace (c->check_struct (this) && 484 subtable.sanitize (c, base)); 485 } 486 487 USHORT platformID; /* Platform ID. */ 488 USHORT encodingID; /* Platform-specific encoding ID. */ 489 OffsetTo<CmapSubtable, ULONG> 490 subtable; /* Byte offset from beginning of table to the subtable for this encoding. */ 491 public: 492 DEFINE_SIZE_STATIC (8); 493 }; 494 495 struct cmap 496 { 497 static const hb_tag_t tableTag = HB_OT_TAG_cmap; 498 find_subtableOT::cmap499 inline const CmapSubtable *find_subtable (unsigned int platform_id, 500 unsigned int encoding_id) const 501 { 502 EncodingRecord key; 503 key.platformID.set (platform_id); 504 key.encodingID.set (encoding_id); 505 506 /* Note: We can use bsearch, but since it has no performance 507 * implications, we use lsearch and as such accept fonts with 508 * unsorted subtable list. */ 509 int result = encodingRecord./*bsearch*/lsearch (key); 510 if (result == -1 || !encodingRecord[result].subtable) 511 return NULL; 512 513 return &(this+encodingRecord[result].subtable); 514 } 515 sanitizeOT::cmap516 inline bool sanitize (hb_sanitize_context_t *c) const 517 { 518 TRACE_SANITIZE (this); 519 return_trace (c->check_struct (this) && 520 likely (version == 0) && 521 encodingRecord.sanitize (c, this)); 522 } 523 524 USHORT version; /* Table version number (0). */ 525 SortedArrayOf<EncodingRecord> 526 encodingRecord; /* Encoding tables. */ 527 public: 528 DEFINE_SIZE_ARRAY (4, encodingRecord); 529 }; 530 531 532 } /* namespace OT */ 533 534 535 #endif /* HB_OT_CMAP_TABLE_HH */ 536