1 /*
2  * Copyright © 2014  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_OT_CMAP_TABLE_HH
28 #define HB_OT_CMAP_TABLE_HH
29 
30 #include "hb-open-type-private.hh"
31 
32 
33 namespace OT {
34 
35 
36 /*
37  * cmap -- Character To Glyph Index Mapping Table
38  */
39 
40 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
41 
42 
43 struct CmapSubtableFormat0
44 {
get_glyphOT::CmapSubtableFormat045   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
46   {
47     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
48     if (!gid)
49       return false;
50     *glyph = gid;
51     return true;
52   }
53 
sanitizeOT::CmapSubtableFormat054   inline bool sanitize (hb_sanitize_context_t *c) const
55   {
56     TRACE_SANITIZE (this);
57     return_trace (c->check_struct (this));
58   }
59 
60   protected:
61   USHORT	format;		/* Format number is set to 0. */
62   USHORT	lengthZ;	/* Byte length of this subtable. */
63   USHORT	languageZ;	/* Ignore. */
64   BYTE		glyphIdArray[256];/* An array that maps character
65 				 * code to glyph index values. */
66   public:
67   DEFINE_SIZE_STATIC (6 + 256);
68 };
69 
70 struct CmapSubtableFormat4
71 {
72   struct accelerator_t
73   {
initOT::CmapSubtableFormat4::accelerator_t74     inline void init (const CmapSubtableFormat4 *subtable)
75     {
76       segCount = subtable->segCountX2 / 2;
77       endCount = subtable->values;
78       startCount = endCount + segCount + 1;
79       idDelta = startCount + segCount;
80       idRangeOffset = idDelta + segCount;
81       glyphIdArray = idRangeOffset + segCount;
82       glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
83     }
84 
get_glyph_funcOT::CmapSubtableFormat4::accelerator_t85     static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
86     {
87       const accelerator_t *thiz = (const accelerator_t *) obj;
88 
89       /* Custom two-array bsearch. */
90       int min = 0, max = (int) thiz->segCount - 1;
91       const USHORT *startCount = thiz->startCount;
92       const USHORT *endCount = thiz->endCount;
93       unsigned int i;
94       while (min <= max)
95       {
96 	int mid = (min + max) / 2;
97 	if (codepoint < startCount[mid])
98 	  max = mid - 1;
99 	else if (codepoint > endCount[mid])
100 	  min = mid + 1;
101 	else
102 	{
103 	  i = mid;
104 	  goto found;
105 	}
106       }
107       return false;
108 
109     found:
110       hb_codepoint_t gid;
111       unsigned int rangeOffset = thiz->idRangeOffset[i];
112       if (rangeOffset == 0)
113 	gid = codepoint + thiz->idDelta[i];
114       else
115       {
116 	/* Somebody has been smoking... */
117 	unsigned int index = rangeOffset / 2 + (codepoint - thiz->startCount[i]) + i - thiz->segCount;
118 	if (unlikely (index >= thiz->glyphIdArrayLength))
119 	  return false;
120 	gid = thiz->glyphIdArray[index];
121 	if (unlikely (!gid))
122 	  return false;
123 	gid += thiz->idDelta[i];
124       }
125 
126       *glyph = gid & 0xFFFFu;
127       return true;
128     }
129 
130     const USHORT *endCount;
131     const USHORT *startCount;
132     const USHORT *idDelta;
133     const USHORT *idRangeOffset;
134     const USHORT *glyphIdArray;
135     unsigned int segCount;
136     unsigned int glyphIdArrayLength;
137   };
138 
get_glyphOT::CmapSubtableFormat4139   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
140   {
141     accelerator_t accel;
142     accel.init (this);
143     return accel.get_glyph_func (&accel, codepoint, glyph);
144   }
145 
sanitizeOT::CmapSubtableFormat4146   inline bool sanitize (hb_sanitize_context_t *c) const
147   {
148     TRACE_SANITIZE (this);
149     if (unlikely (!c->check_struct (this)))
150       return_trace (false);
151 
152     if (unlikely (!c->check_range (this, length)))
153     {
154       /* Some broken fonts have too long of a "length" value.
155        * If that is the case, just change the value to truncate
156        * the subtable at the end of the blob. */
157       uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
158 					    (uintptr_t) (c->end -
159 							 (char *) this));
160       if (!c->try_set (&length, new_length))
161 	return_trace (false);
162     }
163 
164     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
165   }
166 
167   protected:
168   USHORT	format;		/* Format number is set to 4. */
169   USHORT	length;		/* This is the length in bytes of the
170 				 * subtable. */
171   USHORT	languageZ;	/* Ignore. */
172   USHORT	segCountX2;	/* 2 x segCount. */
173   USHORT	searchRangeZ;	/* 2 * (2**floor(log2(segCount))) */
174   USHORT	entrySelectorZ;	/* log2(searchRange/2) */
175   USHORT	rangeShiftZ;	/* 2 x segCount - searchRange */
176 
177   USHORT	values[VAR];
178 #if 0
179   USHORT	endCount[segCount];	/* End characterCode for each segment,
180 					 * last=0xFFFFu. */
181   USHORT	reservedPad;		/* Set to 0. */
182   USHORT	startCount[segCount];	/* Start character code for each segment. */
183   SHORT		idDelta[segCount];	/* Delta for all character codes in segment. */
184   USHORT	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
185   USHORT	glyphIdArray[VAR];	/* Glyph index array (arbitrary length) */
186 #endif
187 
188   public:
189   DEFINE_SIZE_ARRAY (14, values);
190 };
191 
192 struct CmapSubtableLongGroup
193 {
194   friend struct CmapSubtableFormat12;
195   friend struct CmapSubtableFormat13;
196 
cmpOT::CmapSubtableLongGroup197   int cmp (hb_codepoint_t codepoint) const
198   {
199     if (codepoint < startCharCode) return -1;
200     if (codepoint > endCharCode)   return +1;
201     return 0;
202   }
203 
sanitizeOT::CmapSubtableLongGroup204   inline bool sanitize (hb_sanitize_context_t *c) const
205   {
206     TRACE_SANITIZE (this);
207     return_trace (c->check_struct (this));
208   }
209 
210   private:
211   ULONG		startCharCode;	/* First character code in this group. */
212   ULONG		endCharCode;	/* Last character code in this group. */
213   ULONG		glyphID;	/* Glyph index; interpretation depends on
214 				 * subtable format. */
215   public:
216   DEFINE_SIZE_STATIC (12);
217 };
218 
219 template <typename UINT>
220 struct CmapSubtableTrimmed
221 {
get_glyphOT::CmapSubtableTrimmed222   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
223   {
224     /* Rely on our implicit array bound-checking. */
225     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
226     if (!gid)
227       return false;
228     *glyph = gid;
229     return true;
230   }
231 
sanitizeOT::CmapSubtableTrimmed232   inline bool sanitize (hb_sanitize_context_t *c) const
233   {
234     TRACE_SANITIZE (this);
235     return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
236   }
237 
238   protected:
239   UINT		formatReserved;	/* Subtable format and (maybe) padding. */
240   UINT		lengthZ;	/* Byte length of this subtable. */
241   UINT		languageZ;	/* Ignore. */
242   UINT		startCharCode;	/* First character code covered. */
243   ArrayOf<GlyphID, UINT>
244 		glyphIdArray;	/* Array of glyph index values for character
245 				 * codes in the range. */
246   public:
247   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
248 };
249 
250 struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
251 struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
252 
253 template <typename T>
254 struct CmapSubtableLongSegmented
255 {
get_glyphOT::CmapSubtableLongSegmented256   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
257   {
258     int i = groups.bsearch (codepoint);
259     if (i == -1)
260       return false;
261     *glyph = T::group_get_glyph (groups[i], codepoint);
262     return true;
263   }
264 
sanitizeOT::CmapSubtableLongSegmented265   inline bool sanitize (hb_sanitize_context_t *c) const
266   {
267     TRACE_SANITIZE (this);
268     return_trace (c->check_struct (this) && groups.sanitize (c));
269   }
270 
271   protected:
272   USHORT	format;		/* Subtable format; set to 12. */
273   USHORT	reservedZ;	/* Reserved; set to 0. */
274   ULONG		lengthZ;	/* Byte length of this subtable. */
275   ULONG		languageZ;	/* Ignore. */
276   SortedArrayOf<CmapSubtableLongGroup, ULONG>
277 		groups;		/* Groupings. */
278   public:
279   DEFINE_SIZE_ARRAY (16, groups);
280 };
281 
282 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
283 {
group_get_glyphOT::CmapSubtableFormat12284   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
285 						hb_codepoint_t u)
286   { return group.glyphID + (u - group.startCharCode); }
287 };
288 
289 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
290 {
group_get_glyphOT::CmapSubtableFormat13291   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
292 						hb_codepoint_t u HB_UNUSED)
293   { return group.glyphID; }
294 };
295 
296 typedef enum
297 {
298   GLYPH_VARIANT_NOT_FOUND = 0,
299   GLYPH_VARIANT_FOUND = 1,
300   GLYPH_VARIANT_USE_DEFAULT = 2
301 } glyph_variant_t;
302 
303 struct UnicodeValueRange
304 {
cmpOT::UnicodeValueRange305   inline int cmp (const hb_codepoint_t &codepoint) const
306   {
307     if (codepoint < startUnicodeValue) return -1;
308     if (codepoint > startUnicodeValue + additionalCount) return +1;
309     return 0;
310   }
311 
sanitizeOT::UnicodeValueRange312   inline bool sanitize (hb_sanitize_context_t *c) const
313   {
314     TRACE_SANITIZE (this);
315     return_trace (c->check_struct (this));
316   }
317 
318   UINT24	startUnicodeValue;	/* First value in this range. */
319   BYTE		additionalCount;	/* Number of additional values in this
320 					 * range. */
321   public:
322   DEFINE_SIZE_STATIC (4);
323 };
324 
325 typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS;
326 
327 struct UVSMapping
328 {
cmpOT::UVSMapping329   inline int cmp (const hb_codepoint_t &codepoint) const
330   {
331     return unicodeValue.cmp (codepoint);
332   }
333 
sanitizeOT::UVSMapping334   inline bool sanitize (hb_sanitize_context_t *c) const
335   {
336     TRACE_SANITIZE (this);
337     return_trace (c->check_struct (this));
338   }
339 
340   UINT24	unicodeValue;	/* Base Unicode value of the UVS */
341   GlyphID	glyphID;	/* Glyph ID of the UVS */
342   public:
343   DEFINE_SIZE_STATIC (5);
344 };
345 
346 typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS;
347 
348 struct VariationSelectorRecord
349 {
get_glyphOT::VariationSelectorRecord350   inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
351 				    hb_codepoint_t *glyph,
352 				    const void *base) const
353   {
354     int i;
355     const DefaultUVS &defaults = base+defaultUVS;
356     i = defaults.bsearch (codepoint);
357     if (i != -1)
358       return GLYPH_VARIANT_USE_DEFAULT;
359     const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
360     i = nonDefaults.bsearch (codepoint);
361     if (i != -1)
362     {
363       *glyph = nonDefaults[i].glyphID;
364        return GLYPH_VARIANT_FOUND;
365     }
366     return GLYPH_VARIANT_NOT_FOUND;
367   }
368 
cmpOT::VariationSelectorRecord369   inline int cmp (const hb_codepoint_t &variation_selector) const
370   {
371     return varSelector.cmp (variation_selector);
372   }
373 
sanitizeOT::VariationSelectorRecord374   inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
375   {
376     TRACE_SANITIZE (this);
377     return_trace (c->check_struct (this) &&
378 		  defaultUVS.sanitize (c, base) &&
379 		  nonDefaultUVS.sanitize (c, base));
380   }
381 
382   UINT24	varSelector;	/* Variation selector. */
383   OffsetTo<DefaultUVS, ULONG>
384 		defaultUVS;	/* Offset to Default UVS Table. May be 0. */
385   OffsetTo<NonDefaultUVS, ULONG>
386 		nonDefaultUVS;	/* Offset to Non-Default UVS Table. May be 0. */
387   public:
388   DEFINE_SIZE_STATIC (11);
389 };
390 
391 struct CmapSubtableFormat14
392 {
get_glyph_variantOT::CmapSubtableFormat14393   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
394 					    hb_codepoint_t variation_selector,
395 					    hb_codepoint_t *glyph) const
396   {
397     return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
398   }
399 
sanitizeOT::CmapSubtableFormat14400   inline bool sanitize (hb_sanitize_context_t *c) const
401   {
402     TRACE_SANITIZE (this);
403     return_trace (c->check_struct (this) &&
404 		  record.sanitize (c, this));
405   }
406 
407   protected:
408   USHORT	format;		/* Format number is set to 14. */
409   ULONG		lengthZ;	/* Byte length of this subtable. */
410   SortedArrayOf<VariationSelectorRecord, ULONG>
411 		record;		/* Variation selector records; sorted
412 				 * in increasing order of `varSelector'. */
413   public:
414   DEFINE_SIZE_ARRAY (10, record);
415 };
416 
417 struct CmapSubtable
418 {
419   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
420 
get_glyphOT::CmapSubtable421   inline bool get_glyph (hb_codepoint_t codepoint,
422 			 hb_codepoint_t *glyph) const
423   {
424     switch (u.format) {
425     case  0: return u.format0 .get_glyph(codepoint, glyph);
426     case  4: return u.format4 .get_glyph(codepoint, glyph);
427     case  6: return u.format6 .get_glyph(codepoint, glyph);
428     case 10: return u.format10.get_glyph(codepoint, glyph);
429     case 12: return u.format12.get_glyph(codepoint, glyph);
430     case 13: return u.format13.get_glyph(codepoint, glyph);
431     case 14:
432     default: return false;
433     }
434   }
435 
sanitizeOT::CmapSubtable436   inline bool sanitize (hb_sanitize_context_t *c) const
437   {
438     TRACE_SANITIZE (this);
439     if (!u.format.sanitize (c)) return_trace (false);
440     switch (u.format) {
441     case  0: return_trace (u.format0 .sanitize (c));
442     case  4: return_trace (u.format4 .sanitize (c));
443     case  6: return_trace (u.format6 .sanitize (c));
444     case 10: return_trace (u.format10.sanitize (c));
445     case 12: return_trace (u.format12.sanitize (c));
446     case 13: return_trace (u.format13.sanitize (c));
447     case 14: return_trace (u.format14.sanitize (c));
448     default:return_trace (true);
449     }
450   }
451 
452   public:
453   union {
454   USHORT		format;		/* Format identifier */
455   CmapSubtableFormat0	format0;
456   CmapSubtableFormat4	format4;
457   CmapSubtableFormat6	format6;
458   CmapSubtableFormat10	format10;
459   CmapSubtableFormat12	format12;
460   CmapSubtableFormat13	format13;
461   CmapSubtableFormat14	format14;
462   } u;
463   public:
464   DEFINE_SIZE_UNION (2, format);
465 };
466 
467 
468 struct EncodingRecord
469 {
cmpOT::EncodingRecord470   inline int cmp (const EncodingRecord &other) const
471   {
472     int ret;
473     ret = platformID.cmp (other.platformID);
474     if (ret) return ret;
475     ret = encodingID.cmp (other.encodingID);
476     if (ret) return ret;
477     return 0;
478   }
479 
sanitizeOT::EncodingRecord480   inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
481   {
482     TRACE_SANITIZE (this);
483     return_trace (c->check_struct (this) &&
484 		  subtable.sanitize (c, base));
485   }
486 
487   USHORT	platformID;	/* Platform ID. */
488   USHORT	encodingID;	/* Platform-specific encoding ID. */
489   OffsetTo<CmapSubtable, ULONG>
490 		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
491   public:
492   DEFINE_SIZE_STATIC (8);
493 };
494 
495 struct cmap
496 {
497   static const hb_tag_t tableTag	= HB_OT_TAG_cmap;
498 
find_subtableOT::cmap499   inline const CmapSubtable *find_subtable (unsigned int platform_id,
500 					    unsigned int encoding_id) const
501   {
502     EncodingRecord key;
503     key.platformID.set (platform_id);
504     key.encodingID.set (encoding_id);
505 
506     /* Note: We can use bsearch, but since it has no performance
507      * implications, we use lsearch and as such accept fonts with
508      * unsorted subtable list. */
509     int result = encodingRecord./*bsearch*/lsearch (key);
510     if (result == -1 || !encodingRecord[result].subtable)
511       return NULL;
512 
513     return &(this+encodingRecord[result].subtable);
514   }
515 
sanitizeOT::cmap516   inline bool sanitize (hb_sanitize_context_t *c) const
517   {
518     TRACE_SANITIZE (this);
519     return_trace (c->check_struct (this) &&
520 		  likely (version == 0) &&
521 		  encodingRecord.sanitize (c, this));
522   }
523 
524   USHORT		version;	/* Table version number (0). */
525   SortedArrayOf<EncodingRecord>
526 			encodingRecord;	/* Encoding tables. */
527   public:
528   DEFINE_SIZE_ARRAY (4, encodingRecord);
529 };
530 
531 
532 } /* namespace OT */
533 
534 
535 #endif /* HB_OT_CMAP_TABLE_HH */
536