1 /*
2  * Copyright © 2010,2011,2012  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_OT_SHAPE_COMPLEX_PRIVATE_HH
28 #define HB_OT_SHAPE_COMPLEX_PRIVATE_HH
29 
30 #include "hb-private.hh"
31 
32 #include "hb-ot-shape-private.hh"
33 #include "hb-ot-shape-normalize-private.hh"
34 
35 
36 
37 /* buffer var allocations, used by complex shapers */
38 #define complex_var_u8_0()	var2.u8[2]
39 #define complex_var_u8_1()	var2.u8[3]
40 
41 
42 enum hb_ot_shape_zero_width_marks_type_t {
43   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
44 //  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_EARLY,
45   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE,
46   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
47   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
48 
49   HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT = HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE
50 };
51 
52 
53 /* Master OT shaper list */
54 #define HB_COMPLEX_SHAPERS_IMPLEMENT_SHAPERS \
55   HB_COMPLEX_SHAPER_IMPLEMENT (default) /* should be first */ \
56   HB_COMPLEX_SHAPER_IMPLEMENT (arabic) \
57   HB_COMPLEX_SHAPER_IMPLEMENT (hangul) \
58   HB_COMPLEX_SHAPER_IMPLEMENT (hebrew) \
59   HB_COMPLEX_SHAPER_IMPLEMENT (myanmar_old) \
60   HB_COMPLEX_SHAPER_IMPLEMENT (indic) \
61   HB_COMPLEX_SHAPER_IMPLEMENT (myanmar) \
62   HB_COMPLEX_SHAPER_IMPLEMENT (sea) \
63   HB_COMPLEX_SHAPER_IMPLEMENT (thai) \
64   HB_COMPLEX_SHAPER_IMPLEMENT (tibetan) \
65   /* ^--- Add new shapers here */
66 
67 
68 struct hb_ot_complex_shaper_t
69 {
70   char name[8];
71 
72   /* collect_features()
73    * Called during shape_plan().
74    * Shapers should use plan->map to add their features and callbacks.
75    * May be NULL.
76    */
77   void (*collect_features) (hb_ot_shape_planner_t *plan);
78 
79   /* override_features()
80    * Called during shape_plan().
81    * Shapers should use plan->map to override features and add callbacks after
82    * common features are added.
83    * May be NULL.
84    */
85   void (*override_features) (hb_ot_shape_planner_t *plan);
86 
87 
88   /* data_create()
89    * Called at the end of shape_plan().
90    * Whatever shapers return will be accessible through plan->data later.
91    * If NULL is returned, means a plan failure.
92    */
93   void *(*data_create) (const hb_ot_shape_plan_t *plan);
94 
95   /* data_destroy()
96    * Called when the shape_plan is being destroyed.
97    * plan->data is passed here for destruction.
98    * If NULL is returned, means a plan failure.
99    * May be NULL.
100    */
101   void (*data_destroy) (void *data);
102 
103 
104   /* preprocess_text()
105    * Called during shape().
106    * Shapers can use to modify text before shaping starts.
107    * May be NULL.
108    */
109   void (*preprocess_text) (const hb_ot_shape_plan_t *plan,
110 			   hb_buffer_t              *buffer,
111 			   hb_font_t                *font);
112 
113 
114   hb_ot_shape_normalization_mode_t normalization_preference;
115 
116   /* decompose()
117    * Called during shape()'s normalization.
118    * May be NULL.
119    */
120   bool (*decompose) (const hb_ot_shape_normalize_context_t *c,
121 		     hb_codepoint_t  ab,
122 		     hb_codepoint_t *a,
123 		     hb_codepoint_t *b);
124 
125   /* compose()
126    * Called during shape()'s normalization.
127    * May be NULL.
128    */
129   bool (*compose) (const hb_ot_shape_normalize_context_t *c,
130 		   hb_codepoint_t  a,
131 		   hb_codepoint_t  b,
132 		   hb_codepoint_t *ab);
133 
134   /* setup_masks()
135    * Called during shape().
136    * Shapers should use map to get feature masks and set on buffer.
137    * Shapers may NOT modify characters.
138    * May be NULL.
139    */
140   void (*setup_masks) (const hb_ot_shape_plan_t *plan,
141 		       hb_buffer_t              *buffer,
142 		       hb_font_t                *font);
143 
144   hb_ot_shape_zero_width_marks_type_t zero_width_marks;
145 
146   bool fallback_position;
147 };
148 
149 #define HB_COMPLEX_SHAPER_IMPLEMENT(name) extern HB_INTERNAL const hb_ot_complex_shaper_t _hb_ot_complex_shaper_##name;
150 HB_COMPLEX_SHAPERS_IMPLEMENT_SHAPERS
151 #undef HB_COMPLEX_SHAPER_IMPLEMENT
152 
153 
154 static inline const hb_ot_complex_shaper_t *
hb_ot_shape_complex_categorize(const hb_ot_shape_planner_t * planner)155 hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
156 {
157   switch ((hb_tag_t) planner->props.script)
158   {
159     default:
160       return &_hb_ot_complex_shaper_default;
161 
162 
163     /* Unicode-1.1 additions */
164     case HB_SCRIPT_ARABIC:
165 
166     /* Unicode-3.0 additions */
167     case HB_SCRIPT_MONGOLIAN:
168     case HB_SCRIPT_SYRIAC:
169 
170     /* Unicode-5.0 additions */
171     case HB_SCRIPT_NKO:
172     case HB_SCRIPT_PHAGS_PA:
173 
174     /* Unicode-6.0 additions */
175     case HB_SCRIPT_MANDAIC:
176 
177     /* Unicode-7.0 additions */
178     case HB_SCRIPT_MANICHAEAN:
179     case HB_SCRIPT_PSALTER_PAHLAVI:
180 
181       /* For Arabic script, use the Arabic shaper even if no OT script tag was found.
182        * This is because we do fallback shaping for Arabic script (and not others). */
183       if (planner->map.chosen_script[0] != HB_OT_TAG_DEFAULT_SCRIPT ||
184 	  planner->props.script == HB_SCRIPT_ARABIC)
185 	return &_hb_ot_complex_shaper_arabic;
186       else
187 	return &_hb_ot_complex_shaper_default;
188 
189 
190     /* Unicode-1.1 additions */
191     case HB_SCRIPT_THAI:
192     case HB_SCRIPT_LAO:
193 
194       return &_hb_ot_complex_shaper_thai;
195 
196 
197     /* Unicode-1.1 additions */
198     case HB_SCRIPT_HANGUL:
199 
200       return &_hb_ot_complex_shaper_hangul;
201 
202 
203     /* Unicode-2.0 additions */
204     case HB_SCRIPT_TIBETAN:
205 
206       return &_hb_ot_complex_shaper_tibetan;
207 
208 
209     /* Unicode-1.1 additions */
210     case HB_SCRIPT_HEBREW:
211 
212       return &_hb_ot_complex_shaper_hebrew;
213 
214 
215     /* ^--- Add new shapers here */
216 
217 
218 #if 0
219     /* Note:
220      *
221      * These disabled scripts are listed in ucd/IndicSyllabicCategory.txt, but according
222      * to Martin Hosken and Jonathan Kew do not require complex shaping.
223      *
224      * TODO We should automate figuring out which scripts do not need complex shaping
225      *
226      * TODO We currently keep data for these scripts in our indic table.  Need to fix the
227      * generator to not do that.
228      */
229 
230 
231     /* Simple? */
232 
233     /* Unicode-3.2 additions */
234     case HB_SCRIPT_BUHID:
235     case HB_SCRIPT_HANUNOO:
236 
237     /* Unicode-5.1 additions */
238     case HB_SCRIPT_SAURASHTRA:
239 
240     /* Unicode-6.0 additions */
241     case HB_SCRIPT_BATAK:
242     case HB_SCRIPT_BRAHMI:
243 
244 
245     /* Simple */
246 
247     /* Unicode-1.1 additions */
248     /* These have their own shaper now. */
249     case HB_SCRIPT_LAO:
250     case HB_SCRIPT_THAI:
251 
252     /* Unicode-3.2 additions */
253     case HB_SCRIPT_TAGALOG:
254     case HB_SCRIPT_TAGBANWA:
255 
256     /* Unicode-4.0 additions */
257     case HB_SCRIPT_LIMBU:
258     case HB_SCRIPT_TAI_LE:
259 
260     /* Unicode-4.1 additions */
261     case HB_SCRIPT_KHAROSHTHI:
262     case HB_SCRIPT_NEW_TAI_LUE:
263     case HB_SCRIPT_SYLOTI_NAGRI:
264 
265     /* Unicode-5.1 additions */
266     case HB_SCRIPT_KAYAH_LI:
267 
268     /* Unicode-5.2 additions */
269     case HB_SCRIPT_TAI_VIET:
270 
271 
272 #endif
273 
274     /* Unicode-1.1 additions */
275     case HB_SCRIPT_BENGALI:
276     case HB_SCRIPT_DEVANAGARI:
277     case HB_SCRIPT_GUJARATI:
278     case HB_SCRIPT_GURMUKHI:
279     case HB_SCRIPT_KANNADA:
280     case HB_SCRIPT_MALAYALAM:
281     case HB_SCRIPT_ORIYA:
282     case HB_SCRIPT_TAMIL:
283     case HB_SCRIPT_TELUGU:
284 
285     /* Unicode-3.0 additions */
286     case HB_SCRIPT_SINHALA:
287 
288     /* Unicode-5.0 additions */
289     case HB_SCRIPT_BALINESE:
290 
291     /* Unicode-5.1 additions */
292     case HB_SCRIPT_LEPCHA:
293     case HB_SCRIPT_REJANG:
294     case HB_SCRIPT_SUNDANESE:
295 
296     /* Unicode-5.2 additions */
297     case HB_SCRIPT_JAVANESE:
298     case HB_SCRIPT_KAITHI:
299     case HB_SCRIPT_MEETEI_MAYEK:
300 
301     /* Unicode-6.0 additions */
302 
303     /* Unicode-6.1 additions */
304     case HB_SCRIPT_CHAKMA:
305     case HB_SCRIPT_SHARADA:
306     case HB_SCRIPT_TAKRI:
307 
308       /* If the designer designed the font for the 'DFLT' script,
309        * use the default shaper.  Otherwise, use the Indic shaper.
310        * Note that for some simple scripts, there may not be *any*
311        * GSUB/GPOS needed, so there may be no scripts found! */
312       if (planner->map.chosen_script[0] == HB_TAG ('D','F','L','T'))
313 	return &_hb_ot_complex_shaper_default;
314       else
315 	return &_hb_ot_complex_shaper_indic;
316 
317     case HB_SCRIPT_KHMER:
318       /* A number of Khmer fonts in the wild don't have a 'pref' feature,
319        * and as such won't shape properly via the Indic shaper;
320        * however, they typically have 'liga' / 'clig' features that implement
321        * the necessary "reordering" by means of ligature substitutions.
322        * So we send such pref-less fonts through the generic shaper instead. */
323       if (planner->map.found_script[0] &&
324 	  hb_ot_layout_language_find_feature (planner->face, HB_OT_TAG_GSUB,
325 					      planner->map.script_index[0],
326 					      planner->map.language_index[0],
327 					      HB_TAG ('p','r','e','f'),
328 					      NULL))
329 	return &_hb_ot_complex_shaper_indic;
330       else
331 	return &_hb_ot_complex_shaper_default;
332 
333     case HB_SCRIPT_MYANMAR:
334       if (planner->map.chosen_script[0] == HB_TAG ('m','y','m','2'))
335 	return &_hb_ot_complex_shaper_myanmar;
336       else if (planner->map.chosen_script[0] == HB_TAG ('m','y','m','r'))
337 	return &_hb_ot_complex_shaper_myanmar_old;
338       else
339 	return &_hb_ot_complex_shaper_default;
340 
341     /* Unicode-4.1 additions */
342     case HB_SCRIPT_BUGINESE:
343 
344     /* Unicode-5.1 additions */
345     case HB_SCRIPT_CHAM:
346 
347     /* Unicode-5.2 additions */
348     case HB_SCRIPT_TAI_THAM:
349 
350       /* If the designer designed the font for the 'DFLT' script,
351        * use the default shaper.  Otherwise, use the Indic shaper.
352        * Note that for some simple scripts, there may not be *any*
353        * GSUB/GPOS needed, so there may be no scripts found! */
354       if (planner->map.chosen_script[0] == HB_TAG ('D','F','L','T'))
355 	return &_hb_ot_complex_shaper_default;
356       else
357 	return &_hb_ot_complex_shaper_sea;
358   }
359 }
360 
361 
362 #endif /* HB_OT_SHAPE_COMPLEX_PRIVATE_HH */
363