1 /*
2  * Copyright © 2011,2012,2013  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #include "hb-ot-shape-complex-myanmar.hh"
28 
29 
30 /*
31  * Myanmar shaper.
32  */
33 
34 static const hb_tag_t
35 basic_features[] =
36 {
37   /*
38    * Basic features.
39    * These features are applied in order, one at a time, after reordering.
40    */
41   HB_TAG('r','p','h','f'),
42   HB_TAG('p','r','e','f'),
43   HB_TAG('b','l','w','f'),
44   HB_TAG('p','s','t','f'),
45 };
46 static const hb_tag_t
47 other_features[] =
48 {
49   /*
50    * Other features.
51    * These features are applied all at once, after clearing syllables.
52    */
53   HB_TAG('p','r','e','s'),
54   HB_TAG('a','b','v','s'),
55   HB_TAG('b','l','w','s'),
56   HB_TAG('p','s','t','s'),
57 };
58 static const hb_tag_t
59 positioning_features[] =
60 {
61   /*
62    * Positioning features.
63    * We don't care about the types.
64    */
65   HB_TAG('d','i','s','t'),
66   /* Pre-release version of Windows 8 Myanmar font had abvm,blwm
67    * features.  The released Windows 8 version of the font (as well
68    * as the released spec) used 'mark' instead.  The Windows 8
69    * shaper however didn't apply 'mark' but did apply 'mkmk'.
70    * Perhaps it applied abvm/blwm.  This was fixed in a Windows 8
71    * update, so now it applies mark/mkmk.  We are guessing that
72    * it still applies abvm/blwm too.
73    */
74   HB_TAG('a','b','v','m'),
75   HB_TAG('b','l','w','m'),
76 };
77 
78 static void
79 setup_syllables (const hb_ot_shape_plan_t *plan,
80 		 hb_font_t *font,
81 		 hb_buffer_t *buffer);
82 static void
83 reorder (const hb_ot_shape_plan_t *plan,
84 	 hb_font_t *font,
85 	 hb_buffer_t *buffer);
86 static void
87 clear_syllables (const hb_ot_shape_plan_t *plan,
88 		 hb_font_t *font,
89 		 hb_buffer_t *buffer);
90 
91 static void
collect_features_myanmar(hb_ot_shape_planner_t * plan)92 collect_features_myanmar (hb_ot_shape_planner_t *plan)
93 {
94   hb_ot_map_builder_t *map = &plan->map;
95 
96   /* Do this before any lookups have been applied. */
97   map->add_gsub_pause (setup_syllables);
98 
99   map->enable_feature (HB_TAG('l','o','c','l'));
100   /* The Indic specs do not require ccmp, but we apply it here since if
101    * there is a use of it, it's typically at the beginning. */
102   map->enable_feature (HB_TAG('c','c','m','p'));
103 
104 
105   map->add_gsub_pause (reorder);
106 
107   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
108   {
109     map->enable_feature (basic_features[i], F_MANUAL_ZWJ);
110     map->add_gsub_pause (nullptr);
111   }
112 
113   map->add_gsub_pause (clear_syllables);
114 
115   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
116     map->enable_feature (other_features[i], F_MANUAL_ZWJ);
117 
118   for (unsigned int i = 0; i < ARRAY_LENGTH (positioning_features); i++)
119     map->enable_feature (positioning_features[i]);
120 }
121 
122 static void
override_features_myanmar(hb_ot_shape_planner_t * plan)123 override_features_myanmar (hb_ot_shape_planner_t *plan)
124 {
125   plan->map.disable_feature (HB_TAG('l','i','g','a'));
126 }
127 
128 
129 enum syllable_type_t {
130   consonant_syllable,
131   punctuation_cluster,
132   broken_cluster,
133   non_myanmar_cluster,
134 };
135 
136 #include "hb-ot-shape-complex-myanmar-machine.hh"
137 
138 
139 static void
setup_masks_myanmar(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_buffer_t * buffer,hb_font_t * font HB_UNUSED)140 setup_masks_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED,
141 		   hb_buffer_t              *buffer,
142 		   hb_font_t                *font HB_UNUSED)
143 {
144   HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_category);
145   HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_position);
146 
147   /* We cannot setup masks here.  We save information about characters
148    * and setup masks later on in a pause-callback. */
149 
150   unsigned int count = buffer->len;
151   hb_glyph_info_t *info = buffer->info;
152   for (unsigned int i = 0; i < count; i++)
153     set_myanmar_properties (info[i]);
154 }
155 
156 static void
setup_syllables(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font HB_UNUSED,hb_buffer_t * buffer)157 setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
158 		 hb_font_t *font HB_UNUSED,
159 		 hb_buffer_t *buffer)
160 {
161   find_syllables (buffer);
162   foreach_syllable (buffer, start, end)
163     buffer->unsafe_to_break (start, end);
164 }
165 
166 static int
compare_myanmar_order(const hb_glyph_info_t * pa,const hb_glyph_info_t * pb)167 compare_myanmar_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
168 {
169   int a = pa->myanmar_position();
170   int b = pb->myanmar_position();
171 
172   return a < b ? -1 : a == b ? 0 : +1;
173 }
174 
175 
176 /* Rules from:
177  * https://docs.microsoft.com/en-us/typography/script-development/myanmar */
178 
179 static void
initial_reordering_consonant_syllable(hb_buffer_t * buffer,unsigned int start,unsigned int end)180 initial_reordering_consonant_syllable (hb_buffer_t *buffer,
181 				       unsigned int start, unsigned int end)
182 {
183   hb_glyph_info_t *info = buffer->info;
184 
185   unsigned int base = end;
186   bool has_reph = false;
187 
188   {
189     unsigned int limit = start;
190     if (start + 3 <= end &&
191 	info[start  ].myanmar_category() == OT_Ra &&
192 	info[start+1].myanmar_category() == OT_As &&
193 	info[start+2].myanmar_category() == OT_H)
194     {
195       limit += 3;
196       base = start;
197       has_reph = true;
198     }
199 
200     {
201       if (!has_reph)
202 	base = limit;
203 
204       for (unsigned int i = limit; i < end; i++)
205 	if (is_consonant (info[i]))
206 	{
207 	  base = i;
208 	  break;
209 	}
210     }
211   }
212 
213   /* Reorder! */
214   {
215     unsigned int i = start;
216     for (; i < start + (has_reph ? 3 : 0); i++)
217       info[i].myanmar_position() = POS_AFTER_MAIN;
218     for (; i < base; i++)
219       info[i].myanmar_position() = POS_PRE_C;
220     if (i < end)
221     {
222       info[i].myanmar_position() = POS_BASE_C;
223       i++;
224     }
225     indic_position_t pos = POS_AFTER_MAIN;
226     /* The following loop may be ugly, but it implements all of
227      * Myanmar reordering! */
228     for (; i < end; i++)
229     {
230       if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */
231       {
232 	info[i].myanmar_position() = POS_PRE_C;
233 	continue;
234       }
235       if (info[i].myanmar_position() < POS_BASE_C) /* Left matra */
236       {
237 	continue;
238       }
239       if (info[i].myanmar_category() == OT_VS)
240       {
241 	info[i].myanmar_position() = info[i - 1].myanmar_position();
242 	continue;
243       }
244 
245       if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw)
246       {
247 	pos = POS_BELOW_C;
248 	info[i].myanmar_position() = pos;
249 	continue;
250       }
251 
252       if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A)
253       {
254 	info[i].myanmar_position() = POS_BEFORE_SUB;
255 	continue;
256       }
257       if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw)
258       {
259 	info[i].myanmar_position() = pos;
260 	continue;
261       }
262       if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A)
263       {
264         pos = POS_AFTER_SUB;
265 	info[i].myanmar_position() = pos;
266 	continue;
267       }
268       info[i].myanmar_position() = pos;
269     }
270   }
271 
272   /* Sit tight, rock 'n roll! */
273   buffer->sort (start, end, compare_myanmar_order);
274 }
275 
276 static void
initial_reordering_syllable(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_face_t * face HB_UNUSED,hb_buffer_t * buffer,unsigned int start,unsigned int end)277 initial_reordering_syllable (const hb_ot_shape_plan_t *plan HB_UNUSED,
278 			     hb_face_t *face HB_UNUSED,
279 			     hb_buffer_t *buffer,
280 			     unsigned int start, unsigned int end)
281 {
282   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
283   switch (syllable_type) {
284 
285     case broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
286     case consonant_syllable:
287       initial_reordering_consonant_syllable  (buffer, start, end);
288       break;
289 
290     case punctuation_cluster:
291     case non_myanmar_cluster:
292       break;
293   }
294 }
295 
296 static inline void
insert_dotted_circles(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font,hb_buffer_t * buffer)297 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
298 		       hb_font_t *font,
299 		       hb_buffer_t *buffer)
300 {
301   /* Note: This loop is extra overhead, but should not be measurable. */
302   bool has_broken_syllables = false;
303   unsigned int count = buffer->len;
304   hb_glyph_info_t *info = buffer->info;
305   for (unsigned int i = 0; i < count; i++)
306     if ((info[i].syllable() & 0x0F) == broken_cluster)
307     {
308       has_broken_syllables = true;
309       break;
310     }
311   if (likely (!has_broken_syllables))
312     return;
313 
314 
315   hb_codepoint_t dottedcircle_glyph;
316   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph))
317     return;
318 
319   hb_glyph_info_t dottedcircle = {0};
320   dottedcircle.codepoint = 0x25CCu;
321   set_myanmar_properties (dottedcircle);
322   dottedcircle.codepoint = dottedcircle_glyph;
323 
324   buffer->clear_output ();
325 
326   buffer->idx = 0;
327   unsigned int last_syllable = 0;
328   while (buffer->idx < buffer->len && buffer->successful)
329   {
330     unsigned int syllable = buffer->cur().syllable();
331     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
332     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
333     {
334       last_syllable = syllable;
335 
336       hb_glyph_info_t ginfo = dottedcircle;
337       ginfo.cluster = buffer->cur().cluster;
338       ginfo.mask = buffer->cur().mask;
339       ginfo.syllable() = buffer->cur().syllable();
340 
341       buffer->output_info (ginfo);
342     }
343     else
344       buffer->next_glyph ();
345   }
346   buffer->swap_buffers ();
347 }
348 
349 static void
reorder(const hb_ot_shape_plan_t * plan,hb_font_t * font,hb_buffer_t * buffer)350 reorder (const hb_ot_shape_plan_t *plan,
351 	 hb_font_t *font,
352 	 hb_buffer_t *buffer)
353 {
354   insert_dotted_circles (plan, font, buffer);
355 
356   foreach_syllable (buffer, start, end)
357     initial_reordering_syllable (plan, font->face, buffer, start, end);
358 
359   HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_category);
360   HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_position);
361 }
362 
363 static void
clear_syllables(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font HB_UNUSED,hb_buffer_t * buffer)364 clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
365 		 hb_font_t *font HB_UNUSED,
366 		 hb_buffer_t *buffer)
367 {
368   hb_glyph_info_t *info = buffer->info;
369   unsigned int count = buffer->len;
370   for (unsigned int i = 0; i < count; i++)
371     info[i].syllable() = 0;
372 }
373 
374 
375 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar =
376 {
377   collect_features_myanmar,
378   override_features_myanmar,
379   nullptr, /* data_create */
380   nullptr, /* data_destroy */
381   nullptr, /* preprocess_text */
382   nullptr, /* postprocess_glyphs */
383   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
384   nullptr, /* decompose */
385   nullptr, /* compose */
386   setup_masks_myanmar,
387   HB_TAG_NONE, /* gpos_tag */
388   nullptr, /* reorder_marks */
389   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
390   false, /* fallback_position */
391 };
392 
393 
394 /* Ugly Zawgyi encoding.
395  * Disable all auto processing.
396  * https://github.com/harfbuzz/harfbuzz/issues/1162 */
397 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar_zawgyi =
398 {
399   nullptr, /* collect_features */
400   nullptr, /* override_features */
401   nullptr, /* data_create */
402   nullptr, /* data_destroy */
403   nullptr, /* preprocess_text */
404   nullptr, /* postprocess_glyphs */
405   HB_OT_SHAPE_NORMALIZATION_MODE_NONE,
406   nullptr, /* decompose */
407   nullptr, /* compose */
408   nullptr, /* setup_masks */
409   HB_TAG_NONE, /* gpos_tag */
410   nullptr, /* reorder_marks */
411   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
412   false, /* fallback_position */
413 };
414