1 /*
2  * HangulLayoutEngine.cpp: OpenType processing for Han fonts.
3  *
4  * (C) Copyright IBM Corp. 1998-2013 - All Rights Reserved.
5  */
6 
7 #include "LETypes.h"
8 #include "LEScripts.h"
9 #include "LELanguages.h"
10 
11 #include "LayoutEngine.h"
12 #include "OpenTypeLayoutEngine.h"
13 #include "HangulLayoutEngine.h"
14 #include "ScriptAndLanguageTags.h"
15 #include "LEGlyphStorage.h"
16 #include "OpenTypeTables.h"
17 
18 U_NAMESPACE_BEGIN
19 
20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(HangulOpenTypeLayoutEngine)
21 
22 
23 #define FEATURE_MAP(name) {name ## FeatureTag, name ## FeatureMask}
24 
25 #define LJMO_FIRST 0x1100
26 #define LJMO_LAST  0x1159
27 #define LJMO_FILL  0x115F
28 #define LJMO_COUNT 19
29 
30 #define VJMO_FIRST 0x1161
31 #define VJMO_LAST  0x11A2
32 #define VJMO_FILL  0x1160
33 #define VJMO_COUNT 21
34 
35 #define TJMO_FIRST 0x11A7
36 #define TJMO_LAST  0x11F9
37 #define TJMO_COUNT 28
38 
39 #define HSYL_FIRST 0xAC00
40 #define HSYL_COUNT 11172
41 #define HSYL_LVCNT (VJMO_COUNT * TJMO_COUNT)
42 
43 // Character classes
44 enum
45 {
46     CC_L = 0,
47     CC_V,
48     CC_T,
49     CC_LV,
50     CC_LVT,
51     CC_X,
52     CC_COUNT
53 };
54 
55 // Action flags
56 #define AF_L 1
57 #define AF_V 2
58 #define AF_T 4
59 
60 // Actions
61 #define a_N   0
62 #define a_L   (AF_L)
63 #define a_V   (AF_V)
64 #define a_T   (AF_T)
65 #define a_VT  (AF_V | AF_T)
66 #define a_LV  (AF_L | AF_V)
67 #define a_LVT (AF_L | AF_V | AF_T)
68 
69 typedef struct
70 {
71     int32_t newState;
72     int32_t actionFlags;
73 } StateTransition;
74 
75 static const StateTransition stateTable[][CC_COUNT] =
76 {
77 //       L          V          T          LV         LVT           X
78     { {1, a_L},  {2, a_LV}, {3, a_LVT}, {2, a_LV}, {3, a_LVT},  {4, a_T}}, // 0 - start
79     { {1, a_L},  {2, a_V},  {3, a_VT},  {2, a_LV}, {3, a_LVT}, {-1, a_V}}, // 1 - L+
80     {{-1, a_N},  {2, a_V},  {3, a_T},  {-1, a_N}, {-1, a_N},   {-1, a_N}}, // 2 - L+V+
81     {{-1, a_N}, {-1, a_N},  {3, a_T},  {-1, a_N}, {-1, a_N},   {-1, a_N}}, // 3 - L+V+T*
82     {{-1, a_N}, {-1, a_N}, {-1, a_N},  {-1, a_N}, {-1, a_N},    {4, a_T}}  // 4 - X+
83 };
84 
85 
86 #define ccmpFeatureTag LE_CCMP_FEATURE_TAG
87 #define ljmoFeatureTag LE_LJMO_FEATURE_TAG
88 #define vjmoFeatureTag LE_VJMO_FEATURE_TAG
89 #define tjmoFeatureTag LE_TJMO_FEATURE_TAG
90 
91 #define ccmpFeatureMask 0x80000000UL
92 #define ljmoFeatureMask 0x40000000UL
93 #define vjmoFeatureMask 0x20000000UL
94 #define tjmoFeatureMask 0x10000000UL
95 
96 static const FeatureMap featureMap[] =
97 {
98     {ccmpFeatureTag, ccmpFeatureMask},
99     {ljmoFeatureTag, ljmoFeatureMask},
100     {vjmoFeatureTag, vjmoFeatureMask},
101     {tjmoFeatureTag, tjmoFeatureMask}
102 };
103 
104 static const le_int32 featureMapCount = LE_ARRAY_SIZE(featureMap);
105 
106 #define nullFeatures 0
107 #define ljmoFeatures (ccmpFeatureMask | ljmoFeatureMask)
108 #define vjmoFeatures (ccmpFeatureMask | vjmoFeatureMask | ljmoFeatureMask | tjmoFeatureMask)
109 #define tjmoFeatures (ccmpFeatureMask | tjmoFeatureMask | ljmoFeatureMask | vjmoFeatureMask)
110 
compose(LEUnicode lead,LEUnicode vowel,LEUnicode trail,LEUnicode & syllable)111 static le_int32 compose(LEUnicode lead, LEUnicode vowel, LEUnicode trail, LEUnicode &syllable)
112 {
113     le_int32 lIndex = lead  - LJMO_FIRST;
114     le_int32 vIndex = vowel - VJMO_FIRST;
115     le_int32 tIndex = trail - TJMO_FIRST;
116     le_int32 result = 3;
117 
118     if ((lIndex < 0 || lIndex >= LJMO_COUNT ) || (vIndex < 0 || vIndex >= VJMO_COUNT)) {
119         return 0;
120     }
121 
122     if (tIndex <= 0 || tIndex >= TJMO_COUNT) {
123         tIndex = 0;
124         result = 2;
125     }
126 
127     syllable = (LEUnicode) ((lIndex * VJMO_COUNT + vIndex) * TJMO_COUNT + tIndex + HSYL_FIRST);
128 
129     return result;
130 }
131 
decompose(LEUnicode syllable,LEUnicode & lead,LEUnicode & vowel,LEUnicode & trail)132 static le_int32 decompose(LEUnicode syllable, LEUnicode &lead, LEUnicode &vowel, LEUnicode &trail)
133 {
134     le_int32 sIndex = syllable - HSYL_FIRST;
135 
136     if (sIndex < 0 || sIndex >= HSYL_COUNT) {
137         return 0;
138     }
139 
140     lead  = LJMO_FIRST + (sIndex / HSYL_LVCNT);
141     vowel = VJMO_FIRST + (sIndex % HSYL_LVCNT) / TJMO_COUNT;
142     trail = TJMO_FIRST + (sIndex % TJMO_COUNT);
143 
144     if (trail == TJMO_FIRST) {
145         return 2;
146     }
147 
148     return 3;
149 }
150 
getCharClass(LEUnicode ch,LEUnicode & lead,LEUnicode & vowel,LEUnicode & trail)151 static le_int32 getCharClass(LEUnicode ch, LEUnicode &lead, LEUnicode &vowel, LEUnicode &trail)
152 {
153     lead  = LJMO_FILL;
154     vowel = VJMO_FILL;
155     trail = TJMO_FIRST;
156 
157     if (ch >= LJMO_FIRST && ch <= LJMO_LAST) {
158         lead  = ch;
159         return CC_L;
160     }
161 
162     if (ch >= VJMO_FIRST && ch <= VJMO_LAST) {
163         vowel = ch;
164         return CC_V;
165     }
166 
167     if (ch > TJMO_FIRST && ch <= TJMO_LAST) {
168         trail = ch;
169         return CC_T;
170     }
171 
172     le_int32 c = decompose(ch, lead, vowel, trail);
173 
174     if (c == 2) {
175         return CC_LV;
176     }
177 
178     if (c == 3) {
179         return CC_LVT;
180     }
181 
182     trail = ch;
183     return CC_X;
184 }
185 
HangulOpenTypeLayoutEngine(const LEFontInstance * fontInstance,le_int32 scriptCode,le_int32,le_int32 typoFlags,const LEReferenceTo<GlyphSubstitutionTableHeader> & gsubTable,LEErrorCode & success)186 HangulOpenTypeLayoutEngine::HangulOpenTypeLayoutEngine(const LEFontInstance *fontInstance, le_int32 scriptCode, le_int32 /*languageCode*/,
187                                                        le_int32 typoFlags, const LEReferenceTo<GlyphSubstitutionTableHeader> &gsubTable, LEErrorCode &success)
188     : OpenTypeLayoutEngine(fontInstance, scriptCode, korLanguageCode, typoFlags, gsubTable, success)
189 {
190     fFeatureMap = featureMap;
191     fFeatureMapCount = featureMapCount;
192     fFeatureOrder = TRUE;
193 }
194 
HangulOpenTypeLayoutEngine(const LEFontInstance * fontInstance,le_int32 scriptCode,le_int32,le_int32 typoFlags,LEErrorCode & success)195 HangulOpenTypeLayoutEngine::HangulOpenTypeLayoutEngine(const LEFontInstance *fontInstance, le_int32 scriptCode, le_int32 /*languageCode*/,
196 			                                   le_int32 typoFlags, LEErrorCode &success)
197     : OpenTypeLayoutEngine(fontInstance, scriptCode, korLanguageCode, typoFlags, success)
198 {
199     fFeatureMap = featureMap;
200     fFeatureMapCount = featureMapCount;
201     fFeatureOrder = TRUE;
202 }
203 
~HangulOpenTypeLayoutEngine()204 HangulOpenTypeLayoutEngine::~HangulOpenTypeLayoutEngine()
205 {
206     // nothing to do
207 }
208 
characterProcessing(const LEUnicode chars[],le_int32 offset,le_int32 count,le_int32 max,le_bool rightToLeft,LEUnicode * & outChars,LEGlyphStorage & glyphStorage,LEErrorCode & success)209 le_int32 HangulOpenTypeLayoutEngine::characterProcessing(const LEUnicode chars[], le_int32 offset, le_int32 count, le_int32 max, le_bool rightToLeft,
210         LEUnicode *&outChars, LEGlyphStorage &glyphStorage, LEErrorCode &success)
211 {
212     if (LE_FAILURE(success)) {
213         return 0;
214     }
215 
216     if (chars == NULL || offset < 0 || count < 0 || max < 0 || offset >= max || offset + count > max) {
217         success = LE_ILLEGAL_ARGUMENT_ERROR;
218         return 0;
219     }
220 
221     le_int32 worstCase = count * 3;
222 
223     outChars = LE_NEW_ARRAY(LEUnicode, worstCase);
224 
225     if (outChars == NULL) {
226         success = LE_MEMORY_ALLOCATION_ERROR;
227         return 0;
228     }
229 
230     glyphStorage.allocateGlyphArray(worstCase, rightToLeft, success);
231     glyphStorage.allocateAuxData(success);
232 
233     if (LE_FAILURE(success)) {
234         LE_DELETE_ARRAY(outChars);
235         return 0;
236     }
237 
238     le_int32 outCharCount = 0;
239     le_int32 limit = offset + count;
240     le_int32 i = offset;
241 
242     while (i < limit) {
243         le_int32 state    = 0;
244         le_int32 inStart  = i;
245         le_int32 outStart = outCharCount;
246 
247         while( i < limit) {
248             LEUnicode lead  = 0;
249             LEUnicode vowel = 0;
250             LEUnicode trail = 0;
251             int32_t chClass = getCharClass(chars[i], lead, vowel, trail);
252             const StateTransition transition = stateTable[state][chClass];
253 
254             if (chClass == CC_X) {
255                 /* Any character of type X will be stored as a trail jamo */
256                 if ((transition.actionFlags & AF_T) != 0) {
257                     outChars[outCharCount] = trail;
258                     glyphStorage.setCharIndex(outCharCount, i-offset, success);
259                     glyphStorage.setAuxData(outCharCount++, nullFeatures, success);
260                 }
261             } else {
262                 /* Any Hangul will be fully decomposed. Output the decomposed characters. */
263                 if ((transition.actionFlags & AF_L) != 0) {
264                     outChars[outCharCount] = lead;
265                     glyphStorage.setCharIndex(outCharCount, i-offset, success);
266                     glyphStorage.setAuxData(outCharCount++, ljmoFeatures, success);
267                 }
268 
269                 if ((transition.actionFlags & AF_V) != 0) {
270                     outChars[outCharCount] = vowel;
271                     glyphStorage.setCharIndex(outCharCount, i-offset, success);
272                     glyphStorage.setAuxData(outCharCount++, vjmoFeatures, success);
273                 }
274 
275                 if ((transition.actionFlags & AF_T) != 0) {
276                     outChars[outCharCount] = trail;
277                     glyphStorage.setCharIndex(outCharCount, i-offset, success);
278                     glyphStorage.setAuxData(outCharCount++, tjmoFeatures, success);
279                 }
280             }
281 
282             state = transition.newState;
283 
284             /* Negative next state means stop. */
285             if (state < 0) {
286                 break;
287             }
288 
289             i += 1;
290         }
291 
292         le_int32 inLength  = i - inStart;
293         le_int32 outLength = outCharCount - outStart;
294 
295         /*
296          * See if the syllable can be composed into a single character. There are 5
297          * possible cases:
298          *
299          *   Input     Decomposed to    Compose to
300          *   LV        L, V             LV
301          *   LVT       L, V, T          LVT
302          *   L, V      L, V             LV, DEL
303          *   LV, T     L, V, T          LVT, DEL
304          *   L, V, T   L, V, T          LVT, DEL, DEL
305          */
306         if ((inLength >= 1 && inLength <= 3) && (outLength == 2 || outLength == 3)) {
307             LEUnicode syllable = 0x0000;
308             LEUnicode lead  = outChars[outStart];
309             LEUnicode vowel = outChars[outStart + 1];
310             LEUnicode trail = outLength == 3? outChars[outStart + 2] : TJMO_FIRST;
311 
312             /*
313              * If the composition consumes the whole decomposed syllable,
314              * we can use it.
315              */
316             if (compose(lead, vowel, trail, syllable) == outLength) {
317                 outCharCount = outStart;
318                 outChars[outCharCount] = syllable;
319                 glyphStorage.setCharIndex(outCharCount, inStart-offset, success);
320                 glyphStorage.setAuxData(outCharCount++, nullFeatures, success);
321 
322                 /*
323                  * Replace the rest of the input characters with DEL.
324                  */
325                 for(le_int32 d = inStart + 1; d < i; d += 1) {
326                     outChars[outCharCount] = 0xFFFF;
327                     glyphStorage.setCharIndex(outCharCount, d - offset, success);
328                     glyphStorage.setAuxData(outCharCount++, nullFeatures, success);
329                 }
330             }
331         }
332     }
333 
334     glyphStorage.adoptGlyphCount(outCharCount);
335     return outCharCount;
336 }
337 
338 U_NAMESPACE_END
339