1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************
8 ************************************************************************
9 *   Date        Name        Description
10 *   02/28/2001  aliu        Creation
11 *   03/01/2001  George      port to HP/UX
12 ************************************************************************/
13 
14 #include "unicode/utypes.h"
15 
16 #if !UCONFIG_NO_TRANSLITERATION
17 
18 #include "jamotest.h"
19 #include "unicode/utypes.h"
20 #include "unicode/translit.h"
21 #include "cmemory.h"
22 #include "cpdtrans.h"
23 
24 // SEP is the disambiguation separator used by Latin-Jamo and Jamo-Latin
25 #define SEP "-"
26 
JamoTest()27 JamoTest::JamoTest()
28 {
29     UParseError parseError;
30     UErrorCode status = U_ZERO_ERROR;
31     NAME_JAMO = Transliterator::createFromRules("Name-Jamo",
32                                             UnicodeString(JAMO_NAMES_RULES, -1, US_INV),
33                                             UTRANS_FORWARD, parseError, status);
34 
35     if (U_FAILURE(status)) {
36         delete NAME_JAMO;
37         NAME_JAMO = NULL;
38     }
39     status = U_ZERO_ERROR;
40     JAMO_NAME = Transliterator::createFromRules("Jamo-Name",
41                                             UnicodeString(JAMO_NAMES_RULES, -1, US_INV),
42                                             UTRANS_REVERSE, parseError, status);
43     if (U_FAILURE(status)) {
44         delete JAMO_NAME;
45         JAMO_NAME = NULL;
46     }
47 }
48 
~JamoTest()49 JamoTest::~JamoTest()
50 {
51     delete NAME_JAMO;
52     delete JAMO_NAME;
53 }
54 
55 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)56 JamoTest::runIndexedTest(int32_t index, UBool exec,
57                          const char* &name, char* /*par*/) {
58     switch (index) {
59         TESTCASE(0,TestJamo);
60         TESTCASE(1,TestRealText);
61         TESTCASE(2,TestPiecemeal);
62         default: name = ""; break;
63     }
64 }
65 
66 void
TestJamo()67 JamoTest::TestJamo() {
68     UParseError parseError;
69     UErrorCode status = U_ZERO_ERROR;
70     Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
71 
72     if (latinJamo == 0 || U_FAILURE(status)) {
73         dataerrln("FAIL: createInstance() returned 0 - %s", u_errorName(status));
74         return;
75     }
76 
77     Transliterator* jamoLatin = latinJamo->createInverse(status);
78 
79     if (jamoLatin == 0) {
80         delete latinJamo;
81         errln("FAIL: createInverse() returned 0");
82         return;
83     }
84 
85     static const char* CASE[] = {
86         // Column 1 is the latin text L1 to be fed to Latin-Jamo
87         // to yield output J.
88 
89         // Column 2 is expected value of J.  J is fed to
90         // Jamo-Latin to yield output L2.
91 
92         // Column 3 is expected value of L2.  If the expected
93         // value of L2 is L1, then L2 is NULL.
94 
95                 // add tests for the update to fix problems where it didn't follow the standard
96                 // see also http://www.unicode.org/cldr/data/charts/transforms/Latin-Hangul.html
97                 "gach", "(Gi)(A)(Cf)", NULL,
98                 "geumhui", "(Gi)(EU)(Mf)(Hi)(YI)", NULL,
99                 "choe", "(Ci)(OE)", NULL,
100                 "wo", "(IEUNG)(WEO)", NULL,
101                 "Wonpil", "(IEUNG)(WEO)(Nf)(Pi)(I)(L)", "wonpil",
102                 "GIPPEUM", "(Gi)(I)(BB)(EU)(Mf)", "gippeum",
103                 "EUTTEUM", "(IEUNG)(EU)(DD)(EU)(Mf)", "eutteum",
104                 "KKOTNAE", "(GGi)(O)(Tf)(Ni)(AE)", "kkotnae",
105                 "gaga", "(Gi)(A)(Gi)(A)", NULL,
106                 "gag-a", "(Gi)(A)(Gf)(IEUNG)(A)", NULL,
107                 "gak-ka", "(Gi)(A)(Kf)(Ki)(A)", NULL,
108                 "gakka", "(Gi)(A)(GGi)(A)", NULL,
109                 "gakk-a", "(Gi)(A)(GGf)(IEUNG)(A)", NULL,
110                 "gakkka", "(Gi)(A)(GGf)(Ki)(A)", NULL,
111                 "gak-kka", "(Gi)(A)(Kf)(GGi)(A)", NULL,
112 
113         "bab", "(Bi)(A)(Bf)", NULL,
114         "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu",
115         "babbba", "(Bi)(A)(Bf)(Bi)(EU)(Bi)(A)", "babbeuba",
116         "bagg", "(Bi)(A)(Gf)(Gi)(EU)", "baggeu",
117         "baggga", "(Bi)(A)(Gf)(Gi)(EU)(Gi)(A)", "baggeuga",
118         //"bag" SEP "gga", "(Bi)(A)(Gf)" SEP "(Gi)(EU)(Gi)(A)", "bag" SEP "geuga",
119         "kabsa", "(Ki)(A)(Bf)(Si)(A)", NULL,
120         "kabska", "(Ki)(A)(BS)(Ki)(A)", NULL,
121         "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf)
122         "gga", "(Gi)(EU)(Gi)(A)", "geuga",
123         "bsa", "(Bi)(EU)(Si)(A)", "beusa",
124         "agg", "(IEUNG)(A)(Gf)(Gi)(EU)", "aggeu",
125         "agga", "(IEUNG)(A)(Gf)(Gi)(A)", NULL,
126         "la", "(R)(A)", NULL,
127         "bs", "(Bi)(EU)(Sf)", "beus",
128         "kalgga", "(Ki)(A)(L)(Gi)(EU)(Gi)(A)", "kalgeuga",
129 
130         // 'r' in a final position is treated like 'l'
131         "karka", "(Ki)(A)(L)(Ki)(A)", "kalka",
132     };
133 
134     enum { CASE_length = UPRV_LENGTHOF(CASE) };
135 
136     int32_t i;
137     for (i=0; i<CASE_length; i+=3) {
138         UnicodeString jamo = nameToJamo(CASE[i+1]);
139         if (CASE[i+2] == NULL) {
140             expect(*latinJamo, CASE[i], jamo, *jamoLatin);
141         } else {
142             // Handle case where round-trip is expected to fail
143             expect(*latinJamo, CASE[i], jamo);
144             expect(*jamoLatin, jamo, CASE[i+2]);
145         }
146     }
147 
148     delete latinJamo;
149     delete jamoLatin;
150 }
151 
152 /**
153  * Test various step-at-a-time transformation of hangul to jamo to
154  * latin and back.
155  */
TestPiecemeal(void)156 void JamoTest::TestPiecemeal(void) {
157     UnicodeString hangul; hangul.append((UChar)0xBC0F);
158     UnicodeString jamo = nameToJamo("(Mi)(I)(Cf)");
159     UnicodeString latin("mic");
160     UnicodeString latin2("mich");
161 
162     Transliterator *t = NULL;
163     UErrorCode status = U_ZERO_ERROR;
164 
165     t = Transliterator::createInstance("NFD", UTRANS_FORWARD, status); // was Hangul-Jamo
166     if (U_FAILURE(status) || t == 0) {
167         dataerrln("FAIL: createInstance failed");
168         return;
169     }
170     expect(*t, hangul, jamo);
171     delete t;
172 
173     t = Transliterator::createInstance("NFC", UTRANS_FORWARD, status); // was Jamo-Hangul
174     if (U_FAILURE(status) || t == 0) {
175         errln("FAIL: createInstance failed");
176         return;
177     }
178     expect(*t, jamo, hangul);
179     delete t;
180 
181     t = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, status);
182     if (U_FAILURE(status) || t == 0) {
183         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
184         return;
185     }
186     expect(*t, latin, jamo);
187     delete t;
188 
189     t = Transliterator::createInstance("Jamo-Latin", UTRANS_FORWARD, status);
190     if (U_FAILURE(status) || t == 0) {
191         errln("FAIL: createInstance failed");
192         return;
193     }
194     expect(*t, jamo, latin2);
195     delete t;
196 
197     t = Transliterator::createInstance("Hangul-Latin", UTRANS_FORWARD, status);
198     if (U_FAILURE(status) || t == 0) {
199         errln("FAIL: createInstance failed");
200         return;
201     }
202     expect(*t, hangul, latin2);
203     delete t;
204 
205     t = Transliterator::createInstance("Latin-Hangul", UTRANS_FORWARD, status);
206     if (U_FAILURE(status) || t == 0) {
207         errln("FAIL: createInstance failed");
208         return;
209     }
210     expect(*t, latin, hangul);
211     delete t;
212 
213     t = Transliterator::createInstance("Hangul-Latin; Latin-Jamo", UTRANS_FORWARD, status);
214     if (U_FAILURE(status) || t == 0) {
215         errln("FAIL: createInstance failed");
216         return;
217     }
218     expect(*t, hangul, jamo);
219     delete t;
220 
221     t = Transliterator::createInstance("Jamo-Latin; Latin-Hangul", UTRANS_FORWARD, status);
222     if (U_FAILURE(status) || t == 0) {
223         errln("FAIL: createInstance failed");
224         return;
225     }
226     expect(*t, jamo, hangul);
227     delete t;
228 
229     t = Transliterator::createInstance("Hangul-Latin; Latin-Hangul", UTRANS_FORWARD, status);
230     if (U_FAILURE(status) || t == 0) {
231         errln("FAIL: createInstance failed");
232         return;
233     }
234     expect(*t, hangul, hangul);
235     delete t;
236 }
237 
238 void
TestRealText()239 JamoTest::TestRealText() {
240     // Test text taken from the Unicode web site
241      static const char* const WHAT_IS_UNICODE[] = {
242       "\\uc720\\ub2c8\\ucf54\\ub4dc\\uc5d0", "\\ub300\\ud574", "?",
243 
244       "\\uc5b4\\ub5a4", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b4\\ub5a4",
245       "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b4\\ub5a4", "\\uc5b8\\uc5b4\\uc5d0\\ub3c4",
246       "\\uc0c1\\uad00\\uc5c6\\uc774", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\ubaa8\\ub4e0",
247       "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uace0\\uc720", "\\ubc88\\ud638\\ub97c",
248       "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.",
249 
250       "\\uae30\\ubcf8\\uc801\\uc73c\\ub85c", "\\ucef4\\ud4e8\\ud130\\ub294",
251       "\\uc22b\\uc790\\ub9cc", "\\ucc98\\ub9ac\\ud569\\ub2c8\\ub2e4.", "\\uae00\\uc790\\ub098",
252       "\\ub2e4\\ub978", "\\ubb38\\uc790\\uc5d0\\ub3c4", "\\uc22b\\uc790\\ub97c",
253       "\\uc9c0\\uc815\\ud558\\uc5ec",
254       "\\uc800\\uc7a5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\uac00",
255       "\\uac1c\\ubc1c\\ub418\\uae30", "\\uc804\\uc5d0\\ub294", "\\uc774\\ub7ec\\ud55c",
256       "\\uc22b\\uc790\\ub97c", "\\uc9c0\\uc815\\ud558\\uae30", "\\uc704\\ud574", "\\uc218\\ubc31",
257       "\\uac00\\uc9c0\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654",
258       "\\uc2dc\\uc2a4\\ud15c\\uc744",
259       "\\uc0ac\\uc6a9\\ud588\\uc2b5\\ub2c8\\ub2e4.", "\\ub2e8\\uc77c", "\\uae30\\ud638\\ud654",
260       "\\ubc29\\ubc95\\uc73c\\ub85c\\ub294", "\\ubaa8\\ub4e0", "\\ubb38\\uc790\\ub97c",
261       "\\ud3ec\\ud568\\ud560", "\\uc218", "\\uc5c6\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc608\\ub97c",
262       "\\ub4e4\\uc5b4", "\\uc720\\ub7fd", "\\uc5f0\\ud569\\uc5d0\\uc11c\\ub9cc",
263       "\\ubcf4\\ub354\\ub77c\\ub3c4", "\\ubaa8\\ub4e0", "\\uac01", "\\ub098\\ub77c\\ubcc4",
264       "\\uc5b8\\uc5b4\\ub97c", "\\ucc98\\ub9ac\\ud558\\ub824\\uba74", "\\uc5ec\\ub7ec",
265       "\\uac1c\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774",
266       "\\ud544\\uc694\\ud569\\ub2c8\\ub2e4.", "\\uc601\\uc5b4\\uc640", "\\uac19\\uc740",
267       "\\ub2e8\\uc77c", "\\uc5b8\\uc5b4\\uc758", "\\uacbd\\uc6b0\\ub3c4",
268       "\\uacf5\\ud1b5\\uc801\\uc73c\\ub85c", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0",
269       "\\uae00\\uc790,", "\\ubb38\\uc7a5", "\\ubd80\\ud638", "\\ubc0f",
270       "\\ud14c\\ud06c\\ub2c8\\uceec", "\\uae30\\ud638\\uc5d0", "\\ub9de\\ub294", "\\ub2e8\\uc77c",
271       "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc744", "\\uac16\\uace0", "\\uc788\\uc9c0",
272       "\\ubabb\\ud558\\uc600\\uc2b5\\ub2c8\\ub2e4.",
273 
274       "\\uc774\\ub7ec\\ud55c", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uc740",
275       "\\ub610\\ud55c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uacfc",
276       "\\ucda9\\ub3cc\\ud569\\ub2c8\\ub2e4.", "\\uc989", "\\ub450", "\\uac00\\uc9c0",
277       "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", "\\ub450", "\\uac1c\\uc758", "\\ub2e4\\ub978",
278       "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uac19\\uc740", "\\ubc88\\ud638\\ub97c",
279       "\\uc0ac\\uc6a9\\ud558\\uac70\\ub098", "\\uac19\\uc740", "\\ubb38\\uc790\\uc5d0",
280       "\\ub300\\ud574", "\\ub2e4\\ub978", "\\ubc88\\ud638\\ub97c", "\\uc0ac\\uc6a9\\ud560", "\\uc218",
281       "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc8fc\\uc5b4\\uc9c4", "\\ubaa8\\ub4e0",
282       "\\ucef4\\ud4e8\\ud130(\\ud2b9\\ud788", "\\uc11c\\ubc84)\\ub294", "\\uc11c\\ub85c",
283       "\\ub2e4\\ub978", "\\uc5ec\\ub7ec", "\\uac00\\uc9c0", "\\uae30\\ud638\\ud654",
284       "\\ubc29\\ubc95\\uc744", "\\uc9c0\\uc6d0\\ud574\\uc57c",
285       "\\ud569\\ub2c8\\ub2e4.", "\\uadf8\\ub7ec\\ub098,", "\\ub370\\uc774\\ud130\\ub97c",
286       "\\uc11c\\ub85c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774\\ub098",
287       "\\ud50c\\ub7ab\\ud3fc", "\\uac04\\uc5d0", "\\uc804\\ub2ec\\ud560", "\\ub54c\\ub9c8\\ub2e4",
288       "\\uadf8", "\\ub370\\uc774\\ud130\\ub294", "\\ud56d\\uc0c1", "\\uc190\\uc0c1\\uc758",
289       "\\uc704\\ud5d8\\uc744", "\\uacaa\\uac8c", "\\ub429\\ub2c8\\ub2e4.",
290 
291       "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub85c", "\\ubaa8\\ub4e0", "\\uac83\\uc744",
292       "\\ud574\\uacb0\\ud560", "\\uc218", "\\uc788\\uc2b5\\ub2c8\\ub2e4!",
293       "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\uc0ac\\uc6a9", "\\uc911\\uc778",
294       "\\ud50c\\ub7ab\\ud3fc,", "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b8\\uc5b4\\uc5d0",
295       "\\uad00\\uacc4\\uc5c6\\uc774", "\\ubb38\\uc790\\ub9c8\\ub2e4", "\\uace0\\uc720\\ud55c",
296       "\\uc22b\\uc790\\ub97c",
297       "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
298       "\\ud45c\\uc900\\uc740", // "Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, "
299       // "Sun, Sybase, Unisys "
300       "\\ubc0f", "\\uae30\\ud0c0", "\\uc5ec\\ub7ec",
301       "\\ud68c\\uc0ac\\uc640", "\\uac19\\uc740", "\\uc5c5\\uacc4",
302       "\\uc120\\ub450\\uc8fc\\uc790\\uc5d0", "\\uc758\\ud574",
303       "\\ucc44\\ud0dd\\ub418\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294",
304       // "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML "
305       "\\ub4f1\\uacfc",
306       "\\uac19\\uc774", "\\ud604\\uc7ac", "\\ub110\\ub9ac", "\\uc0ac\\uc6a9\\ub418\\ub294",
307       "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud544\\uc694\\ud558\\uba70", "\\uc774\\ub294", // "ISO/IEC",
308       "10646\\uc744", "\\uad6c\\ud604\\ud558\\ub294", "\\uacf5\\uc2dd\\uc801\\uc778",
309       "\\ubc29\\ubc95\\uc785\\ub2c8\\ub2e4.", "\\uc774\\ub294", "\\ub9ce\\uc740", "\\uc6b4\\uc601",
310       "\\uccb4\\uc81c,", "\\uc694\\uc998", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0",
311       "\\ube0c\\ub77c\\uc6b0\\uc800", "\\ubc0f", "\\uae30\\ud0c0", "\\ub9ce\\uc740",
312       "\\uc81c\\ud488\\uc5d0\\uc11c",
313       "\\uc9c0\\uc6d0\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
314       "\\ud45c\\uc900\\uc758", "\\ubd80\\uc0c1\\uacfc", "\\uc774\\ub97c",
315       "\\uc9c0\\uc6d0\\ud558\\ub294", "\\ub3c4\\uad6c\\uc758", "\\uac00\\uc6a9\\uc131\\uc740",
316       "\\ucd5c\\uadfc", "\\uc804", "\\uc138\\uacc4\\uc5d0", "\\ubd88\\uace0", "\\uc788\\ub294",
317       "\\uae30\\uc220", "\\uacbd\\ud5a5\\uc5d0\\uc11c", "\\uac00\\uc7a5", "\\uc911\\uc694\\ud55c",
318       "\\ubd80\\ubd84\\uc744", "\\ucc28\\uc9c0\\ud558\\uace0", "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
319 
320       "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c",
321       // Replaced a hyphen with a space to make the test case work with CLDR1.5
322       //"\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8-\\uc11c\\ubc84", "\\ub610\\ub294",
323       "\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8 \\uc11c\\ubc84", "\\ub610\\ub294",
324       // Replaced a hyphen with a space.
325       //"\\ub2e4\\uc911-\\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc",
326       "\\ub2e4\\uc911 \\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc",
327       "\\uc6f9", "\\uc0ac\\uc774\\ud2b8\\uc5d0", "\\ud1b5\\ud569\\ud558\\uba74",
328       "\\ub808\\uac70\\uc2dc", "\\ubb38\\uc790", "\\uc138\\ud2b8", "\\uc0ac\\uc6a9\\uc5d0",
329       "\\uc788\\uc5b4\\uc11c", "\\uc0c1\\ub2f9\\ud55c", "\\ube44\\uc6a9", "\\uc808\\uac10",
330       "\\ud6a8\\uacfc\\uac00",
331       "\\ub098\\ud0c0\\ub0a9\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c",
332       "\\ud1b5\\ud574", "\\ub9ac\\uc5d4\\uc9c0\\ub2c8\\uc5b4\\ub9c1", "\\uc5c6\\uc774",
333       "\\ub2e4\\uc911", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b8\\uc5b4", "\\ubc0f", "\\uad6d\\uac00",
334       "\\uac04\\uc5d0", "\\ub2e8\\uc77c", "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4",
335       "\\ud50c\\ub7ab\\ud3fc", "\\ub610\\ub294", "\\ub2e8\\uc77c", "\\uc6f9",
336       "\\uc0ac\\uc774\\ud2b8\\ub97c", "\\ubaa9\\ud45c\\ub85c", "\\uc0bc\\uc744", "\\uc218",
337       "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc774\\ub97c", "\\uc0ac\\uc6a9\\ud558\\uba74",
338       "\\ub370\\uc774\\ud130\\ub97c", "\\uc190\\uc0c1", "\\uc5c6\\uc774", "\\uc5ec\\ub7ec",
339       "\\uc2dc\\uc2a4\\ud15c\\uc744", "\\ud1b5\\ud574", "\\uc804\\uc1a1\\ud560", "\\uc218",
340       "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
341 
342       "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc5d0", "\\ub300\\ud574",
343       "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc740",
344       "\\ube44\\uc601\\ub9ac", "\\uc870\\uc9c1\\uc73c\\ub85c\\uc11c", "\\ud604\\ub300",
345       "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", "\\uc81c\\ud488\\uacfc",
346       "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud14d\\uc2a4\\ud2b8\\uc758", "\\ud45c\\ud604\\uc744",
347       "\\uc9c0\\uc815\\ud558\\ub294", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc758",
348       "\\uc0ac\\uc6a9\\uc744", "\\uac1c\\ubc1c\\ud558\\uace0", "\\ud655\\uc7a5\\ud558\\uba70",
349       "\\uc7a5\\ub824\\ud558\\uae30", "\\uc704\\ud574",
350       "\\uc138\\uc6cc\\uc84c\\uc2b5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4",
351       "\\uba64\\ubc84\\uc27d\\uc740", "\\ucef4\\ud4e8\\ud130\\uc640", "\\uc815\\ubcf4",
352       "\\ucc98\\ub9ac", "\\uc0b0\\uc5c5\\uc5d0", "\\uc885\\uc0ac\\ud558\\uace0", "\\uc788\\ub294",
353       "\\uad11\\ubc94\\uc704\\ud55c", "\\ud68c\\uc0ac", "\\ubc0f", "\\uc870\\uc9c1\\uc758",
354       "\\ubc94\\uc704\\ub97c",
355       "\\ub098\\ud0c0\\ub0c5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc758",
356       "\\uc7ac\\uc815\\uc740", "\\uc804\\uc801\\uc73c\\ub85c", "\\ud68c\\ube44\\uc5d0",
357       "\\uc758\\ud574", "\\ucda9\\ub2f9\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
358       "\\ucee8\\uc18c\\uc2dc\\uc5c4\\uc5d0\\uc11c\\uc758", "\\uba64\\ubc84\\uc27d\\uc740",
359       "\\uc804", "\\uc138\\uacc4", "\\uc5b4\\ub290", "\\uacf3\\uc5d0\\uc11c\\ub098",
360       "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc744", "\\uc9c0\\uc6d0\\ud558\\uace0",
361       "\\uadf8", "\\ud655\\uc7a5\\uacfc", "\\uad6c\\ud604\\uc744",
362       "\\uc9c0\\uc6d0\\ud558\\uace0\\uc790\\ud558\\ub294", "\\uc870\\uc9c1\\uacfc",
363       "\\uac1c\\uc778\\uc5d0\\uac8c", "\\uac1c\\ubc29\\ub418\\uc5b4",
364       "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
365 
366       "\\ub354", "\\uc790\\uc138\\ud55c", "\\ub0b4\\uc6a9\\uc740", "\\uc6a9\\uc5b4\\uc9d1,",
367       "\\uc608\\uc81c", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\uc0ac\\uc6a9", "\\uac00\\ub2a5",
368       "\\uc81c\\ud488,", "\\uae30\\uc220", "\\uc815\\ubcf4", "\\ubc0f", "\\uae30\\ud0c0",
369       "\\uc720\\uc6a9\\ud55c", "\\uc815\\ubcf4\\ub97c",
370       "\\ucc38\\uc870\\ud558\\uc2ed\\uc2dc\\uc624."
371     };
372 
373     enum { WHAT_IS_UNICODE_length = UPRV_LENGTHOF(WHAT_IS_UNICODE) };
374 
375     UParseError parseError;
376     UErrorCode status = U_ZERO_ERROR;
377     Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
378     Transliterator* jamoHangul = Transliterator::createInstance("NFC(NFD)", UTRANS_FORWARD, parseError, status);
379     if (latinJamo == 0 || jamoHangul == 0 || U_FAILURE(status)) {
380         delete latinJamo;
381         delete jamoHangul;
382         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
383         return;
384     }
385     Transliterator* jamoLatin = latinJamo->createInverse(status);
386     Transliterator* hangulJamo = jamoHangul->createInverse(status);
387     if (jamoLatin == 0 || hangulJamo == 0) {
388         errln("FAIL: createInverse returned NULL");
389         delete latinJamo;
390         delete jamoLatin;
391         delete jamoHangul;
392         delete hangulJamo;
393         return;
394     }
395 
396     Transliterator* tarray[4] =
397         { hangulJamo, jamoLatin, latinJamo, jamoHangul };
398     CompoundTransliterator rt(tarray, 4);
399 
400     UnicodeString buf;
401     int32_t total = 0;
402     int32_t errors = 0;
403     int32_t i;
404     for (i=0; i < WHAT_IS_UNICODE_length; ++i) {
405         ++total;
406         UnicodeString hangul = UnicodeString(WHAT_IS_UNICODE[i], -1, US_INV);
407         hangul = hangul.unescape(); // Parse backslash-u escapes
408         UnicodeString hangulX = hangul;
409         rt.transliterate(hangulX);
410         if (hangul != hangulX) {
411             ++errors;
412             UnicodeString jamo = hangul; hangulJamo->transliterate(jamo);
413             UnicodeString latin = jamo; jamoLatin->transliterate(latin);
414             UnicodeString jamo2 = latin; latinJamo->transliterate(jamo2);
415             UnicodeString hangul2 = jamo2; jamoHangul->transliterate(hangul2);
416 
417             buf.remove(0);
418             buf.append("FAIL: ");
419             if (hangul2 != hangulX) {
420                 buf.append((UnicodeString)"(Weird: " + hangulX + " != " + hangul2 + ")");
421             }
422             // The Hangul-Jamo conversion is not usually the
423             // bug here, so we hide it from display.
424             // Uncomment lines to see the Hangul.
425             buf.append(//hangul + " => " +
426                        jamoToName(jamo) + " => " +
427                        latin + " => " + jamoToName(jamo2)
428                        //+ " => " + hangul2
429                        );
430             errln(prettify(buf));
431         }
432     }
433     if (errors != 0) {
434         errln((UnicodeString)"Test word failures: " + errors + " out of " + total);
435     } else {
436         logln((UnicodeString)"All " + total + " test words passed");
437     }
438 
439     delete latinJamo;
440     delete jamoLatin;
441     delete jamoHangul;
442     delete hangulJamo;
443 }
444 
445 // Override TransliteratorTest
446 void
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)447 JamoTest::expectAux(const UnicodeString& tag,
448                     const UnicodeString& summary, UBool pass,
449                     const UnicodeString& expectedResult) {
450     UnicodeString jsum = jamoToName(summary);
451     UnicodeString jexp = jamoToName(expectedResult);
452     TransliteratorTest::expectAux(tag, jsum, pass, jexp);
453 }
454 
455 const char* JamoTest::JAMO_NAMES_RULES =
456         "'(Gi)' <> \\u1100;"
457         "'(GGi)' <> \\u1101;"
458         "'(Ni)' <> \\u1102;"
459         "'(Di)' <> \\u1103;"
460         "'(DD)' <> \\u1104;"
461         "'(R)' <> \\u1105;"
462         "'(Mi)' <> \\u1106;"
463         "'(Bi)' <> \\u1107;"
464         "'(BB)' <> \\u1108;"
465         "'(Si)' <> \\u1109;"
466         "'(SSi)' <> \\u110A;"
467         "'(IEUNG)' <> \\u110B;"
468         "'(Ji)' <> \\u110C;"
469         "'(JJ)' <> \\u110D;"
470         "'(Ci)' <> \\u110E;"
471         "'(Ki)' <> \\u110F;"
472         "'(Ti)' <> \\u1110;"
473         "'(Pi)' <> \\u1111;"
474         "'(Hi)' <> \\u1112;"
475 
476         "'(A)' <> \\u1161;"
477         "'(AE)' <> \\u1162;"
478         "'(YA)' <> \\u1163;"
479         "'(YAE)' <> \\u1164;"
480         "'(EO)' <> \\u1165;"
481         "'(E)' <> \\u1166;"
482         "'(YEO)' <> \\u1167;"
483         "'(YE)' <> \\u1168;"
484         "'(O)' <> \\u1169;"
485         "'(WA)' <> \\u116A;"
486         "'(WAE)' <> \\u116B;"
487         "'(OE)' <> \\u116C;"
488         "'(YO)' <> \\u116D;"
489         "'(U)' <> \\u116E;"
490         "'(WEO)' <> \\u116F;"
491         "'(WE)' <> \\u1170;"
492         "'(WI)' <> \\u1171;"
493         "'(YU)' <> \\u1172;"
494         "'(EU)' <> \\u1173;"
495         "'(YI)' <> \\u1174;"
496         "'(I)' <> \\u1175;"
497 
498         "'(Gf)' <> \\u11A8;"
499         "'(GGf)' <> \\u11A9;"
500         "'(GS)' <> \\u11AA;"
501         "'(Nf)' <> \\u11AB;"
502         "'(NJ)' <> \\u11AC;"
503         "'(NH)' <> \\u11AD;"
504         "'(Df)' <> \\u11AE;"
505         "'(L)' <> \\u11AF;"
506         "'(LG)' <> \\u11B0;"
507         "'(LM)' <> \\u11B1;"
508         "'(LB)' <> \\u11B2;"
509         "'(LS)' <> \\u11B3;"
510         "'(LT)' <> \\u11B4;"
511         "'(LP)' <> \\u11B5;"
512         "'(LH)' <> \\u11B6;"
513         "'(Mf)' <> \\u11B7;"
514         "'(Bf)' <> \\u11B8;"
515         "'(BS)' <> \\u11B9;"
516         "'(Sf)' <> \\u11BA;"
517         "'(SSf)' <> \\u11BB;"
518         "'(NG)' <> \\u11BC;"
519         "'(Jf)' <> \\u11BD;"
520         "'(Cf)' <> \\u11BE;"
521         "'(Kf)' <> \\u11BF;"
522         "'(Tf)' <> \\u11C0;"
523         "'(Pf)' <> \\u11C1;"
524         "'(Hf)' <> \\u11C2;";
525 
526 /**
527  * Convert short names to actual jamo.  E.g., "x(LG)y" returns
528  * "x\u11B0y".  See JAMO_NAMES for table of names.
529  */
530 UnicodeString
nameToJamo(const UnicodeString & input)531 JamoTest::nameToJamo(const UnicodeString& input) {
532     if (NAME_JAMO == 0) {
533         errln("Failed to create NAME_JAMO");
534         return input;   /* failure! */
535     }
536     UnicodeString result(input);
537     NAME_JAMO->transliterate(result);
538     return result;
539 }
540 
541 /**
542  * Convert jamo to short names.  E.g., "x\u11B0y" returns
543  * "x(LG)y".  See JAMO_NAMES for table of names.
544  */
545 UnicodeString
jamoToName(const UnicodeString & input)546 JamoTest::jamoToName(const UnicodeString& input) {
547     if (NAME_JAMO == 0) {
548         errln("Failed to create NAME_JAMO");
549         return input;   /* failure! */
550     }
551     UnicodeString result(input);
552     JAMO_NAME->transliterate(result);
553     return result;
554 }
555 
556 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
557