1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2010, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************
6 ************************************************************************
7 *   Date        Name        Description
8 *   02/28/2001  aliu        Creation
9 *   03/01/2001  George      port to HP/UX
10 ************************************************************************/
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_TRANSLITERATION
15 
16 #include "jamotest.h"
17 #include "unicode/utypes.h"
18 #include "unicode/translit.h"
19 #include "cpdtrans.h"
20 
21 // SEP is the disambiguation separator used by Latin-Jamo and Jamo-Latin
22 #define SEP "-"
23 
JamoTest()24 JamoTest::JamoTest()
25 {
26     UParseError parseError;
27     UErrorCode status = U_ZERO_ERROR;
28     NAME_JAMO = Transliterator::createFromRules("Name-Jamo",
29                                             UnicodeString(JAMO_NAMES_RULES, -1, US_INV),
30                                             UTRANS_FORWARD, parseError, status);
31 
32     if (U_FAILURE(status)) {
33         delete NAME_JAMO;
34         NAME_JAMO = NULL;
35     }
36     status = U_ZERO_ERROR;
37     JAMO_NAME = Transliterator::createFromRules("Jamo-Name",
38                                             UnicodeString(JAMO_NAMES_RULES, -1, US_INV),
39                                             UTRANS_REVERSE, parseError, status);
40     if (U_FAILURE(status)) {
41         delete JAMO_NAME;
42         JAMO_NAME = NULL;
43     }
44 }
45 
~JamoTest()46 JamoTest::~JamoTest()
47 {
48     delete NAME_JAMO;
49     delete JAMO_NAME;
50 }
51 
52 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)53 JamoTest::runIndexedTest(int32_t index, UBool exec,
54                          const char* &name, char* /*par*/) {
55     switch (index) {
56         TESTCASE(0,TestJamo);
57         TESTCASE(1,TestRealText);
58         TESTCASE(2,TestPiecemeal);
59         default: name = ""; break;
60     }
61 }
62 
63 void
TestJamo()64 JamoTest::TestJamo() {
65     UParseError parseError;
66     UErrorCode status = U_ZERO_ERROR;
67     Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
68 
69     if (latinJamo == 0 || U_FAILURE(status)) {
70         dataerrln("FAIL: createInstance() returned 0 - %s", u_errorName(status));
71         return;
72     }
73 
74     Transliterator* jamoLatin = latinJamo->createInverse(status);
75 
76     if (jamoLatin == 0) {
77         delete latinJamo;
78         errln("FAIL: createInverse() returned 0");
79         return;
80     }
81 
82     static const char* CASE[] = {
83         // Column 1 is the latin text L1 to be fed to Latin-Jamo
84         // to yield output J.
85 
86         // Column 2 is expected value of J.  J is fed to
87         // Jamo-Latin to yield output L2.
88 
89         // Column 3 is expected value of L2.  If the expected
90         // value of L2 is L1, then L2 is NULL.
91 
92                 // add tests for the update to fix problems where it didn't follow the standard
93                 // see also http://www.unicode.org/cldr/data/charts/transforms/Latin-Hangul.html
94                 "gach", "(Gi)(A)(Cf)", NULL,
95                 "geumhui", "(Gi)(EU)(Mf)(Hi)(YI)", NULL,
96                 "choe", "(Ci)(OE)", NULL,
97                 "wo", "(IEUNG)(WEO)", NULL,
98                 "Wonpil", "(IEUNG)(WEO)(Nf)(Pi)(I)(L)", "wonpil",
99                 "GIPPEUM", "(Gi)(I)(BB)(EU)(Mf)", "gippeum",
100                 "EUTTEUM", "(IEUNG)(EU)(DD)(EU)(Mf)", "eutteum",
101                 "KKOTNAE", "(GGi)(O)(Tf)(Ni)(AE)", "kkotnae",
102                 "gaga", "(Gi)(A)(Gi)(A)", NULL,
103                 "gag-a", "(Gi)(A)(Gf)(IEUNG)(A)", NULL,
104                 "gak-ka", "(Gi)(A)(Kf)(Ki)(A)", NULL,
105                 "gakka", "(Gi)(A)(GGi)(A)", NULL,
106                 "gakk-a", "(Gi)(A)(GGf)(IEUNG)(A)", NULL,
107                 "gakkka", "(Gi)(A)(GGf)(Ki)(A)", NULL,
108                 "gak-kka", "(Gi)(A)(Kf)(GGi)(A)", NULL,
109 
110         "bab", "(Bi)(A)(Bf)", NULL,
111         "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu",
112         "babbba", "(Bi)(A)(Bf)(Bi)(EU)(Bi)(A)", "babbeuba",
113         "bagg", "(Bi)(A)(Gf)(Gi)(EU)", "baggeu",
114         "baggga", "(Bi)(A)(Gf)(Gi)(EU)(Gi)(A)", "baggeuga",
115         //"bag" SEP "gga", "(Bi)(A)(Gf)" SEP "(Gi)(EU)(Gi)(A)", "bag" SEP "geuga",
116         "kabsa", "(Ki)(A)(Bf)(Si)(A)", NULL,
117         "kabska", "(Ki)(A)(BS)(Ki)(A)", NULL,
118         "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf)
119         "gga", "(Gi)(EU)(Gi)(A)", "geuga",
120         "bsa", "(Bi)(EU)(Si)(A)", "beusa",
121         "agg", "(IEUNG)(A)(Gf)(Gi)(EU)", "aggeu",
122         "agga", "(IEUNG)(A)(Gf)(Gi)(A)", NULL,
123         "la", "(R)(A)", NULL,
124         "bs", "(Bi)(EU)(Sf)", "beus",
125         "kalgga", "(Ki)(A)(L)(Gi)(EU)(Gi)(A)", "kalgeuga",
126 
127         // 'r' in a final position is treated like 'l'
128         "karka", "(Ki)(A)(L)(Ki)(A)", "kalka",
129     };
130 
131     enum { CASE_length = sizeof(CASE) / sizeof(CASE[0]) };
132 
133     int32_t i;
134     for (i=0; i<CASE_length; i+=3) {
135         UnicodeString jamo = nameToJamo(CASE[i+1]);
136         if (CASE[i+2] == NULL) {
137             expect(*latinJamo, CASE[i], jamo, *jamoLatin);
138         } else {
139             // Handle case where round-trip is expected to fail
140             expect(*latinJamo, CASE[i], jamo);
141             expect(*jamoLatin, jamo, CASE[i+2]);
142         }
143     }
144 
145     delete latinJamo;
146     delete jamoLatin;
147 }
148 
149 /**
150  * Test various step-at-a-time transformation of hangul to jamo to
151  * latin and back.
152  */
TestPiecemeal(void)153 void JamoTest::TestPiecemeal(void) {
154     UnicodeString hangul; hangul.append((UChar)0xBC0F);
155     UnicodeString jamo = nameToJamo("(Mi)(I)(Cf)");
156     UnicodeString latin("mic");
157     UnicodeString latin2("mich");
158 
159     Transliterator *t = NULL;
160     UErrorCode status = U_ZERO_ERROR;
161 
162     t = Transliterator::createInstance("NFD", UTRANS_FORWARD, status); // was Hangul-Jamo
163     if (U_FAILURE(status) || t == 0) {
164         dataerrln("FAIL: createInstance failed");
165         return;
166     }
167     expect(*t, hangul, jamo);
168     delete t;
169 
170     t = Transliterator::createInstance("NFC", UTRANS_FORWARD, status); // was Jamo-Hangul
171     if (U_FAILURE(status) || t == 0) {
172         errln("FAIL: createInstance failed");
173         return;
174     }
175     expect(*t, jamo, hangul);
176     delete t;
177 
178     t = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, status);
179     if (U_FAILURE(status) || t == 0) {
180         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
181         return;
182     }
183     expect(*t, latin, jamo);
184     delete t;
185 
186     t = Transliterator::createInstance("Jamo-Latin", UTRANS_FORWARD, status);
187     if (U_FAILURE(status) || t == 0) {
188         errln("FAIL: createInstance failed");
189         return;
190     }
191     expect(*t, jamo, latin2);
192     delete t;
193 
194     t = Transliterator::createInstance("Hangul-Latin", UTRANS_FORWARD, status);
195     if (U_FAILURE(status) || t == 0) {
196         errln("FAIL: createInstance failed");
197         return;
198     }
199     expect(*t, hangul, latin2);
200     delete t;
201 
202     t = Transliterator::createInstance("Latin-Hangul", UTRANS_FORWARD, status);
203     if (U_FAILURE(status) || t == 0) {
204         errln("FAIL: createInstance failed");
205         return;
206     }
207     expect(*t, latin, hangul);
208     delete t;
209 
210     t = Transliterator::createInstance("Hangul-Latin; Latin-Jamo", UTRANS_FORWARD, status);
211     if (U_FAILURE(status) || t == 0) {
212         errln("FAIL: createInstance failed");
213         return;
214     }
215     expect(*t, hangul, jamo);
216     delete t;
217 
218     t = Transliterator::createInstance("Jamo-Latin; Latin-Hangul", UTRANS_FORWARD, status);
219     if (U_FAILURE(status) || t == 0) {
220         errln("FAIL: createInstance failed");
221         return;
222     }
223     expect(*t, jamo, hangul);
224     delete t;
225 
226     t = Transliterator::createInstance("Hangul-Latin; Latin-Hangul", UTRANS_FORWARD, status);
227     if (U_FAILURE(status) || t == 0) {
228         errln("FAIL: createInstance failed");
229         return;
230     }
231     expect(*t, hangul, hangul);
232     delete t;
233 }
234 
235 void
TestRealText()236 JamoTest::TestRealText() {
237     // Test text taken from the Unicode web site
238      static const char* const WHAT_IS_UNICODE[] = {
239       "\\uc720\\ub2c8\\ucf54\\ub4dc\\uc5d0", "\\ub300\\ud574", "?",
240 
241       "\\uc5b4\\ub5a4", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b4\\ub5a4",
242       "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b4\\ub5a4", "\\uc5b8\\uc5b4\\uc5d0\\ub3c4",
243       "\\uc0c1\\uad00\\uc5c6\\uc774", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\ubaa8\\ub4e0",
244       "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uace0\\uc720", "\\ubc88\\ud638\\ub97c",
245       "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.",
246 
247       "\\uae30\\ubcf8\\uc801\\uc73c\\ub85c", "\\ucef4\\ud4e8\\ud130\\ub294",
248       "\\uc22b\\uc790\\ub9cc", "\\ucc98\\ub9ac\\ud569\\ub2c8\\ub2e4.", "\\uae00\\uc790\\ub098",
249       "\\ub2e4\\ub978", "\\ubb38\\uc790\\uc5d0\\ub3c4", "\\uc22b\\uc790\\ub97c",
250       "\\uc9c0\\uc815\\ud558\\uc5ec",
251       "\\uc800\\uc7a5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\uac00",
252       "\\uac1c\\ubc1c\\ub418\\uae30", "\\uc804\\uc5d0\\ub294", "\\uc774\\ub7ec\\ud55c",
253       "\\uc22b\\uc790\\ub97c", "\\uc9c0\\uc815\\ud558\\uae30", "\\uc704\\ud574", "\\uc218\\ubc31",
254       "\\uac00\\uc9c0\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654",
255       "\\uc2dc\\uc2a4\\ud15c\\uc744",
256       "\\uc0ac\\uc6a9\\ud588\\uc2b5\\ub2c8\\ub2e4.", "\\ub2e8\\uc77c", "\\uae30\\ud638\\ud654",
257       "\\ubc29\\ubc95\\uc73c\\ub85c\\ub294", "\\ubaa8\\ub4e0", "\\ubb38\\uc790\\ub97c",
258       "\\ud3ec\\ud568\\ud560", "\\uc218", "\\uc5c6\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc608\\ub97c",
259       "\\ub4e4\\uc5b4", "\\uc720\\ub7fd", "\\uc5f0\\ud569\\uc5d0\\uc11c\\ub9cc",
260       "\\ubcf4\\ub354\\ub77c\\ub3c4", "\\ubaa8\\ub4e0", "\\uac01", "\\ub098\\ub77c\\ubcc4",
261       "\\uc5b8\\uc5b4\\ub97c", "\\ucc98\\ub9ac\\ud558\\ub824\\uba74", "\\uc5ec\\ub7ec",
262       "\\uac1c\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774",
263       "\\ud544\\uc694\\ud569\\ub2c8\\ub2e4.", "\\uc601\\uc5b4\\uc640", "\\uac19\\uc740",
264       "\\ub2e8\\uc77c", "\\uc5b8\\uc5b4\\uc758", "\\uacbd\\uc6b0\\ub3c4",
265       "\\uacf5\\ud1b5\\uc801\\uc73c\\ub85c", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0",
266       "\\uae00\\uc790,", "\\ubb38\\uc7a5", "\\ubd80\\ud638", "\\ubc0f",
267       "\\ud14c\\ud06c\\ub2c8\\uceec", "\\uae30\\ud638\\uc5d0", "\\ub9de\\ub294", "\\ub2e8\\uc77c",
268       "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc744", "\\uac16\\uace0", "\\uc788\\uc9c0",
269       "\\ubabb\\ud558\\uc600\\uc2b5\\ub2c8\\ub2e4.",
270 
271       "\\uc774\\ub7ec\\ud55c", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uc740",
272       "\\ub610\\ud55c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uacfc",
273       "\\ucda9\\ub3cc\\ud569\\ub2c8\\ub2e4.", "\\uc989", "\\ub450", "\\uac00\\uc9c0",
274       "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", "\\ub450", "\\uac1c\\uc758", "\\ub2e4\\ub978",
275       "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uac19\\uc740", "\\ubc88\\ud638\\ub97c",
276       "\\uc0ac\\uc6a9\\ud558\\uac70\\ub098", "\\uac19\\uc740", "\\ubb38\\uc790\\uc5d0",
277       "\\ub300\\ud574", "\\ub2e4\\ub978", "\\ubc88\\ud638\\ub97c", "\\uc0ac\\uc6a9\\ud560", "\\uc218",
278       "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc8fc\\uc5b4\\uc9c4", "\\ubaa8\\ub4e0",
279       "\\ucef4\\ud4e8\\ud130(\\ud2b9\\ud788", "\\uc11c\\ubc84)\\ub294", "\\uc11c\\ub85c",
280       "\\ub2e4\\ub978", "\\uc5ec\\ub7ec", "\\uac00\\uc9c0", "\\uae30\\ud638\\ud654",
281       "\\ubc29\\ubc95\\uc744", "\\uc9c0\\uc6d0\\ud574\\uc57c",
282       "\\ud569\\ub2c8\\ub2e4.", "\\uadf8\\ub7ec\\ub098,", "\\ub370\\uc774\\ud130\\ub97c",
283       "\\uc11c\\ub85c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774\\ub098",
284       "\\ud50c\\ub7ab\\ud3fc", "\\uac04\\uc5d0", "\\uc804\\ub2ec\\ud560", "\\ub54c\\ub9c8\\ub2e4",
285       "\\uadf8", "\\ub370\\uc774\\ud130\\ub294", "\\ud56d\\uc0c1", "\\uc190\\uc0c1\\uc758",
286       "\\uc704\\ud5d8\\uc744", "\\uacaa\\uac8c", "\\ub429\\ub2c8\\ub2e4.",
287 
288       "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub85c", "\\ubaa8\\ub4e0", "\\uac83\\uc744",
289       "\\ud574\\uacb0\\ud560", "\\uc218", "\\uc788\\uc2b5\\ub2c8\\ub2e4!",
290       "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\uc0ac\\uc6a9", "\\uc911\\uc778",
291       "\\ud50c\\ub7ab\\ud3fc,", "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b8\\uc5b4\\uc5d0",
292       "\\uad00\\uacc4\\uc5c6\\uc774", "\\ubb38\\uc790\\ub9c8\\ub2e4", "\\uace0\\uc720\\ud55c",
293       "\\uc22b\\uc790\\ub97c",
294       "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
295       "\\ud45c\\uc900\\uc740", // "Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, "
296       // "Sun, Sybase, Unisys "
297       "\\ubc0f", "\\uae30\\ud0c0", "\\uc5ec\\ub7ec",
298       "\\ud68c\\uc0ac\\uc640", "\\uac19\\uc740", "\\uc5c5\\uacc4",
299       "\\uc120\\ub450\\uc8fc\\uc790\\uc5d0", "\\uc758\\ud574",
300       "\\ucc44\\ud0dd\\ub418\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294",
301       // "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML "
302       "\\ub4f1\\uacfc",
303       "\\uac19\\uc774", "\\ud604\\uc7ac", "\\ub110\\ub9ac", "\\uc0ac\\uc6a9\\ub418\\ub294",
304       "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud544\\uc694\\ud558\\uba70", "\\uc774\\ub294", // "ISO/IEC",
305       "10646\\uc744", "\\uad6c\\ud604\\ud558\\ub294", "\\uacf5\\uc2dd\\uc801\\uc778",
306       "\\ubc29\\ubc95\\uc785\\ub2c8\\ub2e4.", "\\uc774\\ub294", "\\ub9ce\\uc740", "\\uc6b4\\uc601",
307       "\\uccb4\\uc81c,", "\\uc694\\uc998", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0",
308       "\\ube0c\\ub77c\\uc6b0\\uc800", "\\ubc0f", "\\uae30\\ud0c0", "\\ub9ce\\uc740",
309       "\\uc81c\\ud488\\uc5d0\\uc11c",
310       "\\uc9c0\\uc6d0\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
311       "\\ud45c\\uc900\\uc758", "\\ubd80\\uc0c1\\uacfc", "\\uc774\\ub97c",
312       "\\uc9c0\\uc6d0\\ud558\\ub294", "\\ub3c4\\uad6c\\uc758", "\\uac00\\uc6a9\\uc131\\uc740",
313       "\\ucd5c\\uadfc", "\\uc804", "\\uc138\\uacc4\\uc5d0", "\\ubd88\\uace0", "\\uc788\\ub294",
314       "\\uae30\\uc220", "\\uacbd\\ud5a5\\uc5d0\\uc11c", "\\uac00\\uc7a5", "\\uc911\\uc694\\ud55c",
315       "\\ubd80\\ubd84\\uc744", "\\ucc28\\uc9c0\\ud558\\uace0", "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
316 
317       "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c",
318       // Replaced a hyphen with a space to make the test case work with CLDR1.5
319       //"\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8-\\uc11c\\ubc84", "\\ub610\\ub294",
320       "\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8 \\uc11c\\ubc84", "\\ub610\\ub294",
321       // Replaced a hyphen with a space.
322       //"\\ub2e4\\uc911-\\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc",
323       "\\ub2e4\\uc911 \\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc",
324       "\\uc6f9", "\\uc0ac\\uc774\\ud2b8\\uc5d0", "\\ud1b5\\ud569\\ud558\\uba74",
325       "\\ub808\\uac70\\uc2dc", "\\ubb38\\uc790", "\\uc138\\ud2b8", "\\uc0ac\\uc6a9\\uc5d0",
326       "\\uc788\\uc5b4\\uc11c", "\\uc0c1\\ub2f9\\ud55c", "\\ube44\\uc6a9", "\\uc808\\uac10",
327       "\\ud6a8\\uacfc\\uac00",
328       "\\ub098\\ud0c0\\ub0a9\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c",
329       "\\ud1b5\\ud574", "\\ub9ac\\uc5d4\\uc9c0\\ub2c8\\uc5b4\\ub9c1", "\\uc5c6\\uc774",
330       "\\ub2e4\\uc911", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b8\\uc5b4", "\\ubc0f", "\\uad6d\\uac00",
331       "\\uac04\\uc5d0", "\\ub2e8\\uc77c", "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4",
332       "\\ud50c\\ub7ab\\ud3fc", "\\ub610\\ub294", "\\ub2e8\\uc77c", "\\uc6f9",
333       "\\uc0ac\\uc774\\ud2b8\\ub97c", "\\ubaa9\\ud45c\\ub85c", "\\uc0bc\\uc744", "\\uc218",
334       "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc774\\ub97c", "\\uc0ac\\uc6a9\\ud558\\uba74",
335       "\\ub370\\uc774\\ud130\\ub97c", "\\uc190\\uc0c1", "\\uc5c6\\uc774", "\\uc5ec\\ub7ec",
336       "\\uc2dc\\uc2a4\\ud15c\\uc744", "\\ud1b5\\ud574", "\\uc804\\uc1a1\\ud560", "\\uc218",
337       "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
338 
339       "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc5d0", "\\ub300\\ud574",
340       "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc740",
341       "\\ube44\\uc601\\ub9ac", "\\uc870\\uc9c1\\uc73c\\ub85c\\uc11c", "\\ud604\\ub300",
342       "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", "\\uc81c\\ud488\\uacfc",
343       "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud14d\\uc2a4\\ud2b8\\uc758", "\\ud45c\\ud604\\uc744",
344       "\\uc9c0\\uc815\\ud558\\ub294", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc758",
345       "\\uc0ac\\uc6a9\\uc744", "\\uac1c\\ubc1c\\ud558\\uace0", "\\ud655\\uc7a5\\ud558\\uba70",
346       "\\uc7a5\\ub824\\ud558\\uae30", "\\uc704\\ud574",
347       "\\uc138\\uc6cc\\uc84c\\uc2b5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4",
348       "\\uba64\\ubc84\\uc27d\\uc740", "\\ucef4\\ud4e8\\ud130\\uc640", "\\uc815\\ubcf4",
349       "\\ucc98\\ub9ac", "\\uc0b0\\uc5c5\\uc5d0", "\\uc885\\uc0ac\\ud558\\uace0", "\\uc788\\ub294",
350       "\\uad11\\ubc94\\uc704\\ud55c", "\\ud68c\\uc0ac", "\\ubc0f", "\\uc870\\uc9c1\\uc758",
351       "\\ubc94\\uc704\\ub97c",
352       "\\ub098\\ud0c0\\ub0c5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc758",
353       "\\uc7ac\\uc815\\uc740", "\\uc804\\uc801\\uc73c\\ub85c", "\\ud68c\\ube44\\uc5d0",
354       "\\uc758\\ud574", "\\ucda9\\ub2f9\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
355       "\\ucee8\\uc18c\\uc2dc\\uc5c4\\uc5d0\\uc11c\\uc758", "\\uba64\\ubc84\\uc27d\\uc740",
356       "\\uc804", "\\uc138\\uacc4", "\\uc5b4\\ub290", "\\uacf3\\uc5d0\\uc11c\\ub098",
357       "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc744", "\\uc9c0\\uc6d0\\ud558\\uace0",
358       "\\uadf8", "\\ud655\\uc7a5\\uacfc", "\\uad6c\\ud604\\uc744",
359       "\\uc9c0\\uc6d0\\ud558\\uace0\\uc790\\ud558\\ub294", "\\uc870\\uc9c1\\uacfc",
360       "\\uac1c\\uc778\\uc5d0\\uac8c", "\\uac1c\\ubc29\\ub418\\uc5b4",
361       "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
362 
363       "\\ub354", "\\uc790\\uc138\\ud55c", "\\ub0b4\\uc6a9\\uc740", "\\uc6a9\\uc5b4\\uc9d1,",
364       "\\uc608\\uc81c", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\uc0ac\\uc6a9", "\\uac00\\ub2a5",
365       "\\uc81c\\ud488,", "\\uae30\\uc220", "\\uc815\\ubcf4", "\\ubc0f", "\\uae30\\ud0c0",
366       "\\uc720\\uc6a9\\ud55c", "\\uc815\\ubcf4\\ub97c",
367       "\\ucc38\\uc870\\ud558\\uc2ed\\uc2dc\\uc624."
368     };
369 
370     enum { WHAT_IS_UNICODE_length = sizeof(WHAT_IS_UNICODE) / sizeof(WHAT_IS_UNICODE[0]) };
371 
372     UParseError parseError;
373     UErrorCode status = U_ZERO_ERROR;
374     Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
375     Transliterator* jamoHangul = Transliterator::createInstance("NFC(NFD)", UTRANS_FORWARD, parseError, status);
376     if (latinJamo == 0 || jamoHangul == 0 || U_FAILURE(status)) {
377         delete latinJamo;
378         delete jamoHangul;
379         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
380         return;
381     }
382     Transliterator* jamoLatin = latinJamo->createInverse(status);
383     Transliterator* hangulJamo = jamoHangul->createInverse(status);
384     if (jamoLatin == 0 || hangulJamo == 0) {
385         errln("FAIL: createInverse returned NULL");
386         delete latinJamo;
387         delete jamoLatin;
388         delete jamoHangul;
389         delete hangulJamo;
390         return;
391     }
392 
393     Transliterator* tarray[4] =
394         { hangulJamo, jamoLatin, latinJamo, jamoHangul };
395     CompoundTransliterator rt(tarray, 4);
396 
397     UnicodeString buf;
398     int32_t total = 0;
399     int32_t errors = 0;
400     int32_t i;
401     for (i=0; i < WHAT_IS_UNICODE_length; ++i) {
402         ++total;
403         UnicodeString hangul = UnicodeString(WHAT_IS_UNICODE[i], -1, US_INV);
404         hangul = hangul.unescape(); // Parse backslash-u escapes
405         UnicodeString hangulX = hangul;
406         rt.transliterate(hangulX);
407         if (hangul != hangulX) {
408             ++errors;
409             UnicodeString jamo = hangul; hangulJamo->transliterate(jamo);
410             UnicodeString latin = jamo; jamoLatin->transliterate(latin);
411             UnicodeString jamo2 = latin; latinJamo->transliterate(jamo2);
412             UnicodeString hangul2 = jamo2; jamoHangul->transliterate(hangul2);
413 
414             buf.remove(0);
415             buf.append("FAIL: ");
416             if (hangul2 != hangulX) {
417                 buf.append((UnicodeString)"(Weird: " + hangulX + " != " + hangul2 + ")");
418             }
419             // The Hangul-Jamo conversion is not usually the
420             // bug here, so we hide it from display.
421             // Uncomment lines to see the Hangul.
422             buf.append(//hangul + " => " +
423                        jamoToName(jamo) + " => " +
424                        latin + " => " + jamoToName(jamo2)
425                        //+ " => " + hangul2
426                        );
427             errln(prettify(buf));
428         }
429     }
430     if (errors != 0) {
431         errln((UnicodeString)"Test word failures: " + errors + " out of " + total);
432     } else {
433         logln((UnicodeString)"All " + total + " test words passed");
434     }
435 
436     delete latinJamo;
437     delete jamoLatin;
438     delete jamoHangul;
439     delete hangulJamo;
440 }
441 
442 // Override TransliteratorTest
443 void
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)444 JamoTest::expectAux(const UnicodeString& tag,
445                     const UnicodeString& summary, UBool pass,
446                     const UnicodeString& expectedResult) {
447     UnicodeString jsum = jamoToName(summary);
448     UnicodeString jexp = jamoToName(expectedResult);
449     TransliteratorTest::expectAux(tag, jsum, pass, jexp);
450 }
451 
452 const char* JamoTest::JAMO_NAMES_RULES =
453         "'(Gi)' <> \\u1100;"
454         "'(GGi)' <> \\u1101;"
455         "'(Ni)' <> \\u1102;"
456         "'(Di)' <> \\u1103;"
457         "'(DD)' <> \\u1104;"
458         "'(R)' <> \\u1105;"
459         "'(Mi)' <> \\u1106;"
460         "'(Bi)' <> \\u1107;"
461         "'(BB)' <> \\u1108;"
462         "'(Si)' <> \\u1109;"
463         "'(SSi)' <> \\u110A;"
464         "'(IEUNG)' <> \\u110B;"
465         "'(Ji)' <> \\u110C;"
466         "'(JJ)' <> \\u110D;"
467         "'(Ci)' <> \\u110E;"
468         "'(Ki)' <> \\u110F;"
469         "'(Ti)' <> \\u1110;"
470         "'(Pi)' <> \\u1111;"
471         "'(Hi)' <> \\u1112;"
472 
473         "'(A)' <> \\u1161;"
474         "'(AE)' <> \\u1162;"
475         "'(YA)' <> \\u1163;"
476         "'(YAE)' <> \\u1164;"
477         "'(EO)' <> \\u1165;"
478         "'(E)' <> \\u1166;"
479         "'(YEO)' <> \\u1167;"
480         "'(YE)' <> \\u1168;"
481         "'(O)' <> \\u1169;"
482         "'(WA)' <> \\u116A;"
483         "'(WAE)' <> \\u116B;"
484         "'(OE)' <> \\u116C;"
485         "'(YO)' <> \\u116D;"
486         "'(U)' <> \\u116E;"
487         "'(WEO)' <> \\u116F;"
488         "'(WE)' <> \\u1170;"
489         "'(WI)' <> \\u1171;"
490         "'(YU)' <> \\u1172;"
491         "'(EU)' <> \\u1173;"
492         "'(YI)' <> \\u1174;"
493         "'(I)' <> \\u1175;"
494 
495         "'(Gf)' <> \\u11A8;"
496         "'(GGf)' <> \\u11A9;"
497         "'(GS)' <> \\u11AA;"
498         "'(Nf)' <> \\u11AB;"
499         "'(NJ)' <> \\u11AC;"
500         "'(NH)' <> \\u11AD;"
501         "'(Df)' <> \\u11AE;"
502         "'(L)' <> \\u11AF;"
503         "'(LG)' <> \\u11B0;"
504         "'(LM)' <> \\u11B1;"
505         "'(LB)' <> \\u11B2;"
506         "'(LS)' <> \\u11B3;"
507         "'(LT)' <> \\u11B4;"
508         "'(LP)' <> \\u11B5;"
509         "'(LH)' <> \\u11B6;"
510         "'(Mf)' <> \\u11B7;"
511         "'(Bf)' <> \\u11B8;"
512         "'(BS)' <> \\u11B9;"
513         "'(Sf)' <> \\u11BA;"
514         "'(SSf)' <> \\u11BB;"
515         "'(NG)' <> \\u11BC;"
516         "'(Jf)' <> \\u11BD;"
517         "'(Cf)' <> \\u11BE;"
518         "'(Kf)' <> \\u11BF;"
519         "'(Tf)' <> \\u11C0;"
520         "'(Pf)' <> \\u11C1;"
521         "'(Hf)' <> \\u11C2;";
522 
523 /**
524  * Convert short names to actual jamo.  E.g., "x(LG)y" returns
525  * "x\u11B0y".  See JAMO_NAMES for table of names.
526  */
527 UnicodeString
nameToJamo(const UnicodeString & input)528 JamoTest::nameToJamo(const UnicodeString& input) {
529     if (NAME_JAMO == 0) {
530         errln("Failed to create NAME_JAMO");
531         return input;   /* failure! */
532     }
533     UnicodeString result(input);
534     NAME_JAMO->transliterate(result);
535     return result;
536 }
537 
538 /**
539  * Convert jamo to short names.  E.g., "x\u11B0y" returns
540  * "x(LG)y".  See JAMO_NAMES for table of names.
541  */
542 UnicodeString
jamoToName(const UnicodeString & input)543 JamoTest::jamoToName(const UnicodeString& input) {
544     if (NAME_JAMO == 0) {
545         errln("Failed to create NAME_JAMO");
546         return input;   /* failure! */
547     }
548     UnicodeString result(input);
549     JAMO_NAME->transliterate(result);
550     return result;
551 }
552 
553 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
554