1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************
8 ************************************************************************
9 * Date Name Description
10 * 02/28/2001 aliu Creation
11 * 03/01/2001 George port to HP/UX
12 ************************************************************************/
13
14 #include "unicode/utypes.h"
15
16 #if !UCONFIG_NO_TRANSLITERATION
17
18 #include "jamotest.h"
19 #include "unicode/utypes.h"
20 #include "unicode/translit.h"
21 #include "cmemory.h"
22 #include "cpdtrans.h"
23
24 // SEP is the disambiguation separator used by Latin-Jamo and Jamo-Latin
25 #define SEP "-"
26
JamoTest()27 JamoTest::JamoTest()
28 {
29 UParseError parseError;
30 UErrorCode status = U_ZERO_ERROR;
31 NAME_JAMO = Transliterator::createFromRules("Name-Jamo",
32 UnicodeString(JAMO_NAMES_RULES, -1, US_INV),
33 UTRANS_FORWARD, parseError, status);
34
35 if (U_FAILURE(status)) {
36 delete NAME_JAMO;
37 NAME_JAMO = NULL;
38 }
39 status = U_ZERO_ERROR;
40 JAMO_NAME = Transliterator::createFromRules("Jamo-Name",
41 UnicodeString(JAMO_NAMES_RULES, -1, US_INV),
42 UTRANS_REVERSE, parseError, status);
43 if (U_FAILURE(status)) {
44 delete JAMO_NAME;
45 JAMO_NAME = NULL;
46 }
47 }
48
~JamoTest()49 JamoTest::~JamoTest()
50 {
51 delete NAME_JAMO;
52 delete JAMO_NAME;
53 }
54
55 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)56 JamoTest::runIndexedTest(int32_t index, UBool exec,
57 const char* &name, char* /*par*/) {
58 switch (index) {
59 TESTCASE(0,TestJamo);
60 TESTCASE(1,TestRealText);
61 TESTCASE(2,TestPiecemeal);
62 default: name = ""; break;
63 }
64 }
65
66 void
TestJamo()67 JamoTest::TestJamo() {
68 UParseError parseError;
69 UErrorCode status = U_ZERO_ERROR;
70 Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
71
72 if (latinJamo == 0 || U_FAILURE(status)) {
73 dataerrln("FAIL: createInstance() returned 0 - %s", u_errorName(status));
74 return;
75 }
76
77 Transliterator* jamoLatin = latinJamo->createInverse(status);
78
79 if (jamoLatin == 0) {
80 delete latinJamo;
81 errln("FAIL: createInverse() returned 0");
82 return;
83 }
84
85 static const char* CASE[] = {
86 // Column 1 is the latin text L1 to be fed to Latin-Jamo
87 // to yield output J.
88
89 // Column 2 is expected value of J. J is fed to
90 // Jamo-Latin to yield output L2.
91
92 // Column 3 is expected value of L2. If the expected
93 // value of L2 is L1, then L2 is NULL.
94
95 // add tests for the update to fix problems where it didn't follow the standard
96 // see also http://www.unicode.org/cldr/data/charts/transforms/Latin-Hangul.html
97 "gach", "(Gi)(A)(Cf)", NULL,
98 "geumhui", "(Gi)(EU)(Mf)(Hi)(YI)", NULL,
99 "choe", "(Ci)(OE)", NULL,
100 "wo", "(IEUNG)(WEO)", NULL,
101 "Wonpil", "(IEUNG)(WEO)(Nf)(Pi)(I)(L)", "wonpil",
102 "GIPPEUM", "(Gi)(I)(BB)(EU)(Mf)", "gippeum",
103 "EUTTEUM", "(IEUNG)(EU)(DD)(EU)(Mf)", "eutteum",
104 "KKOTNAE", "(GGi)(O)(Tf)(Ni)(AE)", "kkotnae",
105 "gaga", "(Gi)(A)(Gi)(A)", NULL,
106 "gag-a", "(Gi)(A)(Gf)(IEUNG)(A)", NULL,
107 "gak-ka", "(Gi)(A)(Kf)(Ki)(A)", NULL,
108 "gakka", "(Gi)(A)(GGi)(A)", NULL,
109 "gakk-a", "(Gi)(A)(GGf)(IEUNG)(A)", NULL,
110 "gakkka", "(Gi)(A)(GGf)(Ki)(A)", NULL,
111 "gak-kka", "(Gi)(A)(Kf)(GGi)(A)", NULL,
112
113 "bab", "(Bi)(A)(Bf)", NULL,
114 "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu",
115 "babbba", "(Bi)(A)(Bf)(Bi)(EU)(Bi)(A)", "babbeuba",
116 "bagg", "(Bi)(A)(Gf)(Gi)(EU)", "baggeu",
117 "baggga", "(Bi)(A)(Gf)(Gi)(EU)(Gi)(A)", "baggeuga",
118 //"bag" SEP "gga", "(Bi)(A)(Gf)" SEP "(Gi)(EU)(Gi)(A)", "bag" SEP "geuga",
119 "kabsa", "(Ki)(A)(Bf)(Si)(A)", NULL,
120 "kabska", "(Ki)(A)(BS)(Ki)(A)", NULL,
121 "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf)
122 "gga", "(Gi)(EU)(Gi)(A)", "geuga",
123 "bsa", "(Bi)(EU)(Si)(A)", "beusa",
124 "agg", "(IEUNG)(A)(Gf)(Gi)(EU)", "aggeu",
125 "agga", "(IEUNG)(A)(Gf)(Gi)(A)", NULL,
126 "la", "(R)(A)", NULL,
127 "bs", "(Bi)(EU)(Sf)", "beus",
128 "kalgga", "(Ki)(A)(L)(Gi)(EU)(Gi)(A)", "kalgeuga",
129
130 // 'r' in a final position is treated like 'l'
131 "karka", "(Ki)(A)(L)(Ki)(A)", "kalka",
132 };
133
134 enum { CASE_length = UPRV_LENGTHOF(CASE) };
135
136 int32_t i;
137 for (i=0; i<CASE_length; i+=3) {
138 UnicodeString jamo = nameToJamo(CASE[i+1]);
139 if (CASE[i+2] == NULL) {
140 expect(*latinJamo, CASE[i], jamo, *jamoLatin);
141 } else {
142 // Handle case where round-trip is expected to fail
143 expect(*latinJamo, CASE[i], jamo);
144 expect(*jamoLatin, jamo, CASE[i+2]);
145 }
146 }
147
148 delete latinJamo;
149 delete jamoLatin;
150 }
151
152 /**
153 * Test various step-at-a-time transformation of hangul to jamo to
154 * latin and back.
155 */
TestPiecemeal(void)156 void JamoTest::TestPiecemeal(void) {
157 UnicodeString hangul; hangul.append((UChar)0xBC0F);
158 UnicodeString jamo = nameToJamo("(Mi)(I)(Cf)");
159 UnicodeString latin("mic");
160 UnicodeString latin2("mich");
161
162 Transliterator *t = NULL;
163 UErrorCode status = U_ZERO_ERROR;
164
165 t = Transliterator::createInstance("NFD", UTRANS_FORWARD, status); // was Hangul-Jamo
166 if (U_FAILURE(status) || t == 0) {
167 dataerrln("FAIL: createInstance failed");
168 return;
169 }
170 expect(*t, hangul, jamo);
171 delete t;
172
173 t = Transliterator::createInstance("NFC", UTRANS_FORWARD, status); // was Jamo-Hangul
174 if (U_FAILURE(status) || t == 0) {
175 errln("FAIL: createInstance failed");
176 return;
177 }
178 expect(*t, jamo, hangul);
179 delete t;
180
181 t = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, status);
182 if (U_FAILURE(status) || t == 0) {
183 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
184 return;
185 }
186 expect(*t, latin, jamo);
187 delete t;
188
189 t = Transliterator::createInstance("Jamo-Latin", UTRANS_FORWARD, status);
190 if (U_FAILURE(status) || t == 0) {
191 errln("FAIL: createInstance failed");
192 return;
193 }
194 expect(*t, jamo, latin2);
195 delete t;
196
197 t = Transliterator::createInstance("Hangul-Latin", UTRANS_FORWARD, status);
198 if (U_FAILURE(status) || t == 0) {
199 errln("FAIL: createInstance failed");
200 return;
201 }
202 expect(*t, hangul, latin2);
203 delete t;
204
205 t = Transliterator::createInstance("Latin-Hangul", UTRANS_FORWARD, status);
206 if (U_FAILURE(status) || t == 0) {
207 errln("FAIL: createInstance failed");
208 return;
209 }
210 expect(*t, latin, hangul);
211 delete t;
212
213 t = Transliterator::createInstance("Hangul-Latin; Latin-Jamo", UTRANS_FORWARD, status);
214 if (U_FAILURE(status) || t == 0) {
215 errln("FAIL: createInstance failed");
216 return;
217 }
218 expect(*t, hangul, jamo);
219 delete t;
220
221 t = Transliterator::createInstance("Jamo-Latin; Latin-Hangul", UTRANS_FORWARD, status);
222 if (U_FAILURE(status) || t == 0) {
223 errln("FAIL: createInstance failed");
224 return;
225 }
226 expect(*t, jamo, hangul);
227 delete t;
228
229 t = Transliterator::createInstance("Hangul-Latin; Latin-Hangul", UTRANS_FORWARD, status);
230 if (U_FAILURE(status) || t == 0) {
231 errln("FAIL: createInstance failed");
232 return;
233 }
234 expect(*t, hangul, hangul);
235 delete t;
236 }
237
238 void
TestRealText()239 JamoTest::TestRealText() {
240 // Test text taken from the Unicode web site
241 static const char* const WHAT_IS_UNICODE[] = {
242 "\\uc720\\ub2c8\\ucf54\\ub4dc\\uc5d0", "\\ub300\\ud574", "?",
243
244 "\\uc5b4\\ub5a4", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b4\\ub5a4",
245 "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b4\\ub5a4", "\\uc5b8\\uc5b4\\uc5d0\\ub3c4",
246 "\\uc0c1\\uad00\\uc5c6\\uc774", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\ubaa8\\ub4e0",
247 "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uace0\\uc720", "\\ubc88\\ud638\\ub97c",
248 "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.",
249
250 "\\uae30\\ubcf8\\uc801\\uc73c\\ub85c", "\\ucef4\\ud4e8\\ud130\\ub294",
251 "\\uc22b\\uc790\\ub9cc", "\\ucc98\\ub9ac\\ud569\\ub2c8\\ub2e4.", "\\uae00\\uc790\\ub098",
252 "\\ub2e4\\ub978", "\\ubb38\\uc790\\uc5d0\\ub3c4", "\\uc22b\\uc790\\ub97c",
253 "\\uc9c0\\uc815\\ud558\\uc5ec",
254 "\\uc800\\uc7a5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\uac00",
255 "\\uac1c\\ubc1c\\ub418\\uae30", "\\uc804\\uc5d0\\ub294", "\\uc774\\ub7ec\\ud55c",
256 "\\uc22b\\uc790\\ub97c", "\\uc9c0\\uc815\\ud558\\uae30", "\\uc704\\ud574", "\\uc218\\ubc31",
257 "\\uac00\\uc9c0\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654",
258 "\\uc2dc\\uc2a4\\ud15c\\uc744",
259 "\\uc0ac\\uc6a9\\ud588\\uc2b5\\ub2c8\\ub2e4.", "\\ub2e8\\uc77c", "\\uae30\\ud638\\ud654",
260 "\\ubc29\\ubc95\\uc73c\\ub85c\\ub294", "\\ubaa8\\ub4e0", "\\ubb38\\uc790\\ub97c",
261 "\\ud3ec\\ud568\\ud560", "\\uc218", "\\uc5c6\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc608\\ub97c",
262 "\\ub4e4\\uc5b4", "\\uc720\\ub7fd", "\\uc5f0\\ud569\\uc5d0\\uc11c\\ub9cc",
263 "\\ubcf4\\ub354\\ub77c\\ub3c4", "\\ubaa8\\ub4e0", "\\uac01", "\\ub098\\ub77c\\ubcc4",
264 "\\uc5b8\\uc5b4\\ub97c", "\\ucc98\\ub9ac\\ud558\\ub824\\uba74", "\\uc5ec\\ub7ec",
265 "\\uac1c\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774",
266 "\\ud544\\uc694\\ud569\\ub2c8\\ub2e4.", "\\uc601\\uc5b4\\uc640", "\\uac19\\uc740",
267 "\\ub2e8\\uc77c", "\\uc5b8\\uc5b4\\uc758", "\\uacbd\\uc6b0\\ub3c4",
268 "\\uacf5\\ud1b5\\uc801\\uc73c\\ub85c", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0",
269 "\\uae00\\uc790,", "\\ubb38\\uc7a5", "\\ubd80\\ud638", "\\ubc0f",
270 "\\ud14c\\ud06c\\ub2c8\\uceec", "\\uae30\\ud638\\uc5d0", "\\ub9de\\ub294", "\\ub2e8\\uc77c",
271 "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc744", "\\uac16\\uace0", "\\uc788\\uc9c0",
272 "\\ubabb\\ud558\\uc600\\uc2b5\\ub2c8\\ub2e4.",
273
274 "\\uc774\\ub7ec\\ud55c", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uc740",
275 "\\ub610\\ud55c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uacfc",
276 "\\ucda9\\ub3cc\\ud569\\ub2c8\\ub2e4.", "\\uc989", "\\ub450", "\\uac00\\uc9c0",
277 "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", "\\ub450", "\\uac1c\\uc758", "\\ub2e4\\ub978",
278 "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uac19\\uc740", "\\ubc88\\ud638\\ub97c",
279 "\\uc0ac\\uc6a9\\ud558\\uac70\\ub098", "\\uac19\\uc740", "\\ubb38\\uc790\\uc5d0",
280 "\\ub300\\ud574", "\\ub2e4\\ub978", "\\ubc88\\ud638\\ub97c", "\\uc0ac\\uc6a9\\ud560", "\\uc218",
281 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc8fc\\uc5b4\\uc9c4", "\\ubaa8\\ub4e0",
282 "\\ucef4\\ud4e8\\ud130(\\ud2b9\\ud788", "\\uc11c\\ubc84)\\ub294", "\\uc11c\\ub85c",
283 "\\ub2e4\\ub978", "\\uc5ec\\ub7ec", "\\uac00\\uc9c0", "\\uae30\\ud638\\ud654",
284 "\\ubc29\\ubc95\\uc744", "\\uc9c0\\uc6d0\\ud574\\uc57c",
285 "\\ud569\\ub2c8\\ub2e4.", "\\uadf8\\ub7ec\\ub098,", "\\ub370\\uc774\\ud130\\ub97c",
286 "\\uc11c\\ub85c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774\\ub098",
287 "\\ud50c\\ub7ab\\ud3fc", "\\uac04\\uc5d0", "\\uc804\\ub2ec\\ud560", "\\ub54c\\ub9c8\\ub2e4",
288 "\\uadf8", "\\ub370\\uc774\\ud130\\ub294", "\\ud56d\\uc0c1", "\\uc190\\uc0c1\\uc758",
289 "\\uc704\\ud5d8\\uc744", "\\uacaa\\uac8c", "\\ub429\\ub2c8\\ub2e4.",
290
291 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub85c", "\\ubaa8\\ub4e0", "\\uac83\\uc744",
292 "\\ud574\\uacb0\\ud560", "\\uc218", "\\uc788\\uc2b5\\ub2c8\\ub2e4!",
293 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\uc0ac\\uc6a9", "\\uc911\\uc778",
294 "\\ud50c\\ub7ab\\ud3fc,", "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b8\\uc5b4\\uc5d0",
295 "\\uad00\\uacc4\\uc5c6\\uc774", "\\ubb38\\uc790\\ub9c8\\ub2e4", "\\uace0\\uc720\\ud55c",
296 "\\uc22b\\uc790\\ub97c",
297 "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
298 "\\ud45c\\uc900\\uc740", // "Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, "
299 // "Sun, Sybase, Unisys "
300 "\\ubc0f", "\\uae30\\ud0c0", "\\uc5ec\\ub7ec",
301 "\\ud68c\\uc0ac\\uc640", "\\uac19\\uc740", "\\uc5c5\\uacc4",
302 "\\uc120\\ub450\\uc8fc\\uc790\\uc5d0", "\\uc758\\ud574",
303 "\\ucc44\\ud0dd\\ub418\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294",
304 // "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML "
305 "\\ub4f1\\uacfc",
306 "\\uac19\\uc774", "\\ud604\\uc7ac", "\\ub110\\ub9ac", "\\uc0ac\\uc6a9\\ub418\\ub294",
307 "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud544\\uc694\\ud558\\uba70", "\\uc774\\ub294", // "ISO/IEC",
308 "10646\\uc744", "\\uad6c\\ud604\\ud558\\ub294", "\\uacf5\\uc2dd\\uc801\\uc778",
309 "\\ubc29\\ubc95\\uc785\\ub2c8\\ub2e4.", "\\uc774\\ub294", "\\ub9ce\\uc740", "\\uc6b4\\uc601",
310 "\\uccb4\\uc81c,", "\\uc694\\uc998", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0",
311 "\\ube0c\\ub77c\\uc6b0\\uc800", "\\ubc0f", "\\uae30\\ud0c0", "\\ub9ce\\uc740",
312 "\\uc81c\\ud488\\uc5d0\\uc11c",
313 "\\uc9c0\\uc6d0\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
314 "\\ud45c\\uc900\\uc758", "\\ubd80\\uc0c1\\uacfc", "\\uc774\\ub97c",
315 "\\uc9c0\\uc6d0\\ud558\\ub294", "\\ub3c4\\uad6c\\uc758", "\\uac00\\uc6a9\\uc131\\uc740",
316 "\\ucd5c\\uadfc", "\\uc804", "\\uc138\\uacc4\\uc5d0", "\\ubd88\\uace0", "\\uc788\\ub294",
317 "\\uae30\\uc220", "\\uacbd\\ud5a5\\uc5d0\\uc11c", "\\uac00\\uc7a5", "\\uc911\\uc694\\ud55c",
318 "\\ubd80\\ubd84\\uc744", "\\ucc28\\uc9c0\\ud558\\uace0", "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
319
320 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c",
321 // Replaced a hyphen with a space to make the test case work with CLDR1.5
322 //"\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8-\\uc11c\\ubc84", "\\ub610\\ub294",
323 "\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8 \\uc11c\\ubc84", "\\ub610\\ub294",
324 // Replaced a hyphen with a space.
325 //"\\ub2e4\\uc911-\\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc",
326 "\\ub2e4\\uc911 \\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc",
327 "\\uc6f9", "\\uc0ac\\uc774\\ud2b8\\uc5d0", "\\ud1b5\\ud569\\ud558\\uba74",
328 "\\ub808\\uac70\\uc2dc", "\\ubb38\\uc790", "\\uc138\\ud2b8", "\\uc0ac\\uc6a9\\uc5d0",
329 "\\uc788\\uc5b4\\uc11c", "\\uc0c1\\ub2f9\\ud55c", "\\ube44\\uc6a9", "\\uc808\\uac10",
330 "\\ud6a8\\uacfc\\uac00",
331 "\\ub098\\ud0c0\\ub0a9\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c",
332 "\\ud1b5\\ud574", "\\ub9ac\\uc5d4\\uc9c0\\ub2c8\\uc5b4\\ub9c1", "\\uc5c6\\uc774",
333 "\\ub2e4\\uc911", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b8\\uc5b4", "\\ubc0f", "\\uad6d\\uac00",
334 "\\uac04\\uc5d0", "\\ub2e8\\uc77c", "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4",
335 "\\ud50c\\ub7ab\\ud3fc", "\\ub610\\ub294", "\\ub2e8\\uc77c", "\\uc6f9",
336 "\\uc0ac\\uc774\\ud2b8\\ub97c", "\\ubaa9\\ud45c\\ub85c", "\\uc0bc\\uc744", "\\uc218",
337 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc774\\ub97c", "\\uc0ac\\uc6a9\\ud558\\uba74",
338 "\\ub370\\uc774\\ud130\\ub97c", "\\uc190\\uc0c1", "\\uc5c6\\uc774", "\\uc5ec\\ub7ec",
339 "\\uc2dc\\uc2a4\\ud15c\\uc744", "\\ud1b5\\ud574", "\\uc804\\uc1a1\\ud560", "\\uc218",
340 "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
341
342 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc5d0", "\\ub300\\ud574",
343 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc740",
344 "\\ube44\\uc601\\ub9ac", "\\uc870\\uc9c1\\uc73c\\ub85c\\uc11c", "\\ud604\\ub300",
345 "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", "\\uc81c\\ud488\\uacfc",
346 "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud14d\\uc2a4\\ud2b8\\uc758", "\\ud45c\\ud604\\uc744",
347 "\\uc9c0\\uc815\\ud558\\ub294", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc758",
348 "\\uc0ac\\uc6a9\\uc744", "\\uac1c\\ubc1c\\ud558\\uace0", "\\ud655\\uc7a5\\ud558\\uba70",
349 "\\uc7a5\\ub824\\ud558\\uae30", "\\uc704\\ud574",
350 "\\uc138\\uc6cc\\uc84c\\uc2b5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4",
351 "\\uba64\\ubc84\\uc27d\\uc740", "\\ucef4\\ud4e8\\ud130\\uc640", "\\uc815\\ubcf4",
352 "\\ucc98\\ub9ac", "\\uc0b0\\uc5c5\\uc5d0", "\\uc885\\uc0ac\\ud558\\uace0", "\\uc788\\ub294",
353 "\\uad11\\ubc94\\uc704\\ud55c", "\\ud68c\\uc0ac", "\\ubc0f", "\\uc870\\uc9c1\\uc758",
354 "\\ubc94\\uc704\\ub97c",
355 "\\ub098\\ud0c0\\ub0c5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc758",
356 "\\uc7ac\\uc815\\uc740", "\\uc804\\uc801\\uc73c\\ub85c", "\\ud68c\\ube44\\uc5d0",
357 "\\uc758\\ud574", "\\ucda9\\ub2f9\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
358 "\\ucee8\\uc18c\\uc2dc\\uc5c4\\uc5d0\\uc11c\\uc758", "\\uba64\\ubc84\\uc27d\\uc740",
359 "\\uc804", "\\uc138\\uacc4", "\\uc5b4\\ub290", "\\uacf3\\uc5d0\\uc11c\\ub098",
360 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc744", "\\uc9c0\\uc6d0\\ud558\\uace0",
361 "\\uadf8", "\\ud655\\uc7a5\\uacfc", "\\uad6c\\ud604\\uc744",
362 "\\uc9c0\\uc6d0\\ud558\\uace0\\uc790\\ud558\\ub294", "\\uc870\\uc9c1\\uacfc",
363 "\\uac1c\\uc778\\uc5d0\\uac8c", "\\uac1c\\ubc29\\ub418\\uc5b4",
364 "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
365
366 "\\ub354", "\\uc790\\uc138\\ud55c", "\\ub0b4\\uc6a9\\uc740", "\\uc6a9\\uc5b4\\uc9d1,",
367 "\\uc608\\uc81c", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\uc0ac\\uc6a9", "\\uac00\\ub2a5",
368 "\\uc81c\\ud488,", "\\uae30\\uc220", "\\uc815\\ubcf4", "\\ubc0f", "\\uae30\\ud0c0",
369 "\\uc720\\uc6a9\\ud55c", "\\uc815\\ubcf4\\ub97c",
370 "\\ucc38\\uc870\\ud558\\uc2ed\\uc2dc\\uc624."
371 };
372
373 enum { WHAT_IS_UNICODE_length = UPRV_LENGTHOF(WHAT_IS_UNICODE) };
374
375 UParseError parseError;
376 UErrorCode status = U_ZERO_ERROR;
377 Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
378 Transliterator* jamoHangul = Transliterator::createInstance("NFC(NFD)", UTRANS_FORWARD, parseError, status);
379 if (latinJamo == 0 || jamoHangul == 0 || U_FAILURE(status)) {
380 delete latinJamo;
381 delete jamoHangul;
382 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
383 return;
384 }
385 Transliterator* jamoLatin = latinJamo->createInverse(status);
386 Transliterator* hangulJamo = jamoHangul->createInverse(status);
387 if (jamoLatin == 0 || hangulJamo == 0) {
388 errln("FAIL: createInverse returned NULL");
389 delete latinJamo;
390 delete jamoLatin;
391 delete jamoHangul;
392 delete hangulJamo;
393 return;
394 }
395
396 Transliterator* tarray[4] =
397 { hangulJamo, jamoLatin, latinJamo, jamoHangul };
398 CompoundTransliterator rt(tarray, 4);
399
400 UnicodeString buf;
401 int32_t total = 0;
402 int32_t errors = 0;
403 int32_t i;
404 for (i=0; i < WHAT_IS_UNICODE_length; ++i) {
405 ++total;
406 UnicodeString hangul = UnicodeString(WHAT_IS_UNICODE[i], -1, US_INV);
407 hangul = hangul.unescape(); // Parse backslash-u escapes
408 UnicodeString hangulX = hangul;
409 rt.transliterate(hangulX);
410 if (hangul != hangulX) {
411 ++errors;
412 UnicodeString jamo = hangul; hangulJamo->transliterate(jamo);
413 UnicodeString latin = jamo; jamoLatin->transliterate(latin);
414 UnicodeString jamo2 = latin; latinJamo->transliterate(jamo2);
415 UnicodeString hangul2 = jamo2; jamoHangul->transliterate(hangul2);
416
417 buf.remove(0);
418 buf.append("FAIL: ");
419 if (hangul2 != hangulX) {
420 buf.append((UnicodeString)"(Weird: " + hangulX + " != " + hangul2 + ")");
421 }
422 // The Hangul-Jamo conversion is not usually the
423 // bug here, so we hide it from display.
424 // Uncomment lines to see the Hangul.
425 buf.append(//hangul + " => " +
426 jamoToName(jamo) + " => " +
427 latin + " => " + jamoToName(jamo2)
428 //+ " => " + hangul2
429 );
430 errln(prettify(buf));
431 }
432 }
433 if (errors != 0) {
434 errln((UnicodeString)"Test word failures: " + errors + " out of " + total);
435 } else {
436 logln((UnicodeString)"All " + total + " test words passed");
437 }
438
439 delete latinJamo;
440 delete jamoLatin;
441 delete jamoHangul;
442 delete hangulJamo;
443 }
444
445 // Override TransliteratorTest
446 void
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)447 JamoTest::expectAux(const UnicodeString& tag,
448 const UnicodeString& summary, UBool pass,
449 const UnicodeString& expectedResult) {
450 UnicodeString jsum = jamoToName(summary);
451 UnicodeString jexp = jamoToName(expectedResult);
452 TransliteratorTest::expectAux(tag, jsum, pass, jexp);
453 }
454
455 const char* JamoTest::JAMO_NAMES_RULES =
456 "'(Gi)' <> \\u1100;"
457 "'(GGi)' <> \\u1101;"
458 "'(Ni)' <> \\u1102;"
459 "'(Di)' <> \\u1103;"
460 "'(DD)' <> \\u1104;"
461 "'(R)' <> \\u1105;"
462 "'(Mi)' <> \\u1106;"
463 "'(Bi)' <> \\u1107;"
464 "'(BB)' <> \\u1108;"
465 "'(Si)' <> \\u1109;"
466 "'(SSi)' <> \\u110A;"
467 "'(IEUNG)' <> \\u110B;"
468 "'(Ji)' <> \\u110C;"
469 "'(JJ)' <> \\u110D;"
470 "'(Ci)' <> \\u110E;"
471 "'(Ki)' <> \\u110F;"
472 "'(Ti)' <> \\u1110;"
473 "'(Pi)' <> \\u1111;"
474 "'(Hi)' <> \\u1112;"
475
476 "'(A)' <> \\u1161;"
477 "'(AE)' <> \\u1162;"
478 "'(YA)' <> \\u1163;"
479 "'(YAE)' <> \\u1164;"
480 "'(EO)' <> \\u1165;"
481 "'(E)' <> \\u1166;"
482 "'(YEO)' <> \\u1167;"
483 "'(YE)' <> \\u1168;"
484 "'(O)' <> \\u1169;"
485 "'(WA)' <> \\u116A;"
486 "'(WAE)' <> \\u116B;"
487 "'(OE)' <> \\u116C;"
488 "'(YO)' <> \\u116D;"
489 "'(U)' <> \\u116E;"
490 "'(WEO)' <> \\u116F;"
491 "'(WE)' <> \\u1170;"
492 "'(WI)' <> \\u1171;"
493 "'(YU)' <> \\u1172;"
494 "'(EU)' <> \\u1173;"
495 "'(YI)' <> \\u1174;"
496 "'(I)' <> \\u1175;"
497
498 "'(Gf)' <> \\u11A8;"
499 "'(GGf)' <> \\u11A9;"
500 "'(GS)' <> \\u11AA;"
501 "'(Nf)' <> \\u11AB;"
502 "'(NJ)' <> \\u11AC;"
503 "'(NH)' <> \\u11AD;"
504 "'(Df)' <> \\u11AE;"
505 "'(L)' <> \\u11AF;"
506 "'(LG)' <> \\u11B0;"
507 "'(LM)' <> \\u11B1;"
508 "'(LB)' <> \\u11B2;"
509 "'(LS)' <> \\u11B3;"
510 "'(LT)' <> \\u11B4;"
511 "'(LP)' <> \\u11B5;"
512 "'(LH)' <> \\u11B6;"
513 "'(Mf)' <> \\u11B7;"
514 "'(Bf)' <> \\u11B8;"
515 "'(BS)' <> \\u11B9;"
516 "'(Sf)' <> \\u11BA;"
517 "'(SSf)' <> \\u11BB;"
518 "'(NG)' <> \\u11BC;"
519 "'(Jf)' <> \\u11BD;"
520 "'(Cf)' <> \\u11BE;"
521 "'(Kf)' <> \\u11BF;"
522 "'(Tf)' <> \\u11C0;"
523 "'(Pf)' <> \\u11C1;"
524 "'(Hf)' <> \\u11C2;";
525
526 /**
527 * Convert short names to actual jamo. E.g., "x(LG)y" returns
528 * "x\u11B0y". See JAMO_NAMES for table of names.
529 */
530 UnicodeString
nameToJamo(const UnicodeString & input)531 JamoTest::nameToJamo(const UnicodeString& input) {
532 if (NAME_JAMO == 0) {
533 errln("Failed to create NAME_JAMO");
534 return input; /* failure! */
535 }
536 UnicodeString result(input);
537 NAME_JAMO->transliterate(result);
538 return result;
539 }
540
541 /**
542 * Convert jamo to short names. E.g., "x\u11B0y" returns
543 * "x(LG)y". See JAMO_NAMES for table of names.
544 */
545 UnicodeString
jamoToName(const UnicodeString & input)546 JamoTest::jamoToName(const UnicodeString& input) {
547 if (NAME_JAMO == 0) {
548 errln("Failed to create NAME_JAMO");
549 return input; /* failure! */
550 }
551 UnicodeString result(input);
552 JAMO_NAME->transliterate(result);
553 return result;
554 }
555
556 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
557