1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include <memory>
5
6 #include "cmemory.h"
7 #include "cstring.h"
8 #include "localebuildertest.h"
9 #include "unicode/localebuilder.h"
10 #include "unicode/strenum.h"
11
LocaleBuilderTest()12 LocaleBuilderTest::LocaleBuilderTest()
13 {
14 }
15
~LocaleBuilderTest()16 LocaleBuilderTest::~LocaleBuilderTest()
17 {
18 }
19
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)20 void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
21 {
22 TESTCASE_AUTO_BEGIN;
23 TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute);
24 TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed);
25 TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed);
26 TESTCASE_AUTO(TestLocaleBuilder);
27 TESTCASE_AUTO(TestLocaleBuilderBasic);
28 TESTCASE_AUTO(TestPosixCases);
29 TESTCASE_AUTO(TestSetExtensionOthers);
30 TESTCASE_AUTO(TestSetExtensionPU);
31 TESTCASE_AUTO(TestSetExtensionT);
32 TESTCASE_AUTO(TestSetExtensionU);
33 TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed);
34 TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed);
35 TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed);
36 TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed);
37 TESTCASE_AUTO(TestSetExtensionValidateTIllFormed);
38 TESTCASE_AUTO(TestSetExtensionValidateTWellFormed);
39 TESTCASE_AUTO(TestSetExtensionValidateUIllFormed);
40 TESTCASE_AUTO(TestSetExtensionValidateUWellFormed);
41 TESTCASE_AUTO(TestSetLanguageIllFormed);
42 TESTCASE_AUTO(TestSetLanguageWellFormed);
43 TESTCASE_AUTO(TestSetLocale);
44 TESTCASE_AUTO(TestSetRegionIllFormed);
45 TESTCASE_AUTO(TestSetRegionWellFormed);
46 TESTCASE_AUTO(TestSetScriptIllFormed);
47 TESTCASE_AUTO(TestSetScriptWellFormed);
48 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey);
49 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue);
50 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed);
51 TESTCASE_AUTO(TestSetVariantIllFormed);
52 TESTCASE_AUTO(TestSetVariantWellFormed);
53 TESTCASE_AUTO_END;
54 }
55
Verify(LocaleBuilder & bld,const char * expected,const char * msg)56 void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) {
57 UErrorCode status = U_ZERO_ERROR;
58 UErrorCode copyStatus = U_ZERO_ERROR;
59 UErrorCode errorStatus = U_ILLEGAL_ARGUMENT_ERROR;
60 if (bld.copyErrorTo(copyStatus)) {
61 errln(msg, u_errorName(copyStatus));
62 }
63 if (!bld.copyErrorTo(errorStatus) || errorStatus != U_ILLEGAL_ARGUMENT_ERROR) {
64 errln("Should always get the previous error and return FALSE");
65 }
66 Locale loc = bld.build(status);
67 if (U_FAILURE(status)) {
68 errln(msg, u_errorName(status));
69 }
70 if (status != copyStatus) {
71 errln(msg, u_errorName(status));
72 }
73 std::string tag = loc.toLanguageTag<std::string>(status);
74 if (U_FAILURE(status)) {
75 errln("loc.toLanguageTag() got Error: %s\n",
76 u_errorName(status));
77 }
78 if (tag != expected) {
79 errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str());
80 }
81 }
82
TestLocaleBuilder()83 void LocaleBuilderTest::TestLocaleBuilder() {
84 // The following test data are copy from
85 // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
86 // "L": +1 = language
87 // "S": +1 = script
88 // "R": +1 = region
89 // "V": +1 = variant
90 // "K": +1 = Unicode locale key / +2 = Unicode locale type
91 // "A": +1 = Unicode locale attribute
92 // "E": +1 = extension letter / +2 = extension value
93 // "P": +1 = private use
94 // "U": +1 = ULocale
95 // "B": +1 = BCP47 language tag
96 // "C": Clear all
97 // "N": Clear extensions
98 // "D": +1 = Unicode locale attribute to be removed
99 // "X": indicates an exception must be thrown
100 // "T": +1 = expected language tag / +2 = expected locale string
101 const char* TESTCASES[][14] = {
102 {"L", "en", "R", "us", "T", "en-US", "en_US"},
103 {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
104 {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
105 {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
106 {"L", "123", "X"},
107 {"R", "us", "T", "und-US", "_US"},
108 {"R", "usa", "X"},
109 {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
110 {"R", "123", "L", "it", "R", "", "T", "it", "it"},
111 {"R", "123", "L", "en", "T", "en-123", "en_123"},
112 {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
113 {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
114 {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
115 {"S", "latin", "X"},
116 {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
117 {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
118 {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
119 {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
120 {"V", "123", "X"},
121 {"U", "en_US", "T", "en-US", "en_US"},
122 {"U", "en_US_WIN", "X"},
123 {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
124 "fr-FR-1606nict-u-ca-gregory-x-test",
125 "fr_FR_1606NICT@calendar=gregorian;x=test"},
126 {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
127 {"B", "und-CA", "T", "und-CA", "_CA"},
128 // Blocked by ICU-20327
129 // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
130 // "en_US_VAR@x=test"},
131 {"B", "en-US-VAR", "X"},
132 {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
133 "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
134 {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
135 "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
136 {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
137 "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
138 "ja_JP@attribute=attr1;calendar=gregorian"},
139 {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn",
140 "en@colnumeric=yes"},
141 {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
142 "th_TH@numbers=thai"},
143 {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
144 {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
145 {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
146 {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
147 {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
148 {"E", "a", "x", "X"},
149 {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
150 // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
151 // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
152 // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
153 // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
154 // key = alphanum alpha
155 {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a",
156 "en@0a=yes;attribute=aaa-bbb"},
157 {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
158 "fr_FR@x=yoshito-icu"},
159 {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
160 "ja_JP@calendar=japanese"},
161 {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
162 "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
163 {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
164 {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
165 "th@calendar=gregorian;numbers=thai"},
166 {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
167 "en_US@timezone=America/New_York"},
168 {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
169 "true", "T", "de-u-co-phonebk-kk-ks-level1",
170 "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
171 {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
172 "en_US@calendar=gregorian"},
173 {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
174 {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
175 {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn",
176 "en_US@colnumeric=yes"},
177 {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
178 {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
179 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
180 "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
181 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
182 "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
183 {"L", "en", "A", "aa", "X"},
184 {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
185 };
186 UErrorCode status = U_ZERO_ERROR;
187 LocaleBuilder bld;
188 for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
189 const char* (&testCase)[14] = TESTCASES[tidx];
190 std::string actions;
191 for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) {
192 if (testCase[p] == nullptr) {
193 actions += " (nullptr)";
194 break;
195 }
196 if (p > 0) actions += " ";
197 actions += testCase[p];
198 }
199 int i = 0;
200 const char* method;
201 status = U_ZERO_ERROR;
202 bld.clear();
203 while (true) {
204 status = U_ZERO_ERROR;
205 UErrorCode copyStatus = U_ZERO_ERROR;
206 method = testCase[i++];
207 if (strcmp("L", method) == 0) {
208 bld.setLanguage(testCase[i++]);
209 bld.copyErrorTo(copyStatus);
210 bld.build(status);
211 } else if (strcmp("S", method) == 0) {
212 bld.setScript(testCase[i++]);
213 bld.copyErrorTo(copyStatus);
214 bld.build(status);
215 } else if (strcmp("R", method) == 0) {
216 bld.setRegion(testCase[i++]);
217 bld.copyErrorTo(copyStatus);
218 bld.build(status);
219 } else if (strcmp("V", method) == 0) {
220 bld.setVariant(testCase[i++]);
221 bld.copyErrorTo(copyStatus);
222 bld.build(status);
223 } else if (strcmp("K", method) == 0) {
224 const char* key = testCase[i++];
225 const char* type = testCase[i++];
226 bld.setUnicodeLocaleKeyword(key, type);
227 bld.copyErrorTo(copyStatus);
228 bld.build(status);
229 } else if (strcmp("A", method) == 0) {
230 bld.addUnicodeLocaleAttribute(testCase[i++]);
231 bld.copyErrorTo(copyStatus);
232 bld.build(status);
233 } else if (strcmp("E", method) == 0) {
234 const char* key = testCase[i++];
235 const char* value = testCase[i++];
236 bld.setExtension(key[0], value);
237 bld.copyErrorTo(copyStatus);
238 bld.build(status);
239 } else if (strcmp("P", method) == 0) {
240 bld.setExtension('x', testCase[i++]);
241 bld.copyErrorTo(copyStatus);
242 bld.build(status);
243 } else if (strcmp("U", method) == 0) {
244 bld.setLocale(Locale(testCase[i++]));
245 bld.copyErrorTo(copyStatus);
246 bld.build(status);
247 } else if (strcmp("B", method) == 0) {
248 bld.setLanguageTag(testCase[i++]);
249 bld.copyErrorTo(copyStatus);
250 bld.build(status);
251 }
252 // clear / remove
253 else if (strcmp("C", method) == 0) {
254 bld.clear();
255 bld.copyErrorTo(copyStatus);
256 bld.build(status);
257 } else if (strcmp("N", method) == 0) {
258 bld.clearExtensions();
259 bld.copyErrorTo(copyStatus);
260 bld.build(status);
261 } else if (strcmp("D", method) == 0) {
262 bld.removeUnicodeLocaleAttribute(testCase[i++]);
263 bld.copyErrorTo(copyStatus);
264 bld.build(status);
265 }
266 // result
267 else if (strcmp("X", method) == 0) {
268 if (U_SUCCESS(status)) {
269 errln("FAIL: No error return - test case: %s", actions.c_str());
270 }
271 } else if (strcmp("T", method) == 0) {
272 status = U_ZERO_ERROR;
273 Locale loc = bld.build(status);
274 if (status != copyStatus) {
275 errln("copyErrorTo not matching");
276 }
277 if (U_FAILURE(status) ||
278 strcmp(loc.getName(), testCase[i + 1]) != 0) {
279 errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(),
280 " for test case: ", actions.c_str());
281 }
282 std::string langtag = loc.toLanguageTag<std::string>(status);
283 if (U_FAILURE(status) || langtag != testCase[i]) {
284 errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(),
285 " for test case: ", actions.c_str());
286 }
287 break;
288 } else {
289 // Unknow test method
290 errln("Unknown test case method: There is an error in the test case data.");
291 break;
292 }
293 if (status != copyStatus) {
294 errln("copyErrorTo not matching");
295 }
296 if (U_FAILURE(status)) {
297 if (strcmp("X", testCase[i]) == 0) {
298 // This failure is expected
299 break;
300 } else {
301 errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
302 " in test case: ", actions.c_str());
303 break;
304 }
305 }
306 if (strcmp("T", method) == 0) {
307 break;
308 }
309 } // while(true)
310 } // for TESTCASES
311 }
312
TestLocaleBuilderBasic()313 void LocaleBuilderTest::TestLocaleBuilderBasic() {
314 LocaleBuilder bld;
315 bld.setLanguage("zh");
316 Verify(bld, "zh", "setLanguage('zh') got Error: %s\n");
317
318 bld.setScript("Hant");
319 Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n");
320
321 bld.setRegion("SG");
322 Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");
323
324 bld.setRegion("HK");
325 bld.setScript("Hans");
326 Verify(bld, "zh-Hans-HK",
327 "setRegion('HK') and setScript('Hans') got Error: %s\n");
328
329 bld.setVariant("revised");
330 Verify(bld, "zh-Hans-HK-revised",
331 "setVariant('revised') got Error: %s\n");
332
333 bld.setUnicodeLocaleKeyword("nu", "thai");
334 Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
335 "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
336
337 bld.setUnicodeLocaleKeyword("co", "pinyin");
338 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
339 "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
340
341 bld.setUnicodeLocaleKeyword("nu", "latn");
342 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
343 "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
344
345 bld.setUnicodeLocaleKeyword("nu", nullptr);
346 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
347 "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
348
349 bld.setUnicodeLocaleKeyword("co", nullptr);
350 Verify(bld, "zh-Hans-HK-revised",
351 "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");
352
353 bld.setScript("");
354 Verify(bld, "zh-HK-revised",
355 "setScript('') got Error: %s\n");
356
357 bld.setVariant("");
358 Verify(bld, "zh-HK",
359 "setVariant('') got Error: %s\n");
360
361 bld.setRegion("");
362 Verify(bld, "zh",
363 "setRegion('') got Error: %s\n");
364 }
365
TestSetLanguageWellFormed()366 void LocaleBuilderTest::TestSetLanguageWellFormed() {
367 // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
368 // unicode_language_subtag = alpha{2,3} | alpha{5,8};
369 // ICUTC decided also support alpha{4}
370 static const char* wellFormedLanguages[] = {
371 "",
372
373 // alpha{2}
374 "en",
375 "NE",
376 "eN",
377 "Ne",
378
379 // alpha{3}
380 "aNe",
381 "zzz",
382 "AAA",
383
384 // alpha{4}
385 "ABCD",
386 "abcd",
387
388 // alpha{5}
389 "efgij",
390 "AbCAD",
391 "ZAASD",
392
393 // alpha{6}
394 "efgijk",
395 "AADGFE",
396 "AkDfFz",
397
398 // alpha{7}
399 "asdfads",
400 "ADSFADF",
401 "piSFkDk",
402
403 // alpha{8}
404 "oieradfz",
405 "IADSFJKR",
406 "kkDSFJkR",
407 };
408 for (const char* lang : wellFormedLanguages) {
409 UErrorCode status = U_ZERO_ERROR;
410 LocaleBuilder bld;
411 bld.setLanguage(lang);
412 Locale loc = bld.build(status);
413 if (U_FAILURE(status)) {
414 errln("setLanguage(\"%s\") got Error: %s\n",
415 lang, u_errorName(status));
416 }
417 }
418 }
419
TestSetLanguageIllFormed()420 void LocaleBuilderTest::TestSetLanguageIllFormed() {
421 static const char* illFormed[] = {
422 "a",
423 "z",
424 "A",
425 "F",
426 "2",
427 "0",
428 "9"
429 "{",
430 ".",
431 "[",
432 "]",
433 "\\",
434
435 "e1",
436 "N2",
437 "3N",
438 "4e",
439 "e:",
440 "43",
441 "a9",
442
443 "aN0",
444 "z1z",
445 "2zz",
446 "3A3",
447 "456",
448 "af)",
449
450 // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
451 // "latn",
452 // "Arab",
453 // "LATN",
454
455 "e)gij",
456 "Ab3AD",
457 "ZAAS8",
458
459 "efgi[]",
460 "AA9GFE",
461 "7kD3Fz",
462 "as8fads",
463 "0DSFADF",
464 "'iSFkDk",
465
466 "oieradf+",
467 "IADSFJK-",
468 "kkDSFJk0",
469
470 // alpha{9}
471 "oieradfab",
472 "IADSFJKDE",
473 "kkDSFJkzf",
474 };
475 for (const char* ill : illFormed) {
476 UErrorCode status = U_ZERO_ERROR;
477 LocaleBuilder bld;
478 bld.setLanguage(ill);
479 Locale loc = bld.build(status);
480 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
481 errln("setLanguage(\"%s\") should fail but has no Error\n", ill);
482 }
483 }
484 }
485
TestSetScriptWellFormed()486 void LocaleBuilderTest::TestSetScriptWellFormed() {
487 // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
488 // unicode_script_subtag = alpha{4} ;
489 static const char* wellFormedScripts[] = {
490 "",
491
492 "Latn",
493 "latn",
494 "lATN",
495 "laTN",
496 "arBN",
497 "ARbn",
498 "adsf",
499 "aADF",
500 "BSVS",
501 "LATn",
502 };
503 for (const char* script : wellFormedScripts) {
504 UErrorCode status = U_ZERO_ERROR;
505 LocaleBuilder bld;
506 bld.setScript(script);
507 Locale loc = bld.build(status);
508 if (U_FAILURE(status)) {
509 errln("setScript(\"%s\") got Error: %s\n",
510 script, u_errorName(status));
511 }
512 }
513 }
514
TestSetScriptIllFormed()515 void LocaleBuilderTest::TestSetScriptIllFormed() {
516 static const char* illFormed[] = {
517 "a",
518 "z",
519 "A",
520 "F",
521 "2",
522 "0",
523 "9"
524 "{",
525 ".",
526 "[",
527 "]",
528 "\\",
529
530 "e1",
531 "N2",
532 "3N",
533 "4e",
534 "e:",
535 "43",
536 "a9",
537
538 "aN0",
539 "z1z",
540 "2zz",
541 "3A3",
542 "456",
543 "af)",
544
545 "0atn",
546 "l1tn",
547 "lA2N",
548 "la4N",
549 "arB5",
550 "1234",
551
552 "e)gij",
553 "Ab3AD",
554 "ZAAS8",
555
556 "efgi[]",
557 "AA9GFE",
558 "7kD3Fz",
559
560 "as8fads",
561 "0DSFADF",
562 "'iSFkDk",
563
564 "oieradf+",
565 "IADSFJK-",
566 "kkDSFJk0",
567
568 // alpha{9}
569 "oieradfab",
570 "IADSFJKDE",
571 "kkDSFJkzf",
572 };
573 for (const char* ill : illFormed) {
574 UErrorCode status = U_ZERO_ERROR;
575 LocaleBuilder bld;
576 bld.setScript(ill);
577 Locale loc = bld.build(status);
578 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
579 errln("setScript(\"%s\") should fail but has no Error\n", ill);
580 }
581 }
582 }
583
TestSetRegionWellFormed()584 void LocaleBuilderTest::TestSetRegionWellFormed() {
585 // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
586 // unicode_region_subtag = (alpha{2} | digit{3})
587 static const char* wellFormedRegions[] = {
588 "",
589
590 // alpha{2}
591 "en",
592 "NE",
593 "eN",
594 "Ne",
595
596 // digit{3}
597 "000",
598 "999",
599 "123",
600 "987"
601 };
602 for (const char* region : wellFormedRegions) {
603 UErrorCode status = U_ZERO_ERROR;
604 LocaleBuilder bld;
605 bld.setRegion(region);
606 Locale loc = bld.build(status);
607 if (U_FAILURE(status)) {
608 errln("setRegion(\"%s\") got Error: %s\n",
609 region, u_errorName(status));
610 }
611 }
612 }
613
TestSetRegionIllFormed()614 void LocaleBuilderTest::TestSetRegionIllFormed() {
615 static const char* illFormed[] = {
616 "a",
617 "z",
618 "A",
619 "F",
620 "2",
621 "0",
622 "9"
623 "{",
624 ".",
625 "[",
626 "]",
627 "\\",
628
629 "e1",
630 "N2",
631 "3N",
632 "4e",
633 "e:",
634 "43",
635 "a9",
636
637 "aN0",
638 "z1z",
639 "2zz",
640 "3A3",
641 "4.6",
642 "af)",
643
644 "0atn",
645 "l1tn",
646 "lA2N",
647 "la4N",
648 "arB5",
649 "1234",
650
651 "e)gij",
652 "Ab3AD",
653 "ZAAS8",
654
655 "efgi[]",
656 "AA9GFE",
657 "7kD3Fz",
658
659 "as8fads",
660 "0DSFADF",
661 "'iSFkDk",
662
663 "oieradf+",
664 "IADSFJK-",
665 "kkDSFJk0",
666
667 // alpha{9}
668 "oieradfab",
669 "IADSFJKDE",
670 "kkDSFJkzf",
671 };
672 for (const char* ill : illFormed) {
673 UErrorCode status = U_ZERO_ERROR;
674 LocaleBuilder bld;
675 bld.setRegion(ill);
676 Locale loc = bld.build(status);
677 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
678 errln("setRegion(\"%s\") should fail but has no Error\n", ill);
679 }
680 }
681 }
682
TestSetVariantWellFormed()683 void LocaleBuilderTest::TestSetVariantWellFormed() {
684 // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
685 // (sep unicode_variant_subtag)*
686 // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
687 static const char* wellFormedVariants[] = {
688 "",
689
690 // alphanum{5}
691 "efgij",
692 "AbCAD",
693 "ZAASD",
694 "0AASD",
695 "A1CAD",
696 "ef2ij",
697 "ads3X",
698 "owqF4",
699
700 // alphanum{6}
701 "efgijk",
702 "AADGFE",
703 "AkDfFz",
704 "0ADGFE",
705 "A9DfFz",
706 "AADG7E",
707
708 // alphanum{7}
709 "asdfads",
710 "ADSFADF",
711 "piSFkDk",
712 "a0dfads",
713 "ADSF3DF",
714 "piSFkD9",
715
716 // alphanum{8}
717 "oieradfz",
718 "IADSFJKR",
719 "kkDSFJkR",
720 "0ADSFJKR",
721 "12345679",
722
723 // digit alphanum{3}
724 "0123",
725 "1abc",
726 "20EF",
727 "30EF",
728 "8A03",
729 "3Ax3",
730 "9Axy",
731
732 // (sep unicode_variant_subtag)*
733 "0123-4567",
734 "0ab3-ABCDE",
735 "9ax3-xByD9",
736 "9ax3-xByD9-adfk934a",
737
738 "0123_4567",
739 "0ab3_ABCDE",
740 "9ax3_xByD9",
741 "9ax3_xByD9_adfk934a",
742
743 "9ax3-xByD9_adfk934a",
744 "9ax3_xByD9-adfk934a",
745 };
746 for (const char* variant : wellFormedVariants) {
747 UErrorCode status = U_ZERO_ERROR;
748 LocaleBuilder bld;
749 bld.setVariant(variant);
750 Locale loc = bld.build(status);
751 if (U_FAILURE(status)) {
752 errln("setVariant(\"%s\") got Error: %s\n",
753 variant, u_errorName(status));
754 }
755 }
756 }
757
TestSetVariantIllFormed()758 void LocaleBuilderTest::TestSetVariantIllFormed() {
759 static const char* illFormed[] = {
760 "a",
761 "z",
762 "A",
763 "F",
764 "2",
765 "0",
766 "9"
767 "{",
768 ".",
769 "[",
770 "]",
771 "\\",
772
773 "e1",
774 "N2",
775 "3N",
776 "4e",
777 "e:",
778 "43",
779 "a9",
780 "en",
781 "NE",
782 "eN",
783 "Ne",
784
785 "aNe",
786 "zzz",
787 "AAA",
788 "aN0",
789 "z1z",
790 "2zz",
791 "3A3",
792 "4.6",
793 "af)",
794 "345",
795 "923",
796
797 "Latn",
798 "latn",
799 "lATN",
800 "laTN",
801 "arBN",
802 "ARbn",
803 "adsf",
804 "aADF",
805 "BSVS",
806 "LATn",
807 "l1tn",
808 "lA2N",
809 "la4N",
810 "arB5",
811 "abc3",
812 "A3BC",
813
814 "e)gij",
815 "A+3AD",
816 "ZAA=8",
817
818 "efgi[]",
819 "AA9]FE",
820 "7k[3Fz",
821
822 "as8f/ds",
823 "0DSFAD{",
824 "'iSFkDk",
825
826 "oieradf+",
827 "IADSFJK-",
828 "k}DSFJk0",
829
830 // alpha{9}
831 "oieradfab",
832 "IADSFJKDE",
833 "kkDSFJkzf",
834 "123456789",
835
836 "-0123",
837 "-0123-4567",
838 "0123-4567-",
839 "-123-4567",
840 "_0123",
841 "_0123_4567",
842 "0123_4567_",
843 "_123_4567",
844
845 "-abcde-figjk",
846 "abcde-figjk-",
847 "-abcde-figjk-",
848 "_abcde_figjk",
849 "abcde_figjk_",
850 "_abcde_figjk_",
851 };
852 for (const char* ill : illFormed) {
853 UErrorCode status = U_ZERO_ERROR;
854 LocaleBuilder bld;
855 bld.setVariant(ill);
856 Locale loc = bld.build(status);
857 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
858 errln("setVariant(\"%s\") should fail but has no Error\n", ill);
859 }
860 }
861 }
862
TestSetUnicodeLocaleKeywordWellFormed()863 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
864 // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
865 // keyword = key (sep type)? ;
866 // key = alphanum alpha ;
867 // type = alphanum{3,8} (sep alphanum{3,8})* ;
868 static const char* wellFormed_key_value[] = {
869 "aa", "123",
870 "3b", "zyzbcdef",
871 "0Z", "1ZB30zk9-abc",
872 "cZ", "2ck30zfZ-adsf023-234kcZ",
873 "ZZ", "Lant",
874 "ko", "",
875 };
876 for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
877 UErrorCode status = U_ZERO_ERROR;
878 LocaleBuilder bld;
879 bld.setUnicodeLocaleKeyword(wellFormed_key_value[i],
880 wellFormed_key_value[i + 1]);
881 Locale loc = bld.build(status);
882 if (U_FAILURE(status)) {
883 errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
884 wellFormed_key_value[i],
885 wellFormed_key_value[i + 1],
886 u_errorName(status));
887 }
888 }
889 }
890
TestSetUnicodeLocaleKeywordIllFormedKey()891 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
892 static const char* illFormed[] = {
893 "34",
894 "ab-cde",
895 "123",
896 "b3",
897 "zyzabcdef",
898 "Z0",
899 };
900 for (const char* ill : illFormed) {
901 UErrorCode status = U_ZERO_ERROR;
902 LocaleBuilder bld;
903 bld.setUnicodeLocaleKeyword(ill, "abc");
904 Locale loc = bld.build(status);
905 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
906 errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
907 ill);
908 }
909 }
910 }
911
TestSetUnicodeLocaleKeywordIllFormedValue()912 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
913 static const char* illFormed[] = {
914 "34",
915 "ab-",
916 "-cd",
917 "-ef-",
918 "zyzabcdef",
919 "ab-abc",
920 "1ZB30zfk9-abc",
921 "2ck30zfk9-adsf023-234kcZ",
922 };
923 for (const char* ill : illFormed) {
924 UErrorCode status = U_ZERO_ERROR;
925 LocaleBuilder bld;
926 bld.setUnicodeLocaleKeyword("ab", ill);
927 Locale loc = bld.build(status);
928 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
929 errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
930 ill);
931 }
932 }
933 }
934
TestAddRemoveUnicodeLocaleAttribute()935 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
936 LocaleBuilder bld;
937 UErrorCode status = U_ZERO_ERROR;
938 Locale loc = bld.setLanguage("fr")
939 .addUnicodeLocaleAttribute("abc")
940 .addUnicodeLocaleAttribute("aBc")
941 .addUnicodeLocaleAttribute("EFG")
942 .addUnicodeLocaleAttribute("efghi")
943 .addUnicodeLocaleAttribute("efgh")
944 .addUnicodeLocaleAttribute("efGhi")
945 .addUnicodeLocaleAttribute("EFg")
946 .addUnicodeLocaleAttribute("hijk")
947 .addUnicodeLocaleAttribute("EFG")
948 .addUnicodeLocaleAttribute("HiJK")
949 .addUnicodeLocaleAttribute("aBc")
950 .build(status);
951 if (U_FAILURE(status)) {
952 errln("addUnicodeLocaleAttribute() got Error: %s\n",
953 u_errorName(status));
954 }
955 std::string expected("fr-u-abc-efg-efgh-efghi-hijk");
956 std::string actual = loc.toLanguageTag<std::string>(status);
957 if (U_FAILURE(status) || expected != actual) {
958 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
959 }
960
961 // remove "efgh" in the middle with different casing.
962 loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status);
963 if (U_FAILURE(status)) {
964 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
965 u_errorName(status));
966 }
967 expected = "fr-u-abc-efg-efghi-hijk";
968 actual = loc.toLanguageTag<std::string>(status);
969 if (U_FAILURE(status) || expected != actual) {
970 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
971 }
972
973 // remove non-existing attributes.
974 loc = bld.removeUnicodeLocaleAttribute("efgh").build(status);
975 if (U_FAILURE(status)) {
976 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
977 u_errorName(status));
978 }
979 actual = loc.toLanguageTag<std::string>(status);
980 if (U_FAILURE(status) || expected != actual) {
981 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
982 }
983
984 // remove "abc" in the beginning with different casing.
985 loc = bld.removeUnicodeLocaleAttribute("ABC").build(status);
986 if (U_FAILURE(status)) {
987 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
988 u_errorName(status));
989 }
990 expected = "fr-u-efg-efghi-hijk";
991 actual = loc.toLanguageTag<std::string>(status);
992 if (U_FAILURE(status) || expected != actual) {
993 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
994 }
995
996 // remove non-existing substring in the end.
997 loc = bld.removeUnicodeLocaleAttribute("hij").build(status);
998 if (U_FAILURE(status)) {
999 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1000 u_errorName(status));
1001 }
1002 actual = loc.toLanguageTag<std::string>(status);
1003 if (U_FAILURE(status) || expected != actual) {
1004 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1005 }
1006
1007 // remove "hijk" in the end with different casing.
1008 loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status);
1009 if (U_FAILURE(status)) {
1010 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1011 u_errorName(status));
1012 }
1013 expected = "fr-u-efg-efghi";
1014 actual = loc.toLanguageTag<std::string>(status);
1015 if (U_FAILURE(status) || expected != actual) {
1016 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1017 }
1018
1019 // remove "efghi" in the end with different casing.
1020 loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status);
1021 if (U_FAILURE(status)) {
1022 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1023 u_errorName(status));
1024 }
1025 expected = "fr-u-efg";
1026 actual = loc.toLanguageTag<std::string>(status);
1027 if (U_FAILURE(status) || expected != actual) {
1028 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1029 }
1030
1031 // remove "efg" in as the only one, with different casing.
1032 loc = bld.removeUnicodeLocaleAttribute("EFG").build(status);
1033 if (U_FAILURE(status)) {
1034 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1035 u_errorName(status));
1036 }
1037 expected = "fr";
1038 actual = loc.toLanguageTag<std::string>(status);
1039 if (U_FAILURE(status) || expected != actual) {
1040 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1041 }
1042
1043 }
1044
TestAddRemoveUnicodeLocaleAttributeWellFormed()1045 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
1046 // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
1047 // attribute = alphanum{3,8} ;
1048 static const char* wellFormedAttributes[] = {
1049 // alphanum{3}
1050 "AbC",
1051 "ZAA",
1052 "0AA",
1053 "x3A",
1054 "xa8",
1055
1056 // alphanum{4}
1057 "AbCA",
1058 "ZASD",
1059 "0ASD",
1060 "A3a4",
1061 "zK90",
1062
1063 // alphanum{5}
1064 "efgij",
1065 "AbCAD",
1066 "ZAASD",
1067 "0AASD",
1068 "A1CAD",
1069 "ef2ij",
1070 "ads3X",
1071 "owqF4",
1072
1073 // alphanum{6}
1074 "efgijk",
1075 "AADGFE",
1076 "AkDfFz",
1077 "0ADGFE",
1078 "A9DfFz",
1079 "AADG7E",
1080
1081 // alphanum{7}
1082 "asdfads",
1083 "ADSFADF",
1084 "piSFkDk",
1085 "a0dfads",
1086 "ADSF3DF",
1087 "piSFkD9",
1088
1089 // alphanum{8}
1090 "oieradfz",
1091 "IADSFJKR",
1092 "kkDSFJkR",
1093 };
1094 LocaleBuilder bld;
1095 for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
1096 if (i % 5 == 0) {
1097 bld.clear();
1098 }
1099 UErrorCode status = U_ZERO_ERROR;
1100 bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]);
1101 Locale loc = bld.build(status);
1102 if (U_FAILURE(status)) {
1103 errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1104 wellFormedAttributes[i], u_errorName(status));
1105 }
1106 if (i > 2) {
1107 bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]);
1108 loc = bld.build(status);
1109 if (U_FAILURE(status)) {
1110 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1111 wellFormedAttributes[i - 1], u_errorName(status));
1112 }
1113 bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]);
1114 loc = bld.build(status);
1115 if (U_FAILURE(status)) {
1116 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1117 wellFormedAttributes[i - 3], u_errorName(status));
1118 }
1119 }
1120 }
1121 }
1122
TestAddUnicodeLocaleAttributeIllFormed()1123 void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
1124 static const char* illFormed[] = {
1125 "aa",
1126 "34",
1127 "ab-",
1128 "-cd",
1129 "-ef-",
1130 "zyzabcdef",
1131 "123456789",
1132 "ab-abc",
1133 "1ZB30zfk9-abc",
1134 "2ck30zfk9-adsf023-234kcZ",
1135 };
1136 for (const char* ill : illFormed) {
1137 UErrorCode status = U_ZERO_ERROR;
1138 LocaleBuilder bld;
1139 bld.addUnicodeLocaleAttribute(ill);
1140 Locale loc = bld.build(status);
1141 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1142 errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
1143 ill);
1144 }
1145 }
1146 }
1147
TestSetExtensionU()1148 void LocaleBuilderTest::TestSetExtensionU() {
1149 LocaleBuilder bld;
1150 bld.setLanguage("zh");
1151 Verify(bld, "zh",
1152 "setLanguage(\"zh\") got Error: %s\n");
1153
1154 bld.setExtension('u', "co-stroke");
1155 Verify(bld, "zh-u-co-stroke",
1156 "setExtension('u', \"co-stroke\") got Error: %s\n");
1157
1158 bld.setExtension('U', "ca-islamic");
1159 Verify(bld, "zh-u-ca-islamic",
1160 "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
1161
1162 bld.setExtension('u', "ca-chinese");
1163 Verify(bld, "zh-u-ca-chinese",
1164 "setExtension('u', \"ca-chinese\") got Error: %s\n");
1165
1166 bld.setExtension('U', "co-pinyin");
1167 Verify(bld, "zh-u-co-pinyin",
1168 "setExtension('U', \"co-pinyin\") got Error: %s\n");
1169
1170 bld.setRegion("TW");
1171 Verify(bld, "zh-TW-u-co-pinyin",
1172 "setRegion(\"TW\") got Error: %s\n");
1173
1174 bld.setExtension('U', "");
1175 Verify(bld, "zh-TW",
1176 "setExtension('U', \"\") got Error: %s\n");
1177
1178 bld.setExtension('u', "abc-defg-kr-face");
1179 Verify(bld, "zh-TW-u-abc-defg-kr-face",
1180 "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
1181
1182 bld.setExtension('U', "ca-japanese");
1183 Verify(bld, "zh-TW-u-ca-japanese",
1184 "setExtension('U', \"ca-japanese\") got Error: %s\n");
1185
1186 }
1187
TestSetExtensionValidateUWellFormed()1188 void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
1189 static const char* wellFormedExtensions[] = {
1190 // keyword
1191 // keyword = key (sep type)? ;
1192 // key = alphanum alpha ;
1193 // type = alphanum{3,8} (sep alphanum{3,8})* ;
1194 "3A",
1195 "ZA",
1196 "az-abc",
1197 "zz-123",
1198 "7z-12345678",
1199 "kb-A234567Z",
1200 // (sep keyword)+
1201 "1z-ZZ",
1202 "2z-ZZ-123",
1203 "3z-ZZ-123-cd",
1204 "0z-ZZ-123-cd-efghijkl",
1205 // attribute
1206 "abc",
1207 "456",
1208 "87654321",
1209 "ZABADFSD",
1210 // (sep attribute)+
1211 "abc-ZABADFSD",
1212 "123-ZABADFSD",
1213 "K2K-12345678",
1214 "K2K-12345678-zzz",
1215 // (sep attribute)+ (sep keyword)*
1216 "K2K-12345678-zz",
1217 "K2K-12345678-zz-0z",
1218 "K2K-12345678-9z-AZ-abc",
1219 "K2K-12345678-zz-9A-234",
1220 "K2K-12345678-zk0-abc-efg-zz-9k-234",
1221 };
1222 for (const char* extension : wellFormedExtensions) {
1223 UErrorCode status = U_ZERO_ERROR;
1224 LocaleBuilder bld;
1225 bld.setExtension('u', extension);
1226 Locale loc = bld.build(status);
1227 if (U_FAILURE(status)) {
1228 errln("setExtension('u', \"%s\") got Error: %s\n",
1229 extension, u_errorName(status));
1230 }
1231 }
1232 }
1233
TestSetExtensionValidateUIllFormed()1234 void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
1235 static const char* illFormed[] = {
1236 // bad key
1237 "-",
1238 "-ab",
1239 "ab-",
1240 "abc-",
1241 "-abc",
1242 "0",
1243 "a",
1244 "A0",
1245 "z9",
1246 "09",
1247 "90",
1248 // bad keyword
1249 "AB-A0",
1250 "AB-efg-A0",
1251 "xy-123456789",
1252 "AB-Aa-",
1253 "AB-Aac-",
1254 // bad attribute
1255 "abcdefghi",
1256 "abcdefgh-",
1257 "abcdefgh-abcdefghi",
1258 "abcdefgh-1",
1259 "abcdefgh-a",
1260 "abcdefgh-a2345678z",
1261 };
1262 for (const char* ill : illFormed) {
1263 UErrorCode status = U_ZERO_ERROR;
1264 LocaleBuilder bld;
1265 bld.setExtension('u', ill);
1266 Locale loc = bld.build(status);
1267 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1268 errln("setExtension('u', \"%s\") should fail but has no Error\n",
1269 ill);
1270 }
1271 }
1272 }
1273
TestSetExtensionT()1274 void LocaleBuilderTest::TestSetExtensionT() {
1275 LocaleBuilder bld;
1276 bld.setLanguage("fr");
1277 Verify(bld, "fr",
1278 "setLanguage(\"fr\") got Error: %s\n");
1279
1280 bld.setExtension('T', "zh");
1281 Verify(bld, "fr-t-zh",
1282 "setExtension('T', \"zh\") got Error: %s\n");
1283
1284 bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
1285 Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
1286 "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
1287
1288 bld.setExtension('T', "a9-123");
1289 Verify(bld, "fr-t-a9-123",
1290 "setExtension('T', \"a9-123\") got Error: %s\n");
1291
1292 bld.setRegion("MX");
1293 Verify(bld, "fr-MX-t-a9-123",
1294 "setRegion(\"MX\") got Error: %s\n");
1295
1296 bld.setScript("Hans");
1297 Verify(bld, "fr-Hans-MX-t-a9-123",
1298 "setScript(\"Hans\") got Error: %s\n");
1299
1300 bld.setVariant("9abc-abcde");
1301 Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
1302 "setVariant(\"9abc-abcde\") got Error: %s\n");
1303
1304 bld.setExtension('T', "");
1305 Verify(bld, "fr-Hans-MX-9abc-abcde",
1306 "bld.setExtension('T', \"\") got Error: %s\n");
1307 }
1308
TestSetExtensionValidateTWellFormed()1309 void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
1310 // ((sep tlang (sep tfield)*) | (sep tfield)+)
1311 static const char* wellFormedExtensions[] = {
1312 // tlang
1313 // tlang = unicode_language_subtag (sep unicode_script_subtag)?
1314 // (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
1315 // unicode_language_subtag
1316 "en",
1317 "abc",
1318 "abcde",
1319 "ABCDEFGH",
1320 // unicode_language_subtag sep unicode_script_subtag
1321 "en-latn",
1322 "abc-arab",
1323 "ABCDEFGH-Thai",
1324 // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1325 "en-latn-ME",
1326 "abc-arab-RU",
1327 "ABCDEFGH-Thai-TH",
1328 "en-latn-409",
1329 "abc-arab-123",
1330 "ABCDEFGH-Thai-456",
1331 // unicode_language_subtag sep unicode_region_subtag
1332 "en-ME",
1333 "abc-RU",
1334 "ABCDEFGH-TH",
1335 "en-409",
1336 "abc-123",
1337 "ABCDEFGH-456",
1338 // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1339 // sep (sep unicode_variant_subtag)*
1340 "en-latn-ME-abcde",
1341 "abc-arab-RU-3abc-abcdef",
1342 "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
1343 "en-latn-409-xafsa",
1344 "abc-arab-123-ADASDF",
1345 "ABCDEFGH-Thai-456-9sdf-ADASFAS",
1346 // (sep tfield)+
1347 "A0-abcde",
1348 "z9-abcde123",
1349 "z9-abcde123-a1-abcde",
1350 // tlang (sep tfield)*
1351 "fr-A0-abcde",
1352 "fr-FR-A0-abcde",
1353 "fr-123-z9-abcde123-a1-abcde",
1354 "fr-Latn-FR-z9-abcde123-a1-abcde",
1355 "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
1356 "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
1357 };
1358 for (const char* extension : wellFormedExtensions) {
1359 UErrorCode status = U_ZERO_ERROR;
1360 LocaleBuilder bld;
1361 bld.setExtension('t', extension);
1362 Locale loc = bld.build(status);
1363 if (U_FAILURE(status)) {
1364 errln("setExtension('t', \"%s\") got Error: %s\n",
1365 extension, u_errorName(status));
1366 }
1367 }
1368 }
1369
TestSetExtensionValidateTIllFormed()1370 void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
1371 static const char* illFormed[] = {
1372 "a",
1373 "a-",
1374 "0",
1375 "9-",
1376 "-9",
1377 "-z",
1378 // "Latn", // Per 2019-01-23 ICUTC, still accept 4alpha. See ICU-20321
1379 "Latn-",
1380 "en-",
1381 "nob-",
1382 "-z9",
1383 "a3",
1384 "a3-",
1385 "3a",
1386 "0z-",
1387 "en-123-a1",
1388 "en-TH-a1",
1389 "gab-TH-a1",
1390 "gab-Thai-a1",
1391 "gab-Thai-TH-a1",
1392 "gab-Thai-TH-0bde-a1",
1393 "gab-Thai-TH-0bde-3b",
1394 "gab-Thai-TH-0bde-z9-a1",
1395 "gab-Thai-TH-0bde-z9-3b",
1396 "gab-Thai-TH-0bde-z9-abcde123-3b",
1397 "gab-Thai-TH-0bde-z9-abcde123-ab",
1398 "gab-Thai-TH-0bde-z9-abcde123-ab",
1399 "gab-Thai-TH-0bde-z9-abcde123-a1",
1400 "gab-Thai-TH-0bde-z9-abcde123-a1-",
1401 "gab-Thai-TH-0bde-z9-abcde123-a1-a",
1402 "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
1403 };
1404 for (const char* ill : illFormed) {
1405 UErrorCode status = U_ZERO_ERROR;
1406 LocaleBuilder bld;
1407 bld.setExtension('t', ill);
1408 Locale loc = bld.build(status);
1409 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1410 errln("setExtension('t', \"%s\") should fail but has no Error\n",
1411 ill);
1412 }
1413 }
1414 }
1415
TestSetExtensionPU()1416 void LocaleBuilderTest::TestSetExtensionPU() {
1417 LocaleBuilder bld;
1418 bld.setLanguage("ar");
1419 Verify(bld, "ar",
1420 "setLanguage(\"ar\") got Error: %s\n");
1421
1422 bld.setExtension('X', "a-b-c-d-e");
1423 Verify(bld, "ar-x-a-b-c-d-e",
1424 "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
1425
1426 bld.setExtension('x', "0-1-2-3");
1427 Verify(bld, "ar-x-0-1-2-3",
1428 "setExtension('x', \"0-1-2-3\") got Error: %s\n");
1429
1430 bld.setExtension('X', "0-12345678-x-x");
1431 Verify(bld, "ar-x-0-12345678-x-x",
1432 "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
1433
1434 bld.setRegion("TH");
1435 Verify(bld, "ar-TH-x-0-12345678-x-x",
1436 "setRegion(\"TH\") got Error: %s\n");
1437
1438 bld.setExtension('X', "");
1439 Verify(bld, "ar-TH",
1440 "setExtension(\"X\") got Error: %s\n");
1441 }
1442
TestSetExtensionValidatePUWellFormed()1443 void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
1444 // ((sep tlang (sep tfield)*) | (sep tfield)+)
1445 static const char* wellFormedExtensions[] = {
1446 "a", // Short subtag
1447 "z", // Short subtag
1448 "0", // Short subtag, digit
1449 "9", // Short subtag, digit
1450 "a-0", // Two short subtag, alpha and digit
1451 "9-z", // Two short subtag, digit and alpha
1452 "ab",
1453 "abc",
1454 "abcefghi", // Long subtag
1455 "87654321",
1456 "01",
1457 "234",
1458 "0a-ab-87654321", // Three subtags
1459 "87654321-ab-00-3A", // Four subtabs
1460 "a-9-87654321", // Three subtags with short and long subtags
1461 "87654321-ab-0-3A",
1462 };
1463 for (const char* extension : wellFormedExtensions) {
1464 UErrorCode status = U_ZERO_ERROR;
1465 LocaleBuilder bld;
1466 bld.setExtension('x', extension);
1467 Locale loc = bld.build(status);
1468 if (U_FAILURE(status)) {
1469 errln("setExtension('x', \"%s\") got Error: %s\n",
1470 extension, u_errorName(status));
1471 }
1472 }
1473 }
1474
TestSetExtensionValidatePUIllFormed()1475 void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
1476 static const char* illFormed[] = {
1477 "123456789", // Too long
1478 "abcdefghi", // Too long
1479 "ab-123456789", // Second subtag too long
1480 "abcdefghi-12", // First subtag too long
1481 "a-ab-987654321", // Third subtag too long
1482 "987654321-a-0-3", // First subtag too long
1483 };
1484 for (const char* ill : illFormed) {
1485 UErrorCode status = U_ZERO_ERROR;
1486 LocaleBuilder bld;
1487 bld.setExtension('x', ill);
1488 Locale loc = bld.build(status);
1489 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1490 errln("setExtension('x', \"%s\") should fail but has no Error\n",
1491 ill);
1492 }
1493 }
1494 }
1495
TestSetExtensionOthers()1496 void LocaleBuilderTest::TestSetExtensionOthers() {
1497 LocaleBuilder bld;
1498 bld.setLanguage("fr");
1499 Verify(bld, "fr",
1500 "setLanguage(\"fr\") got Error: %s\n");
1501
1502 bld.setExtension('Z', "ab");
1503 Verify(bld, "fr-z-ab",
1504 "setExtension('Z', \"ab\") got Error: %s\n");
1505
1506 bld.setExtension('0', "xyz12345-abcdefg");
1507 Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
1508 "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
1509
1510 bld.setExtension('a', "01-12345678-ABcdef");
1511 Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1512 "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
1513
1514 bld.setRegion("TH");
1515 Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1516 "setRegion(\"TH\") got Error: %s\n");
1517
1518 bld.setScript("Arab");
1519 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1520 "setRegion(\"Arab\") got Error: %s\n");
1521
1522 bld.setExtension('A', "97");
1523 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
1524 "setExtension('a', \"97\") got Error: %s\n");
1525
1526 bld.setExtension('a', "");
1527 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
1528 "setExtension('a', \"\") got Error: %s\n");
1529
1530 bld.setExtension('0', "");
1531 Verify(bld, "fr-Arab-TH-z-ab",
1532 "setExtension('0', \"\") got Error: %s\n");
1533 }
1534
TestSetExtensionValidateOthersWellFormed()1535 void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
1536 static const char* wellFormedExtensions[] = {
1537 "ab",
1538 "abc",
1539 "abcefghi",
1540 "01",
1541 "234",
1542 "87654321",
1543 "0a-ab-87654321",
1544 "87654321-ab-00-3A",
1545 };
1546
1547 const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1548 const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1549 int32_t i = 0;
1550 for (const char* extension : wellFormedExtensions) {
1551 char ch = aToZ[i];
1552 i = (i + 1) % aToZLen;
1553 UErrorCode status = U_ZERO_ERROR;
1554 LocaleBuilder bld;
1555 bld.setExtension(ch, extension);
1556 Locale loc = bld.build(status);
1557 if (U_FAILURE(status)) {
1558 errln("setExtension('%c', \"%s\") got Error: %s\n",
1559 ch, extension, u_errorName(status));
1560 }
1561 }
1562
1563 const char* someChars =
1564 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
1565 const int32_t someCharsLen = static_cast<int32_t>(uprv_strlen(someChars));
1566 for (int32_t i = 0; i < someCharsLen; i++) {
1567 char ch = someChars[i];
1568 UErrorCode status = U_ZERO_ERROR;
1569 LocaleBuilder bld;
1570 bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1571 Locale loc = bld.build(status);
1572 if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
1573 if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
1574 if (U_FAILURE(status)) {
1575 errln("setExtension('%c', \"%s\") got Error: %s\n",
1576 ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
1577 }
1578 }
1579 } else {
1580 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1581 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1582 ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1583 }
1584 }
1585
1586 }
1587 }
1588
TestSetExtensionValidateOthersIllFormed()1589 void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
1590 static const char* illFormed[] = {
1591 "0", // Too short
1592 "a", // Too short
1593 "123456789", // Too long
1594 "abcdefghi", // Too long
1595 "ab-123456789", // Second subtag too long
1596 "abcdefghi-12", // First subtag too long
1597 "a-ab-87654321", // Third subtag too long
1598 "87654321-a-0-3", // First subtag too long
1599 };
1600 const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1601 const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1602 int32_t i = 0;
1603 for (const char* ill : illFormed) {
1604 char ch = aToZ[i];
1605 i = (i + 1) % aToZLen;
1606 UErrorCode status = U_ZERO_ERROR;
1607 LocaleBuilder bld;
1608 bld.setExtension(ch, ill);
1609 Locale loc = bld.build(status);
1610 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1611 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1612 ch, ill);
1613 }
1614 }
1615 }
1616
TestSetLocale()1617 void LocaleBuilderTest::TestSetLocale() {
1618 LocaleBuilder bld1, bld2;
1619 UErrorCode status = U_ZERO_ERROR;
1620 Locale l1 = bld1.setLanguage("en")
1621 .setScript("Latn")
1622 .setRegion("MX")
1623 .setVariant("3456-abcde")
1624 .addUnicodeLocaleAttribute("456")
1625 .addUnicodeLocaleAttribute("123")
1626 .setUnicodeLocaleKeyword("nu", "thai")
1627 .setUnicodeLocaleKeyword("co", "stroke")
1628 .setUnicodeLocaleKeyword("ca", "chinese")
1629 .build(status);
1630 if (U_FAILURE(status) || l1.isBogus()) {
1631 errln("build got Error: %s\n", u_errorName(status));
1632 }
1633 status = U_ZERO_ERROR;
1634 Locale l2 = bld1.setLocale(l1).build(status);
1635 if (U_FAILURE(status) || l2.isBogus()) {
1636 errln("build got Error: %s\n", u_errorName(status));
1637 }
1638
1639 if (l1 != l2) {
1640 errln("Two locales should be the same, but one is '%s' and the other is '%s'",
1641 l1.getName(), l2.getName());
1642 }
1643 }
1644
TestPosixCases()1645 void LocaleBuilderTest::TestPosixCases() {
1646 UErrorCode status = U_ZERO_ERROR;
1647 Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status);
1648 if (U_FAILURE(status) || l1.isBogus()) {
1649 errln("build got Error: %s\n", u_errorName(status));
1650 }
1651 LocaleBuilder bld;
1652 bld.setLanguage("en")
1653 .setRegion("MX")
1654 .setScript("Arab")
1655 .setUnicodeLocaleKeyword("nu", "Thai")
1656 .setExtension('x', "1");
1657 // All of above should be cleared by the setLocale call.
1658 Locale l2 = bld.setLocale(l1).build(status);
1659 if (U_FAILURE(status) || l2.isBogus()) {
1660 errln("build got Error: %s\n", u_errorName(status));
1661 }
1662 if (l1 != l2) {
1663 errln("The result locale should be the set as the setLocale %s but got %s\n",
1664 l1.toLanguageTag<std::string>(status).c_str(),
1665 l2.toLanguageTag<std::string>(status).c_str());
1666 }
1667 Locale posix("en-US-POSIX");
1668 if (posix != l2) {
1669 errln("The result locale should be the set as %s but got %s\n",
1670 posix.getName(), l2.getName());
1671 }
1672 }
1673