1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include <memory>
5 
6 #include "cmemory.h"
7 #include "cstring.h"
8 #include "localebuildertest.h"
9 #include "unicode/localebuilder.h"
10 #include "unicode/strenum.h"
11 
LocaleBuilderTest()12 LocaleBuilderTest::LocaleBuilderTest()
13 {
14 }
15 
~LocaleBuilderTest()16 LocaleBuilderTest::~LocaleBuilderTest()
17 {
18 }
19 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)20 void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
21 {
22     TESTCASE_AUTO_BEGIN;
23     TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute);
24     TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed);
25     TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed);
26     TESTCASE_AUTO(TestLocaleBuilder);
27     TESTCASE_AUTO(TestLocaleBuilderBasic);
28     TESTCASE_AUTO(TestPosixCases);
29     TESTCASE_AUTO(TestSetExtensionOthers);
30     TESTCASE_AUTO(TestSetExtensionPU);
31     TESTCASE_AUTO(TestSetExtensionT);
32     TESTCASE_AUTO(TestSetExtensionU);
33     TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed);
34     TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed);
35     TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed);
36     TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed);
37     TESTCASE_AUTO(TestSetExtensionValidateTIllFormed);
38     TESTCASE_AUTO(TestSetExtensionValidateTWellFormed);
39     TESTCASE_AUTO(TestSetExtensionValidateUIllFormed);
40     TESTCASE_AUTO(TestSetExtensionValidateUWellFormed);
41     TESTCASE_AUTO(TestSetLanguageIllFormed);
42     TESTCASE_AUTO(TestSetLanguageWellFormed);
43     TESTCASE_AUTO(TestSetLocale);
44     TESTCASE_AUTO(TestSetRegionIllFormed);
45     TESTCASE_AUTO(TestSetRegionWellFormed);
46     TESTCASE_AUTO(TestSetScriptIllFormed);
47     TESTCASE_AUTO(TestSetScriptWellFormed);
48     TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey);
49     TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue);
50     TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed);
51     TESTCASE_AUTO(TestSetVariantIllFormed);
52     TESTCASE_AUTO(TestSetVariantWellFormed);
53     TESTCASE_AUTO_END;
54 }
55 
Verify(LocaleBuilder & bld,const char * expected,const char * msg)56 void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) {
57     UErrorCode status = U_ZERO_ERROR;
58     UErrorCode copyStatus = U_ZERO_ERROR;
59     UErrorCode errorStatus = U_ILLEGAL_ARGUMENT_ERROR;
60     if (bld.copyErrorTo(copyStatus)) {
61         errln(msg, u_errorName(copyStatus));
62     }
63     if (!bld.copyErrorTo(errorStatus) || errorStatus != U_ILLEGAL_ARGUMENT_ERROR) {
64         errln("Should always get the previous error and return FALSE");
65     }
66     Locale loc = bld.build(status);
67     if (U_FAILURE(status)) {
68         errln(msg, u_errorName(status));
69     }
70     if (status != copyStatus) {
71         errln(msg, u_errorName(status));
72     }
73     std::string tag = loc.toLanguageTag<std::string>(status);
74     if (U_FAILURE(status)) {
75         errln("loc.toLanguageTag() got Error: %s\n",
76               u_errorName(status));
77     }
78     if (tag != expected) {
79         errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str());
80     }
81 }
82 
TestLocaleBuilder()83 void LocaleBuilderTest::TestLocaleBuilder() {
84     // The following test data are copy from
85     // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
86     // "L": +1 = language
87     // "S": +1 = script
88     // "R": +1 = region
89     // "V": +1 = variant
90     // "K": +1 = Unicode locale key / +2 = Unicode locale type
91     // "A": +1 = Unicode locale attribute
92     // "E": +1 = extension letter / +2 = extension value
93     // "P": +1 = private use
94     // "U": +1 = ULocale
95     // "B": +1 = BCP47 language tag
96     // "C": Clear all
97     // "N": Clear extensions
98     // "D": +1 = Unicode locale attribute to be removed
99     // "X": indicates an exception must be thrown
100     // "T": +1 = expected language tag / +2 = expected locale string
101     const char* TESTCASES[][14] = {
102         {"L", "en", "R", "us", "T", "en-US", "en_US"},
103         {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
104         {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
105         {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
106         {"L", "123", "X"},
107         {"R", "us", "T", "und-US", "_US"},
108         {"R", "usa", "X"},
109         {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
110         {"R", "123", "L", "it", "R", "", "T", "it", "it"},
111         {"R", "123", "L", "en", "T", "en-123", "en_123"},
112         {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
113         {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
114         {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
115         {"S", "latin", "X"},
116         {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
117         {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
118         {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
119         {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
120         {"V", "123", "X"},
121         {"U", "en_US", "T", "en-US", "en_US"},
122         {"U", "en_US_WIN", "X"},
123         {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
124           "fr-FR-1606nict-u-ca-gregory-x-test",
125           "fr_FR_1606NICT@calendar=gregorian;x=test"},
126         {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
127         {"B", "und-CA", "T", "und-CA", "_CA"},
128         // Blocked by ICU-20327
129         // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
130         // "en_US_VAR@x=test"},
131         {"B", "en-US-VAR", "X"},
132         {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
133           "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
134         {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
135           "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
136         {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
137           "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
138           "ja_JP@attribute=attr1;calendar=gregorian"},
139         {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn",
140           "en@colnumeric=yes"},
141         {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
142           "th_TH@numbers=thai"},
143         {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
144         {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
145         {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
146         {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
147         {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
148         {"E", "a", "x", "X"},
149         {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
150         // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
151         // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
152         // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
153         // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
154         // key = alphanum alpha
155         {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a",
156          "en@0a=yes;attribute=aaa-bbb"},
157         {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
158           "fr_FR@x=yoshito-icu"},
159         {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
160           "ja_JP@calendar=japanese"},
161         {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
162           "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
163         {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
164         {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
165           "th@calendar=gregorian;numbers=thai"},
166         {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
167           "en_US@timezone=America/New_York"},
168         {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
169           "true", "T", "de-u-co-phonebk-kk-ks-level1",
170           "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
171         {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
172           "en_US@calendar=gregorian"},
173         {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
174         {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
175         {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn",
176           "en_US@colnumeric=yes"},
177         {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
178         {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
179         {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
180           "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
181         {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
182           "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
183         {"L", "en", "A", "aa", "X"},
184         {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
185     };
186     UErrorCode status = U_ZERO_ERROR;
187     LocaleBuilder bld;
188     for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
189         const char* (&testCase)[14] = TESTCASES[tidx];
190         std::string actions;
191         for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) {
192              if (testCase[p] == nullptr) {
193                  actions += " (nullptr)";
194                  break;
195              }
196              if (p > 0) actions += " ";
197              actions += testCase[p];
198         }
199         int i = 0;
200         const char* method;
201         status = U_ZERO_ERROR;
202         bld.clear();
203         while (true) {
204             status = U_ZERO_ERROR;
205             UErrorCode copyStatus = U_ZERO_ERROR;
206             method = testCase[i++];
207             if (strcmp("L", method) == 0) {
208                 bld.setLanguage(testCase[i++]);
209                 bld.copyErrorTo(copyStatus);
210                 bld.build(status);
211             } else if (strcmp("S", method) == 0) {
212                 bld.setScript(testCase[i++]);
213                 bld.copyErrorTo(copyStatus);
214                 bld.build(status);
215             } else if (strcmp("R", method) == 0) {
216                 bld.setRegion(testCase[i++]);
217                 bld.copyErrorTo(copyStatus);
218                 bld.build(status);
219             } else if (strcmp("V", method) == 0) {
220                 bld.setVariant(testCase[i++]);
221                 bld.copyErrorTo(copyStatus);
222                 bld.build(status);
223             } else if (strcmp("K", method) == 0) {
224                 const char* key = testCase[i++];
225                 const char* type = testCase[i++];
226                 bld.setUnicodeLocaleKeyword(key, type);
227                 bld.copyErrorTo(copyStatus);
228                 bld.build(status);
229             } else if (strcmp("A", method) == 0) {
230                 bld.addUnicodeLocaleAttribute(testCase[i++]);
231                 bld.copyErrorTo(copyStatus);
232                 bld.build(status);
233             } else if (strcmp("E", method) == 0) {
234                 const char* key = testCase[i++];
235                 const char* value = testCase[i++];
236                 bld.setExtension(key[0], value);
237                 bld.copyErrorTo(copyStatus);
238                 bld.build(status);
239             } else if (strcmp("P", method) == 0) {
240                 bld.setExtension('x', testCase[i++]);
241                 bld.copyErrorTo(copyStatus);
242                 bld.build(status);
243             } else if (strcmp("U", method) == 0) {
244                 bld.setLocale(Locale(testCase[i++]));
245                 bld.copyErrorTo(copyStatus);
246                 bld.build(status);
247             } else if (strcmp("B", method) == 0) {
248                 bld.setLanguageTag(testCase[i++]);
249                 bld.copyErrorTo(copyStatus);
250                 bld.build(status);
251             }
252             // clear / remove
253             else if (strcmp("C", method) == 0) {
254                 bld.clear();
255                 bld.copyErrorTo(copyStatus);
256                 bld.build(status);
257             } else if (strcmp("N", method) == 0) {
258                 bld.clearExtensions();
259                 bld.copyErrorTo(copyStatus);
260                 bld.build(status);
261             } else if (strcmp("D", method) == 0) {
262                 bld.removeUnicodeLocaleAttribute(testCase[i++]);
263                 bld.copyErrorTo(copyStatus);
264                 bld.build(status);
265             }
266             // result
267             else if (strcmp("X", method) == 0) {
268                 if (U_SUCCESS(status)) {
269                     errln("FAIL: No error return - test case: %s", actions.c_str());
270                 }
271             } else if (strcmp("T", method) == 0) {
272                 status = U_ZERO_ERROR;
273                 Locale loc = bld.build(status);
274                 if (status != copyStatus) {
275                     errln("copyErrorTo not matching");
276                 }
277                 if (U_FAILURE(status) ||
278                     strcmp(loc.getName(), testCase[i + 1]) != 0) {
279                     errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(),
280                             " for test case: ", actions.c_str());
281                 }
282                 std::string langtag = loc.toLanguageTag<std::string>(status);
283                 if (U_FAILURE(status) || langtag != testCase[i]) {
284                     errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(),
285                             " for test case: ", actions.c_str());
286                 }
287                 break;
288             } else {
289                 // Unknow test method
290                 errln("Unknown test case method: There is an error in the test case data.");
291                 break;
292             }
293             if (status != copyStatus) {
294                 errln("copyErrorTo not matching");
295             }
296             if (U_FAILURE(status)) {
297                 if (strcmp("X", testCase[i]) == 0) {
298                     // This failure is expected
299                     break;
300                 } else {
301                     errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
302                           " in test case: ", actions.c_str());
303                     break;
304                 }
305             }
306             if (strcmp("T", method) == 0) {
307                 break;
308             }
309         }  // while(true)
310     }  // for TESTCASES
311 }
312 
TestLocaleBuilderBasic()313 void LocaleBuilderTest::TestLocaleBuilderBasic() {
314     LocaleBuilder bld;
315     bld.setLanguage("zh");
316     Verify(bld, "zh", "setLanguage('zh') got Error: %s\n");
317 
318     bld.setScript("Hant");
319     Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n");
320 
321     bld.setRegion("SG");
322     Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");
323 
324     bld.setRegion("HK");
325     bld.setScript("Hans");
326     Verify(bld, "zh-Hans-HK",
327            "setRegion('HK') and setScript('Hans') got Error: %s\n");
328 
329     bld.setVariant("revised");
330     Verify(bld, "zh-Hans-HK-revised",
331            "setVariant('revised') got Error: %s\n");
332 
333     bld.setUnicodeLocaleKeyword("nu", "thai");
334     Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
335            "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
336 
337     bld.setUnicodeLocaleKeyword("co", "pinyin");
338     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
339            "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
340 
341     bld.setUnicodeLocaleKeyword("nu", "latn");
342     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
343            "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
344 
345     bld.setUnicodeLocaleKeyword("nu", nullptr);
346     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
347            "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
348 
349     bld.setUnicodeLocaleKeyword("co", nullptr);
350     Verify(bld, "zh-Hans-HK-revised",
351            "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");
352 
353     bld.setScript("");
354     Verify(bld, "zh-HK-revised",
355            "setScript('') got Error: %s\n");
356 
357     bld.setVariant("");
358     Verify(bld, "zh-HK",
359            "setVariant('') got Error: %s\n");
360 
361     bld.setRegion("");
362     Verify(bld, "zh",
363            "setRegion('') got Error: %s\n");
364 }
365 
TestSetLanguageWellFormed()366 void LocaleBuilderTest::TestSetLanguageWellFormed() {
367     // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
368     // unicode_language_subtag = alpha{2,3} | alpha{5,8};
369     // ICUTC decided also support alpha{4}
370     static const char* wellFormedLanguages[] = {
371         "",
372 
373         // alpha{2}
374         "en",
375         "NE",
376         "eN",
377         "Ne",
378 
379         // alpha{3}
380         "aNe",
381         "zzz",
382         "AAA",
383 
384         // alpha{4}
385         "ABCD",
386         "abcd",
387 
388         // alpha{5}
389         "efgij",
390         "AbCAD",
391         "ZAASD",
392 
393         // alpha{6}
394         "efgijk",
395         "AADGFE",
396         "AkDfFz",
397 
398         // alpha{7}
399         "asdfads",
400         "ADSFADF",
401         "piSFkDk",
402 
403         // alpha{8}
404         "oieradfz",
405         "IADSFJKR",
406         "kkDSFJkR",
407     };
408     for (const char* lang : wellFormedLanguages) {
409         UErrorCode status = U_ZERO_ERROR;
410         LocaleBuilder bld;
411         bld.setLanguage(lang);
412         Locale loc = bld.build(status);
413         if (U_FAILURE(status)) {
414             errln("setLanguage(\"%s\") got Error: %s\n",
415                   lang, u_errorName(status));
416         }
417     }
418 }
419 
TestSetLanguageIllFormed()420 void LocaleBuilderTest::TestSetLanguageIllFormed() {
421     static const char* illFormed[] = {
422         "a",
423         "z",
424         "A",
425         "F",
426         "2",
427         "0",
428         "9"
429         "{",
430         ".",
431         "[",
432         "]",
433         "\\",
434 
435         "e1",
436         "N2",
437         "3N",
438         "4e",
439         "e:",
440         "43",
441         "a9",
442 
443         "aN0",
444         "z1z",
445         "2zz",
446         "3A3",
447         "456",
448         "af)",
449 
450         // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
451         // "latn",
452         // "Arab",
453         // "LATN",
454 
455         "e)gij",
456         "Ab3AD",
457         "ZAAS8",
458 
459         "efgi[]",
460         "AA9GFE",
461         "7kD3Fz",
462         "as8fads",
463         "0DSFADF",
464         "'iSFkDk",
465 
466         "oieradf+",
467         "IADSFJK-",
468         "kkDSFJk0",
469 
470         // alpha{9}
471         "oieradfab",
472         "IADSFJKDE",
473         "kkDSFJkzf",
474     };
475     for (const char* ill : illFormed) {
476         UErrorCode status = U_ZERO_ERROR;
477         LocaleBuilder bld;
478         bld.setLanguage(ill);
479         Locale loc = bld.build(status);
480         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
481             errln("setLanguage(\"%s\") should fail but has no Error\n", ill);
482         }
483     }
484 }
485 
TestSetScriptWellFormed()486 void LocaleBuilderTest::TestSetScriptWellFormed() {
487     // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
488     // unicode_script_subtag = alpha{4} ;
489     static const char* wellFormedScripts[] = {
490         "",
491 
492         "Latn",
493         "latn",
494         "lATN",
495         "laTN",
496         "arBN",
497         "ARbn",
498         "adsf",
499         "aADF",
500         "BSVS",
501         "LATn",
502     };
503     for (const char* script : wellFormedScripts) {
504         UErrorCode status = U_ZERO_ERROR;
505         LocaleBuilder bld;
506         bld.setScript(script);
507         Locale loc = bld.build(status);
508         if (U_FAILURE(status)) {
509             errln("setScript(\"%s\") got Error: %s\n",
510                   script, u_errorName(status));
511         }
512     }
513 }
514 
TestSetScriptIllFormed()515 void LocaleBuilderTest::TestSetScriptIllFormed() {
516     static const char* illFormed[] = {
517         "a",
518         "z",
519         "A",
520         "F",
521         "2",
522         "0",
523         "9"
524         "{",
525         ".",
526         "[",
527         "]",
528         "\\",
529 
530         "e1",
531         "N2",
532         "3N",
533         "4e",
534         "e:",
535         "43",
536         "a9",
537 
538         "aN0",
539         "z1z",
540         "2zz",
541         "3A3",
542         "456",
543         "af)",
544 
545         "0atn",
546         "l1tn",
547         "lA2N",
548         "la4N",
549         "arB5",
550         "1234",
551 
552         "e)gij",
553         "Ab3AD",
554         "ZAAS8",
555 
556         "efgi[]",
557         "AA9GFE",
558         "7kD3Fz",
559 
560         "as8fads",
561         "0DSFADF",
562         "'iSFkDk",
563 
564         "oieradf+",
565         "IADSFJK-",
566         "kkDSFJk0",
567 
568         // alpha{9}
569         "oieradfab",
570         "IADSFJKDE",
571         "kkDSFJkzf",
572     };
573     for (const char* ill : illFormed) {
574         UErrorCode status = U_ZERO_ERROR;
575         LocaleBuilder bld;
576         bld.setScript(ill);
577         Locale loc = bld.build(status);
578         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
579             errln("setScript(\"%s\") should fail but has no Error\n", ill);
580         }
581     }
582 }
583 
TestSetRegionWellFormed()584 void LocaleBuilderTest::TestSetRegionWellFormed() {
585     // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
586     // unicode_region_subtag = (alpha{2} | digit{3})
587     static const char* wellFormedRegions[] = {
588         "",
589 
590         // alpha{2}
591         "en",
592         "NE",
593         "eN",
594         "Ne",
595 
596         // digit{3}
597         "000",
598         "999",
599         "123",
600         "987"
601     };
602     for (const char* region : wellFormedRegions) {
603         UErrorCode status = U_ZERO_ERROR;
604         LocaleBuilder bld;
605         bld.setRegion(region);
606         Locale loc = bld.build(status);
607         if (U_FAILURE(status)) {
608             errln("setRegion(\"%s\") got Error: %s\n",
609                   region, u_errorName(status));
610         }
611     }
612 }
613 
TestSetRegionIllFormed()614 void LocaleBuilderTest::TestSetRegionIllFormed() {
615     static const char* illFormed[] = {
616         "a",
617         "z",
618         "A",
619         "F",
620         "2",
621         "0",
622         "9"
623         "{",
624         ".",
625         "[",
626         "]",
627         "\\",
628 
629         "e1",
630         "N2",
631         "3N",
632         "4e",
633         "e:",
634         "43",
635         "a9",
636 
637         "aN0",
638         "z1z",
639         "2zz",
640         "3A3",
641         "4.6",
642         "af)",
643 
644         "0atn",
645         "l1tn",
646         "lA2N",
647         "la4N",
648         "arB5",
649         "1234",
650 
651         "e)gij",
652         "Ab3AD",
653         "ZAAS8",
654 
655         "efgi[]",
656         "AA9GFE",
657         "7kD3Fz",
658 
659         "as8fads",
660         "0DSFADF",
661         "'iSFkDk",
662 
663         "oieradf+",
664         "IADSFJK-",
665         "kkDSFJk0",
666 
667         // alpha{9}
668         "oieradfab",
669         "IADSFJKDE",
670         "kkDSFJkzf",
671     };
672     for (const char* ill : illFormed) {
673         UErrorCode status = U_ZERO_ERROR;
674         LocaleBuilder bld;
675         bld.setRegion(ill);
676         Locale loc = bld.build(status);
677         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
678             errln("setRegion(\"%s\") should fail but has no Error\n", ill);
679         }
680     }
681 }
682 
TestSetVariantWellFormed()683 void LocaleBuilderTest::TestSetVariantWellFormed() {
684     // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
685     // (sep unicode_variant_subtag)*
686     // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
687     static const char* wellFormedVariants[] = {
688         "",
689 
690         // alphanum{5}
691         "efgij",
692         "AbCAD",
693         "ZAASD",
694         "0AASD",
695         "A1CAD",
696         "ef2ij",
697         "ads3X",
698         "owqF4",
699 
700         // alphanum{6}
701         "efgijk",
702         "AADGFE",
703         "AkDfFz",
704         "0ADGFE",
705         "A9DfFz",
706         "AADG7E",
707 
708         // alphanum{7}
709         "asdfads",
710         "ADSFADF",
711         "piSFkDk",
712         "a0dfads",
713         "ADSF3DF",
714         "piSFkD9",
715 
716         // alphanum{8}
717         "oieradfz",
718         "IADSFJKR",
719         "kkDSFJkR",
720         "0ADSFJKR",
721         "12345679",
722 
723         // digit alphanum{3}
724         "0123",
725         "1abc",
726         "20EF",
727         "30EF",
728         "8A03",
729         "3Ax3",
730         "9Axy",
731 
732         // (sep unicode_variant_subtag)*
733         "0123-4567",
734         "0ab3-ABCDE",
735         "9ax3-xByD9",
736         "9ax3-xByD9-adfk934a",
737 
738         "0123_4567",
739         "0ab3_ABCDE",
740         "9ax3_xByD9",
741         "9ax3_xByD9_adfk934a",
742 
743         "9ax3-xByD9_adfk934a",
744         "9ax3_xByD9-adfk934a",
745     };
746     for (const char* variant : wellFormedVariants) {
747         UErrorCode status = U_ZERO_ERROR;
748         LocaleBuilder bld;
749         bld.setVariant(variant);
750         Locale loc = bld.build(status);
751         if (U_FAILURE(status)) {
752             errln("setVariant(\"%s\") got Error: %s\n",
753                   variant, u_errorName(status));
754         }
755     }
756 }
757 
TestSetVariantIllFormed()758 void LocaleBuilderTest::TestSetVariantIllFormed() {
759     static const char* illFormed[] = {
760         "a",
761         "z",
762         "A",
763         "F",
764         "2",
765         "0",
766         "9"
767         "{",
768         ".",
769         "[",
770         "]",
771         "\\",
772 
773         "e1",
774         "N2",
775         "3N",
776         "4e",
777         "e:",
778         "43",
779         "a9",
780         "en",
781         "NE",
782         "eN",
783         "Ne",
784 
785         "aNe",
786         "zzz",
787         "AAA",
788         "aN0",
789         "z1z",
790         "2zz",
791         "3A3",
792         "4.6",
793         "af)",
794         "345",
795         "923",
796 
797         "Latn",
798         "latn",
799         "lATN",
800         "laTN",
801         "arBN",
802         "ARbn",
803         "adsf",
804         "aADF",
805         "BSVS",
806         "LATn",
807         "l1tn",
808         "lA2N",
809         "la4N",
810         "arB5",
811         "abc3",
812         "A3BC",
813 
814         "e)gij",
815         "A+3AD",
816         "ZAA=8",
817 
818         "efgi[]",
819         "AA9]FE",
820         "7k[3Fz",
821 
822         "as8f/ds",
823         "0DSFAD{",
824         "'iSFkDk",
825 
826         "oieradf+",
827         "IADSFJK-",
828         "k}DSFJk0",
829 
830         // alpha{9}
831         "oieradfab",
832         "IADSFJKDE",
833         "kkDSFJkzf",
834         "123456789",
835 
836         "-0123",
837         "-0123-4567",
838         "0123-4567-",
839         "-123-4567",
840         "_0123",
841         "_0123_4567",
842         "0123_4567_",
843         "_123_4567",
844 
845         "-abcde-figjk",
846         "abcde-figjk-",
847         "-abcde-figjk-",
848         "_abcde_figjk",
849         "abcde_figjk_",
850         "_abcde_figjk_",
851     };
852     for (const char* ill : illFormed) {
853         UErrorCode status = U_ZERO_ERROR;
854         LocaleBuilder bld;
855         bld.setVariant(ill);
856         Locale loc = bld.build(status);
857         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
858             errln("setVariant(\"%s\") should fail but has no Error\n", ill);
859         }
860     }
861 }
862 
TestSetUnicodeLocaleKeywordWellFormed()863 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
864     // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
865     // keyword = key (sep type)? ;
866     // key = alphanum alpha ;
867     // type = alphanum{3,8} (sep alphanum{3,8})* ;
868     static const char* wellFormed_key_value[] = {
869         "aa", "123",
870         "3b", "zyzbcdef",
871         "0Z", "1ZB30zk9-abc",
872         "cZ", "2ck30zfZ-adsf023-234kcZ",
873         "ZZ", "Lant",
874         "ko", "",
875     };
876     for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
877         UErrorCode status = U_ZERO_ERROR;
878         LocaleBuilder bld;
879         bld.setUnicodeLocaleKeyword(wellFormed_key_value[i],
880                                     wellFormed_key_value[i + 1]);
881         Locale loc = bld.build(status);
882         if (U_FAILURE(status)) {
883             errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
884                   wellFormed_key_value[i],
885                   wellFormed_key_value[i + 1],
886                   u_errorName(status));
887         }
888     }
889 }
890 
TestSetUnicodeLocaleKeywordIllFormedKey()891 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
892     static const char* illFormed[] = {
893         "34",
894         "ab-cde",
895         "123",
896         "b3",
897         "zyzabcdef",
898         "Z0",
899     };
900     for (const char* ill : illFormed) {
901         UErrorCode status = U_ZERO_ERROR;
902         LocaleBuilder bld;
903         bld.setUnicodeLocaleKeyword(ill, "abc");
904         Locale loc = bld.build(status);
905         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
906             errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
907                   ill);
908         }
909     }
910 }
911 
TestSetUnicodeLocaleKeywordIllFormedValue()912 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
913     static const char* illFormed[] = {
914         "34",
915         "ab-",
916         "-cd",
917         "-ef-",
918         "zyzabcdef",
919         "ab-abc",
920         "1ZB30zfk9-abc",
921         "2ck30zfk9-adsf023-234kcZ",
922     };
923     for (const char* ill : illFormed) {
924         UErrorCode status = U_ZERO_ERROR;
925         LocaleBuilder bld;
926         bld.setUnicodeLocaleKeyword("ab", ill);
927         Locale loc = bld.build(status);
928         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
929             errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
930                   ill);
931         }
932     }
933 }
934 
TestAddRemoveUnicodeLocaleAttribute()935 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
936     LocaleBuilder bld;
937     UErrorCode status = U_ZERO_ERROR;
938     Locale loc = bld.setLanguage("fr")
939                     .addUnicodeLocaleAttribute("abc")
940                     .addUnicodeLocaleAttribute("aBc")
941                     .addUnicodeLocaleAttribute("EFG")
942                     .addUnicodeLocaleAttribute("efghi")
943                     .addUnicodeLocaleAttribute("efgh")
944                     .addUnicodeLocaleAttribute("efGhi")
945                     .addUnicodeLocaleAttribute("EFg")
946                     .addUnicodeLocaleAttribute("hijk")
947                     .addUnicodeLocaleAttribute("EFG")
948                     .addUnicodeLocaleAttribute("HiJK")
949                     .addUnicodeLocaleAttribute("aBc")
950                     .build(status);
951     if (U_FAILURE(status)) {
952         errln("addUnicodeLocaleAttribute() got Error: %s\n",
953               u_errorName(status));
954     }
955     std::string expected("fr-u-abc-efg-efgh-efghi-hijk");
956     std::string actual = loc.toLanguageTag<std::string>(status);
957     if (U_FAILURE(status) || expected != actual) {
958         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
959     }
960 
961     // remove "efgh" in the middle with different casing.
962     loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status);
963     if (U_FAILURE(status)) {
964         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
965               u_errorName(status));
966     }
967     expected = "fr-u-abc-efg-efghi-hijk";
968     actual = loc.toLanguageTag<std::string>(status);
969     if (U_FAILURE(status) || expected != actual) {
970         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
971     }
972 
973     // remove non-existing attributes.
974     loc = bld.removeUnicodeLocaleAttribute("efgh").build(status);
975     if (U_FAILURE(status)) {
976         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
977               u_errorName(status));
978     }
979     actual = loc.toLanguageTag<std::string>(status);
980     if (U_FAILURE(status) || expected != actual) {
981         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
982     }
983 
984     // remove "abc" in the beginning with different casing.
985     loc = bld.removeUnicodeLocaleAttribute("ABC").build(status);
986     if (U_FAILURE(status)) {
987         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
988               u_errorName(status));
989     }
990     expected = "fr-u-efg-efghi-hijk";
991     actual = loc.toLanguageTag<std::string>(status);
992     if (U_FAILURE(status) || expected != actual) {
993         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
994     }
995 
996     // remove non-existing substring in the end.
997     loc = bld.removeUnicodeLocaleAttribute("hij").build(status);
998     if (U_FAILURE(status)) {
999         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1000               u_errorName(status));
1001     }
1002     actual = loc.toLanguageTag<std::string>(status);
1003     if (U_FAILURE(status) || expected != actual) {
1004         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1005     }
1006 
1007     // remove "hijk" in the end with different casing.
1008     loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status);
1009     if (U_FAILURE(status)) {
1010         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1011               u_errorName(status));
1012     }
1013     expected = "fr-u-efg-efghi";
1014     actual = loc.toLanguageTag<std::string>(status);
1015     if (U_FAILURE(status) || expected != actual) {
1016         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1017     }
1018 
1019     // remove "efghi" in the end with different casing.
1020     loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status);
1021     if (U_FAILURE(status)) {
1022         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1023               u_errorName(status));
1024     }
1025     expected = "fr-u-efg";
1026     actual = loc.toLanguageTag<std::string>(status);
1027     if (U_FAILURE(status) || expected != actual) {
1028         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1029     }
1030 
1031     // remove "efg" in as the only one, with different casing.
1032     loc = bld.removeUnicodeLocaleAttribute("EFG").build(status);
1033     if (U_FAILURE(status)) {
1034         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1035               u_errorName(status));
1036     }
1037     expected = "fr";
1038     actual = loc.toLanguageTag<std::string>(status);
1039     if (U_FAILURE(status) || expected != actual) {
1040         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1041     }
1042 
1043 }
1044 
TestAddRemoveUnicodeLocaleAttributeWellFormed()1045 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
1046     // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
1047     // attribute = alphanum{3,8} ;
1048     static const char* wellFormedAttributes[] = {
1049         // alphanum{3}
1050         "AbC",
1051         "ZAA",
1052         "0AA",
1053         "x3A",
1054         "xa8",
1055 
1056         // alphanum{4}
1057         "AbCA",
1058         "ZASD",
1059         "0ASD",
1060         "A3a4",
1061         "zK90",
1062 
1063         // alphanum{5}
1064         "efgij",
1065         "AbCAD",
1066         "ZAASD",
1067         "0AASD",
1068         "A1CAD",
1069         "ef2ij",
1070         "ads3X",
1071         "owqF4",
1072 
1073         // alphanum{6}
1074         "efgijk",
1075         "AADGFE",
1076         "AkDfFz",
1077         "0ADGFE",
1078         "A9DfFz",
1079         "AADG7E",
1080 
1081         // alphanum{7}
1082         "asdfads",
1083         "ADSFADF",
1084         "piSFkDk",
1085         "a0dfads",
1086         "ADSF3DF",
1087         "piSFkD9",
1088 
1089         // alphanum{8}
1090         "oieradfz",
1091         "IADSFJKR",
1092         "kkDSFJkR",
1093     };
1094     LocaleBuilder bld;
1095     for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
1096         if (i % 5 == 0) {
1097             bld.clear();
1098         }
1099         UErrorCode status = U_ZERO_ERROR;
1100         bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]);
1101         Locale loc = bld.build(status);
1102         if (U_FAILURE(status)) {
1103             errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1104                   wellFormedAttributes[i], u_errorName(status));
1105         }
1106         if (i > 2) {
1107             bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]);
1108             loc = bld.build(status);
1109             if (U_FAILURE(status)) {
1110                 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1111                       wellFormedAttributes[i - 1], u_errorName(status));
1112             }
1113             bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]);
1114             loc = bld.build(status);
1115             if (U_FAILURE(status)) {
1116                 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1117                       wellFormedAttributes[i - 3], u_errorName(status));
1118             }
1119         }
1120     }
1121 }
1122 
TestAddUnicodeLocaleAttributeIllFormed()1123 void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
1124     static const char* illFormed[] = {
1125         "aa",
1126         "34",
1127         "ab-",
1128         "-cd",
1129         "-ef-",
1130         "zyzabcdef",
1131         "123456789",
1132         "ab-abc",
1133         "1ZB30zfk9-abc",
1134         "2ck30zfk9-adsf023-234kcZ",
1135     };
1136     for (const char* ill : illFormed) {
1137         UErrorCode status = U_ZERO_ERROR;
1138         LocaleBuilder bld;
1139         bld.addUnicodeLocaleAttribute(ill);
1140         Locale loc = bld.build(status);
1141         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1142             errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
1143                   ill);
1144         }
1145     }
1146 }
1147 
TestSetExtensionU()1148 void LocaleBuilderTest::TestSetExtensionU() {
1149     LocaleBuilder bld;
1150     bld.setLanguage("zh");
1151     Verify(bld, "zh",
1152            "setLanguage(\"zh\") got Error: %s\n");
1153 
1154     bld.setExtension('u', "co-stroke");
1155     Verify(bld, "zh-u-co-stroke",
1156            "setExtension('u', \"co-stroke\") got Error: %s\n");
1157 
1158     bld.setExtension('U', "ca-islamic");
1159     Verify(bld, "zh-u-ca-islamic",
1160            "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
1161 
1162     bld.setExtension('u', "ca-chinese");
1163     Verify(bld, "zh-u-ca-chinese",
1164            "setExtension('u', \"ca-chinese\") got Error: %s\n");
1165 
1166     bld.setExtension('U', "co-pinyin");
1167     Verify(bld, "zh-u-co-pinyin",
1168            "setExtension('U', \"co-pinyin\") got Error: %s\n");
1169 
1170     bld.setRegion("TW");
1171     Verify(bld, "zh-TW-u-co-pinyin",
1172            "setRegion(\"TW\") got Error: %s\n");
1173 
1174     bld.setExtension('U', "");
1175     Verify(bld, "zh-TW",
1176            "setExtension('U', \"\") got Error: %s\n");
1177 
1178     bld.setExtension('u', "abc-defg-kr-face");
1179     Verify(bld, "zh-TW-u-abc-defg-kr-face",
1180            "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
1181 
1182     bld.setExtension('U', "ca-japanese");
1183     Verify(bld, "zh-TW-u-ca-japanese",
1184            "setExtension('U', \"ca-japanese\") got Error: %s\n");
1185 
1186 }
1187 
TestSetExtensionValidateUWellFormed()1188 void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
1189     static const char* wellFormedExtensions[] = {
1190         // keyword
1191         //   keyword = key (sep type)? ;
1192         //   key = alphanum alpha ;
1193         //   type = alphanum{3,8} (sep alphanum{3,8})* ;
1194         "3A",
1195         "ZA",
1196         "az-abc",
1197         "zz-123",
1198         "7z-12345678",
1199         "kb-A234567Z",
1200         // (sep keyword)+
1201         "1z-ZZ",
1202         "2z-ZZ-123",
1203         "3z-ZZ-123-cd",
1204         "0z-ZZ-123-cd-efghijkl",
1205         // attribute
1206         "abc",
1207         "456",
1208         "87654321",
1209         "ZABADFSD",
1210         // (sep attribute)+
1211         "abc-ZABADFSD",
1212         "123-ZABADFSD",
1213         "K2K-12345678",
1214         "K2K-12345678-zzz",
1215         // (sep attribute)+ (sep keyword)*
1216         "K2K-12345678-zz",
1217         "K2K-12345678-zz-0z",
1218         "K2K-12345678-9z-AZ-abc",
1219         "K2K-12345678-zz-9A-234",
1220         "K2K-12345678-zk0-abc-efg-zz-9k-234",
1221     };
1222     for (const char* extension : wellFormedExtensions) {
1223         UErrorCode status = U_ZERO_ERROR;
1224         LocaleBuilder bld;
1225         bld.setExtension('u', extension);
1226         Locale loc = bld.build(status);
1227         if (U_FAILURE(status)) {
1228             errln("setExtension('u', \"%s\") got Error: %s\n",
1229                   extension, u_errorName(status));
1230         }
1231     }
1232 }
1233 
TestSetExtensionValidateUIllFormed()1234 void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
1235     static const char* illFormed[] = {
1236         // bad key
1237         "-",
1238         "-ab",
1239         "ab-",
1240         "abc-",
1241         "-abc",
1242         "0",
1243         "a",
1244         "A0",
1245         "z9",
1246         "09",
1247         "90",
1248         // bad keyword
1249         "AB-A0",
1250         "AB-efg-A0",
1251         "xy-123456789",
1252         "AB-Aa-",
1253         "AB-Aac-",
1254         // bad attribute
1255         "abcdefghi",
1256         "abcdefgh-",
1257         "abcdefgh-abcdefghi",
1258         "abcdefgh-1",
1259         "abcdefgh-a",
1260         "abcdefgh-a2345678z",
1261     };
1262     for (const char* ill : illFormed) {
1263         UErrorCode status = U_ZERO_ERROR;
1264         LocaleBuilder bld;
1265         bld.setExtension('u', ill);
1266         Locale loc = bld.build(status);
1267         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1268             errln("setExtension('u', \"%s\") should fail but has no Error\n",
1269                   ill);
1270         }
1271     }
1272 }
1273 
TestSetExtensionT()1274 void LocaleBuilderTest::TestSetExtensionT() {
1275     LocaleBuilder bld;
1276     bld.setLanguage("fr");
1277     Verify(bld, "fr",
1278            "setLanguage(\"fr\") got Error: %s\n");
1279 
1280     bld.setExtension('T', "zh");
1281     Verify(bld, "fr-t-zh",
1282            "setExtension('T', \"zh\") got Error: %s\n");
1283 
1284     bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
1285     Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
1286            "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
1287 
1288     bld.setExtension('T', "a9-123");
1289     Verify(bld, "fr-t-a9-123",
1290            "setExtension('T', \"a9-123\") got Error: %s\n");
1291 
1292     bld.setRegion("MX");
1293     Verify(bld, "fr-MX-t-a9-123",
1294            "setRegion(\"MX\") got Error: %s\n");
1295 
1296     bld.setScript("Hans");
1297     Verify(bld, "fr-Hans-MX-t-a9-123",
1298            "setScript(\"Hans\") got Error: %s\n");
1299 
1300     bld.setVariant("9abc-abcde");
1301     Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
1302            "setVariant(\"9abc-abcde\") got Error: %s\n");
1303 
1304     bld.setExtension('T', "");
1305     Verify(bld, "fr-Hans-MX-9abc-abcde",
1306            "bld.setExtension('T', \"\") got Error: %s\n");
1307 }
1308 
TestSetExtensionValidateTWellFormed()1309 void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
1310     // ((sep tlang (sep tfield)*) | (sep tfield)+)
1311     static const char* wellFormedExtensions[] = {
1312         // tlang
1313         //  tlang = unicode_language_subtag (sep unicode_script_subtag)?
1314         //          (sep unicode_region_subtag)?  (sep unicode_variant_subtag)* ;
1315         // unicode_language_subtag
1316         "en",
1317         "abc",
1318         "abcde",
1319         "ABCDEFGH",
1320         // unicode_language_subtag sep unicode_script_subtag
1321         "en-latn",
1322         "abc-arab",
1323         "ABCDEFGH-Thai",
1324         // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1325         "en-latn-ME",
1326         "abc-arab-RU",
1327         "ABCDEFGH-Thai-TH",
1328         "en-latn-409",
1329         "abc-arab-123",
1330         "ABCDEFGH-Thai-456",
1331         // unicode_language_subtag sep unicode_region_subtag
1332         "en-ME",
1333         "abc-RU",
1334         "ABCDEFGH-TH",
1335         "en-409",
1336         "abc-123",
1337         "ABCDEFGH-456",
1338         // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1339         // sep (sep unicode_variant_subtag)*
1340         "en-latn-ME-abcde",
1341         "abc-arab-RU-3abc-abcdef",
1342         "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
1343         "en-latn-409-xafsa",
1344         "abc-arab-123-ADASDF",
1345         "ABCDEFGH-Thai-456-9sdf-ADASFAS",
1346         // (sep tfield)+
1347         "A0-abcde",
1348         "z9-abcde123",
1349         "z9-abcde123-a1-abcde",
1350         // tlang (sep tfield)*
1351         "fr-A0-abcde",
1352         "fr-FR-A0-abcde",
1353         "fr-123-z9-abcde123-a1-abcde",
1354         "fr-Latn-FR-z9-abcde123-a1-abcde",
1355         "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
1356         "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
1357     };
1358     for (const char* extension : wellFormedExtensions) {
1359         UErrorCode status = U_ZERO_ERROR;
1360         LocaleBuilder bld;
1361         bld.setExtension('t', extension);
1362         Locale loc = bld.build(status);
1363         if (U_FAILURE(status)) {
1364             errln("setExtension('t', \"%s\") got Error: %s\n",
1365                   extension, u_errorName(status));
1366         }
1367     }
1368 }
1369 
TestSetExtensionValidateTIllFormed()1370 void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
1371     static const char* illFormed[] = {
1372         "a",
1373         "a-",
1374         "0",
1375         "9-",
1376         "-9",
1377         "-z",
1378         // "Latn", // Per 2019-01-23 ICUTC, still accept 4alpha. See ICU-20321
1379         "Latn-",
1380         "en-",
1381         "nob-",
1382         "-z9",
1383         "a3",
1384         "a3-",
1385         "3a",
1386         "0z-",
1387         "en-123-a1",
1388         "en-TH-a1",
1389         "gab-TH-a1",
1390         "gab-Thai-a1",
1391         "gab-Thai-TH-a1",
1392         "gab-Thai-TH-0bde-a1",
1393         "gab-Thai-TH-0bde-3b",
1394         "gab-Thai-TH-0bde-z9-a1",
1395         "gab-Thai-TH-0bde-z9-3b",
1396         "gab-Thai-TH-0bde-z9-abcde123-3b",
1397         "gab-Thai-TH-0bde-z9-abcde123-ab",
1398         "gab-Thai-TH-0bde-z9-abcde123-ab",
1399         "gab-Thai-TH-0bde-z9-abcde123-a1",
1400         "gab-Thai-TH-0bde-z9-abcde123-a1-",
1401         "gab-Thai-TH-0bde-z9-abcde123-a1-a",
1402         "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
1403     };
1404     for (const char* ill : illFormed) {
1405         UErrorCode status = U_ZERO_ERROR;
1406         LocaleBuilder bld;
1407         bld.setExtension('t', ill);
1408         Locale loc = bld.build(status);
1409         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1410             errln("setExtension('t', \"%s\") should fail but has no Error\n",
1411                   ill);
1412         }
1413     }
1414 }
1415 
TestSetExtensionPU()1416 void LocaleBuilderTest::TestSetExtensionPU() {
1417     LocaleBuilder bld;
1418     bld.setLanguage("ar");
1419     Verify(bld, "ar",
1420            "setLanguage(\"ar\") got Error: %s\n");
1421 
1422     bld.setExtension('X', "a-b-c-d-e");
1423     Verify(bld, "ar-x-a-b-c-d-e",
1424            "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
1425 
1426     bld.setExtension('x', "0-1-2-3");
1427     Verify(bld, "ar-x-0-1-2-3",
1428            "setExtension('x', \"0-1-2-3\") got Error: %s\n");
1429 
1430     bld.setExtension('X', "0-12345678-x-x");
1431     Verify(bld, "ar-x-0-12345678-x-x",
1432            "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
1433 
1434     bld.setRegion("TH");
1435     Verify(bld, "ar-TH-x-0-12345678-x-x",
1436            "setRegion(\"TH\") got Error: %s\n");
1437 
1438     bld.setExtension('X', "");
1439     Verify(bld, "ar-TH",
1440            "setExtension(\"X\") got Error: %s\n");
1441 }
1442 
TestSetExtensionValidatePUWellFormed()1443 void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
1444     // ((sep tlang (sep tfield)*) | (sep tfield)+)
1445     static const char* wellFormedExtensions[] = {
1446         "a",  // Short subtag
1447         "z",  // Short subtag
1448         "0",  // Short subtag, digit
1449         "9",  // Short subtag, digit
1450         "a-0",  // Two short subtag, alpha and digit
1451         "9-z",  // Two short subtag, digit and alpha
1452         "ab",
1453         "abc",
1454         "abcefghi",  // Long subtag
1455         "87654321",
1456         "01",
1457         "234",
1458         "0a-ab-87654321",  // Three subtags
1459         "87654321-ab-00-3A",  // Four subtabs
1460         "a-9-87654321",  // Three subtags with short and long subtags
1461         "87654321-ab-0-3A",
1462     };
1463     for (const char* extension : wellFormedExtensions) {
1464         UErrorCode status = U_ZERO_ERROR;
1465         LocaleBuilder bld;
1466         bld.setExtension('x', extension);
1467         Locale loc = bld.build(status);
1468         if (U_FAILURE(status)) {
1469             errln("setExtension('x', \"%s\") got Error: %s\n",
1470                   extension, u_errorName(status));
1471         }
1472     }
1473 }
1474 
TestSetExtensionValidatePUIllFormed()1475 void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
1476     static const char* illFormed[] = {
1477         "123456789",  // Too long
1478         "abcdefghi",  // Too long
1479         "ab-123456789",  // Second subtag too long
1480         "abcdefghi-12",  // First subtag too long
1481         "a-ab-987654321",  // Third subtag too long
1482         "987654321-a-0-3",  // First subtag too long
1483     };
1484     for (const char* ill : illFormed) {
1485         UErrorCode status = U_ZERO_ERROR;
1486         LocaleBuilder bld;
1487         bld.setExtension('x', ill);
1488         Locale loc = bld.build(status);
1489         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1490             errln("setExtension('x', \"%s\") should fail but has no Error\n",
1491                   ill);
1492         }
1493     }
1494 }
1495 
TestSetExtensionOthers()1496 void LocaleBuilderTest::TestSetExtensionOthers() {
1497     LocaleBuilder bld;
1498     bld.setLanguage("fr");
1499     Verify(bld, "fr",
1500            "setLanguage(\"fr\") got Error: %s\n");
1501 
1502     bld.setExtension('Z', "ab");
1503     Verify(bld, "fr-z-ab",
1504            "setExtension('Z', \"ab\") got Error: %s\n");
1505 
1506     bld.setExtension('0', "xyz12345-abcdefg");
1507     Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
1508            "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
1509 
1510     bld.setExtension('a', "01-12345678-ABcdef");
1511     Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1512            "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
1513 
1514     bld.setRegion("TH");
1515     Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1516            "setRegion(\"TH\") got Error: %s\n");
1517 
1518     bld.setScript("Arab");
1519     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1520            "setRegion(\"Arab\") got Error: %s\n");
1521 
1522     bld.setExtension('A', "97");
1523     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
1524            "setExtension('a', \"97\") got Error: %s\n");
1525 
1526     bld.setExtension('a', "");
1527     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
1528            "setExtension('a', \"\") got Error: %s\n");
1529 
1530     bld.setExtension('0', "");
1531     Verify(bld, "fr-Arab-TH-z-ab",
1532            "setExtension('0', \"\") got Error: %s\n");
1533 }
1534 
TestSetExtensionValidateOthersWellFormed()1535 void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
1536     static const char* wellFormedExtensions[] = {
1537         "ab",
1538         "abc",
1539         "abcefghi",
1540         "01",
1541         "234",
1542         "87654321",
1543         "0a-ab-87654321",
1544         "87654321-ab-00-3A",
1545     };
1546 
1547     const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1548     const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1549     int32_t i = 0;
1550     for (const char* extension : wellFormedExtensions) {
1551         char ch = aToZ[i];
1552         i = (i + 1) % aToZLen;
1553         UErrorCode status = U_ZERO_ERROR;
1554         LocaleBuilder bld;
1555         bld.setExtension(ch, extension);
1556         Locale loc = bld.build(status);
1557         if (U_FAILURE(status)) {
1558             errln("setExtension('%c', \"%s\") got Error: %s\n",
1559                   ch, extension, u_errorName(status));
1560         }
1561     }
1562 
1563     const char* someChars =
1564         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
1565     const int32_t someCharsLen = static_cast<int32_t>(uprv_strlen(someChars));
1566     for (int32_t i = 0; i < someCharsLen; i++) {
1567         char ch = someChars[i];
1568         UErrorCode status = U_ZERO_ERROR;
1569         LocaleBuilder bld;
1570         bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1571         Locale loc = bld.build(status);
1572         if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
1573             if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
1574                 if (U_FAILURE(status)) {
1575                     errln("setExtension('%c', \"%s\") got Error: %s\n",
1576                           ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
1577                 }
1578             }
1579         } else {
1580             if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1581                 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1582                       ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1583             }
1584         }
1585 
1586     }
1587 }
1588 
TestSetExtensionValidateOthersIllFormed()1589 void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
1590     static const char* illFormed[] = {
1591         "0",  // Too short
1592         "a",  // Too short
1593         "123456789",  // Too long
1594         "abcdefghi",  // Too long
1595         "ab-123456789",  // Second subtag too long
1596         "abcdefghi-12",  // First subtag too long
1597         "a-ab-87654321",  // Third subtag too long
1598         "87654321-a-0-3",  // First subtag too long
1599     };
1600     const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1601     const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1602     int32_t i = 0;
1603     for (const char* ill : illFormed) {
1604         char ch = aToZ[i];
1605         i = (i + 1) % aToZLen;
1606         UErrorCode status = U_ZERO_ERROR;
1607         LocaleBuilder bld;
1608         bld.setExtension(ch, ill);
1609         Locale loc = bld.build(status);
1610         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1611             errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1612                   ch, ill);
1613         }
1614     }
1615 }
1616 
TestSetLocale()1617 void LocaleBuilderTest::TestSetLocale() {
1618     LocaleBuilder bld1, bld2;
1619     UErrorCode status = U_ZERO_ERROR;
1620     Locale l1 = bld1.setLanguage("en")
1621         .setScript("Latn")
1622         .setRegion("MX")
1623         .setVariant("3456-abcde")
1624         .addUnicodeLocaleAttribute("456")
1625         .addUnicodeLocaleAttribute("123")
1626         .setUnicodeLocaleKeyword("nu", "thai")
1627         .setUnicodeLocaleKeyword("co", "stroke")
1628         .setUnicodeLocaleKeyword("ca", "chinese")
1629         .build(status);
1630     if (U_FAILURE(status) || l1.isBogus()) {
1631         errln("build got Error: %s\n", u_errorName(status));
1632     }
1633     status = U_ZERO_ERROR;
1634     Locale l2 = bld1.setLocale(l1).build(status);
1635     if (U_FAILURE(status) || l2.isBogus()) {
1636         errln("build got Error: %s\n", u_errorName(status));
1637     }
1638 
1639     if (l1 != l2) {
1640         errln("Two locales should be the same, but one is '%s' and the other is '%s'",
1641               l1.getName(), l2.getName());
1642     }
1643 }
1644 
TestPosixCases()1645 void LocaleBuilderTest::TestPosixCases() {
1646     UErrorCode status = U_ZERO_ERROR;
1647     Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status);
1648     if (U_FAILURE(status) || l1.isBogus()) {
1649         errln("build got Error: %s\n", u_errorName(status));
1650     }
1651     LocaleBuilder bld;
1652     bld.setLanguage("en")
1653         .setRegion("MX")
1654         .setScript("Arab")
1655         .setUnicodeLocaleKeyword("nu", "Thai")
1656         .setExtension('x', "1");
1657     // All of above should be cleared by the setLocale call.
1658     Locale l2 = bld.setLocale(l1).build(status);
1659     if (U_FAILURE(status) || l2.isBogus()) {
1660         errln("build got Error: %s\n", u_errorName(status));
1661     }
1662     if (l1 != l2) {
1663         errln("The result locale should be the set as the setLocale %s but got %s\n",
1664               l1.toLanguageTag<std::string>(status).c_str(),
1665               l2.toLanguageTag<std::string>(status).c_str());
1666     }
1667     Locale posix("en-US-POSIX");
1668     if (posix != l2) {
1669         errln("The result locale should be the set as %s but got %s\n",
1670               posix.getName(), l2.getName());
1671     }
1672 }
1673