1 /*
2  *******************************************************************************
3  * Copyright (C) 2009-2010, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.impl.locale;
8 
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.List;
13 import java.util.Set;
14 
15 public final class InternalLocaleBuilder {
16 
17     private static final boolean JDKIMPL = false;
18 
19     private String _language = "";
20     private String _script = "";
21     private String _region = "";
22     private String _variant = "";
23 
24     private static final CaseInsensitiveChar PRIVUSE_KEY = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE.charAt(0));
25 
26     private HashMap<CaseInsensitiveChar, String> _extensions;
27     private HashSet<CaseInsensitiveString> _uattributes;
28     private HashMap<CaseInsensitiveString, String> _ukeywords;
29 
30 
InternalLocaleBuilder()31     public InternalLocaleBuilder() {
32     }
33 
setLanguage(String language)34     public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException {
35         if (language == null || language.length() == 0) {
36             _language = "";
37         } else {
38             if (!LanguageTag.isLanguage(language)) {
39                 throw new LocaleSyntaxException("Ill-formed language: " + language, 0);
40             }
41             _language = language;
42         }
43         return this;
44     }
45 
setScript(String script)46     public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException {
47         if (script == null || script.length() == 0) {
48             _script = "";
49         } else {
50             if (!LanguageTag.isScript(script)) {
51                 throw new LocaleSyntaxException("Ill-formed script: " + script, 0);
52             }
53             _script = script;
54         }
55         return this;
56     }
57 
setRegion(String region)58     public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException {
59         if (region == null || region.length() == 0) {
60             _region = "";
61         } else {
62             if (!LanguageTag.isRegion(region)) {
63                 throw new LocaleSyntaxException("Ill-formed region: " + region, 0);
64             }
65             _region = region;
66         }
67         return this;
68     }
69 
setVariant(String variant)70     public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException {
71         if (variant == null || variant.length() == 0) {
72             _variant = "";
73         } else {
74             // normalize separators to "_"
75             String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP);
76             int errIdx = checkVariants(var, BaseLocale.SEP);
77             if (errIdx != -1) {
78                 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
79             }
80             _variant = var;
81         }
82         return this;
83     }
84 
addUnicodeLocaleAttribute(String attribute)85     public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {
86         if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) {
87             throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);
88         }
89         // Use case insensitive string to prevent duplication
90         if (_uattributes == null) {
91             _uattributes = new HashSet<CaseInsensitiveString>(4);
92         }
93         _uattributes.add(new CaseInsensitiveString(attribute));
94         return this;
95     }
96 
removeUnicodeLocaleAttribute(String attribute)97     public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {
98         if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) {
99             throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);
100         }
101         if (_uattributes != null) {
102             _uattributes.remove(new CaseInsensitiveString(attribute));
103         }
104         return this;
105     }
106 
setUnicodeLocaleKeyword(String key, String type)107     public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException {
108         if (!UnicodeLocaleExtension.isKey(key)) {
109             throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key);
110         }
111 
112         CaseInsensitiveString cikey = new CaseInsensitiveString(key);
113         if (type == null) {
114             if (_ukeywords != null) {
115                 // null type is used for remove the key
116                 _ukeywords.remove(cikey);
117             }
118         } else {
119             if (type.length() != 0) {
120                 // normalize separator to "-"
121                 String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
122                 // validate
123                 StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP);
124                 while (!itr.isDone()) {
125                     String s = itr.current();
126                     if (!UnicodeLocaleExtension.isTypeSubtag(s)) {
127                         throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: " + type, itr.currentStart());
128                     }
129                     itr.next();
130                 }
131             }
132             if (_ukeywords == null) {
133                 _ukeywords = new HashMap<CaseInsensitiveString, String>(4);
134             }
135             _ukeywords.put(cikey, type);
136         }
137         return this;
138     }
139 
setExtension(char singleton, String value)140     public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException {
141         // validate key
142         boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton);
143         if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) {
144             throw new LocaleSyntaxException("Ill-formed extension key: " + singleton);
145         }
146 
147         boolean remove = (value == null || value.length() == 0);
148         CaseInsensitiveChar key = new CaseInsensitiveChar(singleton);
149 
150         if (remove) {
151             if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
152                 // clear entire Unicode locale extension
153                 if (_uattributes != null) {
154                     _uattributes.clear();
155                 }
156                 if (_ukeywords != null) {
157                     _ukeywords.clear();
158                 }
159             } else {
160                 if (_extensions != null && _extensions.containsKey(key)) {
161                     _extensions.remove(key);
162                 }
163             }
164         } else {
165             // validate value
166             String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
167             StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP);
168             while (!itr.isDone()) {
169                 String s = itr.current();
170                 boolean validSubtag;
171                 if (isBcpPrivateuse) {
172                     validSubtag = LanguageTag.isPrivateuseSubtag(s);
173                 } else {
174                     validSubtag = LanguageTag.isExtensionSubtag(s);
175                 }
176                 if (!validSubtag) {
177                     throw new LocaleSyntaxException("Ill-formed extension value: " + s, itr.currentStart());
178                 }
179                 itr.next();
180             }
181 
182             if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
183                 setUnicodeLocaleExtension(val);
184             } else {
185                 if (_extensions == null) {
186                     _extensions = new HashMap<CaseInsensitiveChar, String>(4);
187                 }
188                 _extensions.put(key, val);
189             }
190         }
191         return this;
192     }
193 
194     /*
195      * Set extension/private subtags in a single string representation
196      */
setExtensions(String subtags)197     public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException {
198         if (subtags == null || subtags.length() == 0) {
199             clearExtensions();
200             return this;
201         }
202         subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
203         StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);
204 
205         List<String> extensions = null;
206         String privateuse = null;
207 
208         int parsed = 0;
209         int start;
210 
211         // Make a list of extension subtags
212         while (!itr.isDone()) {
213             String s = itr.current();
214             if (LanguageTag.isExtensionSingleton(s)) {
215                 start = itr.currentStart();
216                 String singleton = s;
217                 StringBuilder sb = new StringBuilder(singleton);
218 
219                 itr.next();
220                 while (!itr.isDone()) {
221                     s = itr.current();
222                     if (LanguageTag.isExtensionSubtag(s)) {
223                         sb.append(LanguageTag.SEP).append(s);
224                         parsed = itr.currentEnd();
225                     } else {
226                         break;
227                     }
228                     itr.next();
229                 }
230 
231                 if (parsed < start) {
232                     throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'", start);
233                 }
234 
235                 if (extensions == null) {
236                     extensions = new ArrayList<String>(4);
237                 }
238                 extensions.add(sb.toString());
239             } else {
240                 break;
241             }
242         }
243         if (!itr.isDone()) {
244             String s = itr.current();
245             if (LanguageTag.isPrivateusePrefix(s)) {
246                 start = itr.currentStart();
247                 StringBuilder sb = new StringBuilder(s);
248 
249                 itr.next();
250                 while (!itr.isDone()) {
251                     s = itr.current();
252                     if (!LanguageTag.isPrivateuseSubtag(s)) {
253                         break;
254                     }
255                     sb.append(LanguageTag.SEP).append(s);
256                     parsed = itr.currentEnd();
257 
258                     itr.next();
259                 }
260                 if (parsed <= start) {
261                     throw new LocaleSyntaxException("Incomplete privateuse:" + subtags.substring(start), start);
262                 } else {
263                     privateuse = sb.toString();
264                 }
265             }
266         }
267 
268         if (!itr.isDone()) {
269             throw new LocaleSyntaxException("Ill-formed extension subtags:" + subtags.substring(itr.currentStart()), itr.currentStart());
270         }
271 
272         return setExtensions(extensions, privateuse);
273     }
274 
275     /*
276      * Set a list of BCP47 extensions and private use subtags
277      * BCP47 extensions are already validated and well-formed, but may contain duplicates
278      */
setExtensions(List<String> bcpExtensions, String privateuse)279     private InternalLocaleBuilder setExtensions(List<String> bcpExtensions, String privateuse) {
280         clearExtensions();
281 
282         if (bcpExtensions != null && bcpExtensions.size() > 0) {
283             HashSet<CaseInsensitiveChar> processedExtensions = new HashSet<CaseInsensitiveChar>(bcpExtensions.size());
284             for (String bcpExt : bcpExtensions) {
285                 CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt.charAt(0));
286                 // ignore duplicates
287                 if (!processedExtensions.contains(key)) {
288                     // each extension string contains singleton, e.g. "a-abc-def"
289                     if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
290                         setUnicodeLocaleExtension(bcpExt.substring(2));
291                     } else {
292                         if (_extensions == null) {
293                             _extensions = new HashMap<CaseInsensitiveChar, String>(4);
294                         }
295                         _extensions.put(key, bcpExt.substring(2));
296                     }
297                 }
298             }
299         }
300         if (privateuse != null && privateuse.length() > 0) {
301             // privateuse string contains prefix, e.g. "x-abc-def"
302             if (_extensions == null) {
303                 _extensions = new HashMap<CaseInsensitiveChar, String>(1);
304             }
305             _extensions.put(new CaseInsensitiveChar(privateuse.charAt(0)), privateuse.substring(2));
306         }
307 
308         return this;
309     }
310 
311     /*
312      * Reset Builder's internal state with the given language tag
313      */
setLanguageTag(LanguageTag langtag)314     public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) {
315         clear();
316         if (langtag.getExtlangs().size() > 0) {
317             _language = langtag.getExtlangs().get(0);
318         } else {
319             String language = langtag.getLanguage();
320             if (!language.equals(LanguageTag.UNDETERMINED)) {
321                 _language = language;
322             }
323         }
324         _script = langtag.getScript();
325         _region = langtag.getRegion();
326 
327         List<String> bcpVariants = langtag.getVariants();
328         if (bcpVariants.size() > 0) {
329             StringBuilder var = new StringBuilder(bcpVariants.get(0));
330             for (int i = 1; i < bcpVariants.size(); i++) {
331                 var.append(BaseLocale.SEP).append(bcpVariants.get(i));
332             }
333             _variant = var.toString();
334         }
335 
336         setExtensions(langtag.getExtensions(), langtag.getPrivateuse());
337 
338         return this;
339     }
340 
setLocale(BaseLocale base, LocaleExtensions extensions)341     public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions extensions) throws LocaleSyntaxException {
342         String language = base.getLanguage();
343         String script = base.getScript();
344         String region = base.getRegion();
345         String variant = base.getVariant();
346 
347         if (JDKIMPL) {
348             // Special backward compatibility support
349 
350             // Exception 1 - ja_JP_JP
351             if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) {
352                 // When locale ja_JP_JP is created, ca-japanese is always there.
353                 // The builder ignores the variant "JP"
354                 assert("japanese".equals(extensions.getUnicodeLocaleType("ca")));
355                 variant = "";
356             }
357             // Exception 2 - th_TH_TH
358             else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) {
359                 // When locale th_TH_TH is created, nu-thai is always there.
360                 // The builder ignores the variant "TH"
361                 assert("thai".equals(extensions.getUnicodeLocaleType("nu")));
362                 variant = "";
363             }
364             // Exception 3 - no_NO_NY
365             else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) {
366                 // no_NO_NY is a valid locale and used by Java 6 or older versions.
367                 // The build ignores the variant "NY" and change the language to "nn".
368                 language = "nn";
369                 variant = "";
370             }
371         }
372 
373         // Validate base locale fields before updating internal state.
374         // LocaleExtensions always store validated/canonicalized values,
375         // so no checks are necessary.
376         if (language.length() > 0 && !LanguageTag.isLanguage(language)) {
377             throw new LocaleSyntaxException("Ill-formed language: " + language);
378         }
379 
380         if (script.length() > 0 && !LanguageTag.isScript(script)) {
381             throw new LocaleSyntaxException("Ill-formed script: " + script);
382         }
383 
384         if (region.length() > 0 && !LanguageTag.isRegion(region)) {
385             throw new LocaleSyntaxException("Ill-formed region: " + region);
386         }
387 
388         if (variant.length() > 0) {
389             int errIdx = checkVariants(variant, BaseLocale.SEP);
390             if (errIdx != -1) {
391                 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
392             }
393         }
394 
395         // The input locale is validated at this point.
396         // Now, updating builder's internal fields.
397         _language = language;
398         _script = script;
399         _region = region;
400         _variant = variant;
401         clearExtensions();
402 
403         Set<Character> extKeys = (extensions == null) ? null : extensions.getKeys();
404         if (extKeys != null) {
405             // map extensions back to builder's internal format
406             for (Character key : extKeys) {
407                 Extension e = extensions.getExtension(key);
408                 if (e instanceof UnicodeLocaleExtension) {
409                     UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e;
410                     for (String uatr : ue.getUnicodeLocaleAttributes()) {
411                         if (_uattributes == null) {
412                             _uattributes = new HashSet<CaseInsensitiveString>(4);
413                         }
414                         _uattributes.add(new CaseInsensitiveString(uatr));
415                     }
416                     for (String ukey : ue.getUnicodeLocaleKeys()) {
417                         if (_ukeywords == null) {
418                             _ukeywords = new HashMap<CaseInsensitiveString, String>(4);
419                         }
420                         _ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey));
421                     }
422                 } else {
423                     if (_extensions == null) {
424                         _extensions = new HashMap<CaseInsensitiveChar, String>(4);
425                     }
426                     _extensions.put(new CaseInsensitiveChar(key.charValue()), e.getValue());
427                 }
428             }
429         }
430         return this;
431     }
432 
clear()433     public InternalLocaleBuilder clear() {
434         _language = "";
435         _script = "";
436         _region = "";
437         _variant = "";
438         clearExtensions();
439         return this;
440     }
441 
clearExtensions()442     public InternalLocaleBuilder clearExtensions() {
443         if (_extensions != null) {
444             _extensions.clear();
445         }
446         if (_uattributes != null) {
447             _uattributes.clear();
448         }
449         if (_ukeywords != null) {
450             _ukeywords.clear();
451         }
452         return this;
453     }
454 
getBaseLocale()455     public BaseLocale getBaseLocale() {
456         String language = _language;
457         String script = _script;
458         String region = _region;
459         String variant = _variant;
460 
461         // Special private use subtag sequence identified by "lvariant" will be
462         // interpreted as Java variant.
463         if (_extensions != null) {
464             String privuse = _extensions.get(PRIVUSE_KEY);
465             if (privuse != null) {
466                 StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP);
467                 boolean sawPrefix = false;
468                 int privVarStart = -1;
469                 while (!itr.isDone()) {
470                     if (sawPrefix) {
471                         privVarStart = itr.currentStart();
472                         break;
473                     }
474                     if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {
475                         sawPrefix = true;
476                     }
477                     itr.next();
478                 }
479                 if (privVarStart != -1) {
480                     StringBuilder sb = new StringBuilder(variant);
481                     if (sb.length() != 0) {
482                         sb.append(BaseLocale.SEP);
483                     }
484                     sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, BaseLocale.SEP));
485                     variant = sb.toString();
486                 }
487             }
488         }
489 
490         return BaseLocale.getInstance(language, script, region, variant);
491     }
492 
getLocaleExtensions()493     public LocaleExtensions getLocaleExtensions() {
494         if ((_extensions == null || _extensions.size() == 0)
495                 && (_uattributes == null || _uattributes.size() == 0)
496                 && (_ukeywords == null || _ukeywords.size() == 0)) {
497             return LocaleExtensions.EMPTY_EXTENSIONS;
498         }
499 
500         return new LocaleExtensions(_extensions, _uattributes, _ukeywords);
501     }
502 
503     /*
504      * Remove special private use subtag sequence identified by "lvariant"
505      * and return the rest. Only used by LocaleExtensions
506      */
removePrivateuseVariant(String privuseVal)507     static String removePrivateuseVariant(String privuseVal) {
508         StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP);
509 
510         // Note: privateuse value "abc-lvariant" is unchanged
511         // because no subtags after "lvariant".
512 
513         int prefixStart = -1;
514         boolean sawPrivuseVar = false;
515         while (!itr.isDone()) {
516             if (prefixStart != -1) {
517                 // Note: privateuse value "abc-lvariant" is unchanged
518                 // because no subtags after "lvariant".
519                 sawPrivuseVar = true;
520                 break;
521             }
522             if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {
523                 prefixStart = itr.currentStart();
524             }
525             itr.next();
526         }
527         if (!sawPrivuseVar) {
528             return privuseVal;
529         }
530 
531         assert(prefixStart == 0 || prefixStart > 1);
532         return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1);
533     }
534 
535     /*
536      * Check if the given variant subtags separated by the given
537      * separator(s) are valid
538      */
checkVariants(String variants, String sep)539     private int checkVariants(String variants, String sep) {
540         StringTokenIterator itr = new StringTokenIterator(variants, sep);
541         while (!itr.isDone()) {
542             String s = itr.current();
543             if (!LanguageTag.isVariant(s)) {
544                 return itr.currentStart();
545             }
546             itr.next();
547         }
548         return -1;
549     }
550 
551     /*
552      * Private methods parsing Unicode Locale Extension subtags.
553      * Duplicated attributes/keywords will be ignored.
554      * The input must be a valid extension subtags (excluding singleton).
555      */
setUnicodeLocaleExtension(String subtags)556     private void setUnicodeLocaleExtension(String subtags) {
557         // wipe out existing attributes/keywords
558         if (_uattributes != null) {
559             _uattributes.clear();
560         }
561         if (_ukeywords != null) {
562             _ukeywords.clear();
563         }
564 
565         StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);
566 
567         // parse attributes
568         while (!itr.isDone()) {
569             if (!UnicodeLocaleExtension.isAttribute(itr.current())) {
570                 break;
571             }
572             if (_uattributes == null) {
573                 _uattributes = new HashSet<CaseInsensitiveString>(4);
574             }
575             _uattributes.add(new CaseInsensitiveString(itr.current()));
576             itr.next();
577         }
578 
579         // parse keywords
580         CaseInsensitiveString key = null;
581         String type;
582         int typeStart = -1;
583         int typeEnd = -1;
584         while (!itr.isDone()) {
585             if (key != null) {
586                 if (UnicodeLocaleExtension.isKey(itr.current())) {
587                     // next keyword - emit previous one
588                     assert(typeStart == -1 || typeEnd != -1);
589                     type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);
590                     if (_ukeywords == null) {
591                         _ukeywords = new HashMap<CaseInsensitiveString, String>(4);
592                     }
593                     _ukeywords.put(key, type);
594 
595                     // reset keyword info
596                     CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current());
597                     key = _ukeywords.containsKey(tmpKey) ? null : tmpKey;
598                     typeStart = typeEnd = -1;
599                 } else {
600                     if (typeStart == -1) {
601                         typeStart = itr.currentStart();
602                     }
603                     typeEnd = itr.currentEnd();
604                 }
605             } else if (UnicodeLocaleExtension.isKey(itr.current())) {
606                 // 1. first keyword or
607                 // 2. next keyword, but previous one was duplicate
608                 key = new CaseInsensitiveString(itr.current());
609                 if (_ukeywords != null && _ukeywords.containsKey(key)) {
610                     // duplicate
611                     key = null;
612                 }
613             }
614 
615             if (!itr.hasNext()) {
616                 if (key != null) {
617                     // last keyword
618                     assert(typeStart == -1 || typeEnd != -1);
619                     type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);
620                     if (_ukeywords == null) {
621                         _ukeywords = new HashMap<CaseInsensitiveString, String>(4);
622                     }
623                     _ukeywords.put(key, type);
624                 }
625                 break;
626             }
627 
628             itr.next();
629         }
630     }
631 
632     static class CaseInsensitiveString {
633         private String _s;
634 
CaseInsensitiveString(String s)635         CaseInsensitiveString(String s) {
636             _s = s;
637         }
638 
value()639         public String value() {
640             return _s;
641         }
642 
hashCode()643         public int hashCode() {
644             return AsciiUtil.toLowerString(_s).hashCode();
645         }
646 
equals(Object obj)647         public boolean equals(Object obj) {
648             if (this == obj) {
649                 return true;
650             }
651             if (!(obj instanceof CaseInsensitiveString)) {
652                 return false;
653             }
654             return AsciiUtil.caseIgnoreMatch(_s, ((CaseInsensitiveString)obj).value());
655         }
656     }
657 
658     static class CaseInsensitiveChar {
659         private char _c;
660 
CaseInsensitiveChar(char c)661         CaseInsensitiveChar(char c) {
662             _c = c;
663         }
664 
value()665         public char value() {
666             return _c;
667         }
668 
hashCode()669         public int hashCode() {
670             return AsciiUtil.toLower(_c);
671         }
672 
equals(Object obj)673         public boolean equals(Object obj) {
674             if (this == obj) {
675                 return true;
676             }
677             if (!(obj instanceof CaseInsensitiveChar)) {
678                 return false;
679             }
680             return _c ==  AsciiUtil.toLower(((CaseInsensitiveChar)obj).value());
681         }
682 
683     }
684 }
685