1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2011, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import java.util.ArrayList;
12 import java.util.Collection;
13 import java.util.EnumSet;
14 import java.util.Iterator;
15 import java.util.List;
16 import java.util.Set;
17 import java.util.TreeSet;
18 
19 import com.ibm.icu.impl.Utility;
20 import com.ibm.icu.text.UnicodeSet;
21 
22 public class LocaleIDParser {
23     /**
24      * @return Returns the language.
25      */
getLanguage()26     public String getLanguage() {
27         return language;
28     }
29 
30     /**
31      * @return Returns the language.
32      */
getLanguageScript()33     public String getLanguageScript() {
34         if (script.length() != 0) return language + "_" + script;
35         return language;
36     }
37 
getLanguageScript(Collection<String> in)38     public static Set<String> getLanguageScript(Collection<String> in) {
39         return getLanguageScript(in, null);
40     }
41 
getLanguageScript(Collection<String> in, Set<String> output)42     public static Set<String> getLanguageScript(Collection<String> in, Set<String> output) {
43         if (output == null) output = new TreeSet<String>();
44         LocaleIDParser lparser = new LocaleIDParser();
45         for (Iterator<String> it = in.iterator(); it.hasNext();) {
46             output.add(lparser.set(it.next()).getLanguageScript());
47         }
48         return output;
49     }
50 
51     /**
52      * @return Returns the region.
53      */
getRegion()54     public String getRegion() {
55         return region;
56     }
57 
58     /**
59      * @return Returns the script.
60      */
getScript()61     public String getScript() {
62         return script;
63     }
64 
65     /**
66      * @return Returns the variants.
67      */
getVariants()68     public String[] getVariants() {
69         return (String[]) variants.clone();
70     }
71 
72     // TODO, update to RFC3066
73     // http://www.inter-locale.com/ID/draft-phillips-langtags-08.html
74     private String language;
75     private String script;
76     private String region;
77     private String[] variants;
78 
79     static final UnicodeSet letters = new UnicodeSet("[a-zA-Z]");
80     static final UnicodeSet digits = new UnicodeSet("[0-9]");
81 
set(String localeID)82     public LocaleIDParser set(String localeID) {
83         region = script = "";
84         variants = new String[0];
85 
86         String[] pieces = new String[100]; // fix limitation later
87         Utility.split(localeID, '_', pieces);
88         int i = 0;
89         language = pieces[i++];
90         if (i >= pieces.length) return this;
91         if (pieces[i].length() == 4) {
92             script = pieces[i++];
93             if (i >= pieces.length) return this;
94         }
95         if (pieces[i].length() == 2 && letters.containsAll(pieces[i])
96             || pieces[i].length() == 3 && digits.containsAll(pieces[i])) {
97             region = pieces[i++];
98             if (i >= pieces.length) return this;
99         }
100         List<String> al = new ArrayList<String>();
101         while (i < pieces.length && pieces[i].length() > 0) {
102             al.add(pieces[i++]);
103         }
104         variants = new String[al.size()];
105         al.toArray(variants);
106         return this;
107     }
108 
109     /**
110      * Utility to get the parent of a locale. If the input is "root", then the output is null. Only works on canonical locale names (right casing, etc.)!
111      */
getParent(String localeName)112     public static String getParent(String localeName) {
113         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
114         int pos = localeName.lastIndexOf('_');
115         if (pos >= 0) {
116             String explicitParent = sdi.getExplicitParentLocale(localeName);
117             if (explicitParent != null) {
118                 return explicitParent;
119             }
120             String truncated = localeName.substring(0, pos);
121             // if the final item is a script, and it is not the default content, then go directly to root
122             int pos2 = getScriptPosition(localeName);
123             if (pos2 > 0) {
124                 String script = localeName.substring(pos + 1);
125                 String defaultScript = sdi.getDefaultScript(truncated);
126                 if (!script.equals(defaultScript)) {
127                     return "root";
128                 }
129             }
130             return truncated;
131         }
132         if (localeName.equals("root") || localeName.equals(CLDRFile.SUPPLEMENTAL_NAME)) return null;
133         return "root";
134     }
135 
136     /**
137      * If the locale consists of baseLanguage+script, return the position of the separator, otherwise -1.
138      * @param s
139      */
getScriptPosition(String locale)140     public static int getScriptPosition(String locale) {
141         int pos = locale.indexOf('_');
142         if (pos >= 0 && pos + 5 == locale.length()) {
143             int pos2 = locale.indexOf('_', pos + 1);
144             if (pos2 < 0) {
145                 return pos;
146             }
147         }
148         return -1;
149     }
150 
151     /**
152      * Utility to get the simple parent of a locale. If the input is "root", then the output is null.
153      * This method is similar to the getParent() method above, except that it does NOT pay any attention
154      * to the explicit parent locales information. Thus, getParent("zh_Hant") will return "root",
155      * but getSimpleParent("zh_Hant") would return "zh".
156      */
getSimpleParent(String localeName)157     public static String getSimpleParent(String localeName) {
158         int pos = localeName.lastIndexOf('_');
159         if (pos >= 0) {
160             return localeName.substring(0, pos);
161         }
162         if (localeName.equals("root") || localeName.equals(CLDRFile.SUPPLEMENTAL_NAME)) return null;
163         return "root";
164     }
165 
setLanguage(String language)166     public LocaleIDParser setLanguage(String language) {
167         this.language = language;
168         return this;
169     }
170 
setRegion(String region)171     public LocaleIDParser setRegion(String region) {
172         this.region = region;
173         return this;
174     }
175 
setScript(String script)176     public LocaleIDParser setScript(String script) {
177         this.script = script;
178         return this;
179     }
180 
setVariants(String[] variants)181     public LocaleIDParser setVariants(String[] variants) {
182         this.variants = (String[]) variants.clone();
183         return this;
184     }
185 
186     public enum Level {
187         Language, Script, Region, Variants, Other
188     }
189 
190     /**
191      * Returns an int mask indicating the level
192      *
193      * @return (2 if script is present) + (4 if region is present) + (8 if region is present)
194      */
getLevels()195     public Set<Level> getLevels() {
196         EnumSet<Level> result = EnumSet.of(Level.Language);
197         if (getScript().length() != 0) result.add(Level.Script);
198         if (getRegion().length() != 0) result.add(Level.Region);
199         if (getVariants().length != 0) result.add(Level.Variants);
200         return result;
201     }
202 
getSiblings(Set<String> set)203     public Set<String> getSiblings(Set<String> set) {
204         Set<Level> myLevel = getLevels();
205         String localeID = toString();
206         String parentID = getParent(localeID);
207 
208         String prefix = parentID.equals("root") ? "" : parentID + "_";
209         Set<String> siblings = new TreeSet<String>();
210         for (String id : set) {
211             if (id.startsWith(prefix) && set(id).getLevels().equals(myLevel)) {
212                 siblings.add(id);
213             }
214         }
215         set(localeID); // leave in known state
216         return siblings;
217     }
218 
toString()219     public String toString() {
220         StringBuffer result = new StringBuffer(language);
221         if (script.length() != 0) result.append('_').append(script);
222         if (region.length() != 0) result.append('_').append(region);
223         if (variants != null) {
224             for (int i = 0; i < variants.length; ++i) {
225                 result.append('_').append(variants[i]);
226             }
227         }
228         return result.toString();
229     }
230 }