1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.util.ArrayList; 12 import java.util.Collection; 13 import java.util.EnumSet; 14 import java.util.Iterator; 15 import java.util.List; 16 import java.util.Set; 17 import java.util.TreeSet; 18 19 import com.ibm.icu.impl.Utility; 20 import com.ibm.icu.text.UnicodeSet; 21 22 public class LocaleIDParser { 23 /** 24 * @return Returns the language. 25 */ getLanguage()26 public String getLanguage() { 27 return language; 28 } 29 30 /** 31 * @return Returns the language. 32 */ getLanguageScript()33 public String getLanguageScript() { 34 if (script.length() != 0) return language + "_" + script; 35 return language; 36 } 37 getLanguageScript(Collection<String> in)38 public static Set<String> getLanguageScript(Collection<String> in) { 39 return getLanguageScript(in, null); 40 } 41 getLanguageScript(Collection<String> in, Set<String> output)42 public static Set<String> getLanguageScript(Collection<String> in, Set<String> output) { 43 if (output == null) output = new TreeSet<String>(); 44 LocaleIDParser lparser = new LocaleIDParser(); 45 for (Iterator<String> it = in.iterator(); it.hasNext();) { 46 output.add(lparser.set(it.next()).getLanguageScript()); 47 } 48 return output; 49 } 50 51 /** 52 * @return Returns the region. 53 */ getRegion()54 public String getRegion() { 55 return region; 56 } 57 58 /** 59 * @return Returns the script. 60 */ getScript()61 public String getScript() { 62 return script; 63 } 64 65 /** 66 * @return Returns the variants. 67 */ getVariants()68 public String[] getVariants() { 69 return (String[]) variants.clone(); 70 } 71 72 // TODO, update to RFC3066 73 // http://www.inter-locale.com/ID/draft-phillips-langtags-08.html 74 private String language; 75 private String script; 76 private String region; 77 private String[] variants; 78 79 static final UnicodeSet letters = new UnicodeSet("[a-zA-Z]"); 80 static final UnicodeSet digits = new UnicodeSet("[0-9]"); 81 set(String localeID)82 public LocaleIDParser set(String localeID) { 83 region = script = ""; 84 variants = new String[0]; 85 86 String[] pieces = new String[100]; // fix limitation later 87 Utility.split(localeID, '_', pieces); 88 int i = 0; 89 language = pieces[i++]; 90 if (i >= pieces.length) return this; 91 if (pieces[i].length() == 4) { 92 script = pieces[i++]; 93 if (i >= pieces.length) return this; 94 } 95 if (pieces[i].length() == 2 && letters.containsAll(pieces[i]) 96 || pieces[i].length() == 3 && digits.containsAll(pieces[i])) { 97 region = pieces[i++]; 98 if (i >= pieces.length) return this; 99 } 100 List<String> al = new ArrayList<String>(); 101 while (i < pieces.length && pieces[i].length() > 0) { 102 al.add(pieces[i++]); 103 } 104 variants = new String[al.size()]; 105 al.toArray(variants); 106 return this; 107 } 108 109 /** 110 * Utility to get the parent of a locale. If the input is "root", then the output is null. Only works on canonical locale names (right casing, etc.)! 111 */ getParent(String localeName)112 public static String getParent(String localeName) { 113 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); 114 int pos = localeName.lastIndexOf('_'); 115 if (pos >= 0) { 116 String explicitParent = sdi.getExplicitParentLocale(localeName); 117 if (explicitParent != null) { 118 return explicitParent; 119 } 120 String truncated = localeName.substring(0, pos); 121 // if the final item is a script, and it is not the default content, then go directly to root 122 int pos2 = getScriptPosition(localeName); 123 if (pos2 > 0) { 124 String script = localeName.substring(pos + 1); 125 String defaultScript = sdi.getDefaultScript(truncated); 126 if (!script.equals(defaultScript)) { 127 return "root"; 128 } 129 } 130 return truncated; 131 } 132 if (localeName.equals("root") || localeName.equals(CLDRFile.SUPPLEMENTAL_NAME)) return null; 133 return "root"; 134 } 135 136 /** 137 * If the locale consists of baseLanguage+script, return the position of the separator, otherwise -1. 138 * @param s 139 */ getScriptPosition(String locale)140 public static int getScriptPosition(String locale) { 141 int pos = locale.indexOf('_'); 142 if (pos >= 0 && pos + 5 == locale.length()) { 143 int pos2 = locale.indexOf('_', pos + 1); 144 if (pos2 < 0) { 145 return pos; 146 } 147 } 148 return -1; 149 } 150 151 /** 152 * Utility to get the simple parent of a locale. If the input is "root", then the output is null. 153 * This method is similar to the getParent() method above, except that it does NOT pay any attention 154 * to the explicit parent locales information. Thus, getParent("zh_Hant") will return "root", 155 * but getSimpleParent("zh_Hant") would return "zh". 156 */ getSimpleParent(String localeName)157 public static String getSimpleParent(String localeName) { 158 int pos = localeName.lastIndexOf('_'); 159 if (pos >= 0) { 160 return localeName.substring(0, pos); 161 } 162 if (localeName.equals("root") || localeName.equals(CLDRFile.SUPPLEMENTAL_NAME)) return null; 163 return "root"; 164 } 165 setLanguage(String language)166 public LocaleIDParser setLanguage(String language) { 167 this.language = language; 168 return this; 169 } 170 setRegion(String region)171 public LocaleIDParser setRegion(String region) { 172 this.region = region; 173 return this; 174 } 175 setScript(String script)176 public LocaleIDParser setScript(String script) { 177 this.script = script; 178 return this; 179 } 180 setVariants(String[] variants)181 public LocaleIDParser setVariants(String[] variants) { 182 this.variants = (String[]) variants.clone(); 183 return this; 184 } 185 186 public enum Level { 187 Language, Script, Region, Variants, Other 188 } 189 190 /** 191 * Returns an int mask indicating the level 192 * 193 * @return (2 if script is present) + (4 if region is present) + (8 if region is present) 194 */ getLevels()195 public Set<Level> getLevels() { 196 EnumSet<Level> result = EnumSet.of(Level.Language); 197 if (getScript().length() != 0) result.add(Level.Script); 198 if (getRegion().length() != 0) result.add(Level.Region); 199 if (getVariants().length != 0) result.add(Level.Variants); 200 return result; 201 } 202 getSiblings(Set<String> set)203 public Set<String> getSiblings(Set<String> set) { 204 Set<Level> myLevel = getLevels(); 205 String localeID = toString(); 206 String parentID = getParent(localeID); 207 208 String prefix = parentID.equals("root") ? "" : parentID + "_"; 209 Set<String> siblings = new TreeSet<String>(); 210 for (String id : set) { 211 if (id.startsWith(prefix) && set(id).getLevels().equals(myLevel)) { 212 siblings.add(id); 213 } 214 } 215 set(localeID); // leave in known state 216 return siblings; 217 } 218 toString()219 public String toString() { 220 StringBuffer result = new StringBuffer(language); 221 if (script.length() != 0) result.append('_').append(script); 222 if (region.length() != 0) result.append('_').append(region); 223 if (variants != null) { 224 for (int i = 0; i < variants.length; ++i) { 225 result.append('_').append(variants[i]); 226 } 227 } 228 return result.toString(); 229 } 230 }