1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.util.ArrayList; 12 import java.util.Collection; 13 import java.util.EnumSet; 14 import java.util.Iterator; 15 import java.util.List; 16 import java.util.Set; 17 import java.util.TreeSet; 18 19 import com.ibm.icu.impl.Utility; 20 import com.ibm.icu.text.UnicodeSet; 21 22 public class LocaleIDParser { 23 /** 24 * @return Returns the language. 25 */ getLanguage()26 public String getLanguage() { 27 return language; 28 } 29 30 /** 31 * @return Returns the language. 32 */ getLanguageScript()33 public String getLanguageScript() { 34 if (script.length() != 0) return language + "_" + script; 35 return language; 36 } 37 getLanguageScript(Collection<String> in)38 public static Set<String> getLanguageScript(Collection<String> in) { 39 return getLanguageScript(in, null); 40 } 41 getLanguageScript(Collection<String> in, Set<String> output)42 public static Set<String> getLanguageScript(Collection<String> in, Set<String> output) { 43 if (output == null) output = new TreeSet<>(); 44 LocaleIDParser lparser = new LocaleIDParser(); 45 for (Iterator<String> it = in.iterator(); it.hasNext();) { 46 output.add(lparser.set(it.next()).getLanguageScript()); 47 } 48 return output; 49 } 50 51 /** 52 * @return Returns the region. 53 */ getRegion()54 public String getRegion() { 55 return region; 56 } 57 58 /** 59 * @return Returns the script. 60 */ getScript()61 public String getScript() { 62 return script; 63 } 64 65 /** 66 * @return Returns the variants. 67 */ getVariants()68 public String[] getVariants() { 69 return variants.clone(); 70 } 71 72 // TODO, update to RFC3066 73 // http://www.inter-locale.com/ID/draft-phillips-langtags-08.html 74 private String language; 75 private String script; 76 private String region; 77 private String[] variants; 78 79 static final UnicodeSet letters = new UnicodeSet("[a-zA-Z]"); 80 static final UnicodeSet digits = new UnicodeSet("[0-9]"); 81 set(String localeID)82 public LocaleIDParser set(String localeID) { 83 region = script = ""; 84 variants = new String[0]; 85 86 String[] pieces = new String[100]; // fix limitation later 87 Utility.split(localeID, '_', pieces); 88 int i = 0; 89 language = pieces[i++]; 90 if (i >= pieces.length) return this; 91 if (pieces[i].length() == 4) { 92 script = pieces[i++]; 93 if (i >= pieces.length) return this; 94 } 95 if (pieces[i].length() == 2 && letters.containsAll(pieces[i]) 96 || pieces[i].length() == 3 && digits.containsAll(pieces[i])) { 97 region = pieces[i++]; 98 if (i >= pieces.length) return this; 99 } 100 List<String> al = new ArrayList<>(); 101 while (i < pieces.length && pieces[i].length() > 0) { 102 al.add(pieces[i++]); 103 } 104 variants = new String[al.size()]; 105 al.toArray(variants); 106 return this; 107 } 108 109 /** 110 * Get the parent of a locale. If the input is "root", then return null. 111 * For example, if localeName is "fr_CA", return "fr". 112 * 113 * Only works on canonical locale names (right casing, etc.)! 114 * 115 * Formerly this function returned an empty string when localeName was "_VETTING". 116 * Now it returns "root" where it would have returned an empty string. 117 * TODO: explain "__VETTING", somehow related to SUMMARY_LOCALE. Note that 118 * CLDRLocale.process() changes "__" to "_" before this function is called. 119 * Reference: https://unicode-org.atlassian.net/browse/CLDR-13133 120 */ getParent(String localeName)121 public static String getParent(String localeName) { 122 int pos = localeName.lastIndexOf('_'); 123 if (pos >= 0) { 124 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); 125 String explicitParent = sdi.getExplicitParentLocale(localeName); 126 if (explicitParent != null) { 127 return explicitParent; 128 } 129 String truncated = localeName.substring(0, pos); 130 // if the final item is a script, and it is not the default content, then go directly to root 131 int pos2 = getScriptPosition(localeName); 132 if (pos2 > 0) { 133 String script = localeName.substring(pos + 1); 134 String defaultScript = sdi.getDefaultScript(truncated); 135 if (!script.equals(defaultScript)) { 136 return "root"; 137 } 138 } 139 if (truncated.length() == 0) { 140 return "root"; 141 } 142 return truncated; 143 } 144 if (localeName.equals("root")) { 145 return null; 146 } 147 return "root"; 148 } 149 150 /** 151 * If the locale consists of baseLanguage+script, return the position of the separator, otherwise -1. 152 * @param s 153 */ getScriptPosition(String locale)154 public static int getScriptPosition(String locale) { 155 int pos = locale.indexOf('_'); 156 if (pos >= 0 && pos + 5 == locale.length()) { 157 int pos2 = locale.indexOf('_', pos + 1); 158 if (pos2 < 0) { 159 return pos; 160 } 161 } 162 return -1; 163 } 164 165 /** 166 * Utility to get the simple parent of a locale. If the input is "root", then the output is null. 167 * This method is similar to the getParent() method above, except that it does NOT pay any attention 168 * to the explicit parent locales information. Thus, getParent("zh_Hant") will return "root", 169 * but getSimpleParent("zh_Hant") would return "zh". 170 */ getSimpleParent(String localeName)171 public static String getSimpleParent(String localeName) { 172 int pos = localeName.lastIndexOf('_'); 173 if (pos >= 0) { 174 return localeName.substring(0, pos); 175 } 176 if (localeName.equals("root") || localeName.equals(CLDRFile.SUPPLEMENTAL_NAME)) return null; 177 return "root"; 178 } 179 setLanguage(String language)180 public LocaleIDParser setLanguage(String language) { 181 this.language = language; 182 return this; 183 } 184 setRegion(String region)185 public LocaleIDParser setRegion(String region) { 186 this.region = region; 187 return this; 188 } 189 setScript(String script)190 public LocaleIDParser setScript(String script) { 191 this.script = script; 192 return this; 193 } 194 setVariants(String[] variants)195 public LocaleIDParser setVariants(String[] variants) { 196 this.variants = variants.clone(); 197 return this; 198 } 199 200 public enum Level { 201 Language, Script, Region, Variants, Other 202 } 203 204 /** 205 * Returns an int mask indicating the level 206 * 207 * @return (2 if script is present) + (4 if region is present) + (8 if region is present) 208 */ getLevels()209 public Set<Level> getLevels() { 210 EnumSet<Level> result = EnumSet.of(Level.Language); 211 if (getScript().length() != 0) result.add(Level.Script); 212 if (getRegion().length() != 0) result.add(Level.Region); 213 if (getVariants().length != 0) result.add(Level.Variants); 214 return result; 215 } 216 getSiblings(Set<String> set)217 public Set<String> getSiblings(Set<String> set) { 218 Set<Level> myLevel = getLevels(); 219 String localeID = toString(); 220 String parentID = getParent(localeID); 221 222 String prefix = (parentID == null || "root".equals(parentID)) ? "" : parentID + "_"; 223 Set<String> siblings = new TreeSet<>(); 224 for (String id : set) { 225 if (id.startsWith(prefix) && set(id).getLevels().equals(myLevel)) { 226 siblings.add(id); 227 } 228 } 229 set(localeID); // leave in known state 230 return siblings; 231 } 232 233 @Override toString()234 public String toString() { 235 StringBuffer result = new StringBuffer(language); 236 if (script.length() != 0) result.append('_').append(script); 237 if (region.length() != 0) result.append('_').append(region); 238 if (variants != null) { 239 for (int i = 0; i < variants.length; ++i) { 240 result.append('_').append(variants[i]); 241 } 242 } 243 return result.toString(); 244 } 245 }