1 package org.unicode.cldr.util; 2 3 import java.util.ArrayList; 4 import java.util.Collections; 5 import java.util.List; 6 import java.util.Locale; 7 import java.util.Set; 8 import java.util.SortedMap; 9 import java.util.TreeMap; 10 import java.util.TreeSet; 11 import java.util.regex.Matcher; 12 import java.util.regex.Pattern; 13 14 import com.ibm.icu.impl.Relation; 15 16 public class UExtension { 17 static SupplementalDataInfo data = SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); 18 19 static Pattern SEP = PatternCache.get("[-_]"); 20 static Pattern SPACE = PatternCache.get("\\s"); 21 static Pattern ALPHANUM = PatternCache.get("[0-9A-Za-z]{2,8}"); 22 static Pattern CODEPOINTS = PatternCache.get("(10|[0-9A-Fa-f])?[0-9A-Fa-f]{4}(\\s(10|[0-9A-Fa-f])?[0-9A-Fa-f]{4})*"); 23 static Relation<String, String> validKeyTypes = data.getBcp47Keys(); 24 25 private boolean validating = false; 26 private SortedMap<String, List<String>> keyTypes = new TreeMap<String, List<String>>(); 27 private Set<String> attributes = new TreeSet<String>(); 28 getKeys()29 public Set<String> getKeys() { 30 return keyTypes.keySet(); 31 } 32 getTypes(String key)33 public List<String> getTypes(String key) { 34 return keyTypes.get(key); 35 } 36 getAttributes()37 public Set<String> getAttributes() { 38 return attributes; 39 } 40 isValidating()41 public boolean isValidating() { 42 return validating; 43 } 44 setValidating(boolean validating)45 public UExtension setValidating(boolean validating) { 46 this.validating = validating; 47 return this; 48 } 49 50 /** 51 * Parses the subtags after the -u- 52 * 53 * @param source 54 * @return 55 */ parse(String source)56 public UExtension parse(String source) { 57 // the subtags that are up to the first two letter are attributes 58 String key = null; 59 List<String> list = null; 60 Set<String> validSubtypes = null; 61 Matcher alphanum = ALPHANUM.matcher(""); 62 63 for (String subtag : SEP.split(source)) { 64 if (!alphanum.reset(subtag).matches()) { 65 throw new IllegalArgumentException("Invalid subtag contents, must be [0-9 A-Z a-z]{2,8}: " + subtag); 66 } 67 subtag = subtag.toLowerCase(Locale.ENGLISH); // normalize 68 if (subtag.length() == 2) { // key 69 if (list != null) { // check size of previous list 70 if (list.size() == 0 || !key.equals("vt") && list.size() > 1) { 71 throw new IllegalArgumentException("Illegal number of subtypes for: " + key + "\t" + list); 72 } 73 } 74 key = subtag; 75 if (validating) { 76 validSubtypes = validKeyTypes.getAll(key); 77 if (validSubtypes == null) { 78 throw new IllegalArgumentException("Invalid key: " + key); 79 } 80 } 81 list = keyTypes.get(key); 82 if (list != null) { 83 throw new IllegalArgumentException("Multiple keys with same value: " + subtag); 84 } 85 list = new ArrayList<String>(); 86 keyTypes.put(key, list); 87 } else { // add subtype 88 if (key == null) { 89 if (validating) { 90 throw new IllegalArgumentException("No attributes currently valid: " + subtag); 91 } 92 attributes.add(subtag); 93 break; 94 } 95 if (validating) { 96 if (key.equals("vt")) { 97 if (!CODEPOINTS.matcher(subtag).matches()) { 98 throw new IllegalArgumentException("Illegal subtypes: " + key + "-" + subtag); 99 } 100 } else if (!validSubtypes.contains(subtag)) { 101 throw new IllegalArgumentException("Illegal subtypes: " + key + "-" + subtag); 102 } 103 } 104 list.add(subtag); 105 } 106 } 107 // protect 108 attributes = Collections.unmodifiableSet(attributes); 109 for (String key2 : keyTypes.keySet()) { 110 list = keyTypes.get(key2); 111 keyTypes.put(key2, Collections.unmodifiableList(list)); 112 } 113 keyTypes = Collections.unmodifiableSortedMap(keyTypes); 114 return this; 115 } 116 toString()117 public String toString() { 118 return "{attributes=" + attributes + ", keyTypes=" + keyTypes + "}"; 119 } 120 } 121