1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2012, Google, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package org.unicode.cldr.util.props; 8 9 import java.util.Comparator; 10 import java.util.HashMap; 11 import java.util.List; 12 13 import org.unicode.cldr.util.props.UnicodeProperty.PatternMatcher; 14 15 import com.ibm.icu.impl.UnicodeRegex; 16 import com.ibm.icu.text.UTF16; 17 import com.ibm.icu.text.UnicodeSet; 18 19 /** 20 * Allows for overriding the parsing of UnicodeSet property patterns. 21 * <p> 22 * WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the 23 * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call 24 * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable} 25 * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}. 26 * 27 * @author markdavis 28 */ 29 public class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable { 30 UnicodeRegex unicodeRegex; 31 final UnicodeProperty.Factory factory; 32 UnicodePropertySymbolTable(UnicodeProperty.Factory factory)33 public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) { 34 unicodeRegex = new UnicodeRegex().setSymbolTable(this); 35 this.factory = factory; 36 } 37 38 39 // public boolean applyPropertyAlias0(String propertyName, 40 // String propertyValue, UnicodeSet result) { 41 // if (!propertyName.contains("*")) { 42 // return applyPropertyAlias(propertyName, propertyValue, result); 43 // } 44 // String[] propertyNames = propertyName.split("[*]"); 45 // for (int i = propertyNames.length - 1; i >= 0; ++i) { 46 // String pname = propertyNames[i]; 47 // 48 // } 49 // return null; 50 // } 51 applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result)52 public boolean applyPropertyAlias(String propertyName, 53 String propertyValue, UnicodeSet result) { 54 boolean status = false; 55 boolean invert = false; 56 int posNotEqual = propertyName.indexOf('\u2260'); 57 int posColon = propertyName.indexOf(':'); 58 if (posNotEqual >= 0 || posColon >= 0) { 59 if (posNotEqual < 0) posNotEqual = propertyName.length(); 60 if (posColon < 0) posColon = propertyName.length(); 61 int opPos = posNotEqual < posColon ? posNotEqual : posColon; 62 propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) 63 : propertyName.substring(opPos+1) + "=" + propertyValue; 64 propertyName = propertyName.substring(0,opPos); 65 if (posNotEqual < posColon) { 66 invert = true; 67 } 68 } 69 if (propertyName.endsWith("!")) { 70 propertyName = propertyName.substring(0, propertyName.length() - 1); 71 invert = !invert; 72 } 73 propertyValue = propertyValue.trim(); 74 if (propertyValue.length() != 0) { 75 status = applyPropertyAlias0(propertyName, propertyValue, result); 76 } else { 77 try { 78 status = applyPropertyAlias0("gc", propertyName, result); 79 } catch (Exception e) {}; 80 if (!status) { 81 try { 82 status = applyPropertyAlias0("sc", propertyName, result); 83 } catch (Exception e) {}; 84 if (!status) { 85 try { 86 status = applyPropertyAlias0(propertyName, "Yes", result); 87 } catch (Exception e) {}; 88 if (!status) { 89 status = applyPropertyAlias0(propertyName, "", result); 90 } 91 } 92 } 93 } 94 if (status && invert) { 95 result.complement(); 96 } 97 return status; 98 } 99 100 static final HashMap<String,String[]> GC_REMAP = new HashMap(); 101 { 102 GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" ")); 103 GC_REMAP.put("other", GC_REMAP.get("c")); 104 105 GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" ")); 106 GC_REMAP.put("letter", GC_REMAP.get("l")); 107 108 GC_REMAP.put("lc", "Ll Lt Lu".split(" ")); 109 GC_REMAP.put("casedletter", GC_REMAP.get("lc")); 110 111 GC_REMAP.put("m", "Mc Me Mn".split(" ")); 112 GC_REMAP.put("mark", GC_REMAP.get("m")); 113 114 GC_REMAP.put("n", "Nd Nl No".split(" ")); 115 GC_REMAP.put("number", GC_REMAP.get("n")); 116 117 GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" ")); 118 GC_REMAP.put("punctuation", GC_REMAP.get("p")); 119 GC_REMAP.put("punct", GC_REMAP.get("p")); 120 121 GC_REMAP.put("s", "Sc Sk Sm So".split(" ")); 122 GC_REMAP.put("symbol", GC_REMAP.get("s")); 123 124 GC_REMAP.put("z", "Zl Zp Zs".split(" ")); 125 GC_REMAP.put("separator", GC_REMAP.get("z")); 126 } 127 128 public boolean applyPropertyAlias0(String propertyName, 129 String propertyValue, UnicodeSet result) { 130 result.clear(); 131 UnicodeProperty prop = factory.getProperty(propertyName); 132 String canonicalName = prop.getName(); 133 boolean isAge = UnicodeProperty.equalNames("Age", canonicalName); 134 135 // Hack for special GC values 136 if (canonicalName.equals("General_Category")) { 137 String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue)); 138 if (parts != null) { 139 for (String part : parts) { 140 prop.getSet(part, result); 141 } 142 return true; 143 } 144 } 145 146 PatternMatcher patternMatcher = null; 147 if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) { 148 String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1)); 149 patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex); 150 } 151 UnicodeProperty otherProperty = null; 152 boolean testCp = false; 153 if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) { 154 String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim(); 155 if ("cp".equalsIgnoreCase(otherPropName)) { 156 testCp = true; 157 } else { 158 otherProperty = factory.getProperty(otherPropName); 159 } 160 } 161 if (prop != null) { 162 UnicodeSet set; 163 if (testCp) { 164 set = new UnicodeSet(); 165 for (int i = 0; i <= 0x10FFFF; ++i) { 166 if (UnicodeProperty.equals(i, prop.getValue(i))) { 167 set.add(i); 168 } 169 } 170 } else if (otherProperty != null) { 171 set = new UnicodeSet(); 172 for (int i = 0; i <= 0x10FFFF; ++i) { 173 String v1 = prop.getValue(i); 174 String v2 = otherProperty.getValue(i); 175 if (UnicodeProperty.equals(v1, v2)) { 176 set.add(i); 177 } 178 } 179 } else if (patternMatcher == null) { 180 if (!isValid(prop, propertyValue)) { 181 throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName 182 + " must be in " 183 + prop.getAvailableValues() + " or in " + prop.getValueAliases()); 184 } 185 if (isAge) { 186 set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq)); 187 } else { 188 set = prop.getSet(propertyValue); 189 } 190 } else if (isAge) { 191 set = new UnicodeSet(); 192 List<String> values = prop.getAvailableValues(); 193 for (String value : values) { 194 if (patternMatcher.matches(value)) { 195 for (String other : values) { 196 if (other.compareTo(value) <= 0) { 197 set.addAll(prop.getSet(other)); 198 } 199 } 200 } 201 } 202 } else { 203 set = prop.getSet(patternMatcher); 204 } 205 result.addAll(set); 206 return true; 207 } 208 throw new IllegalArgumentException("Illegal property: " + propertyName); 209 } 210 211 212 isValid(UnicodeProperty prop, String propertyValue)213 private boolean isValid(UnicodeProperty prop, String propertyValue) { 214 // if (prop.getName().equals("General_Category")) { 215 // if (propertyValue) 216 // } 217 return prop.isValidValue(propertyValue); 218 } 219 220 public enum Relation {less, leq, equal, geq, greater} 221 222 public static class ComparisonMatcher implements PatternMatcher { 223 Relation relation; 224 static Comparator comparator = new UTF16.StringComparator(true, false,0); 225 226 String pattern; 227 ComparisonMatcher(String pattern, Relation comparator)228 public ComparisonMatcher(String pattern, Relation comparator) { 229 this.relation = comparator; 230 this.pattern = pattern; 231 } 232 matches(Object value)233 public boolean matches(Object value) { 234 int comp = comparator.compare(pattern, value.toString()); 235 switch (relation) { 236 case less: return comp < 0; 237 case leq: return comp <= 0; 238 default: return comp == 0; 239 case geq: return comp >= 0; 240 case greater: return comp > 0; 241 } 242 } 243 set(String pattern)244 public PatternMatcher set(String pattern) { 245 this.pattern = pattern; 246 return this; 247 } 248 } 249 } 250