1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2012, Google, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package org.unicode.cldr.util.props;
8 
9 import java.util.Comparator;
10 import java.util.HashMap;
11 import java.util.List;
12 
13 import org.unicode.cldr.util.props.UnicodeProperty.PatternMatcher;
14 
15 import com.ibm.icu.impl.UnicodeRegex;
16 import com.ibm.icu.text.UTF16;
17 import com.ibm.icu.text.UnicodeSet;
18 
19 /**
20  * Allows for overriding the parsing of UnicodeSet property patterns.
21  * <p>
22  * WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the
23  * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call
24  * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
25  * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
26  *
27  * @author markdavis
28  */
29 public class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable {
30     UnicodeRegex unicodeRegex;
31     final UnicodeProperty.Factory factory;
32 
UnicodePropertySymbolTable(UnicodeProperty.Factory factory)33     public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) {
34       unicodeRegex = new UnicodeRegex().setSymbolTable(this);
35       this.factory = factory;
36     }
37 
38 
39     //    public boolean applyPropertyAlias0(String propertyName,
40     //            String propertyValue, UnicodeSet result) {
41     //      if (!propertyName.contains("*")) {
42     //        return applyPropertyAlias(propertyName, propertyValue, result);
43     //      }
44     //      String[] propertyNames = propertyName.split("[*]");
45     //      for (int i = propertyNames.length - 1; i >= 0; ++i) {
46     //        String pname = propertyNames[i];
47     //
48     //      }
49     //      return null;
50     //    }
51 
applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result)52     public boolean applyPropertyAlias(String propertyName,
53             String propertyValue, UnicodeSet result) {
54       boolean status = false;
55       boolean invert = false;
56       int posNotEqual = propertyName.indexOf('\u2260');
57       int posColon = propertyName.indexOf(':');
58       if (posNotEqual >= 0 || posColon >= 0) {
59           if (posNotEqual < 0) posNotEqual = propertyName.length();
60           if (posColon < 0) posColon = propertyName.length();
61           int opPos = posNotEqual < posColon ? posNotEqual : posColon;
62           propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1)
63                   : propertyName.substring(opPos+1) + "=" + propertyValue;
64           propertyName = propertyName.substring(0,opPos);
65           if (posNotEqual < posColon) {
66               invert = true;
67           }
68       }
69       if (propertyName.endsWith("!")) {
70         propertyName = propertyName.substring(0, propertyName.length() - 1);
71         invert = !invert;
72       }
73       propertyValue = propertyValue.trim();
74       if (propertyValue.length() != 0) {
75         status = applyPropertyAlias0(propertyName, propertyValue, result);
76       } else {
77         try {
78           status = applyPropertyAlias0("gc", propertyName, result);
79         } catch (Exception e) {};
80         if (!status) {
81           try {
82             status = applyPropertyAlias0("sc", propertyName, result);
83           } catch (Exception e) {};
84           if (!status) {
85             try {
86               status = applyPropertyAlias0(propertyName, "Yes", result);
87             } catch (Exception e) {};
88             if (!status) {
89               status = applyPropertyAlias0(propertyName, "", result);
90             }
91           }
92         }
93       }
94       if (status && invert) {
95         result.complement();
96       }
97       return status;
98     }
99 
100     static final HashMap<String,String[]> GC_REMAP = new HashMap();
101     {
102         GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" "));
103         GC_REMAP.put("other", GC_REMAP.get("c"));
104 
105         GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" "));
106         GC_REMAP.put("letter", GC_REMAP.get("l"));
107 
108         GC_REMAP.put("lc", "Ll Lt Lu".split(" "));
109         GC_REMAP.put("casedletter", GC_REMAP.get("lc"));
110 
111         GC_REMAP.put("m", "Mc Me Mn".split(" "));
112         GC_REMAP.put("mark", GC_REMAP.get("m"));
113 
114         GC_REMAP.put("n", "Nd Nl No".split(" "));
115         GC_REMAP.put("number", GC_REMAP.get("n"));
116 
117         GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" "));
118         GC_REMAP.put("punctuation", GC_REMAP.get("p"));
119         GC_REMAP.put("punct", GC_REMAP.get("p"));
120 
121         GC_REMAP.put("s", "Sc Sk Sm So".split(" "));
122         GC_REMAP.put("symbol", GC_REMAP.get("s"));
123 
124         GC_REMAP.put("z", "Zl Zp Zs".split(" "));
125         GC_REMAP.put("separator", GC_REMAP.get("z"));
126     }
127 
128     public boolean applyPropertyAlias0(String propertyName,
129             String propertyValue, UnicodeSet result) {
130       result.clear();
131       UnicodeProperty prop = factory.getProperty(propertyName);
132       String canonicalName = prop.getName();
133       boolean isAge = UnicodeProperty.equalNames("Age", canonicalName);
134 
135       // Hack for special GC values
136       if (canonicalName.equals("General_Category")) {
137           String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue));
138           if (parts != null) {
139               for (String part : parts) {
140                   prop.getSet(part, result);
141               }
142               return true;
143           }
144       }
145 
146       PatternMatcher patternMatcher = null;
147       if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) {
148         String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1));
149         patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex);
150       }
151       UnicodeProperty otherProperty = null;
152       boolean testCp = false;
153       if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) {
154         String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim();
155         if ("cp".equalsIgnoreCase(otherPropName)) {
156           testCp = true;
157         } else {
158           otherProperty = factory.getProperty(otherPropName);
159         }
160       }
161       if (prop != null) {
162         UnicodeSet set;
163         if (testCp) {
164           set = new UnicodeSet();
165           for (int i = 0; i <= 0x10FFFF; ++i) {
166             if (UnicodeProperty.equals(i, prop.getValue(i))) {
167               set.add(i);
168             }
169           }
170         } else if (otherProperty != null) {
171           set = new UnicodeSet();
172           for (int i = 0; i <= 0x10FFFF; ++i) {
173             String v1 = prop.getValue(i);
174             String v2 = otherProperty.getValue(i);
175             if (UnicodeProperty.equals(v1, v2)) {
176               set.add(i);
177             }
178           }
179         } else if (patternMatcher == null) {
180           if (!isValid(prop, propertyValue)) {
181             throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName
182                     + " must be in "
183                     + prop.getAvailableValues() + " or in " + prop.getValueAliases());
184           }
185           if (isAge) {
186             set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq));
187           } else {
188             set = prop.getSet(propertyValue);
189           }
190         } else if (isAge) {
191           set = new UnicodeSet();
192           List<String> values = prop.getAvailableValues();
193           for (String value : values) {
194             if (patternMatcher.matches(value)) {
195               for (String other : values) {
196                 if (other.compareTo(value) <= 0) {
197                   set.addAll(prop.getSet(other));
198                 }
199               }
200             }
201           }
202         } else {
203           set = prop.getSet(patternMatcher);
204         }
205         result.addAll(set);
206         return true;
207       }
208       throw new IllegalArgumentException("Illegal property: " + propertyName);
209     }
210 
211 
212 
isValid(UnicodeProperty prop, String propertyValue)213     private boolean isValid(UnicodeProperty prop, String propertyValue) {
214 //      if (prop.getName().equals("General_Category")) {
215 //        if (propertyValue)
216 //      }
217       return prop.isValidValue(propertyValue);
218     }
219 
220     public enum Relation {less, leq, equal, geq, greater}
221 
222     public static class ComparisonMatcher implements PatternMatcher {
223         Relation relation;
224         static Comparator comparator = new UTF16.StringComparator(true, false,0);
225 
226         String pattern;
227 
ComparisonMatcher(String pattern, Relation comparator)228         public ComparisonMatcher(String pattern, Relation comparator) {
229           this.relation = comparator;
230           this.pattern = pattern;
231         }
232 
matches(Object value)233         public boolean matches(Object value) {
234           int comp = comparator.compare(pattern, value.toString());
235           switch (relation) {
236           case less: return comp < 0;
237           case leq: return comp <= 0;
238           default: return comp == 0;
239           case geq: return comp >= 0;
240           case greater: return comp > 0;
241           }
242         }
243 
set(String pattern)244         public PatternMatcher set(String pattern) {
245           this.pattern = pattern;
246           return this;
247         }
248       }
249   }
250