1 package org.unicode.cldr.test;
2 
3 import java.util.Arrays;
4 import java.util.Collection;
5 import java.util.Collections;
6 import java.util.EnumMap;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.LinkedHashSet;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16 import java.util.regex.Pattern;
17 
18 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
19 import org.unicode.cldr.util.CLDRConfig;
20 import org.unicode.cldr.util.CLDRFile;
21 import org.unicode.cldr.util.CldrUtility;
22 import org.unicode.cldr.util.DtdData;
23 import org.unicode.cldr.util.DtdData.Attribute;
24 import org.unicode.cldr.util.DtdData.Element;
25 import org.unicode.cldr.util.DtdType;
26 import org.unicode.cldr.util.Factory;
27 import org.unicode.cldr.util.LocaleIDParser;
28 import org.unicode.cldr.util.PatternCache;
29 import org.unicode.cldr.util.SupplementalDataInfo;
30 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo;
31 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
32 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
33 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
34 import org.unicode.cldr.util.XPathParts;
35 
36 import com.ibm.icu.dev.util.CollectionUtilities.ObjectMatcher;
37 import com.ibm.icu.impl.Relation;
38 import com.ibm.icu.impl.Row;
39 import com.ibm.icu.impl.Row.R2;
40 import com.ibm.icu.text.UnicodeSet;
41 
42 public class CheckAttributeValues extends FactoryCheckCLDR {
43 
44     private static final ObjectMatcher<String> NOT_DONE_YET = new RegexMatcher().set(".*", Pattern.COMMENTS);
45     private static final boolean FIND_MISSING = CldrUtility.getProperty("FIND_MISSING_ATTRIBUTE_TESTS", false); // turn on to show <attributeValues> that are missing.
46     private static final boolean SHOW_UNNECESSARY = false; // turn on to show <attributeValues> we should delete.
47 
48     static LinkedHashSet<String> elementOrder = new LinkedHashSet<String>();
49     static LinkedHashSet<String> attributeOrder = new LinkedHashSet<String>();
50     static LinkedHashSet<String> serialElements = new LinkedHashSet<String>();
51     static Map<String, Map<String, MatcherPattern>> element_attribute_validity = new HashMap<String, Map<String, MatcherPattern>>();
52     static Map<String, MatcherPattern> common_attribute_validity = new HashMap<String, MatcherPattern>();
53     static Map<String, MatcherPattern> variables = new HashMap<String, MatcherPattern>();
54     // static VariableReplacer variableReplacer = new VariableReplacer(); // note: this can be coalesced with the above
55     // -- to do later.
56     static boolean initialized = false;
57     static LocaleMatcher localeMatcher;
58     static Map<String, Map<String, String>> code_type_replacement = new TreeMap<String, Map<String, String>>();
59     static final SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo();
60     static DtdData ldmlDtdData = DtdData.getInstance(DtdType.ldml);
61 
62     boolean isEnglish;
63     PluralInfo pluralInfo;
64     Relation<String, String> missingTests = Relation.of(new TreeMap(), TreeSet.class);
65 
66     XPathParts parts = new XPathParts(null, null);
67     static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
68 
CheckAttributeValues(Factory factory)69     public CheckAttributeValues(Factory factory) {
70         super(factory);
71     }
72 
handleFinish()73     public void handleFinish() {
74         for (Entry<String, Set<String>> entry : missingTests.keyValuesSet()) {
75             System.out.println("Missing element: " + entry.getKey() + ", attributes: " + entry.getValue());
76         }
77     }
78 
handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)79     public CheckCLDR handleCheck(String path, String fullPath, String value, Options options,
80         List<CheckStatus> result) {
81         if (fullPath == null) return this; // skip paths that we don't have
82         if (fullPath.indexOf('[') < 0) return this; // skip paths with no attributes
83         String locale = getCldrFileToCheck().getSourceLocaleID(path, null);
84 
85         // skip paths that are not in the immediate locale
86         if (!getCldrFileToCheck().getLocaleID().equals(locale)) {
87             return this;
88         }
89         parts.set(fullPath);
90         for (int i = 0; i < parts.size(); ++i) {
91             if (parts.getAttributeCount(i) == 0) continue;
92             Map<String, String> attributes = parts.getAttributes(i);
93             String element = parts.getElement(i);
94             Element elementInfo = ldmlDtdData.getElementFromName().get(element);
95 
96             Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element);
97             for (String attribute : attributes.keySet()) {
98                 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute);
99                 if (!attributeInfo.values.isEmpty()) {
100                     // we don't need to check, since the DTD will enforce values
101                     continue;
102                 }
103                 String attributeValue = attributes.get(attribute);
104 
105                 // special hack for         // <type key="calendar" type="chinese">Chinese Calendar</type>
106                 if (element.equals("type") && attribute.equals("type")) {
107                     Set<String> typeValues = BCP47_KEY_VALUES.get(attributes.get("key"));
108                     if (!typeValues.contains(attributeValue)) {
109                         result.add(new CheckStatus()
110                             .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue)
111                             .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}",
112                                 new Object[] { attribute, attributeValue, typeValues }));
113                     }
114                     continue;
115                 }
116                 // check the common attributes first
117                 boolean haveTest = check(common_attribute_validity, attribute, attributeValue, result);
118                 // then for the specific element
119                 haveTest = haveTest || check(attribute_validity, attribute, attributeValue, result);
120                 if (!haveTest && FIND_MISSING) {
121                     missingTests.put(element, attribute);
122                 }
123 
124                 // now for plurals
125 
126                 if (attribute.equals("count")) {
127                     if (DIGITS.containsAll(attributeValue)) {
128                         // ok, keep going
129                     } else {
130                         final Count countValue = PluralInfo.Count.valueOf(attributeValue);
131                         if (!pluralInfo.getCounts().contains(countValue)
132                             && !isPluralException(countValue, locale)) {
133                             result.add(new CheckStatus()
134                                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.illegalPlural)
135                                 .setMessage("Illegal plural value {0}; must be one of: {1}",
136                                     new Object[] { countValue, pluralInfo.getCounts() }));
137                         }
138                     }
139                 }
140 
141                 // TODO check other variable elements, like dayPeriods
142             }
143         }
144         return this;
145     }
146 
147     static final Relation<PluralInfo.Count, String> PLURAL_EXCEPTIONS = Relation.of(
148         new EnumMap<PluralInfo.Count, Set<String>>(PluralInfo.Count.class), HashSet.class);
149 
150     static {
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr")151         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr")152         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh")153         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs")154         PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs");
PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru")155         PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru");
156     }
157 
isPluralException(Count countValue, String locale)158     static boolean isPluralException(Count countValue, String locale) {
159         Set<String> exceptions = PLURAL_EXCEPTIONS.get(countValue);
160         if (exceptions == null) {
161             return false;
162         }
163         if (exceptions.contains(locale)) {
164             return true;
165         }
166         int bar = locale.indexOf('_'); // catch bs_Cyrl, etc.
167         if (bar > 0) {
168             String base = locale.substring(0, bar);
169             if (exceptions.contains(base)) {
170                 return true;
171             }
172         }
173         return false;
174     }
175 
176     /**
177      * return true if we performed a test
178      * @param attribute_validity
179      * @param attribute
180      * @param attributeValue
181      * @param result
182      * @return
183      */
check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue, List<CheckStatus> result)184     private boolean check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue,
185         List<CheckStatus> result) {
186         if (attribute_validity == null) {
187             return false; // no test
188         }
189         MatcherPattern matcherPattern = attribute_validity.get(attribute);
190         if (matcherPattern == null) {
191             return false; // no test
192         }
193         if (matcherPattern.matcher.matches(attributeValue)) {
194             return true;
195         }
196         // special check for deprecated codes
197         String replacement = getReplacement(matcherPattern.value, attributeValue);
198         if (replacement != null) {
199             if (isEnglish) {
200                 return true; // don't flag English
201             }
202             if (replacement.length() == 0) {
203                 result.add(new CheckStatus()
204                     .setCause(this).setMainType(CheckStatus.warningType).setSubtype(Subtype.deprecatedAttribute)
205                     .setMessage("Deprecated Attribute Value {0}={1}. Consider removing.",
206                         new Object[] { attribute, attributeValue }));
207             } else {
208                 result
209                     .add(new CheckStatus()
210                         .setCause(this)
211                         .setMainType(CheckStatus.warningType)
212                         .setSubtype(Subtype.deprecatedAttributeWithReplacement)
213                         .setMessage(
214                             "Deprecated Attribute Value {0}={1}. Consider removing, and possibly modifying the related value for {2}.",
215                             new Object[] { attribute, attributeValue, replacement }));
216             }
217         } else {
218             result.add(new CheckStatus()
219                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue)
220                 .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}",
221                     new Object[] { attribute, attributeValue, matcherPattern.pattern }));
222         }
223         return true;
224     }
225 
226     /**
227      * Returns replacement, or null if there is none. "" if the code is deprecated, but without a replacement.
228      * Input is of the form $language
229      *
230      * @return
231      */
getReplacement(String value, String attributeValue)232     String getReplacement(String value, String attributeValue) {
233         Map<String, String> type_replacement = code_type_replacement.get(value);
234         if (type_replacement == null) {
235             return null;
236         }
237         return type_replacement.get(attributeValue);
238     }
239 
240     LocaleIDParser localeIDParser = new LocaleIDParser();
241 
242     @Override
setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)243     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options,
244         List<CheckStatus> possibleErrors) {
245         if (cldrFileToCheck == null) return this;
246         if (Phase.FINAL_TESTING == getPhase() || Phase.BUILD == getPhase()) {
247             setSkipTest(false); // ok
248         } else {
249             setSkipTest(true);
250             return this;
251         }
252 
253         pluralInfo = supplementalData.getPlurals(PluralType.cardinal, cldrFileToCheck.getLocaleID());
254         super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
255         isEnglish = "en".equals(localeIDParser.set(cldrFileToCheck.getLocaleID()).getLanguage());
256         synchronized (elementOrder) {
257             if (!initialized) {
258                 getMetadata();
259                 initialized = true;
260                 localeMatcher = LocaleMatcher.make();
261             }
262         }
263         if (!localeMatcher.matches(cldrFileToCheck.getLocaleID())) {
264             possibleErrors.add(new CheckStatus()
265                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.invalidLocale)
266                 .setMessage("Invalid Locale {0}",
267                     new Object[] { cldrFileToCheck.getLocaleID() }));
268 
269         }
270         return this;
271     }
272 
getMetadata()273     private void getMetadata() {
274 
275         // sorting is expensive, but we need it here.
276 
277         Map<String, R2<String, String>> rawVariables = supplementalData.getValidityInfo();
278         for (Entry<String, R2<String, String>> item : rawVariables.entrySet()) {
279             String id = item.getKey();
280             String type = item.getValue().get0();
281             String value = item.getValue().get1();
282             MatcherPattern mp = getMatcherPattern2(type, value);
283             if (mp != null) {
284                 variables.put(id, mp);
285                 // variableReplacer.add(id, value);
286             }
287         }
288         //System.out.println("Variables: " + variables.keySet());
289 
290         Map<AttributeValidityInfo, String> rawAttributeValueInfo = supplementalData.getAttributeValidity();
291 
292         for (Entry<AttributeValidityInfo, String> entry : rawAttributeValueInfo.entrySet()) {
293             AttributeValidityInfo item = entry.getKey();
294             String value = entry.getValue();
295             MatcherPattern mp = getMatcherPattern2(item.getType(), value);
296             if (mp == null) {
297                 System.out.println("Failed to make matcher for: " + item);
298                 continue;
299             }
300             if (FIND_MISSING && mp.matcher == NOT_DONE_YET) {
301                 missingTests.put(item.getElements().toString(), item.getAttributes().toString());
302             }
303 
304             Set<DtdType> dtds = item.getDtds();
305             // TODO handle other DTDs
306             if (!dtds.contains(DtdType.ldml)) {
307                 continue;
308             }
309             Set<String> attributeList = item.getAttributes();
310             Set<String> elementList = item.getElements();
311             if (elementList.size() == 0) {
312                 addAttributes(attributeList, common_attribute_validity, mp);
313             } else {
314                 for (String element : elementList) {
315                     // check if unnecessary
316                     Element elementInfo = ldmlDtdData.getElementFromName().get(element);
317                     if (elementInfo == null) {
318                         System.out.println("Illegal <attributeValues>, element not valid: element: " + element);
319                     } else {
320                         for (String attribute : attributeList) {
321                             Attribute attributeInfo = elementInfo.getAttributeNamed(attribute);
322                             if (attributeInfo == null) {
323                                 System.out.println("Illegal <attributeValues>, attribute not valid: element: " + element + ", attribute: " + attribute);
324                             } else if (!attributeInfo.values.isEmpty()) {
325                                 if (SHOW_UNNECESSARY) {
326                                     System.out.println("Unnecessary <attributeValues …>, the DTD has specific list: element: " + element + ", attribute: "
327                                         + attribute + ", " + attributeInfo.values);
328                                 }
329                             }
330                         }
331                     }
332                     // System.out.println("\t" + element);
333                     Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element);
334                     if (attribute_validity == null) {
335                         element_attribute_validity.put(element, attribute_validity = new TreeMap<String, MatcherPattern>());
336                     }
337                     addAttributes(attributeList, attribute_validity, mp);
338                 }
339             }
340         }
341     }
342 
343     final static Map<String, Set<String>> BCP47_KEY_VALUES;
344     static {
345         Map<String, Set<String>> temp = new HashMap<>();
346         Relation<R2<String, String>, String> bcp47Aliases = supplementalData.getBcp47Aliases();
347         for (Entry<String, Set<String>> keyValues : supplementalData.getBcp47Keys().keyValuesSet()) {
348             Set<String> fullValues = new TreeSet<>();
349             String key = keyValues.getKey();
350             Set<String> rawValues = keyValues.getValue();
351             for (String value : rawValues) {
352                 if (key.equals("cu")) { // Currency codes are in upper case.
value.toUpperCase()353                     fullValues.add(value.toUpperCase());
354                 } else {
355                     fullValues.add(value);
356                 }
357                 R2<String, String> keyValue = R2.of(key, value);
358                 Set<String> aliases = bcp47Aliases.getAll(keyValue);
359                 if (aliases != null) {
360                     fullValues.addAll(aliases);
361                 }
362             }
363             // Special case exception for generic calendar, since we don't want to expose it in bcp47
364             if (key.equals("ca")) {
365                 fullValues.add("generic");
366             }
367             fullValues = Collections.unmodifiableSet(fullValues);
temp.put(key, fullValues)368             temp.put(key, fullValues);
369             // add aliased keys
370             Set<String> aliases = supplementalData.getBcp47Aliases().getAll(Row.of(key, ""));
371             if (aliases != null) {
372                 for (String aliasKey : aliases) {
temp.put(aliasKey, fullValues)373                     temp.put(aliasKey, fullValues);
374                 }
375             }
376             temp.put("x", Collections.EMPTY_SET); // Hack for 'x', private use.
377         }
378         BCP47_KEY_VALUES = Collections.unmodifiableMap(temp);
379     }
380 
getBcp47MatcherPattern(String key)381     private MatcherPattern getBcp47MatcherPattern(String key) {
382         // <key type="calendar">Calendar</key>
383         // <type key="calendar" type="chinese">Chinese Calendar</type>
384 
385         //<attributeValues elements="key" attributes="type" type="bcp47">key</attributeValues>
386         //<attributeValues elements="type" attributes="key" type="bcp47">key</attributeValues>
387         //<attributeValues elements="type" attributes="type" type="bcp47">use-key</attributeValues>
388 
389         MatcherPattern m = new MatcherPattern();
390         Set<String> values;
391         if (key.equals("key")) {
392             values = BCP47_KEY_VALUES.keySet();
393         } else {
394             values = BCP47_KEY_VALUES.get(key);
395         }
396         m.value = key;
397         m.pattern = values.toString();
398         m.matcher = new CollectionMatcher().set(values);
399         return m;
400     }
401 
getMatcherPattern2(String type, String value)402     private MatcherPattern getMatcherPattern2(String type, String value) {
403         String typeAttribute = type;
404         MatcherPattern result = variables.get(value);
405         if (result != null) {
406             MatcherPattern temp = new MatcherPattern();
407             temp.pattern = result.pattern;
408             temp.matcher = result.matcher;
409             temp.value = value;
410             result = temp;
411             if ("list".equals(typeAttribute)) {
412                 temp.matcher = new ListMatcher().set(result.matcher);
413             }
414             return result;
415         }
416 
417         result = new MatcherPattern();
418         result.pattern = value;
419         result.value = value;
420         if ("choice".equals(typeAttribute)) {
421             result.matcher = new CollectionMatcher()
422                 .set(new HashSet<String>(Arrays.asList(value.trim().split("\\s+"))));
423         } else if ("bcp47".equals(typeAttribute)) {
424             result = getBcp47MatcherPattern(value);
425         } else if ("regex".equals(typeAttribute)) {
426             result.matcher = new RegexMatcher().set(value, Pattern.COMMENTS); // Pattern.COMMENTS to get whitespace
427         } else if ("locale".equals(typeAttribute)) {
428             result.matcher = LocaleMatcher.make();
429         } else if ("notDoneYet".equals(typeAttribute) || "notDoneYet".equals(value)) {
430             result.matcher = NOT_DONE_YET;
431         } else {
432             System.out.println("unknown type; value: <" + value + ">,\t" + typeAttribute);
433             return null;
434         }
435         return result;
436     }
437 
addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp)438     private void addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp) {
439         for (String attribute : attributes) {
440             MatcherPattern old = attribute_validity.get(attribute);
441             if (old != null) {
442                 mp.matcher = new OrMatcher().set(old.matcher, mp.matcher);
443                 mp.pattern = old.pattern + " OR " + mp.pattern;
444             }
445             attribute_validity.put(attribute, mp);
446         }
447     }
448 
449     private static class MatcherPattern {
450         public String value;
451         ObjectMatcher<String> matcher;
452         String pattern;
453 
toString()454         public String toString() {
455             return matcher.getClass().getName() + "\t" + pattern;
456         }
457     }
458 
459     public static class RegexMatcher implements ObjectMatcher<String> {
460         private java.util.regex.Matcher matcher;
461 
set(String pattern)462         public ObjectMatcher<String> set(String pattern) {
463             matcher = PatternCache.get(pattern).matcher("");
464             return this;
465         }
466 
set(String pattern, int flags)467         public ObjectMatcher<String> set(String pattern, int flags) {
468             matcher = Pattern.compile(pattern, flags).matcher("");
469             return this;
470         }
471 
matches(String value)472         public boolean matches(String value) {
473             matcher.reset(value.toString());
474             return matcher.matches();
475         }
476     }
477 
478     public static class CollectionMatcher implements ObjectMatcher<String> {
479         private Collection<String> collection;
480 
set(Collection<String> collection)481         public ObjectMatcher<String> set(Collection<String> collection) {
482             this.collection = collection;
483             return this;
484         }
485 
matches(String value)486         public boolean matches(String value) {
487             return collection.contains(value);
488         }
489     }
490 
491     public static class OrMatcher implements ObjectMatcher<String> {
492         private ObjectMatcher<String> a;
493         private ObjectMatcher<String> b;
494 
set(ObjectMatcher<String> a, ObjectMatcher<String> b)495         public ObjectMatcher<String> set(ObjectMatcher<String> a, ObjectMatcher<String> b) {
496             this.a = a;
497             this.b = b;
498             return this;
499         }
500 
matches(String value)501         public boolean matches(String value) {
502             return a.matches(value) || b.matches(value);
503         }
504     }
505 
506     public static class ListMatcher implements ObjectMatcher<String> {
507         private ObjectMatcher<String> other;
508 
set(ObjectMatcher<String> other)509         public ObjectMatcher<String> set(ObjectMatcher<String> other) {
510             this.other = other;
511             return this;
512         }
513 
matches(String value)514         public boolean matches(String value) {
515             String[] values = value.trim().split("\\s+");
516             if (values.length == 1 && values[0].length() == 0) return true;
517             for (int i = 0; i < values.length; ++i) {
518                 if (!other.matches(values[i])) {
519                     return false;
520                 }
521             }
522             return true;
523         }
524     }
525 
526     public static class LocaleMatcher implements ObjectMatcher<String> {
527         ObjectMatcher<String> grandfathered = variables.get("$grandfathered").matcher;
528         ObjectMatcher<String> language = variables.get("$language").matcher;
529         ObjectMatcher<String> script = variables.get("$script").matcher;
530         ObjectMatcher<String> territory = variables.get("$territory").matcher;
531         ObjectMatcher<String> variant = variables.get("$variant").matcher;
532         LocaleIDParser lip = new LocaleIDParser();
533         static LocaleMatcher singleton = null;
534         static Object sync = new Object();
535 
LocaleMatcher(boolean b)536         private LocaleMatcher(boolean b) {
537         }
538 
make()539         public static LocaleMatcher make() {
540             synchronized (sync) {
541                 if (singleton == null) {
542                     singleton = new LocaleMatcher(true);
543                 }
544             }
545             return singleton;
546         }
547 
matches(String value)548         public boolean matches(String value) {
549             if (grandfathered.matches(value)) return true;
550             lip.set((String) value);
551             String field = lip.getLanguage();
552             if (!language.matches(field)) return false;
553             field = lip.getScript();
554             if (field.length() != 0 && !script.matches(field)) return false;
555             field = lip.getRegion();
556             if (field.length() != 0 && !territory.matches(field)) return false;
557             String[] fields = lip.getVariants();
558             for (int i = 0; i < fields.length; ++i) {
559                 if (!variant.matches(fields[i])) return false;
560             }
561             return true;
562         }
563     }
564 
565 }