1 package org.unicode.cldr.test; 2 3 import java.util.Arrays; 4 import java.util.Collection; 5 import java.util.Collections; 6 import java.util.EnumMap; 7 import java.util.HashMap; 8 import java.util.HashSet; 9 import java.util.LinkedHashSet; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeMap; 15 import java.util.TreeSet; 16 import java.util.regex.Pattern; 17 18 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 19 import org.unicode.cldr.util.CLDRConfig; 20 import org.unicode.cldr.util.CLDRFile; 21 import org.unicode.cldr.util.CldrUtility; 22 import org.unicode.cldr.util.DtdData; 23 import org.unicode.cldr.util.DtdData.Attribute; 24 import org.unicode.cldr.util.DtdData.Element; 25 import org.unicode.cldr.util.DtdType; 26 import org.unicode.cldr.util.Factory; 27 import org.unicode.cldr.util.LocaleIDParser; 28 import org.unicode.cldr.util.PatternCache; 29 import org.unicode.cldr.util.SupplementalDataInfo; 30 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo; 31 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 32 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 33 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 34 import org.unicode.cldr.util.XPathParts; 35 36 import com.ibm.icu.dev.util.CollectionUtilities.ObjectMatcher; 37 import com.ibm.icu.impl.Relation; 38 import com.ibm.icu.impl.Row; 39 import com.ibm.icu.impl.Row.R2; 40 import com.ibm.icu.text.UnicodeSet; 41 42 public class CheckAttributeValues extends FactoryCheckCLDR { 43 44 private static final ObjectMatcher<String> NOT_DONE_YET = new RegexMatcher().set(".*", Pattern.COMMENTS); 45 private static final boolean FIND_MISSING = CldrUtility.getProperty("FIND_MISSING_ATTRIBUTE_TESTS", false); // turn on to show <attributeValues> that are missing. 46 private static final boolean SHOW_UNNECESSARY = false; // turn on to show <attributeValues> we should delete. 47 48 static LinkedHashSet<String> elementOrder = new LinkedHashSet<String>(); 49 static LinkedHashSet<String> attributeOrder = new LinkedHashSet<String>(); 50 static LinkedHashSet<String> serialElements = new LinkedHashSet<String>(); 51 static Map<String, Map<String, MatcherPattern>> element_attribute_validity = new HashMap<String, Map<String, MatcherPattern>>(); 52 static Map<String, MatcherPattern> common_attribute_validity = new HashMap<String, MatcherPattern>(); 53 static Map<String, MatcherPattern> variables = new HashMap<String, MatcherPattern>(); 54 // static VariableReplacer variableReplacer = new VariableReplacer(); // note: this can be coalesced with the above 55 // -- to do later. 56 static boolean initialized = false; 57 static LocaleMatcher localeMatcher; 58 static Map<String, Map<String, String>> code_type_replacement = new TreeMap<String, Map<String, String>>(); 59 static final SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo(); 60 static DtdData ldmlDtdData = DtdData.getInstance(DtdType.ldml); 61 62 boolean isEnglish; 63 PluralInfo pluralInfo; 64 Relation<String, String> missingTests = Relation.of(new TreeMap(), TreeSet.class); 65 66 XPathParts parts = new XPathParts(null, null); 67 static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 68 CheckAttributeValues(Factory factory)69 public CheckAttributeValues(Factory factory) { 70 super(factory); 71 } 72 handleFinish()73 public void handleFinish() { 74 for (Entry<String, Set<String>> entry : missingTests.keyValuesSet()) { 75 System.out.println("Missing element: " + entry.getKey() + ", attributes: " + entry.getValue()); 76 } 77 } 78 handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)79 public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, 80 List<CheckStatus> result) { 81 if (fullPath == null) return this; // skip paths that we don't have 82 if (fullPath.indexOf('[') < 0) return this; // skip paths with no attributes 83 String locale = getCldrFileToCheck().getSourceLocaleID(path, null); 84 85 // skip paths that are not in the immediate locale 86 if (!getCldrFileToCheck().getLocaleID().equals(locale)) { 87 return this; 88 } 89 parts.set(fullPath); 90 for (int i = 0; i < parts.size(); ++i) { 91 if (parts.getAttributeCount(i) == 0) continue; 92 Map<String, String> attributes = parts.getAttributes(i); 93 String element = parts.getElement(i); 94 Element elementInfo = ldmlDtdData.getElementFromName().get(element); 95 96 Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element); 97 for (String attribute : attributes.keySet()) { 98 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute); 99 if (!attributeInfo.values.isEmpty()) { 100 // we don't need to check, since the DTD will enforce values 101 continue; 102 } 103 String attributeValue = attributes.get(attribute); 104 105 // special hack for // <type key="calendar" type="chinese">Chinese Calendar</type> 106 if (element.equals("type") && attribute.equals("type")) { 107 Set<String> typeValues = BCP47_KEY_VALUES.get(attributes.get("key")); 108 if (!typeValues.contains(attributeValue)) { 109 result.add(new CheckStatus() 110 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue) 111 .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}", 112 new Object[] { attribute, attributeValue, typeValues })); 113 } 114 continue; 115 } 116 // check the common attributes first 117 boolean haveTest = check(common_attribute_validity, attribute, attributeValue, result); 118 // then for the specific element 119 haveTest = haveTest || check(attribute_validity, attribute, attributeValue, result); 120 if (!haveTest && FIND_MISSING) { 121 missingTests.put(element, attribute); 122 } 123 124 // now for plurals 125 126 if (attribute.equals("count")) { 127 if (DIGITS.containsAll(attributeValue)) { 128 // ok, keep going 129 } else { 130 final Count countValue = PluralInfo.Count.valueOf(attributeValue); 131 if (!pluralInfo.getCounts().contains(countValue) 132 && !isPluralException(countValue, locale)) { 133 result.add(new CheckStatus() 134 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.illegalPlural) 135 .setMessage("Illegal plural value {0}; must be one of: {1}", 136 new Object[] { countValue, pluralInfo.getCounts() })); 137 } 138 } 139 } 140 141 // TODO check other variable elements, like dayPeriods 142 } 143 } 144 return this; 145 } 146 147 static final Relation<PluralInfo.Count, String> PLURAL_EXCEPTIONS = Relation.of( 148 new EnumMap<PluralInfo.Count, Set<String>>(PluralInfo.Count.class), HashSet.class); 149 150 static { PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr")151 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr")152 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh")153 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs")154 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru")155 PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru"); 156 } 157 isPluralException(Count countValue, String locale)158 static boolean isPluralException(Count countValue, String locale) { 159 Set<String> exceptions = PLURAL_EXCEPTIONS.get(countValue); 160 if (exceptions == null) { 161 return false; 162 } 163 if (exceptions.contains(locale)) { 164 return true; 165 } 166 int bar = locale.indexOf('_'); // catch bs_Cyrl, etc. 167 if (bar > 0) { 168 String base = locale.substring(0, bar); 169 if (exceptions.contains(base)) { 170 return true; 171 } 172 } 173 return false; 174 } 175 176 /** 177 * return true if we performed a test 178 * @param attribute_validity 179 * @param attribute 180 * @param attributeValue 181 * @param result 182 * @return 183 */ check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue, List<CheckStatus> result)184 private boolean check(Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue, 185 List<CheckStatus> result) { 186 if (attribute_validity == null) { 187 return false; // no test 188 } 189 MatcherPattern matcherPattern = attribute_validity.get(attribute); 190 if (matcherPattern == null) { 191 return false; // no test 192 } 193 if (matcherPattern.matcher.matches(attributeValue)) { 194 return true; 195 } 196 // special check for deprecated codes 197 String replacement = getReplacement(matcherPattern.value, attributeValue); 198 if (replacement != null) { 199 if (isEnglish) { 200 return true; // don't flag English 201 } 202 if (replacement.length() == 0) { 203 result.add(new CheckStatus() 204 .setCause(this).setMainType(CheckStatus.warningType).setSubtype(Subtype.deprecatedAttribute) 205 .setMessage("Deprecated Attribute Value {0}={1}. Consider removing.", 206 new Object[] { attribute, attributeValue })); 207 } else { 208 result 209 .add(new CheckStatus() 210 .setCause(this) 211 .setMainType(CheckStatus.warningType) 212 .setSubtype(Subtype.deprecatedAttributeWithReplacement) 213 .setMessage( 214 "Deprecated Attribute Value {0}={1}. Consider removing, and possibly modifying the related value for {2}.", 215 new Object[] { attribute, attributeValue, replacement })); 216 } 217 } else { 218 result.add(new CheckStatus() 219 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.unexpectedAttributeValue) 220 .setMessage("Unexpected Attribute Value {0}={1}: expected: {2}", 221 new Object[] { attribute, attributeValue, matcherPattern.pattern })); 222 } 223 return true; 224 } 225 226 /** 227 * Returns replacement, or null if there is none. "" if the code is deprecated, but without a replacement. 228 * Input is of the form $language 229 * 230 * @return 231 */ getReplacement(String value, String attributeValue)232 String getReplacement(String value, String attributeValue) { 233 Map<String, String> type_replacement = code_type_replacement.get(value); 234 if (type_replacement == null) { 235 return null; 236 } 237 return type_replacement.get(attributeValue); 238 } 239 240 LocaleIDParser localeIDParser = new LocaleIDParser(); 241 242 @Override setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)243 public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, 244 List<CheckStatus> possibleErrors) { 245 if (cldrFileToCheck == null) return this; 246 if (Phase.FINAL_TESTING == getPhase() || Phase.BUILD == getPhase()) { 247 setSkipTest(false); // ok 248 } else { 249 setSkipTest(true); 250 return this; 251 } 252 253 pluralInfo = supplementalData.getPlurals(PluralType.cardinal, cldrFileToCheck.getLocaleID()); 254 super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 255 isEnglish = "en".equals(localeIDParser.set(cldrFileToCheck.getLocaleID()).getLanguage()); 256 synchronized (elementOrder) { 257 if (!initialized) { 258 getMetadata(); 259 initialized = true; 260 localeMatcher = LocaleMatcher.make(); 261 } 262 } 263 if (!localeMatcher.matches(cldrFileToCheck.getLocaleID())) { 264 possibleErrors.add(new CheckStatus() 265 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.invalidLocale) 266 .setMessage("Invalid Locale {0}", 267 new Object[] { cldrFileToCheck.getLocaleID() })); 268 269 } 270 return this; 271 } 272 getMetadata()273 private void getMetadata() { 274 275 // sorting is expensive, but we need it here. 276 277 Map<String, R2<String, String>> rawVariables = supplementalData.getValidityInfo(); 278 for (Entry<String, R2<String, String>> item : rawVariables.entrySet()) { 279 String id = item.getKey(); 280 String type = item.getValue().get0(); 281 String value = item.getValue().get1(); 282 MatcherPattern mp = getMatcherPattern2(type, value); 283 if (mp != null) { 284 variables.put(id, mp); 285 // variableReplacer.add(id, value); 286 } 287 } 288 //System.out.println("Variables: " + variables.keySet()); 289 290 Map<AttributeValidityInfo, String> rawAttributeValueInfo = supplementalData.getAttributeValidity(); 291 292 for (Entry<AttributeValidityInfo, String> entry : rawAttributeValueInfo.entrySet()) { 293 AttributeValidityInfo item = entry.getKey(); 294 String value = entry.getValue(); 295 MatcherPattern mp = getMatcherPattern2(item.getType(), value); 296 if (mp == null) { 297 System.out.println("Failed to make matcher for: " + item); 298 continue; 299 } 300 if (FIND_MISSING && mp.matcher == NOT_DONE_YET) { 301 missingTests.put(item.getElements().toString(), item.getAttributes().toString()); 302 } 303 304 Set<DtdType> dtds = item.getDtds(); 305 // TODO handle other DTDs 306 if (!dtds.contains(DtdType.ldml)) { 307 continue; 308 } 309 Set<String> attributeList = item.getAttributes(); 310 Set<String> elementList = item.getElements(); 311 if (elementList.size() == 0) { 312 addAttributes(attributeList, common_attribute_validity, mp); 313 } else { 314 for (String element : elementList) { 315 // check if unnecessary 316 Element elementInfo = ldmlDtdData.getElementFromName().get(element); 317 if (elementInfo == null) { 318 System.out.println("Illegal <attributeValues>, element not valid: element: " + element); 319 } else { 320 for (String attribute : attributeList) { 321 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute); 322 if (attributeInfo == null) { 323 System.out.println("Illegal <attributeValues>, attribute not valid: element: " + element + ", attribute: " + attribute); 324 } else if (!attributeInfo.values.isEmpty()) { 325 if (SHOW_UNNECESSARY) { 326 System.out.println("Unnecessary <attributeValues …>, the DTD has specific list: element: " + element + ", attribute: " 327 + attribute + ", " + attributeInfo.values); 328 } 329 } 330 } 331 } 332 // System.out.println("\t" + element); 333 Map<String, MatcherPattern> attribute_validity = element_attribute_validity.get(element); 334 if (attribute_validity == null) { 335 element_attribute_validity.put(element, attribute_validity = new TreeMap<String, MatcherPattern>()); 336 } 337 addAttributes(attributeList, attribute_validity, mp); 338 } 339 } 340 } 341 } 342 343 final static Map<String, Set<String>> BCP47_KEY_VALUES; 344 static { 345 Map<String, Set<String>> temp = new HashMap<>(); 346 Relation<R2<String, String>, String> bcp47Aliases = supplementalData.getBcp47Aliases(); 347 for (Entry<String, Set<String>> keyValues : supplementalData.getBcp47Keys().keyValuesSet()) { 348 Set<String> fullValues = new TreeSet<>(); 349 String key = keyValues.getKey(); 350 Set<String> rawValues = keyValues.getValue(); 351 for (String value : rawValues) { 352 if (key.equals("cu")) { // Currency codes are in upper case. value.toUpperCase()353 fullValues.add(value.toUpperCase()); 354 } else { 355 fullValues.add(value); 356 } 357 R2<String, String> keyValue = R2.of(key, value); 358 Set<String> aliases = bcp47Aliases.getAll(keyValue); 359 if (aliases != null) { 360 fullValues.addAll(aliases); 361 } 362 } 363 // Special case exception for generic calendar, since we don't want to expose it in bcp47 364 if (key.equals("ca")) { 365 fullValues.add("generic"); 366 } 367 fullValues = Collections.unmodifiableSet(fullValues); temp.put(key, fullValues)368 temp.put(key, fullValues); 369 // add aliased keys 370 Set<String> aliases = supplementalData.getBcp47Aliases().getAll(Row.of(key, "")); 371 if (aliases != null) { 372 for (String aliasKey : aliases) { temp.put(aliasKey, fullValues)373 temp.put(aliasKey, fullValues); 374 } 375 } 376 temp.put("x", Collections.EMPTY_SET); // Hack for 'x', private use. 377 } 378 BCP47_KEY_VALUES = Collections.unmodifiableMap(temp); 379 } 380 getBcp47MatcherPattern(String key)381 private MatcherPattern getBcp47MatcherPattern(String key) { 382 // <key type="calendar">Calendar</key> 383 // <type key="calendar" type="chinese">Chinese Calendar</type> 384 385 //<attributeValues elements="key" attributes="type" type="bcp47">key</attributeValues> 386 //<attributeValues elements="type" attributes="key" type="bcp47">key</attributeValues> 387 //<attributeValues elements="type" attributes="type" type="bcp47">use-key</attributeValues> 388 389 MatcherPattern m = new MatcherPattern(); 390 Set<String> values; 391 if (key.equals("key")) { 392 values = BCP47_KEY_VALUES.keySet(); 393 } else { 394 values = BCP47_KEY_VALUES.get(key); 395 } 396 m.value = key; 397 m.pattern = values.toString(); 398 m.matcher = new CollectionMatcher().set(values); 399 return m; 400 } 401 getMatcherPattern2(String type, String value)402 private MatcherPattern getMatcherPattern2(String type, String value) { 403 String typeAttribute = type; 404 MatcherPattern result = variables.get(value); 405 if (result != null) { 406 MatcherPattern temp = new MatcherPattern(); 407 temp.pattern = result.pattern; 408 temp.matcher = result.matcher; 409 temp.value = value; 410 result = temp; 411 if ("list".equals(typeAttribute)) { 412 temp.matcher = new ListMatcher().set(result.matcher); 413 } 414 return result; 415 } 416 417 result = new MatcherPattern(); 418 result.pattern = value; 419 result.value = value; 420 if ("choice".equals(typeAttribute)) { 421 result.matcher = new CollectionMatcher() 422 .set(new HashSet<String>(Arrays.asList(value.trim().split("\\s+")))); 423 } else if ("bcp47".equals(typeAttribute)) { 424 result = getBcp47MatcherPattern(value); 425 } else if ("regex".equals(typeAttribute)) { 426 result.matcher = new RegexMatcher().set(value, Pattern.COMMENTS); // Pattern.COMMENTS to get whitespace 427 } else if ("locale".equals(typeAttribute)) { 428 result.matcher = LocaleMatcher.make(); 429 } else if ("notDoneYet".equals(typeAttribute) || "notDoneYet".equals(value)) { 430 result.matcher = NOT_DONE_YET; 431 } else { 432 System.out.println("unknown type; value: <" + value + ">,\t" + typeAttribute); 433 return null; 434 } 435 return result; 436 } 437 addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp)438 private void addAttributes(Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp) { 439 for (String attribute : attributes) { 440 MatcherPattern old = attribute_validity.get(attribute); 441 if (old != null) { 442 mp.matcher = new OrMatcher().set(old.matcher, mp.matcher); 443 mp.pattern = old.pattern + " OR " + mp.pattern; 444 } 445 attribute_validity.put(attribute, mp); 446 } 447 } 448 449 private static class MatcherPattern { 450 public String value; 451 ObjectMatcher<String> matcher; 452 String pattern; 453 toString()454 public String toString() { 455 return matcher.getClass().getName() + "\t" + pattern; 456 } 457 } 458 459 public static class RegexMatcher implements ObjectMatcher<String> { 460 private java.util.regex.Matcher matcher; 461 set(String pattern)462 public ObjectMatcher<String> set(String pattern) { 463 matcher = PatternCache.get(pattern).matcher(""); 464 return this; 465 } 466 set(String pattern, int flags)467 public ObjectMatcher<String> set(String pattern, int flags) { 468 matcher = Pattern.compile(pattern, flags).matcher(""); 469 return this; 470 } 471 matches(String value)472 public boolean matches(String value) { 473 matcher.reset(value.toString()); 474 return matcher.matches(); 475 } 476 } 477 478 public static class CollectionMatcher implements ObjectMatcher<String> { 479 private Collection<String> collection; 480 set(Collection<String> collection)481 public ObjectMatcher<String> set(Collection<String> collection) { 482 this.collection = collection; 483 return this; 484 } 485 matches(String value)486 public boolean matches(String value) { 487 return collection.contains(value); 488 } 489 } 490 491 public static class OrMatcher implements ObjectMatcher<String> { 492 private ObjectMatcher<String> a; 493 private ObjectMatcher<String> b; 494 set(ObjectMatcher<String> a, ObjectMatcher<String> b)495 public ObjectMatcher<String> set(ObjectMatcher<String> a, ObjectMatcher<String> b) { 496 this.a = a; 497 this.b = b; 498 return this; 499 } 500 matches(String value)501 public boolean matches(String value) { 502 return a.matches(value) || b.matches(value); 503 } 504 } 505 506 public static class ListMatcher implements ObjectMatcher<String> { 507 private ObjectMatcher<String> other; 508 set(ObjectMatcher<String> other)509 public ObjectMatcher<String> set(ObjectMatcher<String> other) { 510 this.other = other; 511 return this; 512 } 513 matches(String value)514 public boolean matches(String value) { 515 String[] values = value.trim().split("\\s+"); 516 if (values.length == 1 && values[0].length() == 0) return true; 517 for (int i = 0; i < values.length; ++i) { 518 if (!other.matches(values[i])) { 519 return false; 520 } 521 } 522 return true; 523 } 524 } 525 526 public static class LocaleMatcher implements ObjectMatcher<String> { 527 ObjectMatcher<String> grandfathered = variables.get("$grandfathered").matcher; 528 ObjectMatcher<String> language = variables.get("$language").matcher; 529 ObjectMatcher<String> script = variables.get("$script").matcher; 530 ObjectMatcher<String> territory = variables.get("$territory").matcher; 531 ObjectMatcher<String> variant = variables.get("$variant").matcher; 532 LocaleIDParser lip = new LocaleIDParser(); 533 static LocaleMatcher singleton = null; 534 static Object sync = new Object(); 535 LocaleMatcher(boolean b)536 private LocaleMatcher(boolean b) { 537 } 538 make()539 public static LocaleMatcher make() { 540 synchronized (sync) { 541 if (singleton == null) { 542 singleton = new LocaleMatcher(true); 543 } 544 } 545 return singleton; 546 } 547 matches(String value)548 public boolean matches(String value) { 549 if (grandfathered.matches(value)) return true; 550 lip.set((String) value); 551 String field = lip.getLanguage(); 552 if (!language.matches(field)) return false; 553 field = lip.getScript(); 554 if (field.length() != 0 && !script.matches(field)) return false; 555 field = lip.getRegion(); 556 if (field.length() != 0 && !territory.matches(field)) return false; 557 String[] fields = lip.getVariants(); 558 for (int i = 0; i < fields.length; ++i) { 559 if (!variant.matches(fields[i])) return false; 560 } 561 return true; 562 } 563 } 564 565 }