1 package org.unicode.cldr.unittest;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.util.ArrayList;
7 import java.util.Arrays;
8 import java.util.Collection;
9 import java.util.Collections;
10 import java.util.Comparator;
11 import java.util.EnumSet;
12 import java.util.HashSet;
13 import java.util.Iterator;
14 import java.util.LinkedHashSet;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Map.Entry;
18 import java.util.Set;
19 import java.util.TreeMap;
20 import java.util.TreeSet;
21 
22 import org.unicode.cldr.test.DisplayAndInputProcessor;
23 import org.unicode.cldr.tool.GenerateBirth.Versions;
24 import org.unicode.cldr.tool.LikelySubtags;
25 import org.unicode.cldr.util.Builder;
26 import org.unicode.cldr.util.CLDRConfig;
27 import org.unicode.cldr.util.CLDRFile;
28 import org.unicode.cldr.util.CLDRFile.DraftStatus;
29 import org.unicode.cldr.util.CLDRFile.Status;
30 import org.unicode.cldr.util.CLDRFile.WinningChoice;
31 import org.unicode.cldr.util.CLDRPaths;
32 import org.unicode.cldr.util.ChainedMap;
33 import org.unicode.cldr.util.ChainedMap.M4;
34 import org.unicode.cldr.util.CharacterFallbacks;
35 import org.unicode.cldr.util.CldrUtility;
36 import org.unicode.cldr.util.Counter;
37 import org.unicode.cldr.util.DiscreteComparator;
38 import org.unicode.cldr.util.DiscreteComparator.Ordering;
39 import org.unicode.cldr.util.DtdData;
40 import org.unicode.cldr.util.DtdData.Attribute;
41 import org.unicode.cldr.util.DtdData.Element;
42 import org.unicode.cldr.util.DtdData.ElementType;
43 import org.unicode.cldr.util.DtdType;
44 import org.unicode.cldr.util.ElementAttributeInfo;
45 import org.unicode.cldr.util.Factory;
46 import org.unicode.cldr.util.InputStreamFactory;
47 import org.unicode.cldr.util.LanguageTagParser;
48 import org.unicode.cldr.util.Level;
49 import org.unicode.cldr.util.LocaleIDParser;
50 import org.unicode.cldr.util.Pair;
51 import org.unicode.cldr.util.PathHeader;
52 import org.unicode.cldr.util.SupplementalDataInfo;
53 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
54 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
55 import org.unicode.cldr.util.XMLFileReader;
56 import org.unicode.cldr.util.XPathParts;
57 import org.xml.sax.ErrorHandler;
58 import org.xml.sax.InputSource;
59 import org.xml.sax.SAXException;
60 import org.xml.sax.SAXParseException;
61 import org.xml.sax.XMLReader;
62 
63 import com.google.common.base.Objects;
64 import com.google.common.collect.ImmutableMultimap;
65 import com.google.common.collect.ImmutableSet;
66 import com.google.common.collect.Multimap;
67 import com.google.common.collect.TreeMultimap;
68 import com.ibm.icu.dev.util.CollectionUtilities;
69 import com.ibm.icu.impl.Relation;
70 import com.ibm.icu.impl.Row;
71 import com.ibm.icu.impl.Row.R2;
72 import com.ibm.icu.impl.Row.R3;
73 import com.ibm.icu.impl.Utility;
74 import com.ibm.icu.lang.UCharacter;
75 import com.ibm.icu.text.Collator;
76 import com.ibm.icu.text.DecimalFormat;
77 import com.ibm.icu.text.Normalizer;
78 import com.ibm.icu.text.NumberFormat;
79 import com.ibm.icu.text.UTF16;
80 import com.ibm.icu.text.UnicodeSet;
81 import com.ibm.icu.text.UnicodeSetIterator;
82 import com.ibm.icu.util.Currency;
83 import com.ibm.icu.util.ULocale;
84 
85 public class TestBasic extends TestFmwkPlus {
86 
87     private static final boolean DEBUG = false;
88 
89     static CLDRConfig testInfo = CLDRConfig.getInstance();
90 
91     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = testInfo
92         .getSupplementalDataInfo();
93 
94     private static final ImmutableSet<Pair<String, String>> knownElementExceptions = ImmutableSet.of(
95         Pair.of("ldml", "usesMetazone"),
96         Pair.of("ldmlICU", "usesMetazone"));
97 
98     private static final ImmutableSet<Pair<String, String>> knownAttributeExceptions = ImmutableSet.of(
99         Pair.of("ldml", "version"),
100         Pair.of("supplementalData", "version"),
101         Pair.of("ldmlICU", "version"),
102         Pair.of("layout", "standard"));
103 
104     private static final ImmutableSet<Pair<String, String>> knownChildExceptions = ImmutableSet.of(
105         Pair.of("abbreviationFallback", "special"),
106         Pair.of("inList", "special"),
107         Pair.of("preferenceOrdering", "special"));
108 
109     /**
110      * Simple test that loads each file in the cldr directory, thus verifying
111      * that the DTD works, and also checks that the PrettyPaths work.
112      *
113      * @author markdavis
114      */
115 
main(String[] args)116     public static void main(String[] args) {
117         new TestBasic().run(args);
118     }
119 
120     private static final ImmutableSet<String> skipAttributes = ImmutableSet.of(
121         "alt", "draft", "references");
122 
123     private final ImmutableSet<String> eightPointLocales = ImmutableSet.of(
124         "ar", "ca", "cs", "da", "de", "el", "es", "fi", "fr", "he", "hi", "hr", "hu", "id",
125         "it", "ja", "ko", "lt", "lv", "nb", "nl", "pl", "pt", "pt_PT", "ro", "ru", "sk", "sl", "sr", "sv",
126         "th", "tr", "uk", "vi", "zh", "zh_Hant");
127 
128     // private final boolean showForceZoom = Utility.getProperty("forcezoom",
129     // false);
130 
131     private final boolean resolved = CldrUtility.getProperty("resolved", false);
132 
133     private final Exception[] internalException = new Exception[1];
134 
TestDtds()135     public void TestDtds() throws IOException {
136         Relation<Row.R2<DtdType, String>, String> foundAttributes = Relation
137             .of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(),
138                 TreeSet.class);
139         final CLDRConfig config = CLDRConfig.getInstance();
140         final File basedir = config.getCldrBaseDirectory();
141         List<TimingInfo> data = new ArrayList<>();
142 
143         for (String subdir : config.getCLDRDataDirectories()) {
144             checkDtds(new File(basedir, subdir), 0, foundAttributes, data);
145         }
146         if (foundAttributes.size() > 0) {
147             showFoundElements(foundAttributes);
148         }
149         if (isVerbose()) {
150             long totalBytes = 0;
151             long totalNanos = 0;
152             for (TimingInfo i : data) {
153                 long length = i.file.length();
154                 totalBytes += length;
155                 totalNanos += i.nanos;
156                 logln(i.nanos + "\t" + length + "\t" + i.file);
157             }
158             logln(totalNanos + "\t" + totalBytes);
159         }
160     }
161 
checkDtds(File directoryFile, int level, Relation<R2<DtdType, String>, String> foundAttributes, List<TimingInfo> data)162     private void checkDtds(File directoryFile, int level,
163         Relation<R2<DtdType, String>, String> foundAttributes,
164         List<TimingInfo> data) throws IOException {
165         boolean deepCheck = getInclusion() >= 10;
166         File[] listFiles = directoryFile.listFiles();
167         String canonicalPath = directoryFile.getCanonicalPath();
168         String indent = Utility.repeat("\t", level);
169         if (listFiles == null) {
170             throw new IllegalArgumentException(indent + "Empty directory: "
171                 + canonicalPath);
172         }
173         logln("Checking files for DTD errors in: " + indent + canonicalPath);
174         for (File fileName : listFiles) {
175             String name = fileName.getName();
176             if (CLDRConfig.isJunkFile(name)) {
177                 continue;
178             } else if (fileName.isDirectory()) {
179                 checkDtds(fileName, level + 1, foundAttributes, data);
180             } else if (name.endsWith(".xml")) {
181                 data.add(check(fileName));
182                 if (deepCheck // takes too long to do all the time
183                 ) {
184                     CLDRFile cldrfile = CLDRFile.loadFromFile(fileName, "temp",
185                         DraftStatus.unconfirmed);
186                     for (String xpath : cldrfile) {
187                         String fullPath = cldrfile.getFullXPath(xpath);
188                         if (fullPath == null) {
189                             fullPath = cldrfile.getFullXPath(xpath);
190                             assertNotNull("", fullPath);
191                             continue;
192                         }
193                         XPathParts parts = XPathParts
194                             .getFrozenInstance(fullPath);
195                         DtdType type = parts.getDtdData().dtdType;
196                         for (int i = 0; i < parts.size(); ++i) {
197                             String element = parts.getElement(i);
198                             R2<DtdType, String> typeElement = Row.of(type,
199                                 element);
200                             if (parts.getAttributeCount(i) == 0) {
201                                 foundAttributes.put(typeElement, "NONE");
202                             } else {
203                                 for (String attribute : parts
204                                     .getAttributeKeys(i)) {
205                                     foundAttributes.put(typeElement, attribute);
206                                 }
207                             }
208                         }
209                     }
210                 }
211             }
212         }
213     }
214 
showFoundElements( Relation<Row.R2<DtdType, String>, String> foundAttributes)215     public void showFoundElements(
216         Relation<Row.R2<DtdType, String>, String> foundAttributes) {
217         Relation<Row.R2<DtdType, String>, String> theoryAttributes = Relation
218             .of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(),
219                 TreeSet.class);
220         for (DtdType type : DtdType.values()) {
221             DtdData dtdData = DtdData.getInstance(type);
222             for (Element element : dtdData.getElementFromName().values()) {
223                 String name = element.getName();
224                 Set<Attribute> attributes = element.getAttributes().keySet();
225                 R2<DtdType, String> typeElement = Row.of(type, name);
226                 if (attributes.isEmpty()) {
227                     theoryAttributes.put(typeElement, "NONE");
228                 } else {
229                     for (Attribute attribute : attributes) {
230                         theoryAttributes.put(typeElement, attribute.name);
231                     }
232                 }
233             }
234         }
235         Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed = Relation
236             .of(new TreeMap<String, Set<R3<Boolean, DtdType, String>>>(),
237                 LinkedHashSet.class);
238 
239         for (Entry<R2<DtdType, String>, Set<String>> s : theoryAttributes
240             .keyValuesSet()) {
241             R2<DtdType, String> typeElement = s.getKey();
242             Set<String> theoryAttributeSet = s.getValue();
243             DtdType type = typeElement.get0();
244             String element = typeElement.get1();
245             if (element.equals("ANY") || element.equals("#PCDATA")) {
246                 continue;
247             }
248             boolean deprecatedElement = SUPPLEMENTAL_DATA_INFO.isDeprecated(
249                 type, element, "*", "*");
250             String header = type + "\t" + element + "\t"
251                 + (deprecatedElement ? "X" : "") + "\t";
252             Set<String> usedAttributes = foundAttributes.get(typeElement);
253             Set<String> unusedAttributes = new LinkedHashSet<String>(
254                 theoryAttributeSet);
255             if (usedAttributes == null) {
256                 logln(header
257                     + "<NOT-FOUND>\t\t"
258                     + siftDeprecated(type, element, unusedAttributes,
259                         attributesToTypeElementUsed, false));
260                 continue;
261             }
262             unusedAttributes.removeAll(usedAttributes);
263             logln(header
264                 + siftDeprecated(type, element, usedAttributes,
265                     attributesToTypeElementUsed, true)
266                 + "\t"
267                 + siftDeprecated(type, element, unusedAttributes,
268                     attributesToTypeElementUsed, false));
269         }
270 
271         logln("Undeprecated Attributes\t");
272         for (Entry<String, R3<Boolean, DtdType, String>> s : attributesToTypeElementUsed
273             .keyValueSet()) {
274             R3<Boolean, DtdType, String> typeElementUsed = s.getValue();
275             logln(s.getKey() + "\t" + typeElementUsed.get0()
276                 + "\t" + typeElementUsed.get1() + "\t"
277                 + typeElementUsed.get2());
278         }
279     }
280 
siftDeprecated( DtdType type, String element, Set<String> attributeSet, Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed, boolean used)281     private String siftDeprecated(
282         DtdType type,
283         String element,
284         Set<String> attributeSet,
285         Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed,
286         boolean used) {
287         StringBuilder b = new StringBuilder();
288         StringBuilder bdep = new StringBuilder();
289         for (String attribute : attributeSet) {
290             String attributeName = "«"
291                 + attribute
292                 + (!"NONE".equals(attribute) && CLDRFile.isDistinguishing(type, element, attribute) ? "*"
293                     : "")
294                 + "»";
295             if (!"NONE".equals(attribute) && SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element, attribute,
296                 "*")) {
297                 if (bdep.length() != 0) {
298                     bdep.append(" ");
299                 }
300                 bdep.append(attributeName);
301             } else {
302                 if (b.length() != 0) {
303                     b.append(" ");
304                 }
305                 b.append(attributeName);
306                 if (!"NONE".equals(attribute)) {
307                     attributesToTypeElementUsed.put(attribute,
308                         Row.of(used, type, element));
309                 }
310             }
311         }
312         return b.toString() + "\t" + bdep.toString();
313     }
314 
315     class MyErrorHandler implements ErrorHandler {
error(SAXParseException exception)316         public void error(SAXParseException exception) throws SAXException {
317             errln("error: " + XMLFileReader.showSAX(exception));
318             throw exception;
319         }
320 
fatalError(SAXParseException exception)321         public void fatalError(SAXParseException exception) throws SAXException {
322             errln("fatalError: " + XMLFileReader.showSAX(exception));
323             throw exception;
324         }
325 
warning(SAXParseException exception)326         public void warning(SAXParseException exception) throws SAXException {
327             errln("warning: " + XMLFileReader.showSAX(exception));
328             throw exception;
329         }
330     }
331 
332     private class TimingInfo {
333         File file;
334         long nanos;
335     }
336 
check(File systemID)337     public TimingInfo check(File systemID) {
338         long start = System.nanoTime();
339         try (InputStream fis = InputStreamFactory.createInputStream(systemID)) {
340             // FileInputStream fis = new FileInputStream(systemID);
341             XMLReader xmlReader = XMLFileReader.createXMLReader(true);
342             xmlReader.setErrorHandler(new MyErrorHandler());
343             InputSource is = new InputSource(fis);
344             is.setSystemId(systemID.toString());
345             xmlReader.parse(is);
346             // fis.close();
347         } catch (SAXException | IOException e) {
348             errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t"
349                 + e.getMessage());
350         }
351         // catch (SAXParseException e) {
352         // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" +
353         // e.getMessage());
354         // } catch (IOException e) {
355         // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" +
356         // e.getMessage());
357         // }
358         TimingInfo timingInfo = new TimingInfo();
359         timingInfo.nanos = System.nanoTime() - start;
360         timingInfo.file = systemID;
361         return timingInfo;
362     }
363 
TestCurrencyFallback()364     public void TestCurrencyFallback() {
365         XPathParts parts = new XPathParts();
366         Factory cldrFactory = testInfo.getCldrFactory();
367         Set<String> currencies = testInfo.getStandardCodes().getAvailableCodes(
368             "currency");
369 
370         final UnicodeSet CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS = (UnicodeSet) new UnicodeSet(
371             "[[:sc:]-[\\u0000-\\u00FF]]").freeze();
372 
373         CharacterFallbacks fallbacks = CharacterFallbacks.make();
374 
375         for (String locale : cldrFactory.getAvailable()) {
376             CLDRFile file = testInfo.getCLDRFile(locale, false);
377             if (file.isNonInheriting())
378                 continue;
379 
380             final UnicodeSet OK_CURRENCY_FALLBACK = (UnicodeSet) new UnicodeSet(
381                 "[\\u0000-\\u00FF]").addAll(safeExemplars(file, ""))
382                     .addAll(safeExemplars(file, "auxiliary"))
383 //                .addAll(safeExemplars(file, "currencySymbol"))
384                     .freeze();
385             UnicodeSet badSoFar = new UnicodeSet();
386 
387             for (Iterator<String> it = file.iterator(); it.hasNext();) {
388                 String path = it.next();
389                 if (path.endsWith("/alias")) {
390                     continue;
391                 }
392                 String value = file.getStringValue(path);
393 
394                 // check for special characters
395 
396                 if (CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS.containsSome(value)) {
397 
398                     parts.set(path);
399                     if (!parts.getElement(-1).equals("symbol")) {
400                         continue;
401                     }
402                     // We don't care about fallbacks for narrow currency symbols
403                     if ("narrow".equals(parts.getAttributeValue(-1, "alt"))) {
404                         continue;
405                     }
406                     String currencyType = parts.getAttributeValue(-2, "type");
407 
408                     UnicodeSet fishy = new UnicodeSet().addAll(value)
409                         .retainAll(CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS)
410                         .removeAll(badSoFar);
411                     for (UnicodeSetIterator it2 = new UnicodeSetIterator(fishy); it2
412                         .next();) {
413                         final int fishyCodepoint = it2.codepoint;
414                         List<String> fallbackList = fallbacks
415                             .getSubstitutes(fishyCodepoint);
416 
417                         String nfkc = Normalizer.normalize(fishyCodepoint,
418                             Normalizer.NFKC);
419                         if (!nfkc.equals(UTF16.valueOf(fishyCodepoint))) {
420                             if (fallbackList == null) {
421                                 fallbackList = new ArrayList<String>();
422                             } else {
423                                 fallbackList = new ArrayList<String>(
424                                     fallbackList); // writable
425                             }
426                             fallbackList.add(nfkc);
427                         }
428                         // later test for all Latin-1
429                         if (fallbackList == null) {
430                             errln("Locale:\t" + locale
431                                 + ";\tCharacter with no fallback:\t"
432                                 + it2.getString() + "\t"
433                                 + UCharacter.getName(fishyCodepoint));
434                             badSoFar.add(fishyCodepoint);
435                         } else {
436                             String fallback = null;
437                             for (String fb : fallbackList) {
438                                 if (OK_CURRENCY_FALLBACK.containsAll(fb)) {
439                                     if (!fb.equals(currencyType)
440                                         && currencies.contains(fb)) {
441                                         errln("Locale:\t"
442                                             + locale
443                                             + ";\tCurrency:\t"
444                                             + currencyType
445                                             + ";\tFallback converts to different code!:\t"
446                                             + fb
447                                             + "\t"
448                                             + it2.getString()
449                                             + "\t"
450                                             + UCharacter
451                                                 .getName(fishyCodepoint));
452                                     }
453                                     if (fallback == null) {
454                                         fallback = fb;
455                                     }
456                                 }
457                             }
458                             if (fallback == null) {
459                                 errln("Locale:\t"
460                                     + locale
461                                     + ";\tCharacter with no good fallback (exemplars+Latin1):\t"
462                                     + it2.getString() + "\t"
463                                     + UCharacter.getName(fishyCodepoint));
464                                 badSoFar.add(fishyCodepoint);
465                             } else {
466                                 logln("Locale:\t" + locale
467                                     + ";\tCharacter with good fallback:\t"
468                                     + it2.getString() + " "
469                                     + UCharacter.getName(fishyCodepoint)
470                                     + " => " + fallback);
471                                 // badSoFar.add(fishyCodepoint);
472                             }
473                         }
474                     }
475                 }
476             }
477         }
478     }
479 
TestAbstractPaths()480     public void TestAbstractPaths() {
481         Factory cldrFactory = testInfo.getCldrFactory();
482         CLDRFile english = testInfo.getEnglish();
483         Map<String, Counter<Level>> abstactPaths = new TreeMap<String, Counter<Level>>();
484         RegexTransform abstractPathTransform = new RegexTransform(
485             RegexTransform.Processing.ONE_PASS).add("//ldml/", "")
486                 .add("\\[@alt=\"[^\"]*\"\\]", "").add("=\"[^\"]*\"", "=\"*\"")
487                 .add("([^]])\\[", "$1\t[").add("([^]])/", "$1\t/")
488                 .add("/", "\t");
489 
490         for (String locale : getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable()) {
491             CLDRFile file = testInfo.getCLDRFile(locale, resolved);
492             if (file.isNonInheriting())
493                 continue;
494             logln(locale + "\t-\t" + english.getName(locale));
495 
496             for (Iterator<String> it = file.iterator(); it.hasNext();) {
497                 String path = it.next();
498                 if (path.endsWith("/alias")) {
499                     continue;
500                 }
501                 // collect abstracted paths
502                 String abstractPath = abstractPathTransform.transform(path);
503                 Level level = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path,
504                     locale);
505                 if (level == Level.OPTIONAL) {
506                     level = Level.COMPREHENSIVE;
507                 }
508                 Counter<Level> row = abstactPaths.get(abstractPath);
509                 if (row == null) {
510                     abstactPaths.put(abstractPath, row = new Counter<Level>());
511                 }
512                 row.add(level, 1);
513             }
514         }
515         logln(CldrUtility.LINE_SEPARATOR + "Abstract Paths");
516         for (Entry<String, Counter<Level>> pathInfo : abstactPaths.entrySet()) {
517             String path = pathInfo.getKey();
518             Counter<Level> counter = pathInfo.getValue();
519             logln(counter.getTotal() + "\t" + getCoverage(counter) + "\t"
520                 + path);
521         }
522     }
523 
getCoverage(Counter<Level> counter)524     private CharSequence getCoverage(Counter<Level> counter) {
525         StringBuilder result = new StringBuilder();
526         boolean first = true;
527         for (Level level : counter.getKeysetSortedByKey()) {
528             if (first) {
529                 first = false;
530             } else {
531                 result.append(' ');
532             }
533             result.append("L").append(level.ordinal()).append("=")
534                 .append(counter.get(level));
535         }
536         return result;
537     }
538 
539     // public void TestCLDRFileCache() {
540     // long start = System.nanoTime();
541     // Factory cldrFactory = testInfo.getCldrFactory();
542     // String unusualLocale = "hi";
543     // CLDRFile file = cldrFactory.make(unusualLocale, true);
544     // long afterOne = System.nanoTime();
545     // logln("First: " + (afterOne-start));
546     // CLDRFile file2 = cldrFactory.make(unusualLocale, true);
547     // long afterTwo = System.nanoTime();
548     // logln("Second: " + (afterTwo-afterOne));
549     // }
550     //
TestPaths()551     public void TestPaths() {
552         Relation<String, String> distinguishing = Relation.of(
553             new TreeMap<String, Set<String>>(), TreeSet.class);
554         Relation<String, String> nonDistinguishing = Relation.of(
555             new TreeMap<String, Set<String>>(), TreeSet.class);
556         XPathParts parts = new XPathParts();
557         Factory cldrFactory = testInfo.getCldrFactory();
558         CLDRFile english = testInfo.getEnglish();
559 
560         Relation<String, String> pathToLocale = Relation.of(
561             new TreeMap<String, Set<String>>(CLDRFile
562                 .getComparator(DtdType.ldml)),
563             TreeSet.class, null);
564         Set<String> localesToTest = getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable();
565         for (String locale : localesToTest) {
566             CLDRFile file = testInfo.getCLDRFile(locale, resolved);
567             DtdType dtdType = null;
568             if (file.isNonInheriting())
569                 continue;
570             DisplayAndInputProcessor displayAndInputProcessor = new DisplayAndInputProcessor(
571                 file, false);
572 
573             logln(locale + "\t-\t" + english.getName(locale));
574 
575             for (Iterator<String> it = file.iterator(); it.hasNext();) {
576                 String path = it.next();
577                 if (dtdType == null) {
578                     dtdType = DtdType.fromPath(path);
579                 }
580 
581                 if (path.endsWith("/alias")) {
582                     continue;
583                 }
584                 String value = file.getStringValue(path);
585                 if (value == null) {
586                     throw new IllegalArgumentException(locale
587                         + "\tError: in null value at " + path);
588                 }
589 
590                 String displayValue = displayAndInputProcessor
591                     .processForDisplay(path, value);
592                 if (!displayValue.equals(value)) {
593                     logln("\t"
594                         + locale
595                         + "\tdisplayAndInputProcessor changes display value <"
596                         + value + ">\t=>\t<" + displayValue + ">\t\t"
597                         + path);
598                 }
599                 String inputValue = displayAndInputProcessor.processInput(path,
600                     value, internalException);
601                 if (internalException[0] != null) {
602                     errln("\t" + locale
603                         + "\tdisplayAndInputProcessor internal error <"
604                         + value + ">\t=>\t<" + inputValue + ">\t\t" + path);
605                     internalException[0].printStackTrace(System.out);
606                 }
607                 if (isVerbose() && !inputValue.equals(value)) {
608                     displayAndInputProcessor.processInput(path, value,
609                         internalException); // for
610                     // debugging
611                     logln("\t"
612                         + locale
613                         + "\tdisplayAndInputProcessor changes input value <"
614                         + value + ">\t=>\t<" + inputValue + ">\t\t" + path);
615                 }
616 
617                 pathToLocale.put(path, locale);
618 
619                 // also check for non-distinguishing attributes
620                 if (path.contains("/identity"))
621                     continue;
622 
623                 String fullPath = file.getFullXPath(path);
624                 parts.set(fullPath);
625                 for (int i = 0; i < parts.size(); ++i) {
626                     if (parts.getAttributeCount(i) == 0)
627                         continue;
628                     String element = parts.getElement(i);
629                     for (String attribute : parts.getAttributeKeys(i)) {
630                         if (skipAttributes.contains(attribute))
631                             continue;
632                         if (CLDRFile.isDistinguishing(dtdType, element, attribute)) {
633                             distinguishing.put(element, attribute);
634                         } else {
635                             nonDistinguishing.put(element, attribute);
636                         }
637                     }
638                 }
639             }
640         }
641 
642         if (isVerbose()) {
643             System.out.format("Distinguishing Elements: %s"
644                 + CldrUtility.LINE_SEPARATOR, distinguishing);
645             System.out.format("Nondistinguishing Elements: %s"
646                 + CldrUtility.LINE_SEPARATOR, nonDistinguishing);
647             System.out.format("Skipped %s" + CldrUtility.LINE_SEPARATOR,
648                 skipAttributes);
649         }
650     }
651 
652     /**
653      * The verbose output shows the results of 1..3 \u00a4 signs.
654      */
checkCurrency()655     public void checkCurrency() {
656         Map<String, Set<R2<String, Integer>>> results = new TreeMap<String, Set<R2<String, Integer>>>(
657             Collator.getInstance(ULocale.ENGLISH));
658         for (ULocale locale : ULocale.getAvailableLocales()) {
659             if (locale.getCountry().length() != 0) {
660                 continue;
661             }
662             for (int i = 1; i < 4; ++i) {
663                 NumberFormat format = getCurrencyInstance(locale, i);
664                 for (Currency c : new Currency[] { Currency.getInstance("USD"),
665                     Currency.getInstance("EUR"),
666                     Currency.getInstance("INR") }) {
667                     format.setCurrency(c);
668                     final String formatted = format.format(12345.67);
669                     Set<R2<String, Integer>> set = results.get(formatted);
670                     if (set == null) {
671                         results.put(formatted,
672                             set = new TreeSet<R2<String, Integer>>());
673                     }
674                     set.add(Row.of(locale.toString(), Integer.valueOf(i)));
675                 }
676             }
677         }
678         for (String formatted : results.keySet()) {
679             logln(formatted + "\t" + results.get(formatted));
680         }
681     }
682 
getCurrencyInstance(ULocale locale, int type)683     private static NumberFormat getCurrencyInstance(ULocale locale, int type) {
684         NumberFormat format = NumberFormat.getCurrencyInstance(locale);
685         if (type > 1) {
686             DecimalFormat format2 = (DecimalFormat) format;
687             String pattern = format2.toPattern();
688             String replacement = "\u00a4\u00a4";
689             for (int i = 2; i < type; ++i) {
690                 replacement += "\u00a4";
691             }
692             pattern = pattern.replace("\u00a4", replacement);
693             format2.applyPattern(pattern);
694         }
695         return format;
696     }
697 
safeExemplars(CLDRFile file, String string)698     private UnicodeSet safeExemplars(CLDRFile file, String string) {
699         final UnicodeSet result = file.getExemplarSet(string,
700             WinningChoice.NORMAL);
701         return result != null ? result : new UnicodeSet();
702     }
703 
TestAPath()704     public void TestAPath() {
705         // <month type="1">1</month>
706         String path = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]";
707         CLDRFile root = testInfo.getRoot();
708         logln("path: " + path);
709         String fullpath = root.getFullXPath(path);
710         logln("fullpath: " + fullpath);
711         String value = root.getStringValue(path);
712         logln("value: " + value);
713         Status status = new Status();
714         String source = root.getSourceLocaleID(path, status);
715         logln("locale: " + source);
716         logln("status: " + status);
717     }
718 
TestDefaultContents()719     public void TestDefaultContents() {
720         Set<String> defaultContents = Inheritance.defaultContents;
721         Multimap<String, String> parentToChildren = Inheritance.parentToChildren;
722 
723         if (DEBUG) {
724             Inheritance.showChain("", "", "root");
725         }
726 
727         for (String locale : defaultContents) {
728             CLDRFile cldrFile;
729             try {
730                 cldrFile = testInfo.getCLDRFile(locale, false);
731             } catch (RuntimeException e) {
732                 logln("Can't open default content file:\t" + locale);
733                 continue;
734             }
735             // we check that the default content locale is always empty
736             for (Iterator<String> it = cldrFile.iterator(); it.hasNext();) {
737                 String path = it.next();
738                 if (path.contains("/identity")) {
739                     continue;
740                 }
741                 errln("Default content file not empty:\t" + locale);
742                 showDifferences(locale);
743                 break;
744             }
745         }
746 
747         // check that if a locale has any children, that exactly one of them is
748         // the default content. Ignore locales with variants
749 
750         for (Entry<String, Collection<String>> localeAndKids : parentToChildren.asMap().entrySet()) {
751             String locale = localeAndKids.getKey();
752             if (locale.equals("root")) {
753                 continue;
754             }
755 
756             Collection<String> rawChildren = localeAndKids.getValue();
757 
758             // remove variant children
759             Set<String> children = new LinkedHashSet<>();
760             for (String child : rawChildren) {
761                 if (new LocaleIDParser().set(child).getVariants().length == 0) {
762                     children.add(child);
763                 }
764             }
765             if (children.isEmpty()) {
766                 continue;
767             }
768 
769             Set<String> defaultContentChildren = new LinkedHashSet<String>(children);
770             defaultContentChildren.retainAll(defaultContents);
771             if (defaultContentChildren.size() == 1) {
772                 continue;
773             // If we're already down to the region level then it's OK not to have
774             // default contents.
775             } else if (! new LocaleIDParser().set(locale).getRegion().isEmpty()) {
776                 continue;
777             } else if (defaultContentChildren.isEmpty()) {
778                     Object possible = highestShared(locale, children);
779                     errln("Locale has children but is missing default contents locale: "
780                         + locale + ", children: " + children + "; possible fixes for children:\n" + possible);
781             } else {
782                 errln("Locale has too many defaultContent locales!!: "
783                     + locale + ", defaultContents: "
784                     + defaultContentChildren);
785             }
786         }
787 
788         // check that each default content locale is likely-subtag equivalent to
789         // its parent.
790 
791         for (String locale : defaultContents) {
792             String maxLocale = LikelySubtags.maximize(locale, likelyData);
793             String localeParent = LocaleIDParser.getParent(locale);
794             String maxLocaleParent = LikelySubtags.maximize(localeParent,
795                 likelyData);
796             if (locale.equals("ar_001")) {
797                 logln("Known exception to likelyMax(locale=" + locale + ")"
798                     + " == " + "likelyMax(defaultContent=" + localeParent
799                     + ")");
800                 continue;
801             }
802             assertEquals("likelyMax(locale=" + locale + ")" + " == "
803                 + "likelyMax(defaultContent=" + localeParent + ")",
804                 maxLocaleParent, maxLocale);
805         }
806 
807     }
808 
highestShared(String parent, Set<String> children)809     private String highestShared(String parent, Set<String> children) {
810         M4<PathHeader, String, String, Boolean> data = ChainedMap.of(new TreeMap<PathHeader, Object>(), new TreeMap<String, Object>(),
811             new TreeMap<String, Object>(), Boolean.class);
812         CLDRFile parentFile = testInfo.getCLDRFile(parent, true);
813         PathHeader.Factory phf = PathHeader.getFactory(testInfo.getEnglish());
814         for (String child : children) {
815             CLDRFile cldrFile = testInfo.getCLDRFile(child, false);
816             for (String path : cldrFile) {
817                 if (path.contains("/identity")) {
818                     continue;
819                 }
820                 if (path.contains("provisional") || path.contains("unconfirmed")) {
821                     continue;
822                 }
823                 String value = cldrFile.getStringValue(path);
824                 // double-check
825                 String parentValue = parentFile.getStringValue(path);
826                 if (value.equals(parentValue)) {
827                     continue;
828                 }
829                 PathHeader ph = phf.fromPath(path);
830                 data.put(ph, value, child, Boolean.TRUE);
831                 data.put(ph, parentValue == null ? "∅∅∅" : parentValue, child, Boolean.TRUE);
832             }
833         }
834         StringBuilder result = new StringBuilder();
835         for (Entry<PathHeader, Map<String, Map<String, Boolean>>> entry : data) {
836             for (Entry<String, Map<String, Boolean>> item : entry.getValue().entrySet()) {
837                 result.append("\n")
838                     .append(entry.getKey())
839                     .append("\t")
840                     .append(item.getKey() + "\t" + item.getValue().keySet());
841             }
842         }
843         return result.toString();
844     }
845 
846     public static class Inheritance {
847         public static final Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO
848             .getDefaultContentLocales();
849         public static final Multimap<String, String> parentToChildren;
850 
851         static {
852             Multimap<String, String> _parentToChildren = TreeMultimap.create();
853             for (String child : testInfo.getCldrFactory().getAvailable()) {
854                 if (child.equals("root")) {
855                     continue;
856                 }
857                 String localeParent = LocaleIDParser.getParent(child);
_parentToChildren.put(localeParent, child)858                 _parentToChildren.put(localeParent, child);
859             }
860             parentToChildren = ImmutableMultimap.copyOf(_parentToChildren);
861         }
862 
showChain(String prefix, String gparent, String current)863         public static void showChain(String prefix, String gparent, String current) {
864             Collection<String> children = parentToChildren.get(current);
865             if (children == null) {
866                 throw new IllegalArgumentException();
867             }
868             prefix += current + (defaultContents.contains(current) ? "*" : "")
869                 + (isLikelyEquivalent(gparent, current) ? "~" : "") + "\t";
870 
871             // find leaves
872             Set<String> parents = new LinkedHashSet<>(children);
873             parents.retainAll(parentToChildren.keySet());
874             Set<String> leaves = new LinkedHashSet<>(children);
875             leaves.removeAll(parentToChildren.keySet());
876             if (!leaves.isEmpty()) {
877                 List<String> presentation = new ArrayList<>();
878                 boolean gotDc = false;
879                 for (String s : leaves) {
880                     String shown = s;
881                     if (isLikelyEquivalent(current, s)) {
882                         shown += "~";
883                     }
884                     if (defaultContents.contains(s)) {
885                         gotDc = true;
886                         shown += "*";
887                     }
888                     if (!shown.equals(s)) {
889                         presentation.add(0, shown);
890                     } else {
891                         presentation.add(shown);
892                     }
893                 }
894                 if (!gotDc) {
895                     int debug = 0;
896                 }
897                 if (leaves.size() == 1) {
898                     System.out.println(prefix + CollectionUtilities.join(presentation, " "));
899                 } else {
900                     System.out.println(prefix + "{" + CollectionUtilities.join(presentation, " ") + "}");
901                 }
902             }
903             for (String parent : parents) {
904                 showChain(prefix, current, parent);
905             }
906         }
907 
isLikelyEquivalent(String locale1, String locale2)908         static boolean isLikelyEquivalent(String locale1, String locale2) {
909             if (locale1.equals(locale2)) {
910                 return true;
911             }
912             try {
913                 String maxLocale1 = LikelySubtags.maximize(locale1, likelyData);
914                 String maxLocale2 = LikelySubtags.maximize(locale2, likelyData);
915                 return maxLocale1 != null && Objects.equal(maxLocale1, maxLocale2);
916             } catch (Exception e) {
917                 return false;
918             }
919         }
920     }
921 
922     static final Map<String, String> likelyData = SUPPLEMENTAL_DATA_INFO
923         .getLikelySubtags();
924 
TestLikelySubtagsComplete()925     public void TestLikelySubtagsComplete() {
926         LanguageTagParser ltp = new LanguageTagParser();
927         for (String locale : testInfo.getCldrFactory().getAvailable()) {
928             if (locale.equals("root")) {
929                 continue;
930             }
931             String maxLocale = LikelySubtags.maximize(locale, likelyData);
932             if (maxLocale == null) {
933                 errln("Locale missing likely subtag: " + locale);
934                 continue;
935             }
936             ltp.set(maxLocale);
937             if (ltp.getLanguage().isEmpty() || ltp.getScript().isEmpty()
938                 || ltp.getRegion().isEmpty()) {
939                 errln("Locale has defective likely subtag: " + locale + " => "
940                     + maxLocale);
941             }
942         }
943     }
944 
showDifferences(String locale)945     private void showDifferences(String locale) {
946         CLDRFile cldrFile = testInfo.getCLDRFile(locale, false);
947         final String localeParent = LocaleIDParser.getParent(locale);
948         CLDRFile parentFile = testInfo.getCLDRFile(localeParent, true);
949         int funnyCount = 0;
950         for (Iterator<String> it = cldrFile.iterator("",
951             cldrFile.getComparator()); it.hasNext();) {
952             String path = it.next();
953             if (path.contains("/identity")) {
954                 continue;
955             }
956             final String fullXPath = cldrFile.getFullXPath(path);
957             if (fullXPath.contains("[@draft=\"unconfirmed\"]")
958                 || fullXPath.contains("[@draft=\"provisional\"]")) {
959                 funnyCount++;
960                 continue;
961             }
962             logln("\tpath:\t" + path);
963             logln("\t\t" + locale + " value:\t<"
964                 + cldrFile.getStringValue(path) + ">");
965             final String parentFullPath = parentFile.getFullXPath(path);
966             logln("\t\t" + localeParent + " value:\t<"
967                 + parentFile.getStringValue(path) + ">");
968             logln("\t\t" + locale + " fullpath:\t" + fullXPath);
969             logln("\t\t" + localeParent + " fullpath:\t" + parentFullPath);
970         }
971         logln("\tCount of non-approved:\t" + funnyCount);
972     }
973 
974     enum MissingType {
975         plurals, main_exemplars, no_main, collation, index_exemplars, punct_exemplars
976     }
977 
TestCoreData()978     public void TestCoreData() {
979         Set<String> availableLanguages = testInfo.getCldrFactory()
980             .getAvailableLanguages();
981         PluralInfo rootRules = SUPPLEMENTAL_DATA_INFO.getPlurals(
982             PluralType.cardinal, "root");
983         EnumSet<MissingType> errors = EnumSet.of(MissingType.collation);
984         EnumSet<MissingType> warnings = EnumSet.of(MissingType.collation,
985             MissingType.index_exemplars, MissingType.punct_exemplars);
986 
987         Set<String> collations = new HashSet<String>();
988 
989         // collect collation info
990         Factory collationFactory = Factory.make(CLDRPaths.COLLATION_DIRECTORY,
991             ".*", DraftStatus.contributed);
992         for (String localeID : collationFactory.getAvailable()) {
993             // if (localeID.equals("root")) {
994             // CLDRFile cldrFile = collationFactory.make(localeID, false,
995             // DraftStatus.contributed);
996             // for (String path : cldrFile) {
997             // if (path.startsWith("//ldml/collations")) {
998             // String fullPath = cldrFile.getFullXPath(path);
999             // String valid = parts.set(fullPath).getAttributeValue(1,
1000             // "validSubLocales");
1001             // for (String validSub : valid.trim().split("\\s+")) {
1002             // if (isTopLevel(validSub)) {
1003             // collations.add(validSub);
1004             // }
1005             // }
1006             // break; // done with root
1007             // }
1008             // }
1009             // } else
1010             if (isTopLevel(localeID)) {
1011                 collations.add(localeID);
1012             }
1013         }
1014         logln(collations.toString());
1015 
1016         Set<String> allLanguages = Builder.with(new TreeSet<String>())
1017             .addAll(collations).addAll(availableLanguages).freeze();
1018 
1019         for (String localeID : allLanguages) {
1020             if (localeID.equals("root")) {
1021                 continue; // skip script locales
1022             }
1023             if (!isTopLevel(localeID)) {
1024                 continue;
1025             }
1026 
1027             errors.clear();
1028             warnings.clear();
1029 
1030             String name = "Locale:" + localeID + " ("
1031                 + testInfo.getEnglish().getName(localeID) + ")";
1032 
1033             if (!collations.contains(localeID)) {
1034                 warnings.add(MissingType.collation);
1035                 logln(name + " is missing " + MissingType.collation.toString());
1036             }
1037 
1038             try {
1039                 CLDRFile cldrFile = testInfo.getCldrFactory().make(localeID,
1040                     false, DraftStatus.contributed);
1041 
1042                 String wholeFileAlias = cldrFile.getStringValue("//ldml/alias");
1043                 if (wholeFileAlias != null) {
1044                     logln("Whole-file alias:" + name);
1045                     continue;
1046                 }
1047 
1048                 PluralInfo pluralInfo = SUPPLEMENTAL_DATA_INFO.getPlurals(
1049                     PluralType.cardinal, localeID);
1050                 if (pluralInfo == rootRules) {
1051                     logln(name + " is missing "
1052                         + MissingType.plurals.toString());
1053                     warnings.add(MissingType.plurals);
1054                 }
1055                 UnicodeSet main = cldrFile.getExemplarSet("",
1056                     WinningChoice.WINNING);
1057                 if (main == null || main.isEmpty()) {
1058                     errln("  " + name + " is missing "
1059                         + MissingType.main_exemplars.toString());
1060                     errors.add(MissingType.main_exemplars);
1061                 }
1062                 UnicodeSet index = cldrFile.getExemplarSet("index",
1063                     WinningChoice.WINNING);
1064                 if (index == null || index.isEmpty()) {
1065                     logln(name + " is missing "
1066                         + MissingType.index_exemplars.toString());
1067                     warnings.add(MissingType.index_exemplars);
1068                 }
1069                 UnicodeSet punctuation = cldrFile.getExemplarSet("punctuation",
1070                     WinningChoice.WINNING);
1071                 if (punctuation == null || punctuation.isEmpty()) {
1072                     logln(name + " is missing "
1073                         + MissingType.punct_exemplars.toString());
1074                     warnings.add(MissingType.punct_exemplars);
1075                 }
1076             } catch (Exception e) {
1077                 errln("  " + name + " is missing main locale data.");
1078                 errors.add(MissingType.no_main);
1079             }
1080 
1081             // report errors
1082 
1083             if (errors.isEmpty() && warnings.isEmpty()) {
1084                 logln(name + ": No problems...");
1085             }
1086         }
1087     }
1088 
isTopLevel(String localeID)1089     private boolean isTopLevel(String localeID) {
1090         return "root".equals(LocaleIDParser.getParent(localeID));
1091     }
1092 
1093     /**
1094      * Tests that every dtd item is connected from root
1095      */
TestDtdCompleteness()1096     public void TestDtdCompleteness() {
1097         for (DtdType type : DtdType.values()) {
1098             DtdData dtdData = DtdData.getInstance(type);
1099             Set<Element> descendents = new LinkedHashSet<Element>();
1100             dtdData.getDescendents(dtdData.ROOT, descendents);
1101             Set<Element> elements = dtdData.getElements();
1102             if (!elements.equals(descendents)) {
1103                 for (Element e : elements) {
1104                     if (!descendents.contains(e) && !e.equals(dtdData.PCDATA)
1105                         && !e.equals(dtdData.ANY)) {
1106                         errln(type + ": Element " + e
1107                             + " not contained in descendents of ROOT.");
1108                     }
1109                 }
1110                 for (Element e : descendents) {
1111                     if (!elements.contains(e)) {
1112                         errln(type + ": Element " + e
1113                             + ", descendent of ROOT, not in elements.");
1114                     }
1115                 }
1116             }
1117             LinkedHashSet<Element> all = new LinkedHashSet<Element>(descendents);
1118             all.addAll(elements);
1119             Set<Attribute> attributes = dtdData.getAttributes();
1120             for (Attribute a : attributes) {
1121                 if (!elements.contains(a.element)) {
1122                     errln(type + ": Attribute " + a + " isn't for any element.");
1123                 }
1124             }
1125         }
1126     }
1127 
TestBasicDTDCompatibility()1128     public void TestBasicDTDCompatibility() {
1129 
1130         // Only run the rest in exhaustive mode, since it requires CLDR_ARCHIVE_DIRECTORY
1131         if (getInclusion() <= 5) {
1132             return;
1133         }
1134 
1135         final String oldCommon = CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + Versions.v22_1.toString() + "/common";
1136 
1137         // set up exceptions
1138         Set<String> changedToEmpty = new HashSet<String>(
1139             Arrays.asList(new String[] { "version", "languageCoverage",
1140                 "scriptCoverage", "territoryCoverage",
1141                 "currencyCoverage", "timezoneCoverage",
1142                 "skipDefaultLocale" }));
1143         Set<String> PCDATA = new HashSet<String>();
1144         PCDATA.add("PCDATA");
1145         Set<String> EMPTY = new HashSet<String>();
1146         EMPTY.add("EMPTY");
1147         Set<String> VERSION = new HashSet<String>();
1148         VERSION.add("version");
1149 
1150         // test all DTDs
1151         for (DtdType dtd : DtdType.values()) {
1152             try {
1153                 ElementAttributeInfo oldDtd = ElementAttributeInfo.getInstance(
1154                     oldCommon, dtd);
1155                 ElementAttributeInfo newDtd = ElementAttributeInfo
1156                     .getInstance(dtd);
1157 
1158                 if (oldDtd == newDtd) {
1159                     continue;
1160                 }
1161                 Relation<String, String> oldElement2Children = oldDtd
1162                     .getElement2Children();
1163                 Relation<String, String> newElement2Children = newDtd
1164                     .getElement2Children();
1165 
1166                 Relation<String, String> oldElement2Attributes = oldDtd
1167                     .getElement2Attributes();
1168                 Relation<String, String> newElement2Attributes = newDtd
1169                     .getElement2Attributes();
1170 
1171                 for (String element : oldElement2Children.keySet()) {
1172                     Set<String> oldChildren = oldElement2Children
1173                         .getAll(element);
1174                     Set<String> newChildren = newElement2Children
1175                         .getAll(element);
1176                     if (newChildren == null) {
1177                         if (!knownElementExceptions.contains(Pair.of(dtd.toString(), element))) {
1178                             errln("Old " + dtd + " contains element not in new: <"
1179                                 + element + ">");
1180                         }
1181                         continue;
1182                     }
1183                     Set<String> funny = containsInOrder(newChildren,
1184                         oldChildren);
1185                     if (funny != null) {
1186                         if (changedToEmpty.contains(element)
1187                             && oldChildren.equals(PCDATA)
1188                             && newChildren.equals(EMPTY)) {
1189                             // ok, skip
1190                         } else {
1191                             errln("Old " + dtd + " element <" + element
1192                                 + "> has children Missing/Misordered:\t"
1193                                 + funny + "\n\t\tOld:\t" + oldChildren
1194                                 + "\n\t\tNew:\t" + newChildren);
1195                         }
1196                     }
1197 
1198                     Set<String> oldAttributes = oldElement2Attributes
1199                         .getAll(element);
1200                     if (oldAttributes == null) {
1201                         oldAttributes = Collections.emptySet();
1202                     }
1203                     Set<String> newAttributes = newElement2Attributes
1204                         .getAll(element);
1205                     if (newAttributes == null) {
1206                         newAttributes = Collections.emptySet();
1207                     }
1208                     if (!newAttributes.containsAll(oldAttributes)) {
1209                         LinkedHashSet<String> missing = new LinkedHashSet<String>(
1210                             oldAttributes);
1211                         missing.removeAll(newAttributes);
1212                         if (element.equals(dtd.toString())
1213                             && missing.equals(VERSION)) {
1214                             // ok, skip
1215                         } else {
1216                             errln("Old " + dtd + " element <" + element
1217                                 + "> has attributes Missing:\t" + missing
1218                                 + "\n\t\tOld:\t" + oldAttributes
1219                                 + "\n\t\tNew:\t" + newAttributes);
1220                         }
1221                     }
1222                 }
1223             } catch (Exception e) {
1224                 e.printStackTrace();
1225                 errln("Failure with " + dtd);
1226             }
1227         }
1228     }
1229 
containsInOrder(Set<T> superset, Set<T> subset)1230     private <T> Set<T> containsInOrder(Set<T> superset, Set<T> subset) {
1231         if (!superset.containsAll(subset)) {
1232             LinkedHashSet<T> missing = new LinkedHashSet<T>(subset);
1233             missing.removeAll(superset);
1234             return missing;
1235         }
1236         // ok, we know that they are subsets, try order
1237         Set<T> result = null;
1238         DiscreteComparator<T> comp = new DiscreteComparator.Builder<T>(
1239             Ordering.ARBITRARY).add(superset).get();
1240         T last = null;
1241         for (T item : subset) {
1242             if (last != null) {
1243                 int order = comp.compare(last, item);
1244                 if (order != -1) {
1245                     if (result == null) {
1246                         result = new HashSet<T>();
1247                         result.add(last);
1248                         result.add(item);
1249                     }
1250                 }
1251             }
1252             last = item;
1253         }
1254         return result;
1255     }
1256 
TestDtdCompatibility()1257     public void TestDtdCompatibility() {
1258 
1259         for (DtdType type : DtdType.values()) {
1260             DtdData dtdData = DtdData.getInstance(type);
1261             Map<String, Element> currentElementFromName = dtdData
1262                 .getElementFromName();
1263 
1264             // current has no orphan
1265             Set<Element> orphans = new LinkedHashSet<Element>(dtdData
1266                 .getElementFromName().values());
1267             orphans.remove(dtdData.ROOT);
1268             orphans.remove(dtdData.PCDATA);
1269             orphans.remove(dtdData.ANY);
1270             Set<String> elementsWithoutAlt = new TreeSet<String>();
1271             Set<String> elementsWithoutDraft = new TreeSet<String>();
1272             Set<String> elementsWithoutAlias = new TreeSet<String>();
1273             Set<String> elementsWithoutSpecial = new TreeSet<String>();
1274 
1275             for (Element element : dtdData.getElementFromName().values()) {
1276                 Set<Element> children = element.getChildren().keySet();
1277                 orphans.removeAll(children);
1278                 if (type == DtdType.ldml
1279                     && !SUPPLEMENTAL_DATA_INFO.isDeprecated(type,
1280                         element.name, "*", "*")) {
1281                     if (element.getType() == ElementType.PCDATA) {
1282                         if (element.getAttributeNamed("alt") == null) {
1283                             elementsWithoutAlt.add(element.name);
1284                         }
1285                         if (element.getAttributeNamed("draft") == null) {
1286                             elementsWithoutDraft.add(element.name);
1287                         }
1288                     } else {
1289                         if (children.size() != 0 && !"alias".equals(element.name)) {
1290                             if (element.getChildNamed("alias") == null) {
1291                                 elementsWithoutAlias.add(element.name);
1292                             }
1293                             if (element.getChildNamed("special") == null) {
1294                                 elementsWithoutSpecial.add(element.name);
1295                             }
1296                         }
1297                     }
1298                 }
1299             }
1300             assertEquals(type + " DTD Must not have orphan elements",
1301                 Collections.EMPTY_SET, orphans);
1302             assertEquals(type
1303                 + " DTD elements with PCDATA must have 'alt' attributes",
1304                 Collections.EMPTY_SET, elementsWithoutAlt);
1305             assertEquals(type
1306                 + " DTD elements with PCDATA must have 'draft' attributes",
1307                 Collections.EMPTY_SET, elementsWithoutDraft);
1308             assertEquals(type
1309                 + " DTD elements with children must have 'alias' elements",
1310                 Collections.EMPTY_SET, elementsWithoutAlias);
1311             assertEquals(
1312                 type
1313                     + " DTD elements with children must have 'special' elements",
1314                 Collections.EMPTY_SET, elementsWithoutSpecial);
1315 
1316             // Only run the rest in exhaustive mode, since it requires CLDR_ARCHIVE_DIRECTORY
1317             if (getInclusion() <= 5) {
1318                 return;
1319             }
1320 
1321             for (Versions version : Versions.values()) {
1322                 if (version == Versions.trunk) {
1323                     continue;
1324                 } else if (version == Versions.v1_1_1) {
1325                     break;
1326                 }
1327                 DtdData dtdDataOld;
1328                 try {
1329                     dtdDataOld = DtdData.getInstance(type, version.toString());
1330                 } catch (IllegalArgumentException e) {
1331                     boolean tooOld = false;
1332                     switch (type) {
1333                     case ldmlBCP47:
1334                     case ldmlICU:
1335                         tooOld = version.compareTo(Versions.v1_7_2) >= 0;
1336                         break;
1337                     case keyboard:
1338                     case platform:
1339                         tooOld = version.compareTo(Versions.v22_1) >= 0;
1340                         break;
1341                     default:
1342                         break;
1343                     }
1344                     if (tooOld) {
1345                         continue;
1346                     } else {
1347                         throw e;
1348                     }
1349                 }
1350                 // verify that if E is in dtdDataOld, then it is in dtdData, and
1351                 // has at least the same children and attributes
1352                 for (Entry<String, Element> entry : dtdDataOld
1353                     .getElementFromName().entrySet()) {
1354                     Element oldElement = entry.getValue();
1355                     Element newElement = currentElementFromName.get(entry
1356                         .getKey());
1357                     if (knownElementExceptions.contains(Pair.of(type.toString(), oldElement.getName()))) {
1358                         continue;
1359                     }
1360                     if (assertNotNull(type
1361                         + " DTD for trunk must be superset of v" + version
1362                         + ", and must contain «" + oldElement.getName()
1363                         + "»", newElement)) {
1364                         // TODO Check order also
1365                         for (Element oldChild : oldElement.getChildren()
1366                             .keySet()) {
1367                             if (oldChild == null) {
1368                                 continue;
1369                             }
1370                             Element newChild = newElement
1371                                 .getChildNamed(oldChild.getName());
1372 
1373                             if (knownChildExceptions.contains(Pair.of(newElement.getName(), oldChild.getName()))) {
1374                                 continue;
1375                             }
1376                             assertNotNull(
1377                                 type + " DTD - Children of «"
1378                                     + newElement.getName()
1379                                     + "» must be superset of v"
1380                                     + version + ", and must contain «"
1381                                     + oldChild.getName() + "»",
1382                                 newChild);
1383                         }
1384                         for (Attribute oldAttribute : oldElement
1385                             .getAttributes().keySet()) {
1386                             Attribute newAttribute = newElement
1387                                 .getAttributeNamed(oldAttribute.getName());
1388 
1389                             if (knownAttributeExceptions.contains(Pair.of(newElement.getName(), oldAttribute.getName()))) {
1390                                 continue;
1391                             }
1392                             assertNotNull(
1393                                 type + " DTD - Attributes of «"
1394                                     + newElement.getName()
1395                                     + "» must be superset of v"
1396                                     + version + ", and must contain «"
1397                                     + oldAttribute.getName() + "»",
1398                                 newAttribute);
1399 
1400                         }
1401                     }
1402                 }
1403             }
1404         }
1405     }
1406 
1407     /**
1408      * Compare each path to each other path for every single file in CLDR
1409      */
TestDtdComparison()1410     public void TestDtdComparison() {
1411         // try some simple paths for regression
1412 
1413         sortPaths(
1414             DtdData.getInstance(DtdType.ldml).getDtdComparator(null),
1415             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/dateTimeFormatLength[@type=\"full\"]/dateTimeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1416             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats");
1417 
1418         sortPaths(
1419             DtdData.getInstance(DtdType.supplementalData).getDtdComparator(
1420                 null),
1421             "//supplementalData/territoryContainment/group[@type=\"419\"][@contains=\"013 029 005\"][@grouping=\"true\"]",
1422             "//supplementalData/territoryContainment/group[@type=\"003\"][@contains=\"021 013 029\"][@grouping=\"true\"]");
1423 
1424         //checkDtdComparatorForResource("TestBasic_ja.xml", DtdType.ldmlICU);
1425     }
1426 
TestDtdComparisonsAll()1427     public void TestDtdComparisonsAll() {
1428         if (getInclusion() <= 5) { // Only run this test in exhaustive mode.
1429             return;
1430         }
1431         for (File file : CLDRConfig.getInstance().getAllCLDRFilesEndingWith(".xml")) {
1432             checkDtdComparatorFor(file, null);
1433         }
1434     }
1435 
checkDtdComparatorForResource(String fileToRead, DtdType overrideDtdType)1436     public void checkDtdComparatorForResource(String fileToRead,
1437         DtdType overrideDtdType) {
1438         MyHandler myHandler = new MyHandler(overrideDtdType);
1439         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
1440         try {
1441             myHandler.fileName = fileToRead;
1442             xfr.read(myHandler.fileName, TestBasic.class, -1, true);
1443             logln(myHandler.fileName);
1444         } catch (Exception e) {
1445             Throwable t = e;
1446             StringBuilder b = new StringBuilder();
1447             String indent = "";
1448             while (t != null) {
1449                 b.append(indent).append(t.getMessage());
1450                 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t";
1451                 t = t.getCause();
1452             }
1453             errln(b.toString());
1454             return;
1455         }
1456         DtdData dtdData = DtdData.getInstance(myHandler.dtdType);
1457         sortPaths(dtdData.getDtdComparator(null), myHandler.data);
1458     }
1459 
checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType)1460     public void checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType) {
1461         MyHandler myHandler = new MyHandler(overrideDtdType);
1462         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
1463         try {
1464             myHandler.fileName = fileToRead.getCanonicalPath();
1465             xfr.read(myHandler.fileName, -1, true);
1466             logln(myHandler.fileName);
1467         } catch (Exception e) {
1468             Throwable t = e;
1469             StringBuilder b = new StringBuilder();
1470             String indent = "";
1471             while (t != null) {
1472                 b.append(indent).append(t.getMessage());
1473                 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t";
1474                 t = t.getCause();
1475             }
1476             errln(b.toString());
1477             return;
1478         }
1479         DtdData dtdData = DtdData.getInstance(myHandler.dtdType);
1480         sortPaths(dtdData.getDtdComparator(null), myHandler.data);
1481     }
1482 
1483     static class MyHandler extends XMLFileReader.SimpleHandler {
1484         private String fileName;
1485         private DtdType dtdType;
1486         private final Set<String> data = new LinkedHashSet<>();
1487 
MyHandler(DtdType overrideDtdType)1488         public MyHandler(DtdType overrideDtdType) {
1489             dtdType = overrideDtdType;
1490         }
1491 
handlePathValue(String path, String value)1492         public void handlePathValue(String path, String value) {
1493             if (dtdType == null) {
1494                 try {
1495                     dtdType = DtdType.fromPath(path);
1496                 } catch (Exception e) {
1497                     throw new IllegalArgumentException(
1498                         "Can't read " + fileName, e);
1499                 }
1500             }
1501             data.add(path);
1502         }
1503     }
1504 
sortPaths(Comparator<String> dc, Collection<String> paths)1505     public void sortPaths(Comparator<String> dc, Collection<String> paths) {
1506         String[] array = paths.toArray(new String[paths.size()]);
1507         sortPaths(dc, array);
1508     }
1509 
sortPaths(Comparator<String> dc, String... array)1510     public void sortPaths(Comparator<String> dc, String... array) {
1511         Arrays.sort(array, 0, array.length, dc);
1512     }
1513     // public void TestNewDtdData() moved to TestDtdData
1514 }
1515