1 package org.unicode.cldr.util;
2 
3 import java.io.File;
4 import java.io.StringReader;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.Comparator;
10 import java.util.EnumMap;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Iterator;
14 import java.util.LinkedHashMap;
15 import java.util.LinkedHashSet;
16 import java.util.List;
17 import java.util.Locale;
18 import java.util.Map;
19 import java.util.Map.Entry;
20 import java.util.Set;
21 import java.util.TreeMap;
22 import java.util.regex.Pattern;
23 
24 import com.google.common.base.CharMatcher;
25 import com.google.common.base.Splitter;
26 import com.google.common.collect.ImmutableSet;
27 import com.google.common.collect.ImmutableSet.Builder;
28 import com.google.common.collect.Multimap;
29 import com.ibm.icu.dev.util.CollectionUtilities;
30 import com.ibm.icu.impl.Relation;
31 import com.ibm.icu.text.Transform;
32 
33 /**
34  * An immutable object that contains the structure of a DTD.
35  * @author markdavis
36  */
37 public class DtdData extends XMLFileReader.SimpleHandler {
38     private static final String COMMENT_PREFIX = System.lineSeparator() + "    ";
39     private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false);
40     private static final boolean USE_SYNTHESIZED = false;
41 
42     private static final boolean DEBUG = false;
43     private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]");
44 
45     private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class);
46     private Map<String, Element> nameToElement = new HashMap<String, Element>();
47     private MapComparator<String> elementComparator;
48     private MapComparator<String> attributeComparator;
49 
50     public final Element ROOT;
51     public final Element PCDATA = elementFrom("#PCDATA");
52     public final Element ANY = elementFrom("ANY");
53     public final DtdType dtdType;
54     public final String version;
55     private Element lastElement;
56     private Attribute lastAttribute;
57     private Set<String> preCommentCache;
58     private DtdComparator dtdComparator;
59 
60     public enum AttributeStatus {
61         distinguished, value, metadata
62     }
63 
64     public enum Mode {
65         REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null");
66 
67         public final String source;
68 
Mode(String s)69         Mode(String s) {
70             source = s;
71         }
72 
forString(String mode)73         public static Mode forString(String mode) {
74             for (Mode value : Mode.values()) {
75                 if (value.source.equals(mode)) {
76                     return value;
77                 }
78             }
79             if (mode == null) {
80                 return NULL;
81             }
82             throw new IllegalArgumentException(mode);
83         }
84     }
85 
86     public enum AttributeType {
87         CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE
88     }
89 
90     public static class Attribute implements Named {
91         public final String name;
92         public final Element element;
93         public final Mode mode;
94         public final String defaultValue;
95         public final AttributeType type;
96         public final Map<String, Integer> values;
97         private final Set<String> commentsPre;
98         private Set<String> commentsPost;
99         private boolean isDeprecatedAttribute;
100         private AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations
101         private Set<String> deprecatedValues = Collections.emptySet();
102         private final Comparator<String> attributeValueComparator;
103 
Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)104         private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) {
105             commentsPre = firstComment;
106             element = element2;
107             name = aName.intern();
108             if (name.equals("draft") // normally never permitted on elements with children, but special cases...
109                 && !element.getName().equals("collation")
110                 && !element.getName().equals("transform")) {
111                 int elementChildrenCount = element.getChildren().size();
112                 if (elementChildrenCount > 1
113                     || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) {
114                     isDeprecatedAttribute = true;
115                     if (DEBUG) {
116                         System.out.println(element.getName() + ":" + element.getChildren());
117                     }
118                 }
119             }
120             mode = mode2;
121             defaultValue = value2 == null ? null
122                 : value2.intern();
123             AttributeType _type = AttributeType.ENUMERATED_TYPE;
124             Map<String, Integer> _values = Collections.emptyMap();
125             if (split.length == 1) {
126                 try {
127                     _type = AttributeType.valueOf(split[0]);
128                 } catch (Exception e) {
129                 }
130             }
131             type = _type;
132 
133             if (_type == AttributeType.ENUMERATED_TYPE) {
134                 LinkedHashMap<String, Integer> temp = new LinkedHashMap<String, Integer>();
135                 for (String part : split) {
136                     if (part.length() != 0) {
137                         temp.put(part.intern(), temp.size());
138                     }
139                 }
140                 _values = Collections.unmodifiableMap(temp);
141             }
142             values = _values;
143             attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name);
144         }
145 
146         @Override
toString()147         public String toString() {
148             return element.name + ":" + name;
149         }
150 
appendDtdString(StringBuilder b)151         public StringBuilder appendDtdString(StringBuilder b) {
152             Attribute a = this;
153             b.append("<!ATTLIST " + element.name + " " + a.name);
154             boolean first;
155             if (a.type == AttributeType.ENUMERATED_TYPE) {
156                 b.append(" (");
157                 first = true;
158                 for (String s : a.values.keySet()) {
159                     if (deprecatedValues.contains(s)) {
160                         continue;
161                     }
162                     if (first) {
163                         first = false;
164                     } else {
165                         b.append(" | ");
166                     }
167                     b.append(s);
168                 }
169                 b.append(")");
170             } else {
171                 b.append(' ').append(a.type);
172             }
173             if (a.mode != Mode.NULL) {
174                 b.append(" ").append(a.mode.source);
175             }
176             if (a.defaultValue != null) {
177                 b.append(" \"").append(a.defaultValue).append('"');
178             }
179             b.append(" >");
180             return b;
181         }
182 
features()183         public String features() {
184             return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString())
185                 + (mode == Mode.NULL ? "" : ", mode=" + mode)
186                 + (defaultValue == null ? "" : ", default=" + defaultValue);
187         }
188 
189         @Override
getName()190         public String getName() {
191             return name;
192         }
193 
194         private static Splitter COMMA = Splitter.on(',').trimResults();
195 
addComment(String commentIn)196         public void addComment(String commentIn) {
197             if (commentIn.startsWith("@")) {
198                 // there are exactly 2 cases: deprecated and ordered
199                 switch (commentIn) {
200                 case "@METADATA":
201                     attributeStatus = AttributeStatus.metadata;
202                     break;
203                 case "@VALUE":
204                     attributeStatus = AttributeStatus.value;
205                     break;
206                 case "@DEPRECATED":
207                     isDeprecatedAttribute = true;
208                     break;
209                 default:
210                     if (commentIn.startsWith("@DEPRECATED:")) {
211                         deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(commentIn.substring("@DEPRECATED:".length()))));
212                         break;
213                     }
214                     throw new IllegalArgumentException("Unrecognized annotation: " + commentIn);
215                 }
216                 return;
217             }
218             commentsPost = addUnmodifiable(commentsPost, commentIn.trim());
219         }
220 
221         /**
222          * Special version of identity; only considers name and name of element
223          */
224         @Override
equals(Object obj)225         public boolean equals(Object obj) {
226             if (!(obj instanceof Attribute)) {
227                 return false;
228             }
229             Attribute that = (Attribute) obj;
230             return name.equals(that.name)
231                 && element.name.equals(that.element.name) // don't use plain element: circularity
232             // not relevant to identity
233             //                && Objects.equals(comment, that.comment)
234             //                && mode.equals(that.mode)
235             //                && Objects.equals(defaultValue, that.defaultValue)
236             //                && type.equals(that.type)
237             //                && values.equals(that.values)
238             ;
239         }
240 
241         /**
242          * Special version of identity; only considers name and name of element
243          */
244         @Override
hashCode()245         public int hashCode() {
246             return name.hashCode() * 37
247                 + element.name.hashCode() // don't use plain element: circularity
248             // not relevant to identity
249             //                ) * 37 + Objects.hashCode(comment)) * 37
250             //                + mode.hashCode()) * 37
251             //                + Objects.hashCode(defaultValue)) * 37
252             //                + type.hashCode()) * 37
253             //                + values.hashCode()
254             ;
255         }
256 
isDeprecated()257         public boolean isDeprecated() {
258             return isDeprecatedAttribute;
259         }
260 
isDeprecatedValue(String value)261         public boolean isDeprecatedValue(String value) {
262             return deprecatedValues.contains(value);
263         }
264 
getStatus()265         public AttributeStatus getStatus() {
266             return attributeStatus;
267         }
268 
269     }
270 
DtdData(DtdType type, String version)271     private DtdData(DtdType type, String version) {
272         this.dtdType = type;
273         this.ROOT = elementFrom(type.rootType.toString());
274         this.version = version;
275     }
276 
addAttribute(String eName, String aName, String type, String mode, String value)277     private void addAttribute(String eName, String aName, String type, String mode, String value) {
278         Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache);
279         preCommentCache = null;
280         getAttributesFromName().put(aName, a);
281         CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size());
282         lastElement = null;
283         lastAttribute = a;
284     }
285 
286     public enum ElementType {
287         EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN;
288         public final String source;
289 
ElementType(String s)290         private ElementType(String s) {
291             source = s;
292         }
293 
ElementType()294         private ElementType() {
295             source = name();
296         }
297     }
298 
299     interface Named {
getName()300         String getName();
301     }
302 
303     public enum ElementStatus {
304         regular, metadata
305     }
306 
307     public static class Element implements Named {
308         public final String name;
309         private String rawModel;
310         private ElementType type;
311         private final Map<Element, Integer> children = new LinkedHashMap<Element, Integer>();
312         private final Map<Attribute, Integer> attributes = new LinkedHashMap<Attribute, Integer>();
313         private Set<String> commentsPre;
314         private Set<String> commentsPost;
315         private String model;
316         private boolean isOrderedElement;
317         private boolean isDeprecatedElement;
318         private ElementStatus elementStatus = ElementStatus.regular;
319 
Element(String name2)320         private Element(String name2) {
321             name = name2.intern();
322         }
323 
setChildren(DtdData dtdData, String model, Set<String> precomments)324         private void setChildren(DtdData dtdData, String model, Set<String> precomments) {
325             this.commentsPre = precomments;
326             rawModel = model;
327             this.model = clean(model);
328             if (model.equals("EMPTY")) {
329                 type = ElementType.EMPTY;
330                 return;
331             }
332             type = ElementType.CHILDREN;
333             for (String part : FILLER.split(model)) {
334                 if (part.length() != 0) {
335                     if (part.equals("#PCDATA")) {
336                         type = ElementType.PCDATA;
337                     } else if (part.equals("ANY")) {
338                         type = ElementType.ANY;
339                     } else {
340                         CldrUtility.putNew(children, dtdData.elementFrom(part), children.size());
341                     }
342                 }
343             }
344             if ((type == ElementType.CHILDREN) == (children.size() == 0)
345                 && !model.startsWith("(#PCDATA|cp")) {
346                 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model);
347             }
348         }
349 
350         static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)");
351         static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])");
352 
clean(String model2)353         private String clean(String model2) {
354             // (x) -> ( x );
355             // x,y -> x, y
356             // x|y -> x | y
357             String result = CLEANER1.matcher(model2).replaceAll("$1 ");
358             result = CLEANER2.matcher(result).replaceAll(" $1");
359             return result.equals(model2)
360                 ? model2
361                 : result; // for debugging
362         }
363 
containsAttribute(String string)364         public boolean containsAttribute(String string) {
365             for (Attribute a : attributes.keySet()) {
366                 if (a.name.equals(string)) {
367                     return true;
368                 }
369             }
370             return false;
371         }
372 
373         @Override
toString()374         public String toString() {
375             return name;
376         }
377 
toDtdString()378         public String toDtdString() {
379             return "<!ELEMENT " + name + " " + getRawModel() + " >";
380         }
381 
getType()382         public ElementType getType() {
383             return type;
384         }
385 
getChildren()386         public Map<Element, Integer> getChildren() {
387             return Collections.unmodifiableMap(children);
388         }
389 
getAttributes()390         public Map<Attribute, Integer> getAttributes() {
391             return Collections.unmodifiableMap(attributes);
392         }
393 
394         @Override
getName()395         public String getName() {
396             return name;
397         }
398 
getChildNamed(String string)399         public Element getChildNamed(String string) {
400             for (Element e : children.keySet()) {
401                 if (e.name.equals(string)) {
402                     return e;
403                 }
404             }
405             return null;
406         }
407 
getAttributeNamed(String string)408         public Attribute getAttributeNamed(String string) {
409             for (Attribute a : attributes.keySet()) {
410                 if (a.name.equals(string)) {
411                     return a;
412                 }
413             }
414             return null;
415         }
416 
addComment(String addition)417         public void addComment(String addition) {
418             if (addition.startsWith("@")) {
419                 // there are exactly 3 cases: deprecated, ordered, and metadata
420                 switch (addition) {
421                 case "@ORDERED":
422                     isOrderedElement = true;
423                     break;
424                 case "@DEPRECATED":
425                     isDeprecatedElement = true;
426                     break;
427                 case "@METADATA":
428                     elementStatus = ElementStatus.metadata;
429                     break;
430                 default:
431                     throw new IllegalArgumentException("Unrecognized annotation: " + addition);
432                 }
433                 return;
434             }
435             commentsPost = addUnmodifiable(commentsPost, addition.trim());
436         }
437 
438         /**
439          * Special version of equals. Only the name is considered in the identity.
440          */
441         @Override
equals(Object obj)442         public boolean equals(Object obj) {
443             if (!(obj instanceof Element)) {
444                 return false;
445             }
446             Element that = (Element) obj;
447             return name.equals(that.name)
448             // not relevant to the identity of the object
449             //                && Objects.equals(comment, that.comment)
450             //                && type == that.type
451             //                && attributes.equals(that.attributes)
452             //                && children.equals(that.children)
453             ;
454         }
455 
456         /**
457          * Special version of hashcode. Only the name is considered in the identity.
458          */
459         @Override
hashCode()460         public int hashCode() {
461             return name.hashCode()
462             // not relevant to the identity of the object
463             // * 37 + Objects.hashCode(comment)
464             //) * 37 + Objects.hashCode(type)
465             //                ) * 37 + attributes.hashCode()
466             //                ) * 37 + children.hashCode()
467             ;
468         }
469 
isDeprecated()470         public boolean isDeprecated() {
471             return isDeprecatedElement;
472         }
473 
getElementStatus()474         public ElementStatus getElementStatus() {
475             return elementStatus;
476         }
477 
478         /**
479          * @return the rawModel
480          */
getRawModel()481         public String getRawModel() {
482             return rawModel;
483         }
484     }
485 
elementFrom(String name)486     private Element elementFrom(String name) {
487         Element result = nameToElement.get(name);
488         if (result == null) {
489             nameToElement.put(name, result = new Element(name));
490         }
491         return result;
492     }
493 
addElement(String name2, String model)494     private void addElement(String name2, String model) {
495         Element element = elementFrom(name2);
496         element.setChildren(this, model, preCommentCache);
497         preCommentCache = null;
498         lastElement = element;
499         lastAttribute = null;
500     }
501 
addComment(String comment)502     private void addComment(String comment) {
503         comment = comment.trim();
504         if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky"
505             if (comment.startsWith("@")) {
506                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
507             }
508             preCommentCache = addUnmodifiable(preCommentCache, comment);
509         } else if (lastElement != null) {
510             lastElement.addComment(comment);
511         } else if (lastAttribute != null) {
512             lastAttribute.addComment(comment);
513         } else {
514             if (comment.startsWith("@")) {
515                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
516             }
517             preCommentCache = addUnmodifiable(preCommentCache, comment);
518         }
519     }
520 
521     // TODO hide this
522     /**
523      * @deprecated
524      */
525     @Override
handleElementDecl(String name, String model)526     public void handleElementDecl(String name, String model) {
527         if (SHOW_ALL) {
528             // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) >
529             System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >");
530         }
531         addElement(name, model);
532     }
533 
534     // TODO hide this
535     /**
536      * @deprecated
537      */
538     @Override
handleStartDtd(String name, String publicId, String systemId)539     public void handleStartDtd(String name, String publicId, String systemId) {
540         DtdType explicitDtdType = DtdType.valueOf(name);
541         if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) {
542             throw new IllegalArgumentException("Mismatch in dtdTypes");
543         }
544     };
545 
546     /**
547      * @deprecated
548      */
549     @Override
handleAttributeDecl(String eName, String aName, String type, String mode, String value)550     public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
551         if (SHOW_ALL) {
552             // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED >
553             // <!ATTLIST version number CDATA #REQUIRED >
554             // <!ATTLIST version cldrVersion CDATA #FIXED "27" >
555 
556             System.out.println("<!ATTLIST " + eName
557                 + " " + aName
558                 + " " + type
559                 + " " + mode
560                 + (value == null ? "" : " \"" + value + "\"")
561                 + " >");
562         }
563         // HACK for 1.1.1
564         if (eName.equals("draft")) {
565             eName = "week";
566         }
567         addAttribute(eName, aName, type, mode, value);
568     }
569 
570     /**
571      * @deprecated
572      */
573     @Override
handleComment(String path, String comment)574     public void handleComment(String path, String comment) {
575         if (SHOW_ALL) {
576             // <!-- true and false are deprecated. -->
577             System.out.println("<!-- " + comment.trim() + " -->");
578         }
579         addComment(comment);
580     }
581 
582     // TODO hide this
583     /**
584      * @deprecated
585      */
586     @Override
handleEndDtd()587     public void handleEndDtd() {
588         throw new XMLFileReader.AbortException();
589     }
590 
591     //    static final Map<CLDRFile.DtdType, String> DTD_TYPE_TO_FILE;
592     //    static {
593     //        EnumMap<CLDRFile.DtdType, String> temp = new EnumMap<CLDRFile.DtdType, String>(CLDRFile.DtdType.class);
594     //        temp.put(CLDRFile.DtdType.ldml, CldrUtility.BASE_DIRECTORY + "common/dtd/ldml.dtd");
595     //        temp.put(CLDRFile.DtdType.supplementalData, CldrUtility.BASE_DIRECTORY + "common/dtd/ldmlSupplemental.dtd");
596     //        temp.put(CLDRFile.DtdType.ldmlBCP47, CldrUtility.BASE_DIRECTORY + "common/dtd/ldmlBCP47.dtd");
597     //        temp.put(CLDRFile.DtdType.keyboard, CldrUtility.BASE_DIRECTORY + "keyboards/dtd/ldmlKeyboard.dtd");
598     //        temp.put(CLDRFile.DtdType.platform, CldrUtility.BASE_DIRECTORY + "keyboards/dtd/ldmlPlatform.dtd");
599     //        DTD_TYPE_TO_FILE = Collections.unmodifiableMap(temp);
600     //    }
601 
602     /**
603      * Normal version of DtdData
604      * Note that it always gets the trunk version
605      */
getInstance(DtdType type)606     public static DtdData getInstance(DtdType type) {
607         return CACHE.get(type);
608     }
609 
610     /**
611      * Special form using version, used only by tests, etc.
612      */
getInstance(DtdType type, String version)613     public static DtdData getInstance(DtdType type, String version) {
614         DtdData simpleHandler = new DtdData(type, version);
615         XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler);
616         File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory()
617             : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version);
618 
619         if (type != type.rootType) {
620             // read the real first, then add onto it.
621             readFile(type.rootType, xfr, directory);
622         }
623         readFile(type, xfr, directory);
624         // HACK
625         if (type == DtdType.ldmlICU) {
626             Element special = simpleHandler.nameToElement.get("special");
627             for (String extraElementName : Arrays.asList(
628                 "icu:breakIteratorData",
629                 "icu:UCARules",
630                 "icu:scripts",
631                 "icu:transforms",
632                 "icu:ruleBasedNumberFormats",
633                 "icu:isLeapMonth",
634                 "icu:version",
635                 "icu:breakDictionaryData",
636                 "icu:depends")) {
637                 Element extraElement = simpleHandler.nameToElement.get(extraElementName);
638                 special.children.put(extraElement, special.children.size());
639             }
640         }
641         if (simpleHandler.ROOT.children.size() == 0) {
642             throw new IllegalArgumentException(); // should never happen
643         }
644         simpleHandler.finish();
645         simpleHandler.freeze();
646         return simpleHandler;
647     }
648 
finish()649     private void finish() {
650         dtdComparator = new DtdComparator();
651     }
652 
readFile(DtdType type, XMLFileReader xfr, File directory)653     public static void readFile(DtdType type, XMLFileReader xfr, File directory) {
654         File file = new File(directory, type.dtdPath);
655         StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>"
656             + "<!DOCTYPE " + type
657             + " SYSTEM '" + file.getAbsolutePath() + "'>");
658         xfr.read(type.toString(), s, -1, true); //  DTD_TYPE_TO_FILE.get(type)
659     }
660 
freeze()661     private void freeze() {
662         if (version == null) { // only generate for new versions
663             MergeLists<String> elementMergeList = new MergeLists<String>();
664             elementMergeList.add(dtdType.toString());
665             MergeLists<String> attributeMergeList = new MergeLists<String>();
666             attributeMergeList.add("_q");
667 
668             for (Element element : nameToElement.values()) {
669                 if (element.children.size() > 0) {
670                     Collection<String> names = getNames(element.children.keySet());
671                     elementMergeList.add(names);
672                     if (DEBUG) {
673                         System.out.println(element.getName() + "\t→\t" + names);
674                     }
675                 }
676                 if (element.attributes.size() > 0) {
677                     Collection<String> names = getNames(element.attributes.keySet());
678                     attributeMergeList.add(names);
679                     if (DEBUG) {
680                         System.out.println(element.getName() + "\t→\t@" + names);
681                     }
682                 }
683             }
684             List<String> elementList = elementMergeList.merge();
685             List<String> attributeList = attributeMergeList.merge();
686             if (DEBUG) {
687                 System.out.println("Element Ordering:\t" + elementList);
688                 System.out.println("Attribute Ordering:\t" + attributeList);
689             }
690             // double-check
691             //        for (Element element : elements) {
692             //            if (!MergeLists.hasConsistentOrder(elementList, element.children.keySet())) {
693             //                throw new IllegalArgumentException("Failed to find good element order: " + element.children.keySet());
694             //            }
695             //            if (!MergeLists.hasConsistentOrder(attributeList, element.attributes.keySet())) {
696             //                throw new IllegalArgumentException("Failed to find good attribute order: " + element.attributes.keySet());
697             //            }
698             //        }
699             elementComparator = new MapComparator<String>(elementList).setErrorOnMissing(true).freeze();
700             attributeComparator = new MapComparator<String>(attributeList).setErrorOnMissing(true).freeze();
701         }
702         nameToAttributes.freeze();
703         nameToElement = Collections.unmodifiableMap(nameToElement);
704     }
705 
getNames(Collection<? extends Named> keySet)706     private Collection<String> getNames(Collection<? extends Named> keySet) {
707         List<String> result = new ArrayList<String>();
708         for (Named e : keySet) {
709             result.add(e.getName());
710         }
711         return result;
712     }
713 
714     public enum DtdItem {
715         ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE
716     }
717 
718     public interface AttributeValueComparator {
compare(String element, String attribute, String value1, String value2)719         public int compare(String element, String attribute, String value1, String value2);
720     }
721 
getDtdComparator(AttributeValueComparator avc)722     public Comparator<String> getDtdComparator(AttributeValueComparator avc) {
723         return dtdComparator;
724     }
725 
726     private class DtdComparator implements Comparator<String> {
727         @Override
compare(String path1, String path2)728         public int compare(String path1, String path2) {
729             XPathParts a = XPathParts.getFrozenInstance(path1);
730             XPathParts b = XPathParts.getFrozenInstance(path2);
731             // there must always be at least one element
732             String baseA = a.getElement(0);
733             String baseB = b.getElement(0);
734             if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) {
735                 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB);
736             }
737             int min = Math.min(a.size(), b.size());
738             Element parent = ROOT;
739             Element elementA;
740             for (int i = 1; i < min; ++i, parent = elementA) {
741                 // add extra test for "fake" elements, used in diffing. they always start with _
742                 String elementRawA = a.getElement(i);
743                 String elementRawB = b.getElement(i);
744                 if (elementRawA.startsWith("_")) {
745                     return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1;
746                 } else if (elementRawB.startsWith("_")) {
747                     return 1;
748                 }
749                 //
750                 elementA = nameToElement.get(elementRawA);
751                 Element elementB = nameToElement.get(elementRawB);
752                 if (elementA != elementB) {
753                     int aa = parent.children.get(elementA);
754                     int bb = parent.children.get(elementB);
755                     return aa - bb;
756                 }
757                 int countA = a.getAttributeCount(i);
758                 int countB = b.getAttributeCount(i);
759                 if (countA == 0 && countB == 0) {
760                     continue;
761                 }
762                 // we have two ways to compare the attributes. One based on the dtd,
763                 // and one based on explicit comparators
764 
765                 // at this point the elements are the same and correspond to elementA
766                 // in the dtd
767 
768                 // Handle the special added elements
769                 String aqValue = a.getAttributeValue(i, "_q");
770                 if (aqValue != null) {
771                     String bqValue = b.getAttributeValue(i, "_q");
772                     if (!aqValue.equals(bqValue)) {
773                         int aValue = Integer.parseInt(aqValue);
774                         int bValue = Integer.parseInt(bqValue);
775                         return aValue - bValue;
776                     }
777                     --countA;
778                     --countB;
779                 }
780 
781                 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) {
782                     Attribute main = attr.getKey();
783                     String valueA = a.getAttributeValue(i, main.name);
784                     String valueB = b.getAttributeValue(i, main.name);
785                     if (valueA == null) {
786                         if (valueB != null) {
787                             return -1;
788                         }
789                     } else if (valueB == null) {
790                         return 1;
791                     } else if (valueA.equals(valueB)) {
792                         --countA;
793                         --countB;
794                         if (countA == 0 && countB == 0) {
795                             break attributes;
796                         }
797                         continue; // TODO
798                     } else if (main.attributeValueComparator != null) {
799                         return main.attributeValueComparator.compare(valueA, valueB);
800                     } else if (main.values.size() != 0) {
801                         int aa = main.values.get(valueA);
802                         int bb = main.values.get(valueB);
803                         return aa - bb;
804                     } else {
805                         return valueA.compareTo(valueB);
806                     }
807                 }
808                 if (countA != 0 || countB != 0) {
809                     throw new IllegalArgumentException();
810                 }
811             }
812             return a.size() - b.size();
813         }
814     }
815 
getAttributeComparator()816     public MapComparator<String> getAttributeComparator() {
817         return attributeComparator;
818     }
819 
getElementComparator()820     public MapComparator<String> getElementComparator() {
821         return elementComparator;
822     }
823 
getAttributesFromName()824     public Relation<String, Attribute> getAttributesFromName() {
825         return nameToAttributes;
826     }
827 
getElementFromName()828     public Map<String, Element> getElementFromName() {
829         return nameToElement;
830     }
831 
832     //    private static class XPathIterator implements SimpleIterator<Node> {
833     //        private String path;
834     //        private int position; // at the start of the next element, or at the end of the string
835     //        private Node node = new Node();
836     //
837     //        public void set(String path) {
838     //            if (!path.startsWith("//")) {
839     //                throw new IllegalArgumentException();
840     //            }
841     //            this.path = path;
842     //            this.position = 2;
843     //        }
844     //
845     //        @Override
846     //        public Node next() {
847     //            // starts with /...[@...="...."]...
848     //            if (position >= path.length()) {
849     //                return null;
850     //            }
851     //            node.elementName = "";
852     //            node.attributes.clear();
853     //            int start = position;
854     //            // collect the element
855     //            while (true) {
856     //                if (position >= path.length()) {
857     //                    return node;
858     //                }
859     //                char ch = path.charAt(position++);
860     //                switch (ch) {
861     //                case '/':
862     //                    return node;
863     //                case '[':
864     //                    node.elementName = path.substring(start, position);
865     //                    break;
866     //                }
867     //            }
868     //            // done with element, we hit a [, collect the attributes
869     //
870     //            if (path.charAt(position++) != '@') {
871     //                throw new IllegalArgumentException();
872     //            }
873     //            while (true) {
874     //                if (position >= path.length()) {
875     //                    return node;
876     //                }
877     //                char ch = path.charAt(position++);
878     //                switch (ch) {
879     //                case '/':
880     //                    return node;
881     //                case '[':
882     //                    node.elementName = path.substring(start, position);
883     //                    break;
884     //                }
885     //            }
886     //        }
887     //    }
888 
toString()889     public String toString() {
890         StringBuilder b = new StringBuilder();
891         // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) >
892         // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. -->
893 //        if (firstComment != null) {
894 //            b.append("\n<!--").append(firstComment).append("-->");
895 //        }
896         Seen seen = new Seen(dtdType);
897         seen.seenElements.add(ANY);
898         seen.seenElements.add(PCDATA);
899         toString(ROOT, b, seen);
900 
901         // Hack for ldmlIcu: catch the items that are not mentioned in the original
902         int currentEnd = b.length();
903         for (Element e : nameToElement.values()) {
904             toString(e, b, seen);
905         }
906         if (currentEnd != b.length()) {
907             b.insert(currentEnd,
908                 System.lineSeparator() + System.lineSeparator()
909                     + "<!-- Elements not reachable from root! -->"
910                     + System.lineSeparator());
911         }
912         return b.toString();
913     }
914 
915     static final class Seen {
916         Set<Element> seenElements = new HashSet<Element>();
917         Set<Attribute> seenAttributes = new HashSet<Attribute>();
918 
Seen(DtdType dtdType)919         public Seen(DtdType dtdType) {
920             if (dtdType.rootType == dtdType) {
921                 return;
922             }
923             DtdData otherData = DtdData.getInstance(dtdType.rootType);
924             walk(otherData, otherData.ROOT);
925             seenElements.remove(otherData.nameToElement.get("special"));
926         }
927 
walk(DtdData otherData, Element current)928         private void walk(DtdData otherData, Element current) {
929             seenElements.add(current);
930             seenAttributes.addAll(current.attributes.keySet());
931             for (Element e : current.children.keySet()) {
932                 walk(otherData, e);
933             }
934         }
935     }
936 
getDescendents(Element start, Set<Element> toAddTo)937     public Set<Element> getDescendents(Element start, Set<Element> toAddTo) {
938         if (!toAddTo.contains(start)) {
939             toAddTo.add(start);
940             for (Element e : start.children.keySet()) {
941                 getDescendents(e, toAddTo);
942             }
943         }
944         return toAddTo;
945     }
946 
947     //static final SupplementalDataInfo supplementalDataInfo = CLDRConfig.getInstance().getSupplementalDataInfo();
948 
toString(Element current, StringBuilder b, Seen seen)949     private void toString(Element current, StringBuilder b, Seen seen) {
950 //        if ("calendar".equals(current.name) || current.commentsPost != null && current.commentsPost.contains("use of fields")) {
951 //            int debug = 0;
952 //        }
953         boolean first = true;
954         if (seen.seenElements.contains(current)) {
955             return;
956         }
957         seen.seenElements.add(current);
958         boolean elementDeprecated = isDeprecated(current.name, "*", "*");
959 
960         showComments(b, current.commentsPre, true);
961         b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >");
962         if (USE_SYNTHESIZED) {
963             Element aliasElement = getElementFromName().get("alias");
964             //b.append(current.rawChildren);
965             if (!current.children.isEmpty()) {
966                 LinkedHashSet<Element> elements = new LinkedHashSet<Element>(current.children.keySet());
967                 boolean hasAlias = aliasElement != null && elements.remove(aliasElement);
968                 //boolean hasSpecial = specialElement != null && elements.remove(specialElement);
969                 if (hasAlias) {
970                     b.append("(alias |");
971                 }
972                 b.append("(");
973                 // <!ELEMENT transformNames ( alias | (transformName | special)* ) >
974                 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) >
975 
976                 for (Element e : elements) {
977                     if (first) {
978                         first = false;
979                     } else {
980                         b.append(", ");
981                     }
982                     b.append(e.name);
983                     if (e.type != ElementType.PCDATA) {
984                         b.append("*");
985                     }
986                 }
987                 if (hasAlias) {
988                     b.append(")");
989                 }
990                 b.append(")");
991             } else {
992                 b.append(current.type == null ? "???" : current.type.source);
993             }
994             b.append(">");
995         }
996         showComments(b, current.commentsPost, false);
997         if (isOrdered(current.name)) {
998             b.append(COMMENT_PREFIX + "<!--@ORDERED-->");
999         }
1000         if (current.getElementStatus() != ElementStatus.regular) {
1001             b.append(COMMENT_PREFIX + "<!--@"
1002                 + current.getElementStatus().toString().toUpperCase(Locale.ROOT)
1003                 + "-->");
1004         }
1005         if (elementDeprecated) {
1006             b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1007         }
1008 
1009         LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>();
1010 
1011         for (Attribute a : current.attributes.keySet()) {
1012             if (seen.seenAttributes.contains(a)) {
1013                 continue;
1014             }
1015             seen.seenAttributes.add(a);
1016             boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*");
1017 
1018             deprecatedValues.clear();
1019 
1020             showComments(b, a.commentsPre, true);
1021             b.append("\n<!ATTLIST " + current.name + " " + a.name);
1022             if (a.type == AttributeType.ENUMERATED_TYPE) {
1023                 b.append(" (");
1024                 first = true;
1025                 for (String s : a.values.keySet()) {
1026                     if (first) {
1027                         first = false;
1028                     } else {
1029                         b.append(" | ");
1030                     }
1031                     b.append(s);
1032                     if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) {
1033                         deprecatedValues.add(s);
1034                     }
1035                 }
1036                 b.append(")");
1037             } else {
1038                 b.append(' ').append(a.type);
1039             }
1040             if (a.mode != Mode.NULL) {
1041                 b.append(" ").append(a.mode.source);
1042             }
1043             if (a.defaultValue != null) {
1044                 b.append(" \"").append(a.defaultValue).append('"');
1045             }
1046             b.append(" >");
1047             showComments(b, a.commentsPost, false);
1048 //            if (attributeDeprecated != deprecatedComment) {
1049 //                System.out.println("*** BAD DEPRECATION ***" + a);
1050 //            }
1051             if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) {
1052                 b.append(COMMENT_PREFIX + "<!--@METADATA-->");
1053             } else if (!isDistinguishing(current.name, a.name)) {
1054                 b.append(COMMENT_PREFIX + "<!--@VALUE-->");
1055             }
1056             if (attributeDeprecated) {
1057                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1058             } else if (!deprecatedValues.isEmpty()) {
1059                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + CollectionUtilities.join(deprecatedValues, ", ") + "-->");
1060             }
1061         }
1062         if (current.children.size() > 0) {
1063             for (Element e : current.children.keySet()) {
1064                 toString(e, b, seen);
1065             }
1066         }
1067     }
1068 
showComments(StringBuilder b, Set<String> comments, boolean separate)1069     private void showComments(StringBuilder b, Set<String> comments, boolean separate) {
1070         if (comments == null) {
1071             return;
1072         }
1073         if (separate && b.length() != 0) {
1074             b.append(System.lineSeparator());
1075         }
1076         for (String c : comments) {
1077             boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat");
1078             if (!deprecatedComment) {
1079                 if (separate) {
1080                     // special handling for very first comment
1081                     if (b.length() == 0) {
1082                         b.append("<!--")
1083                             .append(System.lineSeparator())
1084                             .append(c)
1085                             .append(System.lineSeparator())
1086                             .append("-->");
1087                         continue;
1088                     }
1089                     b.append(System.lineSeparator());
1090                 } else {
1091                     b.append(COMMENT_PREFIX);
1092                 }
1093                 b.append("<!-- ").append(c).append(" -->");
1094             }
1095         }
1096     }
1097 
removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1098     public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) {
1099         for (Iterator<T> it = elements.iterator(); it.hasNext();) {
1100             T item = it.next();
1101             if (matcher.transform(item) == Boolean.TRUE) {
1102                 it.remove();
1103                 return item;
1104             }
1105         }
1106         return null;
1107     }
1108 
getElements()1109     public Set<Element> getElements() {
1110         return new LinkedHashSet<Element>(nameToElement.values());
1111     }
1112 
getAttributes()1113     public Set<Attribute> getAttributes() {
1114         return new LinkedHashSet<Attribute>(nameToAttributes.values());
1115     }
1116 
isDistinguishing(String elementName, String attribute)1117     public boolean isDistinguishing(String elementName, String attribute) {
1118         return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished;
1119     }
1120 
1121     static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft"));
1122 
addUnmodifiable(Set<String> comment, String addition)1123     static final Set<String> addUnmodifiable(Set<String> comment, String addition) {
1124         if (comment == null) {
1125             return Collections.singleton(addition);
1126         } else {
1127             comment = new LinkedHashSet<>(comment);
1128             comment.add(addition);
1129             return Collections.unmodifiableSet(comment);
1130         }
1131     }
1132 
1133     public class IllegalByDtdException extends RuntimeException {
1134         private static final long serialVersionUID = 1L;
1135         public final String elementName;
1136         public final String attributeName;
1137         public final String attributeValue;
1138 
IllegalByDtdException(String elementName, String attributeName, String attributeValue)1139         public IllegalByDtdException(String elementName, String attributeName, String attributeValue) {
1140             this.elementName = elementName;
1141             this.attributeName = attributeName;
1142             this.attributeValue = attributeValue;
1143         }
1144 
1145         @Override
getMessage()1146         public String getMessage() {
1147             return "Dtd " + dtdType
1148                 + " doesn’t allow "
1149                 + "element=" + elementName
1150                 + (attributeName == null ? "" : ", attribute: " + attributeName)
1151                 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue);
1152         }
1153     }
1154 
1155     //@SuppressWarnings("unused")
isDeprecated(String elementName, String attributeName, String attributeValue)1156     public boolean isDeprecated(String elementName, String attributeName, String attributeValue) {
1157         Element element = nameToElement.get(elementName);
1158         if (element == null) {
1159             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1160         } else if (element.isDeprecatedElement) {
1161             return true;
1162         }
1163         if ("*".equals(attributeName) || "_q".equals(attributeName)) {
1164             return false;
1165         }
1166         Attribute attribute = element.getAttributeNamed(attributeName);
1167         if (attribute == null) {
1168             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1169         } else if (attribute.isDeprecatedAttribute) {
1170             return true;
1171         }
1172         return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*"
1173     }
1174 
isOrdered(String elementName)1175     public boolean isOrdered(String elementName) {
1176         Element element = nameToElement.get(elementName);
1177         if (element == null) {
1178             if (elementName.startsWith("icu:")) {
1179                 return false;
1180             }
1181             throw new IllegalByDtdException(elementName, null, null);
1182         }
1183         return element.isOrderedElement;
1184     }
1185 
getAttributeStatus(String elementName, String attributeName)1186     public AttributeStatus getAttributeStatus(String elementName, String attributeName) {
1187         if ("_q".equals(attributeName)) {
1188             return AttributeStatus.distinguished; // special case
1189         }
1190         if ("#PCDATA".equals(elementName)) {
1191             int debug = 1;
1192         }
1193         Element element = nameToElement.get(elementName);
1194         if (element == null) {
1195             if (elementName.startsWith("icu:")) {
1196                 return AttributeStatus.distinguished;
1197             }
1198             throw new IllegalByDtdException(elementName, attributeName, null);
1199         }
1200         Attribute attribute = element.getAttributeNamed(attributeName);
1201         if (attribute == null) {
1202             if (elementName.startsWith("icu:")) {
1203                 return AttributeStatus.distinguished;
1204             }
1205             throw new IllegalByDtdException(elementName, attributeName, null);
1206         }
1207         return attribute.attributeStatus;
1208     }
1209 
1210     // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1211     private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze();
1212 
1213     static MapComparator<String> dayValueOrder = new MapComparator<String>().add(
1214         "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze();
1215     static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add(
1216         "midnight", "am", "noon", "pm",
1217         "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2",
1218         // The ones on the following line are no longer used actively. Can be removed later?
1219         "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze();
1220     static MapComparator<String> listPatternOrder = new MapComparator<String>().add(
1221         "start", "middle", "end", "2", "3").freeze();
1222     static MapComparator<String> widthOrder = new MapComparator<String>().add(
1223         "abbreviated", "narrow", "short", "wide", "all").freeze();
1224     static MapComparator<String> lengthOrder = new MapComparator<String>().add(
1225         "full", "long", "medium", "short").freeze();
1226     static MapComparator<String> dateFieldOrder = new MapComparator<String>().add(
1227         "era", "era-short", "era-narrow",
1228         "year", "year-short", "year-narrow",
1229         "quarter", "quarter-short", "quarter-narrow",
1230         "month", "month-short", "month-narrow",
1231         "week", "week-short", "week-narrow",
1232         "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow",
1233         "day", "day-short", "day-narrow",
1234         "dayOfYear", "dayOfYear-short", "dayOfYear-narrow",
1235         "weekday", "weekday-short", "weekday-narrow",
1236         "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow",
1237         "sun", "sun-short", "sun-narrow",
1238         "mon", "mon-short", "mon-narrow",
1239         "tue", "tue-short", "tue-narrow",
1240         "wed", "wed-short", "wed-narrow",
1241         "thu", "thu-short", "thu-narrow",
1242         "fri", "fri-short", "fri-narrow",
1243         "sat", "sat-short", "sat-narrow",
1244         "dayperiod-short", "dayperiod", "dayperiod-narrow",
1245         "hour", "hour-short", "hour-narrow",
1246         "minute", "minute-short", "minute-narrow",
1247         "second", "second-short", "second-narrow",
1248         "zone", "zone-short", "zone-narrow").freeze();
1249     static MapComparator<String> unitOrder = new MapComparator<String>().add(
1250         "acceleration-g-force", "acceleration-meter-per-second-squared",
1251         "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second",
1252         "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter",
1253         "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch",
1254         "concentr-karat",
1255         "concentr-milligram-per-deciliter", "concentr-millimole-per-liter",
1256         "concentr-part-per-million", "concentr-percent", "concentr-permille",
1257         "consumption-liter-per-kilometer", "consumption-liter-per-100kilometers",
1258         "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial",
1259         "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit",
1260         "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit",
1261         "digital-byte", "digital-bit",
1262         "duration-century",
1263         "duration-year", "duration-year-person",
1264         "duration-month", "duration-month-person",
1265         "duration-week", "duration-week-person",
1266         "duration-day", "duration-day-person",
1267         "duration-hour", "duration-minute", "duration-second",
1268         "duration-millisecond", "duration-microsecond", "duration-nanosecond",
1269         "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt",
1270         "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour",
1271         "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz",
1272         "length-kilometer", "length-meter", "length-decimeter", "length-centimeter",
1273         "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer",
1274         "length-mile", "length-yard", "length-foot", "length-inch",
1275         "length-parsec", "length-light-year", "length-astronomical-unit",
1276         "length-furlong", "length-fathom",
1277         "length-nautical-mile", "length-mile-scandinavian",
1278         "length-point",
1279         "light-lux",
1280         "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram",
1281         "mass-ton", "mass-stone", "mass-pound", "mass-ounce",
1282         "mass-ounce-troy", "mass-carat",
1283         "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt",
1284         "power-horsepower",
1285         "pressure-hectopascal", "pressure-millimeter-of-mercury",
1286         "pressure-pound-per-square-inch", "pressure-inch-hg", "pressure-millibar", "pressure-atmosphere",
1287         "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot",
1288         "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin",
1289         "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter",
1290         "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch",
1291         "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter",
1292         "volume-pint-metric", "volume-cup-metric",
1293         "volume-acre-foot",
1294         "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup",
1295         "volume-fluid-ounce", "volume-tablespoon", "volume-teaspoon").freeze();
1296 
1297     static MapComparator<String> countValueOrder = new MapComparator<String>().add(
1298         "0", "1", "zero", "one", "two", "few", "many", "other").freeze();
1299     static MapComparator<String> unitLengthOrder = new MapComparator<String>().add(
1300         "long", "short", "narrow").freeze();
1301     static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add(
1302         "standard", "accounting").freeze();
1303     static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator();
1304 
1305     static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator();
1306 
1307     // Hack for US
1308     static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<String>() {
1309         @Override
1310         public int compare(String o1, String o2) {
1311             if (o1.contains("{")) {
1312                 o1 = o1.replace("{", "");
1313             }
1314             if (o2.contains("{")) {
1315                 o2 = o2.replace("{", "");
1316             }
1317             return COMP.compare(o1, o2);
1318         }
1319 
1320     };
1321 
getAttributeValueComparator(String element, String attribute)1322     public static Comparator<String> getAttributeValueComparator(String element, String attribute) {
1323         return getAttributeValueComparator(DtdType.ldml, element, attribute);
1324     }
1325 
getAttributeValueComparator(DtdType type, String element, String attribute)1326     static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) {
1327         // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1328         Comparator<String> comp = valueOrdering;
1329         if (type != DtdType.ldml && type != DtdType.ldmlICU) {
1330             return comp;
1331         }
1332         if (attribute.equals("day")) { // && (element.startsWith("weekend")
1333             comp = dayValueOrder;
1334         } else if (attribute.equals("type")) {
1335             if (element.endsWith("FormatLength")) {
1336                 comp = lengthOrder;
1337             } else if (element.endsWith("Width")) {
1338                 comp = widthOrder;
1339             } else if (element.equals("day")) {
1340                 comp = dayValueOrder;
1341             } else if (element.equals("field")) {
1342                 comp = dateFieldOrder;
1343             } else if (element.equals("zone")) {
1344                 comp = zoneOrder;
1345             } else if (element.equals("listPatternPart")) {
1346                 comp = listPatternOrder;
1347             } else if (element.equals("currencyFormat")) {
1348                 comp = currencyFormatOrder;
1349             } else if (element.equals("unitLength")) {
1350                 comp = unitLengthOrder;
1351             } else if (element.equals("unit")) {
1352                 comp = unitOrder;
1353             } else if (element.equals("dayPeriod")) {
1354                 comp = dayPeriodOrder;
1355             }
1356         } else if (attribute.equals("count") && !element.equals("minDays")) {
1357             comp = countValueOrder;
1358         } else if (attribute.equals("cp") && element.equals("annotation")) {
1359             comp = UNICODE_SET_COMPARATOR;
1360         }
1361         return comp;
1362     }
1363 
1364     /**
1365      * Comparator for attributes in CLDR files
1366      */
1367     private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() {
1368         @Override
1369         public int compare(String element, String attribute, String value1, String value2) {
1370             Comparator<String> comp = getAttributeValueComparator(element, attribute);
1371             return comp.compare(value1, value2);
1372         }
1373     };
1374 
hasValue(String elementName)1375     public boolean hasValue(String elementName) {
1376         return nameToElement.get(elementName).type == ElementType.PCDATA;
1377     }
1378 
isMetadata(XPathParts pathPlain)1379     public boolean isMetadata(XPathParts pathPlain) {
1380         for (String s : pathPlain.getElements()) {
1381             Element e = getElementFromName().get(s);
1382             if (e.elementStatus == ElementStatus.metadata) {
1383                 return true;
1384             }
1385         }
1386         return false;
1387     }
1388 
isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1389     public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) {
1390         // TODO Don't use hard-coded list; instead add to DTD annotations
1391         final String element1 = pathPlain.getElement(1);
1392         final String element2 = pathPlain.getElement(2);
1393         final String elementN = pathPlain.getElement(-1);
1394         switch (dtdType2) {
1395         case ldml:
1396             switch (element1) {
1397             case "generation":
1398             case "metadata":
1399                 return true;
1400             }
1401             break;
1402         case ldmlBCP47:
1403             switch (element1) {
1404             case "generation":
1405             case "version":
1406                 return true;
1407             }
1408             break;
1409         ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment
1410         case supplementalData:
1411             // these are NOT under /metadata/ but are actually metadata
1412             switch (element1) {
1413             case "generation":
1414             case "version":
1415             case "validity":
1416             case "references":
1417             case "coverageLevels":
1418                 return true;
1419             case "transforms":
1420                 return elementN.equals("comment");
1421             case "metadata":
1422                 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata.
1423                 switch (element2) {
1424                 case "validity":
1425                 case "serialElements":
1426                 case "suppress":
1427                 case "distinguishing":
1428                 case "blocking":
1429                 case "casingData":
1430                     return true;
1431                 }
1432                 break;
1433             }
1434             break;
1435         default:
1436         }
1437         return false;
1438     }
1439 
isDeprecated(XPathParts pathPlain)1440     public boolean isDeprecated(XPathParts pathPlain) {
1441         for (int i = 0; i < pathPlain.size(); ++i) {
1442             String elementName = pathPlain.getElement(i);
1443             if (isDeprecated(elementName, "*", null)) {
1444                 return true;
1445             }
1446             for (String attribute : pathPlain.getAttributeKeys(i)) {
1447                 String attributeValue = pathPlain.getAttributeValue(i, attribute);
1448                 if (isDeprecated(elementName, attribute, attributeValue)) {
1449                     return true;
1450                 }
1451             }
1452         }
1453         return false;
1454     }
1455 
1456     public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings();
1457     public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings();
1458     public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings();
1459 
1460     private static class XPathPartsSet {
1461         private final Set<XPathParts> list = new LinkedHashSet<>();
1462 
addElement(String element)1463         private void addElement(String element) {
1464             if (list.isEmpty()) {
1465                 list.add(new XPathParts().addElement(element));
1466             } else {
1467                 for (XPathParts item : list) {
1468                     item.addElement(element);
1469                 }
1470             }
1471         }
1472 
addAttribute(String attribute, String attributeValue)1473         private void addAttribute(String attribute, String attributeValue) {
1474             for (XPathParts item : list) {
1475                 item.addAttribute(attribute, attributeValue);
1476             }
1477         }
1478 
setElement(int i, String string)1479         private void setElement(int i, String string) {
1480             for (XPathParts item : list) {
1481                 item.setElement(i, string);
1482             }
1483         }
1484 
1485 //        private int size() {
1486 //            return list.iterator().next().size();
1487 //        }
1488 //
1489 //        private void removeElement(int i) {
1490 //            for (XPathParts item : list) {
1491 //                item.removeElement(i);
1492 //            }
1493 //        }
1494 
addAttributes(String attribute, List<String> attributeValues)1495         private void addAttributes(String attribute, List<String> attributeValues) {
1496             if (attributeValues.size() == 1) {
1497                 addAttribute(attribute, attributeValues.iterator().next());
1498             } else {
1499                 // duplicate all the items in the list with the given values
1500                 Set<XPathParts> newList = new LinkedHashSet<>();
1501                 for (XPathParts item : list) {
1502                     for (String attributeValue : attributeValues) {
1503                         XPathParts newItem = item.cloneAsThawed();
1504                         newItem.addAttribute(attribute, attributeValue);
1505                         newList.add(newItem);
1506                     }
1507                 }
1508                 list.clear();
1509                 list.addAll(newList);
1510             }
1511         }
1512 
toStrings()1513         private ImmutableSet<String> toStrings() {
1514             Builder<String> result = new ImmutableSet.Builder<>();
1515 
1516             for (XPathParts item : list) {
1517                 result.add(item.toString());
1518             }
1519             return result.build();
1520         }
1521 
1522         @Override
toString()1523         public String toString() {
1524             return list.toString();
1525         }
1526     }
1527 
getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)1528     public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) {
1529         extras.clear();
1530         Map<String, String> valueAttributes = new HashMap<>();
1531         XPathPartsSet pathResult = new XPathPartsSet();
1532         String element = null;
1533         for (int i = 0; i < pathPlain.size(); ++i) {
1534             element = pathPlain.getElement(i);
1535             pathResult.addElement(element);
1536             valueAttributes.clear();
1537             for (String attribute : pathPlain.getAttributeKeys(i)) {
1538                 AttributeStatus status = getAttributeStatus(element, attribute);
1539                 final String attributeValue = pathPlain.getAttributeValue(i, attribute);
1540                 switch (status) {
1541                 case distinguished:
1542                     AttributeType attrType = getAttributeType(element, attribute);
1543                     if (attrType == AttributeType.NMTOKENS) {
1544                         pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue));
1545                     } else {
1546                         pathResult.addAttribute(attribute, attributeValue);
1547                     }
1548                     break;
1549                 case value:
1550                     valueAttributes.put(attribute, attributeValue);
1551                     break;
1552                 case metadata:
1553                     break;
1554                 }
1555             }
1556             if (!valueAttributes.isEmpty()) {
1557                 boolean hasValue = hasValue(element);
1558                 // if it doesn't have a value, we construct new child elements, with _ prefix
1559                 // if it does have a value, we have to play a further trick, since
1560                 // we can't have a value and child elements at the same level.
1561                 // So we use a _ suffix on the element.
1562                 if (hasValue) {
1563                     pathResult.setElement(i, element + "_");
1564                 } else {
1565                     int debug = 0;
1566                 }
1567                 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) {
1568                     final String attribute = attributeAndValue.getKey();
1569                     final String attributeValue = attributeAndValue.getValue();
1570 
1571                     Set<String> pathsShort = pathResult.toStrings();
1572                     AttributeType attrType = getAttributeType(element, attribute);
1573                     for (String pathShort : pathsShort) {
1574                         pathShort += "/_" + attribute;
1575                         if (attrType == AttributeType.NMTOKENS) {
1576                             for (String valuePart : SPACE_SPLITTER.split(attributeValue)) {
1577                                 extras.put(pathShort, valuePart);
1578                             }
1579                         } else {
1580                             extras.put(pathShort, attributeValue);
1581                         }
1582                     }
1583                 }
1584                 if (hasValue) {
1585                     pathResult.setElement(i, element); // restore
1586                 }
1587             }
1588         }
1589         // Only add the path if it could have a value, looking at the last element
1590         if (!hasValue(element)) {
1591             return null;
1592         }
1593         return pathResult.toStrings();
1594     }
1595 
getAttributeType(String elementName, String attributeName)1596     public AttributeType getAttributeType(String elementName, String attributeName) {
1597         Element element = nameToElement.get(elementName);
1598         if (element == null) {
1599             return null;
1600         }
1601         Attribute attr = element.getAttributeNamed(attributeName);
1602         if (attr == null) {
1603             return null;
1604         }
1605         return attr.type;
1606     }
1607 
1608     // TODO: add support for following to DTD annotations, and rework API
1609 
1610     static final Set<String> SPACED_VALUES = ImmutableSet.of(
1611         "idValidity",
1612         "languageGroup");
1613 
getValueSplitter(XPathParts pathPlain)1614     public static Splitter getValueSplitter(XPathParts pathPlain) {
1615         if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) {
1616             return SPACE_SPLITTER;
1617         } else if (pathPlain.getElement(-1).equals("annotation")
1618             && !pathPlain.getAttributeKeys(-1).contains("tts")) {
1619             return BAR_SPLITTER;
1620         }
1621         return CR_SPLITTER;
1622     }
1623 
isComment(XPathParts pathPlain, String line)1624     public static boolean isComment(XPathParts pathPlain, String line) {
1625         if (pathPlain.contains("transform")) {
1626             if (line.startsWith("#")) {
1627                 return true;
1628             }
1629         }
1630         return false;
1631     }
1632 
isExtraSplit(String extraPath)1633     public static boolean isExtraSplit(String extraPath) {
1634         if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
1635             return true;
1636         }
1637         return false;
1638     }
1639 
1640     // ALWAYS KEEP AT END, FOR STATIC INIT ORDER
1641     private static final Map<DtdType, DtdData> CACHE;
1642     static {
1643         EnumMap<DtdType, DtdData> temp = new EnumMap<DtdType, DtdData>(DtdType.class);
1644         for (DtdType type : DtdType.values()) {
temp.put(type, getInstance(type, null))1645             temp.put(type, getInstance(type, null));
1646         }
1647         CACHE = Collections.unmodifiableMap(temp);
1648     }
1649     // ALWAYS KEEP AT END, FOR STATIC INIT ORDER
1650 }
1651