1 package org.unicode.cldr.util;
2 
3 import java.io.File;
4 import java.io.StringReader;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.Comparator;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.LinkedHashMap;
14 import java.util.LinkedHashSet;
15 import java.util.List;
16 import java.util.Locale;
17 import java.util.Map;
18 import java.util.Map.Entry;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.concurrent.ConcurrentHashMap;
22 import java.util.concurrent.ConcurrentMap;
23 import java.util.regex.Pattern;
24 
25 import com.google.common.base.CharMatcher;
26 import com.google.common.base.Joiner;
27 import com.google.common.base.Splitter;
28 import com.google.common.collect.ImmutableSet;
29 import com.google.common.collect.ImmutableSet.Builder;
30 import com.google.common.collect.ImmutableSetMultimap;
31 import com.google.common.collect.Multimap;
32 import com.google.common.collect.TreeMultimap;
33 import com.ibm.icu.impl.Relation;
34 import com.ibm.icu.text.Transform;
35 
36 /**
37  * An immutable object that contains the structure of a DTD.
38  * @author markdavis
39  */
40 public class DtdData extends XMLFileReader.SimpleHandler {
41     private static final String COMMENT_PREFIX = System.lineSeparator() + "    ";
42     private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false);
43     private static final boolean USE_SYNTHESIZED = false;
44 
45     private static final boolean DEBUG = false;
46     private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]");
47 
48     private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class);
49     private Map<String, Element> nameToElement = new HashMap<>();
50     private MapComparator<String> elementComparator;
51     private MapComparator<String> attributeComparator;
52 
53     public final Element ROOT;
54     public final Element PCDATA = elementFrom("#PCDATA");
55     public final Element ANY = elementFrom("ANY");
56     public final DtdType dtdType;
57     public final String version;
58     private Element lastElement;
59     private Attribute lastAttribute;
60     private Set<String> preCommentCache;
61     private DtdComparator dtdComparator;
62 
63     public enum AttributeStatus {
64         distinguished ("§d"),
65         value ("§v"),
66         metadata ("§m︎");
67         public final String shortName;
AttributeStatus(String shortName)68         AttributeStatus(String shortName) {
69             this.shortName = shortName;
70         }
getShortName(AttributeStatus status)71         public static String getShortName(AttributeStatus status) {
72             return status == null ? "" : status.shortName;
73         }
74     }
75 
76     public enum Mode {
77         REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null");
78 
79         public final String source;
80 
Mode(String s)81         Mode(String s) {
82             source = s;
83         }
84 
forString(String mode)85         public static Mode forString(String mode) {
86             for (Mode value : Mode.values()) {
87                 if (value.source.equals(mode)) {
88                     return value;
89                 }
90             }
91             if (mode == null) {
92                 return NULL;
93             }
94             throw new IllegalArgumentException(mode);
95         }
96     }
97 
98     public enum AttributeType {
99         CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE
100     }
101 
102     static final Set<String> DRAFT_ON_NON_LEAF_ALLOWED = ImmutableSet.of("collation", "transform", "unitPreferenceData", "rulesetGrouping");
103 
104     public static class Attribute implements Named {
105         private static final Joiner JOINER_COMMA_SPACE = Joiner.on(", ");
106         public static final String AUG_TRAIL = "⟫";
107         public static final String AUG_LEAD = "⟪";
108         public static final String ENUM_TRAIL = "⟩";
109         public static final String ENUM_LEAD = "⟨";
110         public static final Pattern LEAD_TRAIL = Pattern.compile("(.*[" + AUG_LEAD + ENUM_LEAD + "])(.*)([" + AUG_TRAIL + ENUM_TRAIL + "].*)");
111         public final String name;
112         public final Element element;
113         public final Mode mode;
114         public final String defaultValue;
115         public final AttributeType type;
116         public final Map<String, Integer> values;
117         private final Set<String> commentsPre;
118         private Set<String> commentsPost;
119         private boolean isDeprecatedAttribute;
120         public AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations
121         private Set<String> deprecatedValues = Collections.emptySet();
122         public MatchValue matchValue;
123         private final Comparator<String> attributeValueComparator;
124 
Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)125         private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) {
126             commentsPre = firstComment;
127             element = element2;
128             name = aName.intern();
129             if (name.equals("draft") // normally never permitted on elements with children, but special cases...
130                 && !DRAFT_ON_NON_LEAF_ALLOWED.contains(element.getName())) {
131                 int elementChildrenCount = element.getChildren().size();
132                 if (elementChildrenCount > 1
133                     || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) {
134                     isDeprecatedAttribute = true;
135                     if (DEBUG) {
136                         System.out.println(element.getName() + ":" + element.getChildren());
137                     }
138                 }
139             }
140             mode = mode2;
141             defaultValue = value2 == null ? null
142                 : value2.intern();
143             AttributeType _type = AttributeType.ENUMERATED_TYPE;
144             Map<String, Integer> _values = Collections.emptyMap();
145             if (split.length == 1) {
146                 try {
147                     _type = AttributeType.valueOf(split[0]);
148                 } catch (Exception e) {
149                 }
150             }
151             type = _type;
152 
153             if (_type == AttributeType.ENUMERATED_TYPE) {
154                 LinkedHashMap<String, Integer> temp = new LinkedHashMap<>();
155                 for (String part : split) {
156                     if (part.length() != 0) {
157                         temp.put(part.intern(), temp.size());
158                     }
159                 }
160                 _values = Collections.unmodifiableMap(temp);
161             }
162             values = _values;
163             attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name);
164         }
165 
166         @Override
toString()167         public String toString() {
168             return element.name + ":" + name;
169         }
170 
getSampleValue()171         public String getSampleValue() {
172             return type == AttributeType.ENUMERATED_TYPE  ? (values.containsKey("year") ? "year" : values.keySet().iterator().next())
173                 : matchValue != null ? matchValue.getSample()
174                     : MatchValue.DEFAULT_SAMPLE;
175         }
176 
appendDtdString(StringBuilder b)177         public StringBuilder appendDtdString(StringBuilder b) {
178             Attribute a = this;
179             b.append("<!ATTLIST " + element.name + " " + a.name);
180             boolean first;
181             if (a.type == AttributeType.ENUMERATED_TYPE) {
182                 b.append(" (");
183                 first = true;
184                 for (String s : a.values.keySet()) {
185                     if (deprecatedValues.contains(s)) {
186                         continue;
187                     }
188                     if (first) {
189                         first = false;
190                     } else {
191                         b.append(" | ");
192                     }
193                     b.append(s);
194                 }
195                 b.append(")");
196             } else {
197                 b.append(' ').append(a.type);
198             }
199             if (a.mode != Mode.NULL) {
200                 b.append(" ").append(a.mode.source);
201             }
202             if (a.defaultValue != null) {
203                 b.append(" \"").append(a.defaultValue).append('"');
204             }
205             b.append(" >");
206             return b;
207         }
208 
features()209         public String features() {
210             return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString())
211                 + (mode == Mode.NULL ? "" : ", mode=" + mode)
212                 + (defaultValue == null ? "" : ", default=" + defaultValue);
213         }
214 
215         @Override
getName()216         public String getName() {
217             return name;
218         }
219 
220         private static Splitter COMMA = Splitter.on(',').trimResults();
221 
addComment(String commentIn)222         public void addComment(String commentIn) {
223             if (commentIn.startsWith("@")) {
224                 // there are exactly 2 cases: deprecated and ordered
225                 switch (commentIn) {
226                 case "@METADATA":
227                     attributeStatus = AttributeStatus.metadata;
228                     break;
229                 case "@VALUE":
230                     attributeStatus = AttributeStatus.value;
231                     break;
232                 case "@DEPRECATED":
233                     isDeprecatedAttribute = true;
234                     break;
235                 default:
236                     int colonPos = commentIn.indexOf(':');
237                     if (colonPos < 0) {
238                         throw new IllegalArgumentException("Unrecognized annotation: " + commentIn);
239                     }
240                     String command = commentIn.substring(0, colonPos);
241                     String argument = commentIn.substring(colonPos + 1);
242                     switch(command) {
243                     case "@DEPRECATED":
244                         deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(argument)));
245                         break;
246                     case "@MATCH":
247                         if (matchValue != null) {
248                             throw new IllegalArgumentException("Conflicting @MATCH: " + matchValue.getName() + " & " + argument);
249                         }
250                         matchValue = MatchValue.of(argument);
251                         break;
252                     default:
253                         throw new IllegalArgumentException("Unrecognized annotation: " + commentIn);
254                     }
255                 }
256                 return;
257             }
258             commentsPost = addUnmodifiable(commentsPost, commentIn.trim());
259         }
260 
261         /**
262          * Special version of identity; only considers name and name of element
263          */
264         @Override
equals(Object obj)265         public boolean equals(Object obj) {
266             if (!(obj instanceof Attribute)) {
267                 return false;
268             }
269             Attribute that = (Attribute) obj;
270             return name.equals(that.name)
271                 && element.name.equals(that.element.name) // don't use plain element: circularity
272                 // not relevant to identity
273                 //                && Objects.equals(comment, that.comment)
274                 //                && mode.equals(that.mode)
275                 //                && Objects.equals(defaultValue, that.defaultValue)
276                 //                && type.equals(that.type)
277                 //                && values.equals(that.values)
278                 ;
279         }
280 
281         /**
282          * Special version of identity; only considers name and name of element
283          */
284         @Override
hashCode()285         public int hashCode() {
286             return name.hashCode() * 37
287                 + element.name.hashCode() // don't use plain element: circularity
288                 // not relevant to identity
289                 //                ) * 37 + Objects.hashCode(comment)) * 37
290                 //                + mode.hashCode()) * 37
291                 //                + Objects.hashCode(defaultValue)) * 37
292                 //                + type.hashCode()) * 37
293                 //                + values.hashCode()
294                 ;
295         }
296 
isDeprecated()297         public boolean isDeprecated() {
298             return isDeprecatedAttribute;
299         }
300 
isDeprecatedValue(String value)301         public boolean isDeprecatedValue(String value) {
302             return deprecatedValues.contains(value);
303         }
304 
getStatus()305         public AttributeStatus getStatus() {
306             return attributeStatus;
307         }
308 
getValueStatus(String value)309         public ValueStatus getValueStatus(String value) {
310             return deprecatedValues.contains(value) ? ValueStatus.invalid
311                 : type == AttributeType.ENUMERATED_TYPE  ? (values.containsKey(value) ? ValueStatus.valid  : ValueStatus.invalid)
312                     : matchValue == null ? ValueStatus.unknown
313                         : matchValue.is(value) ? ValueStatus.valid
314                             : ValueStatus.invalid;
315         }
316 
getMatchString()317         public String getMatchString() {
318             return type == AttributeType.ENUMERATED_TYPE ? ENUM_LEAD + JOINER_COMMA_SPACE.join(values.keySet()) + ENUM_TRAIL
319                 : matchValue != null ? AUG_LEAD + matchValue.toString() + AUG_TRAIL
320                     : "";
321         }
322 
getMatchingName(Map<Attribute, Integer> attributes)323         public Attribute getMatchingName(Map<Attribute, Integer> attributes) {
324             for (Attribute attribute : attributes.keySet()) {
325                 if (name.equals(attribute.getName())) {
326                     return attribute;
327                 }
328             }
329             return null;
330         }
331 
332     }
333 
334     public enum ValueStatus {invalid, unknown, valid}
335 
DtdData(DtdType type, String version)336     private DtdData(DtdType type, String version) {
337         this.dtdType = type;
338         this.ROOT = elementFrom(type.rootType.toString());
339         this.version = version;
340     }
341 
addAttribute(String eName, String aName, String type, String mode, String value)342     private void addAttribute(String eName, String aName, String type, String mode, String value) {
343         Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache);
344         preCommentCache = null;
345         getAttributesFromName().put(aName, a);
346         CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size());
347         lastElement = null;
348         lastAttribute = a;
349     }
350 
351     public enum ElementType {
352         EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN;
353         public final String source;
354 
ElementType(String s)355         private ElementType(String s) {
356             source = s;
357         }
358 
ElementType()359         private ElementType() {
360             source = name();
361         }
362     }
363 
364     interface Named {
getName()365         String getName();
366     }
367 
368     public enum ElementStatus {
369         regular, metadata
370     }
371 
372     public static class Element implements Named {
373         public final String name;
374         private String rawModel;
375         private ElementType type;
376         private final Map<Element, Integer> children = new LinkedHashMap<>();
377         private final Map<Attribute, Integer> attributes = new LinkedHashMap<>();
378         private Set<String> commentsPre;
379         private Set<String> commentsPost;
380         private String model;
381         private boolean isOrderedElement;
382         private boolean isDeprecatedElement;
383         private ElementStatus elementStatus = ElementStatus.regular;
384 
Element(String name2)385         private Element(String name2) {
386             name = name2.intern();
387         }
388 
setChildren(DtdData dtdData, String model, Set<String> precomments)389         private void setChildren(DtdData dtdData, String model, Set<String> precomments) {
390             this.commentsPre = precomments;
391             rawModel = model;
392             this.model = clean(model);
393             if (model.equals("EMPTY")) {
394                 type = ElementType.EMPTY;
395                 return;
396             }
397             type = ElementType.CHILDREN;
398             for (String part : FILLER.split(model)) {
399                 if (part.length() != 0) {
400                     if (part.equals("#PCDATA")) {
401                         type = ElementType.PCDATA;
402                     } else if (part.equals("ANY")) {
403                         type = ElementType.ANY;
404                     } else {
405                         CldrUtility.putNew(children, dtdData.elementFrom(part), children.size());
406                     }
407                 }
408             }
409             if ((type == ElementType.CHILDREN) == (children.size() == 0)
410                 && !model.startsWith("(#PCDATA|cp")) {
411                 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model);
412             }
413         }
414 
415         static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)");
416         static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])");
417 
clean(String model2)418         private String clean(String model2) {
419             // (x) -> ( x );
420             // x,y -> x, y
421             // x|y -> x | y
422             String result = CLEANER1.matcher(model2).replaceAll("$1 ");
423             result = CLEANER2.matcher(result).replaceAll(" $1");
424             return result.equals(model2)
425                 ? model2
426                     : result; // for debugging
427         }
428 
containsAttribute(String string)429         public boolean containsAttribute(String string) {
430             for (Attribute a : attributes.keySet()) {
431                 if (a.name.equals(string)) {
432                     return true;
433                 }
434             }
435             return false;
436         }
437 
438         @Override
toString()439         public String toString() {
440             return name;
441         }
442 
toDtdString()443         public String toDtdString() {
444             return "<!ELEMENT " + name + " " + getRawModel() + " >";
445         }
446 
getType()447         public ElementType getType() {
448             return type;
449         }
450 
getChildren()451         public Map<Element, Integer> getChildren() {
452             return Collections.unmodifiableMap(children);
453         }
454 
getAttributes()455         public Map<Attribute, Integer> getAttributes() {
456             return Collections.unmodifiableMap(attributes);
457         }
458 
459         @Override
getName()460         public String getName() {
461             return name;
462         }
463 
getChildNamed(String string)464         public Element getChildNamed(String string) {
465             for (Element e : children.keySet()) {
466                 if (e.name.equals(string)) {
467                     return e;
468                 }
469             }
470             return null;
471         }
472 
getAttributeNamed(String string)473         public Attribute getAttributeNamed(String string) {
474             for (Attribute a : attributes.keySet()) {
475                 if (a.name.equals(string)) {
476                     return a;
477                 }
478             }
479             return null;
480         }
481 
addComment(String addition)482         public void addComment(String addition) {
483             if (addition.startsWith("@")) {
484                 // there are exactly 3 cases: deprecated, ordered, and metadata
485                 switch (addition) {
486                 case "@ORDERED":
487                     isOrderedElement = true;
488                     break;
489                 case "@DEPRECATED":
490                     isDeprecatedElement = true;
491                     break;
492                 case "@METADATA":
493                     elementStatus = ElementStatus.metadata;
494                     break;
495                 default:
496                     throw new IllegalArgumentException("Unrecognized annotation: " + addition);
497                 }
498                 return;
499             }
500             commentsPost = addUnmodifiable(commentsPost, addition.trim());
501         }
502 
503         /**
504          * Special version of equals. Only the name is considered in the identity.
505          */
506         @Override
equals(Object obj)507         public boolean equals(Object obj) {
508             if (!(obj instanceof Element)) {
509                 return false;
510             }
511             Element that = (Element) obj;
512             return name.equals(that.name)
513                 // not relevant to the identity of the object
514                 //                && Objects.equals(comment, that.comment)
515                 //                && type == that.type
516                 //                && attributes.equals(that.attributes)
517                 //                && children.equals(that.children)
518                 ;
519         }
520 
521         /**
522          * Special version of hashcode. Only the name is considered in the identity.
523          */
524         @Override
hashCode()525         public int hashCode() {
526             return name.hashCode()
527                 // not relevant to the identity of the object
528                 // * 37 + Objects.hashCode(comment)
529                 //) * 37 + Objects.hashCode(type)
530                 //                ) * 37 + attributes.hashCode()
531                 //                ) * 37 + children.hashCode()
532                 ;
533         }
534 
isDeprecated()535         public boolean isDeprecated() {
536             return isDeprecatedElement;
537         }
538 
isOrdered()539         public boolean isOrdered() {
540             return isOrderedElement;
541         }
542 
getElementStatus()543         public ElementStatus getElementStatus() {
544             return elementStatus;
545         }
546 
547         /**
548          * @return the rawModel
549          */
getRawModel()550         public String getRawModel() {
551             return rawModel;
552         }
553     }
554 
elementFrom(String name)555     private Element elementFrom(String name) {
556         Element result = nameToElement.get(name);
557         if (result == null) {
558             nameToElement.put(name, result = new Element(name));
559         }
560         return result;
561     }
562 
addElement(String name2, String model)563     private void addElement(String name2, String model) {
564         Element element = elementFrom(name2);
565         element.setChildren(this, model, preCommentCache);
566         preCommentCache = null;
567         lastElement = element;
568         lastAttribute = null;
569     }
570 
addComment(String comment)571     private void addComment(String comment) {
572         comment = comment.trim();
573         if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky"
574             if (comment.startsWith("@")) {
575                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
576             }
577             preCommentCache = addUnmodifiable(preCommentCache, comment);
578         } else if (lastElement != null) {
579             lastElement.addComment(comment);
580         } else if (lastAttribute != null) {
581             lastAttribute.addComment(comment);
582         } else {
583             if (comment.startsWith("@")) {
584                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
585             }
586             preCommentCache = addUnmodifiable(preCommentCache, comment);
587         }
588     }
589 
590     // TODO hide this
591     /**
592      * @deprecated
593      */
594     @Deprecated
595     @Override
handleElementDecl(String name, String model)596     public void handleElementDecl(String name, String model) {
597         if (SHOW_ALL) {
598             // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) >
599             System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >");
600         }
601         addElement(name, model);
602     }
603 
604     // TODO hide this
605     /**
606      * @deprecated
607      */
608     @Deprecated
609     @Override
handleStartDtd(String name, String publicId, String systemId)610     public void handleStartDtd(String name, String publicId, String systemId) {
611         DtdType explicitDtdType = DtdType.valueOf(name);
612         if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) {
613             throw new IllegalArgumentException("Mismatch in dtdTypes");
614         }
615     }
616 
617     /**
618      * @deprecated
619      */
620     @Deprecated
621     @Override
handleAttributeDecl(String eName, String aName, String type, String mode, String value)622     public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
623         if (SHOW_ALL) {
624             // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED >
625             // <!ATTLIST version number CDATA #REQUIRED >
626             // <!ATTLIST version cldrVersion CDATA #FIXED "27" >
627 
628             System.out.println("<!ATTLIST " + eName
629                 + " " + aName
630                 + " " + type
631                 + " " + mode
632                 + (value == null ? "" : " \"" + value + "\"")
633                 + " >");
634         }
635         // HACK for 1.1.1
636         if (eName.equals("draft")) {
637             eName = "week";
638         }
639         addAttribute(eName, aName, type, mode, value);
640     }
641 
642     /**
643      * @deprecated
644      */
645     @Deprecated
646     @Override
handleComment(String path, String comment)647     public void handleComment(String path, String comment) {
648         if (SHOW_ALL) {
649             // <!-- true and false are deprecated. -->
650             System.out.println("<!-- " + comment.trim() + " -->");
651         }
652         addComment(comment);
653     }
654 
655     // TODO hide this
656     /**
657      * @deprecated
658      */
659     @Deprecated
660     @Override
handleEndDtd()661     public void handleEndDtd() {
662         throw new XMLFileReader.AbortException();
663     }
664 
665     /**
666      * Note that it always gets the trunk version
667      * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead
668      */
669     @Deprecated
getInstance(DtdType type)670     public static DtdData getInstance(DtdType type) {
671         return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory());
672     }
673 
674     /**
675      * Special form using version, used only by tests, etc.
676      */
getInstance(DtdType type, String version)677     public static DtdData getInstance(DtdType type, String version) {
678         File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory()
679             : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version);
680 
681         return getInstance(type, version, directory);
682     }
683 
684     private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE = new ConcurrentHashMap<>();
685 
686     /**
687      * Normal version of DtdData
688      * Get a DtdData, given the CLDR root directory.
689      * @param type which DtdType to return
690      * @param directory the CLDR Root directory, which contains the "common" directory.
691      * @return
692      */
getInstance(DtdType type, File directory)693     public static DtdData getInstance(DtdType type, File directory) {
694         Pair<DtdType, File> key = new Pair<>(type, directory);
695         DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory));
696         return data;
697     }
698 
getInstance(DtdType type, String version, File directory)699     private static DtdData getInstance(DtdType type, String version, File directory) {
700         DtdData simpleHandler = new DtdData(type, version);
701         XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler);
702         if (type != type.rootType) {
703             // read the real first, then add onto it.
704             readFile(type.rootType, xfr, directory);
705         }
706         readFile(type, xfr, directory);
707         // HACK
708         if (type == DtdType.ldmlICU) {
709             Element special = simpleHandler.nameToElement.get("special");
710             for (String extraElementName : Arrays.asList(
711                 "icu:breakIteratorData",
712                 "icu:UCARules",
713                 "icu:scripts",
714                 "icu:transforms",
715                 "icu:ruleBasedNumberFormats",
716                 "icu:isLeapMonth",
717                 "icu:version",
718                 "icu:breakDictionaryData",
719                 "icu:depends")) {
720                 Element extraElement = simpleHandler.nameToElement.get(extraElementName);
721                 special.children.put(extraElement, special.children.size());
722             }
723         }
724         if (simpleHandler.ROOT.children.size() == 0) {
725             throw new IllegalArgumentException(); // should never happen
726         }
727         simpleHandler.finish();
728         simpleHandler.freeze();
729         return simpleHandler;
730     }
731 
finish()732     private void finish() {
733         dtdComparator = new DtdComparator();
734     }
735 
readFile(DtdType type, XMLFileReader xfr, File directory)736     public static void readFile(DtdType type, XMLFileReader xfr, File directory) {
737         File file = new File(directory, type.dtdPath);
738         StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>"
739             + "<!DOCTYPE " + type
740             + " SYSTEM '" + file.getAbsolutePath() + "'>");
741         xfr.read(type.toString(), s, -1, true); //  DTD_TYPE_TO_FILE.get(type)
742     }
743 
freeze()744     private void freeze() {
745         if (version == null) { // only generate for new versions
746             MergeLists<String> elementMergeList = new MergeLists<>();
747             elementMergeList.add(dtdType.toString());
748             MergeLists<String> attributeMergeList = new MergeLists<>();
749             attributeMergeList.add("_q");
750 
751             for (Element element : nameToElement.values()) {
752                 if (element.children.size() > 0) {
753                     Collection<String> names = getNames(element.children.keySet());
754                     elementMergeList.add(names);
755                     if (DEBUG) {
756                         System.out.println(element.getName() + "\t→\t" + names);
757                     }
758                 }
759                 if (element.attributes.size() > 0) {
760                     Collection<String> names = getNames(element.attributes.keySet());
761                     attributeMergeList.add(names);
762                     if (DEBUG) {
763                         System.out.println(element.getName() + "\t→\t@" + names);
764                     }
765                 }
766             }
767             List<String> elementList = elementMergeList.merge();
768             List<String> attributeList = attributeMergeList.merge();
769             if (DEBUG) {
770                 System.out.println("Element Ordering:\t" + elementList);
771                 System.out.println("Attribute Ordering:\t" + attributeList);
772             }
773             elementComparator = new MapComparator<>(elementList).setErrorOnMissing(true).freeze();
774             attributeComparator = new MapComparator<>(attributeList).setErrorOnMissing(true).freeze();
775         }
776         nameToAttributes.freeze();
777         nameToElement = Collections.unmodifiableMap(nameToElement);
778     }
779 
getNames(Collection<? extends Named> keySet)780     private Collection<String> getNames(Collection<? extends Named> keySet) {
781         List<String> result = new ArrayList<>();
782         for (Named e : keySet) {
783             result.add(e.getName());
784         }
785         return result;
786     }
787 
788     public enum DtdItem {
789         ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE
790     }
791 
792     public interface AttributeValueComparator {
compare(String element, String attribute, String value1, String value2)793         public int compare(String element, String attribute, String value1, String value2);
794     }
795 
getDtdComparator(AttributeValueComparator avc)796     public Comparator<String> getDtdComparator(AttributeValueComparator avc) {
797         return dtdComparator;
798     }
799 
getDtdComparator()800     public DtdComparator getDtdComparator() {
801         return dtdComparator;
802     }
803 
804     public class DtdComparator implements Comparator<String> {
805         @Override
compare(String path1, String path2)806         public int compare(String path1, String path2) {
807             XPathParts a = XPathParts.getFrozenInstance(path1);
808             XPathParts b = XPathParts.getFrozenInstance(path2);
809             return xpathComparator(a, b);
810         }
811 
xpathComparator(XPathParts a, XPathParts b)812         public int xpathComparator(XPathParts a, XPathParts b) {
813             // there must always be at least one element
814             String baseA = a.getElement(0);
815             String baseB = b.getElement(0);
816             if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) {
817                 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB);
818             }
819             int min = Math.min(a.size(), b.size());
820             Element parent = ROOT;
821             Element elementA;
822             for (int i = 1; i < min; ++i, parent = elementA) {
823                 // add extra test for "fake" elements, used in diffing. they always start with _
824                 String elementRawA = a.getElement(i);
825                 String elementRawB = b.getElement(i);
826                 if (elementRawA.startsWith("_")) {
827                     return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1;
828                 } else if (elementRawB.startsWith("_")) {
829                     return 1;
830                 }
831                 //
832                 elementA = nameToElement.get(elementRawA);
833                 Element elementB = nameToElement.get(elementRawB);
834                 if (elementA != elementB) {
835                     int aa = parent.children.get(elementA);
836                     int bb = parent.children.get(elementB);
837                     return aa - bb;
838                 }
839                 int countA = a.getAttributeCount(i);
840                 int countB = b.getAttributeCount(i);
841                 if (countA == 0 && countB == 0) {
842                     continue;
843                 }
844                 // we have two ways to compare the attributes. One based on the dtd,
845                 // and one based on explicit comparators
846 
847                 // at this point the elements are the same and correspond to elementA
848                 // in the dtd
849 
850                 // Handle the special added elements
851                 String aqValue = a.getAttributeValue(i, "_q");
852                 if (aqValue != null) {
853                     String bqValue = b.getAttributeValue(i, "_q");
854                     if (!aqValue.equals(bqValue)) {
855                         int aValue = Integer.parseInt(aqValue);
856                         int bValue = Integer.parseInt(bqValue);
857                         return aValue - bValue;
858                     }
859                     --countA;
860                     --countB;
861                 }
862 
863                 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) {
864                     Attribute main = attr.getKey();
865                     String valueA = a.getAttributeValue(i, main.name);
866                     String valueB = b.getAttributeValue(i, main.name);
867                     if (valueA == null) {
868                         if (valueB != null) {
869                             return -1;
870                         }
871                     } else if (valueB == null) {
872                         return 1;
873                     } else if (valueA.equals(valueB)) {
874                         --countA;
875                         --countB;
876                         if (countA == 0 && countB == 0) {
877                             break attributes;
878                         }
879                         continue; // TODO
880                     } else if (main.attributeValueComparator != null) {
881                         return main.attributeValueComparator.compare(valueA, valueB);
882                     } else if (main.values.size() != 0) {
883                         int aa = main.values.get(valueA);
884                         int bb = main.values.get(valueB);
885                         return aa - bb;
886                     } else {
887                         return valueA.compareTo(valueB);
888                     }
889                 }
890                 if (countA != 0 || countB != 0) {
891                     throw new IllegalArgumentException();
892                 }
893             }
894             return a.size() - b.size();
895         }
896     }
897 
getAttributeComparator()898     public MapComparator<String> getAttributeComparator() {
899         return attributeComparator;
900     }
901 
902 
getElementComparator()903     public MapComparator<String> getElementComparator() {
904         return elementComparator;
905     }
906 
getAttributesFromName()907     public Relation<String, Attribute> getAttributesFromName() {
908         return nameToAttributes;
909     }
910 
getElementFromName()911     public Map<String, Element> getElementFromName() {
912         return nameToElement;
913     }
914 
915     @Override
toString()916     public String toString() {
917         StringBuilder b = new StringBuilder();
918         // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) >
919         // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. -->
920         Seen seen = new Seen(dtdType);
921         seen.seenElements.add(ANY);
922         seen.seenElements.add(PCDATA);
923         toString(ROOT, b, seen);
924 
925         // Hack for ldmlIcu: catch the items that are not mentioned in the original
926         int currentEnd = b.length();
927         for (Element e : nameToElement.values()) {
928             toString(e, b, seen);
929         }
930         if (currentEnd != b.length()) {
931             b.insert(currentEnd,
932                 System.lineSeparator() + System.lineSeparator()
933                 + "<!-- Elements not reachable from root! -->"
934                 + System.lineSeparator());
935         }
936         return b.toString();
937     }
938 
939     static final class Seen {
940         Set<Element> seenElements = new HashSet<>();
941         Set<Attribute> seenAttributes = new HashSet<>();
942 
Seen(DtdType dtdType)943         public Seen(DtdType dtdType) {
944             if (dtdType.rootType == dtdType) {
945                 return;
946             }
947             DtdData otherData = DtdData.getInstance(dtdType.rootType);
948             walk(otherData, otherData.ROOT);
949             seenElements.remove(otherData.nameToElement.get("special"));
950         }
951 
walk(DtdData otherData, Element current)952         private void walk(DtdData otherData, Element current) {
953             seenElements.add(current);
954             seenAttributes.addAll(current.attributes.keySet());
955             for (Element e : current.children.keySet()) {
956                 walk(otherData, e);
957             }
958         }
959     }
960 
getDescendents(Element start, Set<Element> toAddTo)961     public Set<Element> getDescendents(Element start, Set<Element> toAddTo) {
962         if (!toAddTo.contains(start)) {
963             toAddTo.add(start);
964             for (Element e : start.children.keySet()) {
965                 getDescendents(e, toAddTo);
966             }
967         }
968         return toAddTo;
969     }
970 
toString(Element current, StringBuilder b, Seen seen)971     private void toString(Element current, StringBuilder b, Seen seen) {
972         boolean first = true;
973         if (seen.seenElements.contains(current)) {
974             return;
975         }
976         seen.seenElements.add(current);
977         boolean elementDeprecated = isDeprecated(current.name, "*", "*");
978 
979         showComments(b, current.commentsPre, true);
980         b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >");
981         if (USE_SYNTHESIZED) {
982             Element aliasElement = getElementFromName().get("alias");
983             //b.append(current.rawChildren);
984             if (!current.children.isEmpty()) {
985                 LinkedHashSet<Element> elements = new LinkedHashSet<>(current.children.keySet());
986                 boolean hasAlias = aliasElement != null && elements.remove(aliasElement);
987                 //boolean hasSpecial = specialElement != null && elements.remove(specialElement);
988                 if (hasAlias) {
989                     b.append("(alias |");
990                 }
991                 b.append("(");
992                 // <!ELEMENT transformNames ( alias | (transformName | special)* ) >
993                 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) >
994 
995                 for (Element e : elements) {
996                     if (first) {
997                         first = false;
998                     } else {
999                         b.append(", ");
1000                     }
1001                     b.append(e.name);
1002                     if (e.type != ElementType.PCDATA) {
1003                         b.append("*");
1004                     }
1005                 }
1006                 if (hasAlias) {
1007                     b.append(")");
1008                 }
1009                 b.append(")");
1010             } else {
1011                 b.append(current.type == null ? "???" : current.type.source);
1012             }
1013             b.append(">");
1014         }
1015         showComments(b, current.commentsPost, false);
1016         if (isOrdered(current.name)) {
1017             b.append(COMMENT_PREFIX + "<!--@ORDERED-->");
1018         }
1019         if (current.getElementStatus() != ElementStatus.regular) {
1020             b.append(COMMENT_PREFIX + "<!--@"
1021                 + current.getElementStatus().toString().toUpperCase(Locale.ROOT)
1022                 + "-->");
1023         }
1024         if (elementDeprecated) {
1025             b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1026         }
1027 
1028         LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>();
1029 
1030         for (Attribute a : current.attributes.keySet()) {
1031             if (seen.seenAttributes.contains(a)) {
1032                 continue;
1033             }
1034             seen.seenAttributes.add(a);
1035             boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*");
1036 
1037             deprecatedValues.clear();
1038 
1039             showComments(b, a.commentsPre, true);
1040             b.append("\n<!ATTLIST " + current.name + " " + a.name);
1041             if (a.type == AttributeType.ENUMERATED_TYPE) {
1042                 b.append(" (");
1043                 first = true;
1044                 for (String s : a.values.keySet()) {
1045                     if (first) {
1046                         first = false;
1047                     } else {
1048                         b.append(" | ");
1049                     }
1050                     b.append(s);
1051                     if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) {
1052                         deprecatedValues.add(s);
1053                     }
1054                 }
1055                 b.append(")");
1056             } else {
1057                 b.append(' ').append(a.type);
1058             }
1059             if (a.mode != Mode.NULL) {
1060                 b.append(" ").append(a.mode.source);
1061             }
1062             if (a.defaultValue != null) {
1063                 b.append(" \"").append(a.defaultValue).append('"');
1064             }
1065             b.append(" >");
1066             showComments(b, a.commentsPost, false);
1067 //            if (attributeDeprecated != deprecatedComment) {
1068 //                System.out.println("*** BAD DEPRECATION ***" + a);
1069 //            }
1070             if (a.matchValue != null) {
1071                 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->");
1072             }
1073             if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) {
1074                 b.append(COMMENT_PREFIX + "<!--@METADATA-->");
1075             } else if (!isDistinguishing(current.name, a.name)) {
1076                 b.append(COMMENT_PREFIX + "<!--@VALUE-->");
1077             }
1078             if (attributeDeprecated) {
1079                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1080             } else if (!deprecatedValues.isEmpty()) {
1081                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + Joiner.on(", ")
1082                     .join(deprecatedValues) + "-->");
1083             }
1084         }
1085         if (current.children.size() > 0) {
1086             for (Element e : current.children.keySet()) {
1087                 toString(e, b, seen);
1088             }
1089         }
1090     }
1091 
showComments(StringBuilder b, Set<String> comments, boolean separate)1092     private void showComments(StringBuilder b, Set<String> comments, boolean separate) {
1093         if (comments == null) {
1094             return;
1095         }
1096         if (separate && b.length() != 0) {
1097             b.append(System.lineSeparator());
1098         }
1099         for (String c : comments) {
1100             boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat");
1101             if (!deprecatedComment) {
1102                 if (separate) {
1103                     // special handling for very first comment
1104                     if (b.length() == 0) {
1105                         b.append("<!--")
1106                         .append(System.lineSeparator())
1107                         .append(c)
1108                         .append(System.lineSeparator())
1109                         .append("-->");
1110                         continue;
1111                     }
1112                     b.append(System.lineSeparator());
1113                 } else {
1114                     b.append(COMMENT_PREFIX);
1115                 }
1116                 b.append("<!-- ").append(c).append(" -->");
1117             }
1118         }
1119     }
1120 
removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1121     public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) {
1122         for (Iterator<T> it = elements.iterator(); it.hasNext();) {
1123             T item = it.next();
1124             if (matcher.transform(item) == Boolean.TRUE) {
1125                 it.remove();
1126                 return item;
1127             }
1128         }
1129         return null;
1130     }
1131 
getElements()1132     public Set<Element> getElements() {
1133         return new LinkedHashSet<>(nameToElement.values());
1134     }
1135 
getAttributes()1136     public Set<Attribute> getAttributes() {
1137         return new LinkedHashSet<>(nameToAttributes.values());
1138     }
1139 
isDistinguishing(String elementName, String attribute)1140     public boolean isDistinguishing(String elementName, String attribute) {
1141         return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished;
1142     }
1143 
1144     static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft"));
1145 
addUnmodifiable(Set<String> comment, String addition)1146     static final Set<String> addUnmodifiable(Set<String> comment, String addition) {
1147         if (comment == null) {
1148             return Collections.singleton(addition);
1149         } else {
1150             comment = new LinkedHashSet<>(comment);
1151             comment.add(addition);
1152             return Collections.unmodifiableSet(comment);
1153         }
1154     }
1155 
1156     public class IllegalByDtdException extends RuntimeException {
1157         private static final long serialVersionUID = 1L;
1158         public final String elementName;
1159         public final String attributeName;
1160         public final String attributeValue;
1161 
IllegalByDtdException(String elementName, String attributeName, String attributeValue)1162         public IllegalByDtdException(String elementName, String attributeName, String attributeValue) {
1163             this.elementName = elementName;
1164             this.attributeName = attributeName;
1165             this.attributeValue = attributeValue;
1166         }
1167 
1168         @Override
getMessage()1169         public String getMessage() {
1170             return "Dtd " + dtdType
1171                 + " doesn’t allow "
1172                 + "element=" + elementName
1173                 + (attributeName == null ? "" : ", attribute: " + attributeName)
1174                 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue);
1175         }
1176     }
1177 
1178     //@SuppressWarnings("unused")
isDeprecated(String elementName, String attributeName, String attributeValue)1179     public boolean isDeprecated(String elementName, String attributeName, String attributeValue) {
1180         Element element = nameToElement.get(elementName);
1181         if (element == null) {
1182             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1183         } else if (element.isDeprecatedElement) {
1184             return true;
1185         }
1186         if ("*".equals(attributeName) || "_q".equals(attributeName)) {
1187             return false;
1188         }
1189         Attribute attribute = element.getAttributeNamed(attributeName);
1190         if (attribute == null) {
1191             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1192         } else if (attribute.isDeprecatedAttribute) {
1193             return true;
1194         }
1195         return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*"
1196     }
1197 
isOrdered(String elementName)1198     public boolean isOrdered(String elementName) {
1199         Element element = nameToElement.get(elementName);
1200         if (element == null) {
1201             if (elementName.startsWith("icu:")) {
1202                 return false;
1203             }
1204             throw new IllegalByDtdException(elementName, null, null);
1205         }
1206         return element.isOrderedElement;
1207     }
1208 
getAttributeStatus(String elementName, String attributeName)1209     public AttributeStatus getAttributeStatus(String elementName, String attributeName) {
1210         if ("_q".equals(attributeName)) {
1211             return AttributeStatus.distinguished; // special case
1212         }
1213         Element element = nameToElement.get(elementName);
1214         if (element == null) {
1215             if (elementName.startsWith("icu:")) {
1216                 return AttributeStatus.distinguished;
1217             }
1218             throw new IllegalByDtdException(elementName, attributeName, null);
1219         }
1220         Attribute attribute = element.getAttributeNamed(attributeName);
1221         if (attribute == null) {
1222             if (elementName.startsWith("icu:")) {
1223                 return AttributeStatus.distinguished;
1224             }
1225             throw new IllegalByDtdException(elementName, attributeName, null);
1226         }
1227         return attribute.attributeStatus;
1228     }
1229 
1230     // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1231     private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze();
1232 
1233     static MapComparator<String> dayValueOrder = new MapComparator<String>().add(
1234         "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze();
1235     static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add(
1236         "midnight", "am", "noon", "pm",
1237         "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2",
1238         // The ones on the following line are no longer used actively. Can be removed later?
1239         "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze();
1240     static MapComparator<String> listPatternOrder = new MapComparator<String>().add(
1241         "start", "middle", "end", "2", "3").freeze();
1242     static MapComparator<String> widthOrder = new MapComparator<String>().add(
1243         "abbreviated", "narrow", "short", "wide", "all").freeze();
1244     static MapComparator<String> lengthOrder = new MapComparator<String>().add(
1245         "full", "long", "medium", "short").freeze();
1246     static MapComparator<String> dateFieldOrder = new MapComparator<String>().add(
1247         "era", "era-short", "era-narrow",
1248         "year", "year-short", "year-narrow",
1249         "quarter", "quarter-short", "quarter-narrow",
1250         "month", "month-short", "month-narrow",
1251         "week", "week-short", "week-narrow",
1252         "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow",
1253         "day", "day-short", "day-narrow",
1254         "dayOfYear", "dayOfYear-short", "dayOfYear-narrow",
1255         "weekday", "weekday-short", "weekday-narrow",
1256         "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow",
1257         "sun", "sun-short", "sun-narrow",
1258         "mon", "mon-short", "mon-narrow",
1259         "tue", "tue-short", "tue-narrow",
1260         "wed", "wed-short", "wed-narrow",
1261         "thu", "thu-short", "thu-narrow",
1262         "fri", "fri-short", "fri-narrow",
1263         "sat", "sat-short", "sat-narrow",
1264         "dayperiod-short", "dayperiod", "dayperiod-narrow",
1265         "hour", "hour-short", "hour-narrow",
1266         "minute", "minute-short", "minute-narrow",
1267         "second", "second-short", "second-narrow",
1268         "zone", "zone-short", "zone-narrow").freeze();
1269 
1270     /* TODO: change this to be data-file driven. Can do with new Unit preferences info; also put them in a more meaningful order (metric vs other; size) */
1271 
1272     public static final MapComparator<String> unitOrder = new MapComparator<String>().add(
1273         "acceleration-g-force", "acceleration-meter-per-square-second",
1274         "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second",
1275         "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter",
1276         "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch",
1277         "area-dunam",
1278         "concentr-karat",
1279         "concentr-milligram-per-deciliter", "concentr-millimole-per-liter",
1280         "concentr-item",
1281         "concentr-portion",
1282         "concentr-permillion", "concentr-percent", "concentr-permille", "concentr-permyriad",
1283         "concentr-mole",
1284         "consumption-liter-per-kilometer", "consumption-liter-per-100-kilometer",
1285         "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial",
1286         "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit",
1287         "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit",
1288         "digital-byte", "digital-bit",
1289         "duration-century", "duration-decade",
1290         "duration-year", "duration-year-person",
1291         "duration-month", "duration-month-person",
1292         "duration-week", "duration-week-person",
1293         "duration-day", "duration-day-person",
1294         "duration-hour", "duration-minute", "duration-second",
1295         "duration-millisecond", "duration-microsecond", "duration-nanosecond",
1296         "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt",
1297         "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour",
1298         "energy-electronvolt",
1299         "energy-british-thermal-unit",
1300         "energy-therm-us",
1301         "force-pound-force",
1302         "force-newton",
1303         "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz",
1304         "graphics-em", "graphics-pixel", "graphics-megapixel",
1305         "graphics-pixel-per-centimeter", "graphics-pixel-per-inch",
1306         "graphics-dot-per-centimeter", "graphics-dot-per-inch",
1307         "graphics-dot",
1308         "length-earth-radius",
1309         "length-100-kilometer",
1310         "length-kilometer", "length-meter", "length-decimeter", "length-centimeter",
1311         "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer",
1312         "length-mile", "length-yard", "length-foot", "length-inch",
1313         "length-parsec", "length-light-year", "length-astronomical-unit",
1314         "length-furlong", "length-fathom",
1315         "length-nautical-mile", "length-mile-scandinavian",
1316         "length-point",
1317         "length-solar-radius",
1318         "light-lux",
1319         "light-candela",
1320         "light-lumen",
1321         "light-solar-luminosity",
1322         "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram",
1323         "mass-ton", "mass-stone", "mass-pound", "mass-ounce",
1324         "mass-ounce-troy", "mass-carat",
1325         "mass-dalton",
1326         "mass-earth-mass",
1327         "mass-solar-mass",
1328 
1329         "mass-grain",
1330 
1331         "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt",
1332         "power-horsepower",
1333         "pressure-millimeter-ofhg",
1334          "pressure-ofhg",
1335         "pressure-pound-force-per-square-inch", "pressure-inch-ofhg", "pressure-bar", "pressure-millibar", "pressure-atmosphere",
1336         "pressure-pascal",
1337         "pressure-hectopascal",
1338         "pressure-kilopascal",
1339         "pressure-megapascal",
1340         "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot",
1341         "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin",
1342         "torque-pound-force-foot",
1343         "torque-newton-meter",
1344         "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter",
1345         "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch",
1346         "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter",
1347         "volume-pint-metric", "volume-cup-metric",
1348         "volume-acre-foot",
1349         "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup",
1350         "volume-fluid-ounce", "volume-fluid-ounce-imperial", "volume-tablespoon", "volume-teaspoon",
1351         "volume-barrel",
1352 
1353         "volume-dessert-spoon",
1354         "volume-dessert-spoon-imperial",
1355         "volume-drop",
1356         "volume-dram",
1357         "volume-jigger",
1358         "volume-pinch",
1359         "volume-quart-imperial"
1360        // "volume-pint-imperial"
1361         ).freeze();
1362 
1363     static MapComparator<String> countValueOrder = new MapComparator<String>().add(
1364         "0", "1", "zero", "one", "two", "few", "many", "other").freeze();
1365     static MapComparator<String> unitLengthOrder = new MapComparator<String>().add(
1366         "long", "short", "narrow").freeze();
1367     static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add(
1368         "standard", "accounting").freeze();
1369     static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator();
1370 
1371     static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator();
1372 
1373     // Hack for US
1374     static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<String>() {
1375         @Override
1376         public int compare(String o1, String o2) {
1377             if (o1.contains("{")) {
1378                 o1 = o1.replace("{", "");
1379             }
1380             if (o2.contains("{")) {
1381                 o2 = o2.replace("{", "");
1382             }
1383             return COMP.compare(o1, o2);
1384         }
1385 
1386     };
1387 
getAttributeValueComparator(String element, String attribute)1388     public static Comparator<String> getAttributeValueComparator(String element, String attribute) {
1389         return getAttributeValueComparator(DtdType.ldml, element, attribute);
1390     }
1391 
getAttributeValueComparator(DtdType type, String element, String attribute)1392     static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) {
1393         // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1394         Comparator<String> comp = valueOrdering;
1395         if (type != DtdType.ldml && type != DtdType.ldmlICU) {
1396             return comp;
1397         }
1398         if (attribute.equals("day")) { // && (element.startsWith("weekend")
1399             comp = dayValueOrder;
1400         } else if (attribute.equals("type")) {
1401             if (element.endsWith("FormatLength")) {
1402                 comp = lengthOrder;
1403             } else if (element.endsWith("Width")) {
1404                 comp = widthOrder;
1405             } else if (element.equals("day")) {
1406                 comp = dayValueOrder;
1407             } else if (element.equals("field")) {
1408                 comp = dateFieldOrder;
1409             } else if (element.equals("zone")) {
1410                 comp = zoneOrder;
1411             } else if (element.equals("listPatternPart")) {
1412                 comp = listPatternOrder;
1413             } else if (element.equals("currencyFormat")) {
1414                 comp = currencyFormatOrder;
1415             } else if (element.equals("unitLength")) {
1416                 comp = unitLengthOrder;
1417             } else if (element.equals("unit")) {
1418                 comp = unitOrder;
1419             } else if (element.equals("dayPeriod")) {
1420                 comp = dayPeriodOrder;
1421             }
1422         } else if (attribute.equals("count") && !element.equals("minDays")) {
1423             comp = countValueOrder;
1424         } else if (attribute.equals("cp") && element.equals("annotation")) {
1425             comp = UNICODE_SET_COMPARATOR;
1426         }
1427         return comp;
1428     }
1429 
1430     /**
1431      * Comparator for attributes in CLDR files
1432      */
1433     private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() {
1434         @Override
1435         public int compare(String element, String attribute, String value1, String value2) {
1436             Comparator<String> comp = getAttributeValueComparator(element, attribute);
1437             return comp.compare(value1, value2);
1438         }
1439     };
1440 
hasValue(String elementName)1441     public boolean hasValue(String elementName) {
1442         return nameToElement.get(elementName).type == ElementType.PCDATA;
1443     }
1444 
isMetadata(XPathParts pathPlain)1445     public boolean isMetadata(XPathParts pathPlain) {
1446         for (String s : pathPlain.getElements()) {
1447             Element e = getElementFromName().get(s);
1448             if (e.elementStatus == ElementStatus.metadata) {
1449                 return true;
1450             }
1451         }
1452         return false;
1453     }
1454 
isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1455     public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) {
1456         // TODO Don't use hard-coded list; instead add to DTD annotations
1457         final String element1 = pathPlain.getElement(1);
1458         final String element2 = pathPlain.getElement(2);
1459         final String elementN = pathPlain.getElement(-1);
1460         switch (dtdType2) {
1461         case ldml:
1462             switch (element1) {
1463             case "generation":
1464             case "metadata":
1465                 return true;
1466             }
1467             break;
1468         case ldmlBCP47:
1469             switch (element1) {
1470             case "generation":
1471             case "version":
1472                 return true;
1473             }
1474             break;
1475             ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment
1476         case supplementalData:
1477             // these are NOT under /metadata/ but are actually metadata
1478             switch (element1) {
1479             case "generation":
1480             case "version":
1481             case "validity":
1482             case "references":
1483             case "coverageLevels":
1484                 return true;
1485             case "transforms":
1486                 return elementN.equals("comment");
1487             case "metadata":
1488                 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata.
1489                 switch (element2) {
1490                 case "validity":
1491                 case "serialElements":
1492                 case "suppress":
1493                 case "distinguishing":
1494                 case "blocking":
1495                 case "casingData":
1496                     return true;
1497                 }
1498                 break;
1499             }
1500             break;
1501         default:
1502         }
1503         return false;
1504     }
1505 
isDeprecated(XPathParts pathPlain)1506     public boolean isDeprecated(XPathParts pathPlain) {
1507         for (int i = 0; i < pathPlain.size(); ++i) {
1508             String elementName = pathPlain.getElement(i);
1509             if (isDeprecated(elementName, "*", null)) {
1510                 return true;
1511             }
1512             for (String attribute : pathPlain.getAttributeKeys(i)) {
1513                 String attributeValue = pathPlain.getAttributeValue(i, attribute);
1514                 if (isDeprecated(elementName, attribute, attributeValue)) {
1515                     return true;
1516                 }
1517             }
1518         }
1519         return false;
1520     }
1521 
1522     public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings();
1523     public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings();
1524     public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings();
1525 
1526     private static class XPathPartsSet {
1527         private final Set<XPathParts> list = new LinkedHashSet<>();
1528 
addElement(String element)1529         private void addElement(String element) {
1530             if (list.isEmpty()) {
1531                 list.add(new XPathParts().addElement(element));
1532             } else {
1533                 for (XPathParts item : list) {
1534                     item.addElement(element);
1535                 }
1536             }
1537         }
1538 
addAttribute(String attribute, String attributeValue)1539         private void addAttribute(String attribute, String attributeValue) {
1540             for (XPathParts item : list) {
1541                 item.addAttribute(attribute, attributeValue);
1542             }
1543         }
1544 
setElement(int i, String string)1545         private void setElement(int i, String string) {
1546             for (XPathParts item : list) {
1547                 item.setElement(i, string);
1548             }
1549         }
1550 
addAttributes(String attribute, List<String> attributeValues)1551         private void addAttributes(String attribute, List<String> attributeValues) {
1552             if (attributeValues.size() == 1) {
1553                 addAttribute(attribute, attributeValues.iterator().next());
1554             } else {
1555                 // duplicate all the items in the list with the given values
1556                 Set<XPathParts> newList = new LinkedHashSet<>();
1557                 for (XPathParts item : list) {
1558                     for (String attributeValue : attributeValues) {
1559                         XPathParts newItem = item.cloneAsThawed();
1560                         newItem.addAttribute(attribute, attributeValue);
1561                         newList.add(newItem);
1562                     }
1563                 }
1564                 list.clear();
1565                 list.addAll(newList);
1566             }
1567         }
1568 
toStrings()1569         private ImmutableSet<String> toStrings() {
1570             Builder<String> result = new ImmutableSet.Builder<>();
1571 
1572             for (XPathParts item : list) {
1573                 result.add(item.toString());
1574             }
1575             return result.build();
1576         }
1577 
1578         @Override
toString()1579         public String toString() {
1580             return list.toString();
1581         }
1582     }
1583 
getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)1584     public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) {
1585         extras.clear();
1586         Map<String, String> valueAttributes = new HashMap<>();
1587         XPathPartsSet pathResult = new XPathPartsSet();
1588         String element = null;
1589         for (int i = 0; i < pathPlain.size(); ++i) {
1590             element = pathPlain.getElement(i);
1591             pathResult.addElement(element);
1592             valueAttributes.clear();
1593             for (String attribute : pathPlain.getAttributeKeys(i)) {
1594                 AttributeStatus status = getAttributeStatus(element, attribute);
1595                 final String attributeValue = pathPlain.getAttributeValue(i, attribute);
1596                 switch (status) {
1597                 case distinguished:
1598                     AttributeType attrType = getAttributeType(element, attribute);
1599                     if (attrType == AttributeType.NMTOKENS) {
1600                         pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue));
1601                     } else {
1602                         pathResult.addAttribute(attribute, attributeValue);
1603                     }
1604                     break;
1605                 case value:
1606                     valueAttributes.put(attribute, attributeValue);
1607                     break;
1608                 case metadata:
1609                     break;
1610                 }
1611             }
1612             if (!valueAttributes.isEmpty()) {
1613                 boolean hasValue = hasValue(element);
1614                 // if it doesn't have a value, we construct new child elements, with _ prefix
1615                 // if it does have a value, we have to play a further trick, since
1616                 // we can't have a value and child elements at the same level.
1617                 // So we use a _ suffix on the element.
1618                 if (hasValue) {
1619                     pathResult.setElement(i, element + "_");
1620                 } else {
1621                     int debug = 0;
1622                 }
1623                 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) {
1624                     final String attribute = attributeAndValue.getKey();
1625                     final String attributeValue = attributeAndValue.getValue();
1626 
1627                     Set<String> pathsShort = pathResult.toStrings();
1628                     AttributeType attrType = getAttributeType(element, attribute);
1629                     for (String pathShort : pathsShort) {
1630                         pathShort += "/_" + attribute;
1631                         if (attrType == AttributeType.NMTOKENS) {
1632                             for (String valuePart : SPACE_SPLITTER.split(attributeValue)) {
1633                                 extras.put(pathShort, valuePart);
1634                             }
1635                         } else {
1636                             extras.put(pathShort, attributeValue);
1637                         }
1638                     }
1639                 }
1640                 if (hasValue) {
1641                     pathResult.setElement(i, element); // restore
1642                 }
1643             }
1644         }
1645         // Only add the path if it could have a value, looking at the last element
1646         if (!hasValue(element)) {
1647             return null;
1648         }
1649         return pathResult.toStrings();
1650     }
1651 
getAttributeType(String elementName, String attributeName)1652     public AttributeType getAttributeType(String elementName, String attributeName) {
1653         Attribute attr = getAttribute(elementName, attributeName);
1654         return (attr != null) ? attr.type : null;
1655     }
1656 
getAttribute(String elementName, String attributeName)1657     public Attribute getAttribute(String elementName, String attributeName) {
1658         Element element = nameToElement.get(elementName);
1659         return (element != null) ? element.getAttributeNamed(attributeName) : null;
1660     }
1661 
1662     // TODO: add support for following to DTD annotations, and rework API
1663 
1664     static final Set<String> SPACED_VALUES = ImmutableSet.of(
1665         "idValidity",
1666         "languageGroup");
1667 
getValueSplitter(XPathParts pathPlain)1668     public static Splitter getValueSplitter(XPathParts pathPlain) {
1669         if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) {
1670             return SPACE_SPLITTER;
1671         } else if (pathPlain.getElement(-1).equals("annotation")
1672             && !pathPlain.getAttributeKeys(-1).contains("tts")) {
1673             return BAR_SPLITTER;
1674         }
1675         return CR_SPLITTER;
1676     }
1677 
isComment(XPathParts pathPlain, String line)1678     public static boolean isComment(XPathParts pathPlain, String line) {
1679         if (pathPlain.contains("transform")) {
1680             if (line.startsWith("#")) {
1681                 return true;
1682             }
1683         }
1684         return false;
1685     }
1686 
isExtraSplit(String extraPath)1687     public static boolean isExtraSplit(String extraPath) {
1688         if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
1689             return true;
1690         }
1691         return false;
1692     }
1693 
1694     /**
1695      * Return the value status for an EAV
1696      */
getValueStatus(String elementName, String attributeName, String value)1697     public ValueStatus getValueStatus(String elementName, String attributeName, String value) {
1698         Element element = nameToElement.get(elementName);
1699         if (element == null) {
1700             return ValueStatus.invalid;
1701         }
1702         Attribute attr = element.getAttributeNamed(attributeName);
1703         if (attr == null) {
1704             return ValueStatus.invalid;
1705         }
1706         return attr.getValueStatus(value);
1707     }
1708 
1709     /**
1710      * Return element-attribute pairs with non-enumerated values, for quick checks.
1711      */
getNonEnumerated(Map<String,String> matchValues)1712     public Multimap<String, String> getNonEnumerated(Map<String,String> matchValues) {
1713         Multimap<String,String> nonEnumeratedElementToAttribute = TreeMultimap.create(); // make tree for ease of debugging
1714         for (Entry<String, Element> entry : nameToElement.entrySet()) {
1715             Element element = entry.getValue();
1716             for (Attribute attribute : element.attributes.keySet()) {
1717                 if (attribute.type != AttributeType.ENUMERATED_TYPE) {
1718                     String elementName = element.getName();
1719                     String attrName = attribute.getName();
1720                     nonEnumeratedElementToAttribute.put(elementName, attrName);
1721                     if (attribute.matchValue != null) {
1722                         matchValues.put(elementName + "\t" + attrName, attribute.matchValue.getName());
1723                     }
1724                 }
1725             }
1726         }
1727         return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute);
1728     }
1729 }
1730