1 package org.unicode.cldr.api;
2 
3 import static com.google.common.base.Preconditions.checkNotNull;
4 
5 import java.util.ArrayList;
6 import java.util.Comparator;
7 import java.util.Iterator;
8 import java.util.LinkedHashMap;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.regex.Pattern;
12 
13 import org.unicode.cldr.util.CLDRFile;
14 import org.unicode.cldr.util.CldrUtility;
15 import org.unicode.cldr.util.XPathParts;
16 
17 import com.google.common.collect.Lists;
18 
19 /**
20  * Serializes a CLDRFile as a sequence of {@link CldrValue CldrValues}.
21  */
22 final class CldrFileDataSource implements CldrData {
23     private static final Pattern CAPTURE_SORT_INDEX = Pattern.compile("#([0-9]+)");
24 
25     private final CLDRFile source;
26 
CldrFileDataSource(CLDRFile source)27     CldrFileDataSource(CLDRFile source) {
28         this.source = checkNotNull(source);
29     }
30 
31     @Override
accept(PathOrder order, ValueVisitor visitor)32     public void accept(PathOrder order, ValueVisitor visitor) {
33         Iterator<String> paths;
34         switch (order) {
35         case ARBITRARY:
36             paths = source.iterator();
37             break;
38 
39         case NESTED_GROUPING:
40             // Distinguishing paths when sorted by string order should yield "nested grouping".
41             // This is because lexicographical order is determined by the earliest character
42             // difference, which either occurs in the element name or the attribute declaration.
43             // Either way, the string before the first difference will agree on zero or more
44             // complete path elements and order is always decided by a change to the lowest path
45             // element. This should therefore result in common parent prefixes always being visited
46             // consecutively. It also (like DTD ordering) greatly improves the performance when
47             // parsing paths because consecutive paths share common parent elements.
48             paths = source.iterator(null, Comparator.naturalOrder());
49             break;
50 
51         case DTD:
52             paths = source.iterator(null, source.getComparator());
53             break;
54 
55         default:
56             throw new AssertionError("Unknown path ordering: " + order);
57         }
58         read(paths, source, visitor);
59     }
60 
61     @Override
62     /* @Nullable */
get(CldrPath cldrPath)63     public CldrValue get(CldrPath cldrPath) {
64         String dPath = getInternalPathString(cldrPath);
65         String fullXPath = source.getFullXPath(dPath);
66         if (fullXPath == null) {
67             return null;
68         }
69         XPathParts pathPaths = XPathParts.getFrozenInstance(fullXPath);
70         int length = pathPaths.size();
71         Map<AttributeKey, String> attributes = new LinkedHashMap<>();
72         for (int n = 0; n < length; n++) {
73             CldrPaths.processPathAttributes(
74                 pathPaths.getElement(n),
75                 pathPaths.getAttributes(n),
76                 cldrPath.getDataType(),
77                 e -> {},
78                 attributes::put);
79         }
80         // This is MUCH faster if you pass the distinguishing path in. If the CLDRFile is
81         // "unresolved" then we can get the special "inheritance marker" returned, which
82         // should just be treated as if there was no value present.
83         String value = source.getStringValue(dPath);
84         if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) {
85             return null;
86         }
87         return CldrValue.create(value, attributes, cldrPath);
88     }
89 
getInternalPathString(CldrPath p)90     private static String getInternalPathString(CldrPath p) {
91         // This is the distinguishing xpath, but possibly with a sort index present (e.g.
92         // foo#42[@bar="x"]). So to get the internal path as used by CLDRFile, we must convert '#N'
93         // into '[@_q="N"]'
94         String dpath = p.toString();
95         if (dpath.indexOf('#') != -1) {
96             dpath = CAPTURE_SORT_INDEX.matcher(dpath).replaceAll("[@_q=\"$1\"]");
97         }
98         return dpath;
99     }
100 
read(Iterator<String> paths, CLDRFile src, ValueVisitor visitor)101     private void read(Iterator<String> paths, CLDRFile src, ValueVisitor visitor) {
102         Map<AttributeKey, String> valueAttributes = new LinkedHashMap<>();
103 
104         // This is a bit fiddly since we add path elements in reverse order to the 'stack' but want
105         // to access them using the path element index. E.g. if we add the path a->b->c->d to the
106         // stack we get "(d,c,b,a)" in the array, but really want "(a,b,c,d)" to avoid having to
107         // use recursion or other tricks to reverse the order of addition, we can just create a
108         // reversed _view_ onto the list and pass that around. We could just insert the elements at
109         // the front of the array (rather than adding them at the end) but that means repeated
110         // copying of existing elements to make room, so it's slower.
111         //
112         // This has the path elements pushed into it in reverse order.
113         List<CldrPath> previousElementStack = new ArrayList<>();
114         // This views the path elements in forward order.
115         List<CldrPath> previousElements = Lists.reverse(previousElementStack);
116 
117         while (paths.hasNext()) {
118             String dPath = paths.next();
119             // This is MUCH faster if you pass the distinguishing path in. If the CLDRFile is
120             // "unresolved" then we can get the special "inheritance marker" returned, which
121             // should just be treated as if there was no value present.
122             String value = src.getStringValue(dPath);
123             if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) {
124                 continue;
125             }
126             // There's a cache behind XPathParts which probably makes it faster to lookup these
127             // instances rather than parse them each time (it all depends on whether this is the
128             // first time the full paths are used).
129             CldrPath cldrPath = CldrPaths.processXPath(
130                 src.getFullXPath(dPath), previousElements, valueAttributes::put);
131 
132             if (CldrPaths.isLeafPath(cldrPath) && CldrPaths.shouldEmit(cldrPath)) {
133                 visitor.visit(CldrValue.create(value, valueAttributes, cldrPath));
134             }
135 
136             // Prepare the element stack for next time by pushing the current path onto it.
137             pushPathElements(cldrPath, previousElementStack);
138             valueAttributes.clear();
139         }
140     }
141 
142     /**
143      * Pushes the elements of the given path into the list. This is efficient but results in the
144      * list order being reversed (e.g. path "a->b->c->d" results in "(d,c,b,a)". A reversed view
145      * of this stack is used to present the path elements in "forward order".
146      */
pushPathElements(CldrPath cldrPath, List<CldrPath> stack)147     private static void pushPathElements(CldrPath cldrPath, List<CldrPath> stack) {
148         stack.clear();
149         for (CldrPath p = cldrPath; p != null; p = p.getParent()) {
150             stack.add(p);
151         }
152     }
153 }
154