1 package org.unicode.cldr.api; 2 3 import static com.google.common.base.Preconditions.checkNotNull; 4 5 import java.util.ArrayList; 6 import java.util.Comparator; 7 import java.util.Iterator; 8 import java.util.LinkedHashMap; 9 import java.util.List; 10 import java.util.Map; 11 import java.util.regex.Pattern; 12 13 import org.unicode.cldr.util.CLDRFile; 14 import org.unicode.cldr.util.CldrUtility; 15 import org.unicode.cldr.util.XPathParts; 16 17 import com.google.common.collect.Lists; 18 19 /** 20 * Serializes a CLDRFile as a sequence of {@link CldrValue CldrValues}. 21 */ 22 final class CldrFileDataSource implements CldrData { 23 private static final Pattern CAPTURE_SORT_INDEX = Pattern.compile("#([0-9]+)"); 24 25 private final CLDRFile source; 26 CldrFileDataSource(CLDRFile source)27 CldrFileDataSource(CLDRFile source) { 28 this.source = checkNotNull(source); 29 } 30 31 @Override accept(PathOrder order, ValueVisitor visitor)32 public void accept(PathOrder order, ValueVisitor visitor) { 33 Iterator<String> paths; 34 switch (order) { 35 case ARBITRARY: 36 paths = source.iterator(); 37 break; 38 39 case NESTED_GROUPING: 40 // Distinguishing paths when sorted by string order should yield "nested grouping". 41 // This is because lexicographical order is determined by the earliest character 42 // difference, which either occurs in the element name or the attribute declaration. 43 // Either way, the string before the first difference will agree on zero or more 44 // complete path elements and order is always decided by a change to the lowest path 45 // element. This should therefore result in common parent prefixes always being visited 46 // consecutively. It also (like DTD ordering) greatly improves the performance when 47 // parsing paths because consecutive paths share common parent elements. 48 paths = source.iterator(null, Comparator.naturalOrder()); 49 break; 50 51 case DTD: 52 paths = source.iterator(null, source.getComparator()); 53 break; 54 55 default: 56 throw new AssertionError("Unknown path ordering: " + order); 57 } 58 read(paths, source, visitor); 59 } 60 61 @Override 62 /* @Nullable */ get(CldrPath cldrPath)63 public CldrValue get(CldrPath cldrPath) { 64 String dPath = getInternalPathString(cldrPath); 65 String fullXPath = source.getFullXPath(dPath); 66 if (fullXPath == null) { 67 return null; 68 } 69 XPathParts pathPaths = XPathParts.getFrozenInstance(fullXPath); 70 int length = pathPaths.size(); 71 Map<AttributeKey, String> attributes = new LinkedHashMap<>(); 72 for (int n = 0; n < length; n++) { 73 CldrPaths.processPathAttributes( 74 pathPaths.getElement(n), 75 pathPaths.getAttributes(n), 76 cldrPath.getDataType(), 77 e -> {}, 78 attributes::put); 79 } 80 // This is MUCH faster if you pass the distinguishing path in. If the CLDRFile is 81 // "unresolved" then we can get the special "inheritance marker" returned, which 82 // should just be treated as if there was no value present. 83 String value = source.getStringValue(dPath); 84 if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) { 85 return null; 86 } 87 return CldrValue.create(value, attributes, cldrPath); 88 } 89 getInternalPathString(CldrPath p)90 private static String getInternalPathString(CldrPath p) { 91 // This is the distinguishing xpath, but possibly with a sort index present (e.g. 92 // foo#42[@bar="x"]). So to get the internal path as used by CLDRFile, we must convert '#N' 93 // into '[@_q="N"]' 94 String dpath = p.toString(); 95 if (dpath.indexOf('#') != -1) { 96 dpath = CAPTURE_SORT_INDEX.matcher(dpath).replaceAll("[@_q=\"$1\"]"); 97 } 98 return dpath; 99 } 100 read(Iterator<String> paths, CLDRFile src, ValueVisitor visitor)101 private void read(Iterator<String> paths, CLDRFile src, ValueVisitor visitor) { 102 Map<AttributeKey, String> valueAttributes = new LinkedHashMap<>(); 103 104 // This is a bit fiddly since we add path elements in reverse order to the 'stack' but want 105 // to access them using the path element index. E.g. if we add the path a->b->c->d to the 106 // stack we get "(d,c,b,a)" in the array, but really want "(a,b,c,d)" to avoid having to 107 // use recursion or other tricks to reverse the order of addition, we can just create a 108 // reversed _view_ onto the list and pass that around. We could just insert the elements at 109 // the front of the array (rather than adding them at the end) but that means repeated 110 // copying of existing elements to make room, so it's slower. 111 // 112 // This has the path elements pushed into it in reverse order. 113 List<CldrPath> previousElementStack = new ArrayList<>(); 114 // This views the path elements in forward order. 115 List<CldrPath> previousElements = Lists.reverse(previousElementStack); 116 117 while (paths.hasNext()) { 118 String dPath = paths.next(); 119 // This is MUCH faster if you pass the distinguishing path in. If the CLDRFile is 120 // "unresolved" then we can get the special "inheritance marker" returned, which 121 // should just be treated as if there was no value present. 122 String value = src.getStringValue(dPath); 123 if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) { 124 continue; 125 } 126 // There's a cache behind XPathParts which probably makes it faster to lookup these 127 // instances rather than parse them each time (it all depends on whether this is the 128 // first time the full paths are used). 129 CldrPath cldrPath = CldrPaths.processXPath( 130 src.getFullXPath(dPath), previousElements, valueAttributes::put); 131 132 if (CldrPaths.isLeafPath(cldrPath) && CldrPaths.shouldEmit(cldrPath)) { 133 visitor.visit(CldrValue.create(value, valueAttributes, cldrPath)); 134 } 135 136 // Prepare the element stack for next time by pushing the current path onto it. 137 pushPathElements(cldrPath, previousElementStack); 138 valueAttributes.clear(); 139 } 140 } 141 142 /** 143 * Pushes the elements of the given path into the list. This is efficient but results in the 144 * list order being reversed (e.g. path "a->b->c->d" results in "(d,c,b,a)". A reversed view 145 * of this stack is used to present the path elements in "forward order". 146 */ pushPathElements(CldrPath cldrPath, List<CldrPath> stack)147 private static void pushPathElements(CldrPath cldrPath, List<CldrPath> stack) { 148 stack.clear(); 149 for (CldrPath p = cldrPath; p != null; p = p.getParent()) { 150 stack.add(p); 151 } 152 } 153 } 154