1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu.localedistance;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static com.google.common.collect.ImmutableSetMultimap.toImmutableSetMultimap;
7 
8 import java.util.Map.Entry;
9 import java.util.Set;
10 import java.util.regex.Pattern;
11 
12 import org.unicode.cldr.api.AttributeKey;
13 import org.unicode.cldr.api.CldrData;
14 import org.unicode.cldr.api.CldrPath;
15 import org.unicode.cldr.api.PathMatcher;
16 
17 import com.google.common.base.Splitter;
18 import com.google.common.collect.ImmutableSet;
19 import com.google.common.collect.ImmutableSetMultimap;
20 import com.google.common.collect.SetMultimap;
21 import com.google.common.collect.Sets;
22 import com.google.common.collect.SortedSetMultimap;
23 import com.google.common.collect.TreeMultimap;
24 
25 /**
26  * Territory containment graph. This is built from CLDR supplemental data and
27  * represents all territories and their containment, including macro regions
28  * such as {@code "016"}. The root node of the graph is {@code "001"}.
29  */
30 final class TerritoryContainment {
31     // CLDR paths for containment data.
32     private static final PathMatcher CONTAINMENT_PATH =
33         PathMatcher.of("//supplementalData/territoryContainment/group[@type=*]");
34     private static final AttributeKey TYPE = AttributeKey.keyOf("group", "type");
35     private static final AttributeKey CONTAINS = AttributeKey.keyOf("group", "contains");
36 
37     // Standard CLDR list values are split by space.
38     // NOTE: You must omit empty strings, since otherwise " foo " becomes ("", "foo", "").
39     private static final Splitter LIST_SPLITTER =
40             Splitter.on(' ').trimResults().omitEmptyStrings();
41     // The world region must be the only root in the graph.
42     private static final String WORLD = "001";
43     private static final Pattern REGION = Pattern.compile("[A-Z]{2}|[0-9]{3}");
44 
45     /**
46      * Returns the territory containment information described by the given CLDR
47      * supplemental data.
48      */
getContainment(CldrData supplementalData)49     public static TerritoryContainment getContainment(CldrData supplementalData) {
50         // Directed, acyclic containment graph. Maps each territory to its direct contents.
51         // Note that since things like deprecated regions are included here, this allows
52         // sub-regions to have more than one parent.
53         SortedSetMultimap<String, String> graph = TreeMultimap.create();
54         supplementalData.accept(CldrData.PathOrder.DTD, v -> {
55             CldrPath path = v.getPath();
56             if (CONTAINMENT_PATH.matches(path)) {
57                 graph.putAll(v.get(TYPE), LIST_SPLITTER.split(v.get(CONTAINS)));
58             }
59         });
60         return new TerritoryContainment(ImmutableSetMultimap.copyOf(graph));
61     }
62 
63     /** Maps each macro-region to all its leaf contents (direct and indirect). */
64     private final ImmutableSetMultimap<String, String> macroToLeafRegions;
65 
TerritoryContainment(ImmutableSetMultimap<String, String> graph)66     private TerritoryContainment(ImmutableSetMultimap<String, String> graph) {
67         // Do some double checking of the CLDR data.
68         graph.values().forEach(
69                 r -> checkArgument(REGION.matcher(r).matches(), "bad region '%s' in: %s", r, graph));
70         checkArgument(graph.containsKey(WORLD), "missing world region '%s'", WORLD);
71         // There should be only one "root" in the graph, so every other region should be
72         // contained by something.
73         Set<String> allContained = ImmutableSet.copyOf(graph.values());
74         Set<String> roots = ImmutableSet.copyOf(Sets.difference(graph.keySet(), allContained));
75         checkArgument(roots.equals(ImmutableSet.of(WORLD)),
76             "world region '%s' must be the only containment graph root (was %s)", WORLD, roots);
77 
78         // Start with a copy of the direct containment graph (but still pass in the direct
79         // graph to avoid issues with concurrent modification).
80         // If the graph is cyclic, this step will never terminate and run out of memory
81         // (and since this is a build-time tool, that's probably fine).
82         SortedSetMultimap<String, String> resolved = TreeMultimap.create(graph);
83         resolve(WORLD, graph, resolved);
84         // For leaf regions (direct or indirect) just retain any sub-regions which don't
85         // have child regions from the resolved graph.
86         this.macroToLeafRegions = resolved.entries().stream()
87             // Only keep macro regions (leaf regions don't have child regions by definition).
88             .filter(e -> !graph.get(e.getKey()).isEmpty())
89             // Only keep the single-region e.getValue() if it is a leaf region.
90             .filter(e -> graph.get(e.getValue()).isEmpty())
91             .collect(toImmutableSetMultimap(Entry::getKey, Entry::getValue));
92     }
93 
94     // Recursively resolve the region and its child regions.
resolve( String region, SetMultimap<String, String> graph, SetMultimap<String, String> resolved)95     private static Set<String> resolve(
96         String region, SetMultimap<String, String> graph, SetMultimap<String, String> resolved) {
97         graph.get(region).forEach(sub -> resolved.putAll(region, resolve(sub, graph, resolved)));
98         return resolved.get(region);
99     }
100 
101     /**
102      * Returns the leaf regions contained in the given region (if the given region is a
103      * leaf region, then the empty set is returned).
104      */
getLeafRegionsOf(String region)105     public ImmutableSet<String> getLeafRegionsOf(String region) {
106         return macroToLeafRegions.get(region);
107     }
108 
109     /** Returns all leaf regions. */
getLeafRegions()110     public ImmutableSet<String> getLeafRegions() {
111         return macroToLeafRegions.get(WORLD);
112     }
113 
114     /** Returns all macro regions. */
getMacroRegions()115     public ImmutableSet<String> getMacroRegions() {
116         return macroToLeafRegions.keySet();
117     }
118 }
119