1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu.localedistance;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static java.util.Arrays.asList;
7 
8 import java.util.List;
9 
10 import org.unicode.cldr.api.CldrData;
11 import org.unicode.cldr.api.CldrDataSupplier;
12 import org.unicode.cldr.api.CldrValue;
13 
14 import com.google.common.base.Ascii;
15 import com.google.common.base.Function;
16 import com.google.common.base.Splitter;
17 import com.google.common.collect.ImmutableMap;
18 import com.ibm.icu.impl.locale.LSR;
19 import com.ibm.icu.util.BytesTrie;
20 
21 /**
22  * Utilities for easily generating test data for the LocaleDistanceMapper tests.
23  */
24 final class TestData {
25     /**
26      * Returns an LSR from a locale ID pattern (e.g. "und", "zh-Hant", "en-*-GB").
27      * This is definitely not a general locale parser!
28      */
lsr(String s)29     static LSR lsr(String s) {
30         List<String> parts = Splitter.on('-').splitToList(s);
31         checkArgument(parts.size() <= 3);
32         return new LSR(
33                 parts.get(0),
34                 parts.size() > 1 ? parts.get(1) : "",
35                 parts.size() > 2 ? parts.get(2) : "",
36                 LSR.DONT_CARE_FLAGS);
37     }
38 
39     enum AliasType { LANGUAGE, TERRITORY }
40 
41     enum AliasReason { DEPRECATED, OVERLONG, LEGACY, MACRO }
42 
43     /** Returns CLDR data for the given values. */
cldrData(CldrValue... values)44     static CldrData cldrData(CldrValue... values) {
45         return CldrDataSupplier.forValues(asList(values));
46     }
47 
48     /** Returns a CldrValue for a {@code <paradigmLocales>} element. */
paradigms(String... values)49     static CldrValue paradigms(String... values) {
50         return supplemental(
51                 "languageMatching/languageMatches[@type=\"written_new\"]/"
52                         + "paradigmLocales[@locales=\"%s\"]",
53                 String.join(" ", values));
54     }
55 
56     /** Returns a CldrValue for a {@code <matchVariable>} element. */
matchVariable(String id, String value)57     static CldrValue matchVariable(String id, String value) {
58         return supplemental(
59                 "languageMatching/languageMatches[@type=\"written_new\"]/"
60                         + "matchVariable[@id=\"%s\"][@value=\"%s\"]",
61                 id, value);
62     }
63 
64     /** Returns a CldrValue for a {@code <languageMatch>} element. */
languageMatch( String desired, String supported, int distance, boolean oneway, int sort)65     static CldrValue languageMatch(
66             String desired, String supported, int distance, boolean oneway, int sort) {
67         return supplemental(
68                 "languageMatching/languageMatches[@type=\"written_new\"]/"
69                         + "languageMatch[@_q=\"%d\"][@desired=\"%s\"][@supported=\"%s\"][@distance=\"%d\"]%s",
70                 sort, desired, supported, distance, oneway ? "[@oneway=\"true\"]" : "");
71     }
72 
73     /** Returns a CldrValue for either a {@code <languageAlias>} or {@code <territoryAlias>} element. */
alias(AliasType type, AliasReason reason, String value, String... replacement)74     static CldrValue alias(AliasType type, AliasReason reason, String value, String... replacement) {
75         return supplemental(
76                 "metadata/alias/%sAlias[@type=\"%s\"][@replacement=\"%s\"][@reason=\"%s\"]",
77                 lower(type), value, String.join(" ", replacement), lower(reason));
78     }
79 
80     /** Returns a CldrValue for either a {@code <likelySubtags>} element. */
likelySubtag(String from, String to)81     static CldrValue likelySubtag(String from, String to) {
82         return supplemental(
83                 "likelySubtags/likelySubtag[@from=\"%s\"][@to=\"%s\"]",
84                 from, to);
85     }
86 
87     /** Returns a CldrValue for a {@code <territoryContainment>} group element. */
territoryGroup(String region, String... subregions)88     static CldrValue territoryGroup(String region, String... subregions) {
89         return supplemental(
90                 "territoryContainment/group[@type=\"%s\"][@contains=\"%s\"]",
91                 region, String.join(" ", subregions));
92     }
93 
94     /**
95      * Returns a CldrValue for a {@code <territoryContainment>} group element where
96      * {@code @status="group"}.
97      */
territoryGrouping(String region, String... subregions)98     static CldrValue territoryGrouping(String region, String... subregions) {
99         return supplemental(
100                 "territoryContainment/group[@type=\"%s\"][@contains=\"%s\"][@status=\"group\"]",
101                 region, String.join(" ", subregions));
102     }
103 
104     /**
105      * Returns a CldrValue for a {@code <territoryContainment>} group element where
106      * {@code @status="deprecated"}.
107      */
deprecatedTerritory(String region, String... subregions)108     static CldrValue deprecatedTerritory(String region, String... subregions) {
109         return supplemental(
110                 "territoryContainment/group[@type=\"%s\"][@contains=\"%s\"][@status=\"deprecated\"]",
111                 region, String.join(" ", subregions));
112     }
113 
114     /**
115      * Returns a map from expanded Trie keys to mapped value. This is useful in allowing
116      * tests to use human readable data when testing Tries.
117      *
118      * @param star a string representing the Trie wildcard in the output keys, which for
119      *             readability differs between use cases (e.g. "*" for subtags and "*-*"
120      *             for match rules).
121      * @param fn a function to map the actual Trie value to a more readable value for
122      *           testing.
123      */
getTrieTable(BytesTrie trie, String star, Function<Integer, T> fn)124     static <T> ImmutableMap<String, T> getTrieTable(BytesTrie trie, String star, Function<Integer, T> fn) {
125         // Mostly copied from LocaleDistance (since the necessary constructor is private).
126         // Main change is the this no longer uses a TreeMap, since we want to test order.
127         ImmutableMap.Builder<String, T> map = ImmutableMap.builder();
128         StringBuilder sb = new StringBuilder();
129         for (BytesTrie.Entry entry : trie) {
130             sb.setLength(0);
131             int length = entry.bytesLength();
132             for (int i = 0; i < length; ++i) {
133                 byte b = entry.byteAt(i);
134                 if (b == '*') {
135                     sb.append(star).append('-');
136                 } else if (b >= 0) {
137                     sb.append((char) b);
138                 } else {  // end of subtag (high bit set)
139                     sb.append((char) (b & 0x7f)).append('-');
140                 }
141             }
142             assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
143             sb.setLength(sb.length() - 1);
144             map.put(sb.toString(), fn.apply(entry.value));
145         }
146         return map.build();
147     }
148 
supplemental(String path, Object... args)149     private static CldrValue supplemental(String path, Object... args) {
150         return CldrValue.parseValue(String.format("//supplementalData/" + path, args), "");
151     }
152 
lower(Enum<?> value)153     private static String lower(Enum<?> value) {
154         return Ascii.toLowerCase(value.name());
155     }
156 
TestData()157     private TestData() {}
158 }
159