1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.HashSet;
8 import java.util.LinkedHashMap;
9 import java.util.LinkedHashSet;
10 import java.util.Map.Entry;
11 import java.util.Set;
12 import java.util.TreeMap;
13 import java.util.TreeSet;
14 
15 import org.unicode.cldr.draft.FileUtilities;
16 import org.unicode.cldr.tool.Option.Options;
17 import org.unicode.cldr.util.CLDRConfig;
18 import org.unicode.cldr.util.CLDRFile;
19 import org.unicode.cldr.util.CLDRFile.DraftStatus;
20 import org.unicode.cldr.util.CLDRLocale;
21 import org.unicode.cldr.util.CLDRPaths;
22 import org.unicode.cldr.util.ChainedMap;
23 import org.unicode.cldr.util.ChainedMap.M3;
24 import org.unicode.cldr.util.ChainedMap.M4;
25 import org.unicode.cldr.util.Counter;
26 import org.unicode.cldr.util.Factory;
27 import org.unicode.cldr.util.LanguageTagParser;
28 import org.unicode.cldr.util.PathHeader;
29 import org.unicode.cldr.util.PathHeader.SectionId;
30 import org.unicode.cldr.util.SupplementalDataInfo;
31 
32 import com.google.common.base.Objects;
33 import com.ibm.icu.impl.Relation;
34 import com.ibm.icu.text.UnicodeSet;
35 
36 public class ShowRegionalVariants {
37     private static String MY_DIR;
38 
39     private static final boolean SKIP_SUPPRESSED_PATHS = true;
40 
41     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
42     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = CONFIG.getSupplementalDataInfo();
43     private static final Factory FACTORY = CONFIG.getCldrFactory();
44     private static final CLDRFile ENGLISH = CONFIG.getEnglish();
45     private static final CLDRLocale ROOT = CLDRLocale.getInstance("root");
46     //private static final CLDRLocale en_US_POSIX = CLDRLocale.getInstance("en_US_POSIX");
47     private static final CLDRLocale SWISS_HIGH_GERMAN = CLDRLocale.getInstance("de_CH");
48 
49     final static Options myOptions = new Options();
50 
51     enum MyOptions {
52         targetDir(".*", CLDRPaths.GEN_DIRECTORY + "/regional/", "target output file."),;
53         // boilderplate
54         final Option option;
55 
MyOptions(String argumentPattern, String defaultArgument, String helpText)56         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
57             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
58         }
59     }
60 
main(String[] args)61     public static void main(String[] args) throws IOException {
62         myOptions.parse(MyOptions.targetDir, args, true);
63 
64         MY_DIR = MyOptions.targetDir.option.getValue();
65 
66         Set<String> coverageLocales = CONFIG.getStandardCodes().getLocaleCoverageLocales("cldr");
67         Set<String> dc = new HashSet<>(SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales());
68         Set<String> skipLocales = new HashSet<>(Arrays.asList("root", "en_US_POSIX", "sr_Latn"));
69 
70         Relation<CLDRLocale, CLDRLocale> parentToChildren = Relation.of(new TreeMap<CLDRLocale, Set<CLDRLocale>>(), TreeSet.class);
71         // first, collect all locales for lookup by parents.
72 
73         for (String locale : FACTORY.getAvailable()) {
74             if (skipLocales.contains(locale.toString())
75                 || dc.contains(locale.toString())) {
76                 continue;
77             }
78             CLDRLocale loc = CLDRLocale.getInstance(locale);
79 
80             if (!coverageLocales.contains(loc.getLanguage())) {
81                 continue;
82             }
83             CLDRLocale parent = null;
84             for (CLDRLocale current = loc;; current = parent) {
85                 parent = current.getParent();
86                 if (!dc.contains(parent.toString())) { // skip over default content
87                     break;
88                 }
89             }
90             if (ROOT.equals(parent)) {
91                 continue;
92             } else if ("root".equals(parent.toString())) {
93                 throw new IllegalArgumentException("CLDRLocale failure");
94             }
95             parentToChildren.put(parent, loc);
96         }
97 
98         // show inheritance
99         System.out.println("Locale Name\tCode\tRegion\tInherits from\tCode");
100         showInheritance(parentToChildren);
101 
102         // next find out the unique items in children
103         Relation<String, String> valueToAncestors = Relation.of(new LinkedHashMap<String, Set<String>>(), LinkedHashSet.class);
104 
105         int count = 0;
106 
107         try (
108             PrintWriter grandSummary = FileUtilities.openUTF8Writer(MY_DIR, "GrandSummary.txt");
109             PrintWriter summary = FileUtilities.openUTF8Writer(MY_DIR, "Summary.txt");
110             PrintWriter detailFile = FileUtilities.openUTF8Writer(MY_DIR, "details.txt");) {
111             grandSummary.println("Parent\tName\tTotal Diff Count\tChildren");
112             summary.println("Parent\tName\tDiff Count\tChild\tChild Name");
113             detailFile
114                 .println(
115                     "№\tBase\tParent Locales I\tParent Locales II\tChild Locales\tEnglish value\tParent value I\tParent value II\tChild value\tCorrected Child value\tComments\tFix Parent value?\tSection\tPage\tHeader\tCode");
116             PathHeader.Factory phf = PathHeader.getFactory(ENGLISH);
117             String lastBase = "";
118             for (Entry<CLDRLocale, Set<CLDRLocale>> item : parentToChildren.keyValuesSet()) {
119                 CLDRLocale parent = item.getKey();
120                 String base = parent.getLanguage();
121 
122                 CLDRFile parentFile = FACTORY.make(parent.toString(), true, DraftStatus.contributed);
123                 M4<PathHeader, String, CLDRLocale, Boolean> pathToValuesToLocales = ChainedMap.of(
124                     new TreeMap<PathHeader, Object>(),
125                     new TreeMap<String, Object>(),
126                     new TreeMap<CLDRLocale, Object>(),
127                     Boolean.class);
128 
129                 Counter<CLDRLocale> childDiffs = new Counter<>();
130 
131                 for (CLDRLocale child : item.getValue()) {
132                     //childDiffs.add(child, 0); // make sure it shows up
133                     String childString = child.toString();
134                     CLDRFile childFile = FACTORY.make(childString, false, DraftStatus.contributed);
135                     for (String path : childFile) {
136                         if (SKIP_SUPPRESSED_PATHS) {
137                             if (path.contains("/currency") && path.contains("/symbol")) {
138                                 continue;
139                             }
140                         }
141                         String childValue = childFile.getStringValue(path);
142                         if (childValue == null) {
143                             continue;
144                         }
145                         String parentValue = parentFile.getStringValue(path);
146                         if (parentValue == null) {
147                             parentValue = "∅∅∅";
148                         }
149                         if (!Objects.equal(childValue, parentValue)) {
150                             if (SKIP_SUPPRESSED_PATHS) {
151                                 if ("∅∅∅".equals(childValue) || "∅∅∅".equals(parentValue)) {
152                                     continue; // skip suppressed paths
153                                 }
154                             }
155                             if (parentValue != null) {
156                                 if (child.equals(SWISS_HIGH_GERMAN)) {
157                                     String norm = parentValue.replace("ß", "ss");
158                                     if (childValue.equals(norm)) {
159                                         continue;
160                                     }
161                                 } else if (base.equals("en")) {
162                                     if (sameExceptEnd(childValue, "re", parentValue, "er")
163                                         || sameExceptEnd(childValue, "res", parentValue, "ers")) {
164                                         continue;
165                                     }
166                                 }
167                             }
168                             PathHeader pheader = phf.fromPath(path);
169                             if (SectionId.Special == pheader.getSectionId()) {
170                                 continue;
171                             }
172                             pathToValuesToLocales.put(pheader, childValue, child, Boolean.TRUE);
173                             childDiffs.add(child, 1);
174                         }
175                     }
176                 }
177 
178                 long totalChildDiffs = childDiffs.getTotal();
179                 if (totalChildDiffs == 0) {
180                     continue;
181                 }
182 
183                 if (!base.equals(lastBase)) {
184                     detailFile.println();
185 //                    if (detailFile != null) {
186 //                        detailFile.close();
187 //                    }
188 //                    detailFile = FileUtilities.openUTF8Writer(MY_DIR, "detail-" + base + ".txt");
189 //                    detailFile.println("Section\tPage\tHeader\tCode\tLocales\tvalue\tParent Locales\tvalue\tParent Locales\tvalue");
190 //                    lastBase = base;
191                 }
192 
193                 grandSummary.println(parent + "\t" + ENGLISH.getName(parent.toString()) + "\t" + totalChildDiffs + "\t" + item.getValue());
194                 for (CLDRLocale s : childDiffs.getKeysetSortedByKey()) {
195                     long childDiffValue = childDiffs.get(s);
196                     if (childDiffValue == 0) {
197                         continue;
198                     }
199                     summary.println(parent + "\t" + ENGLISH.getName(parent.toString()) + "\t" + childDiffValue + "\t" + s + "\t"
200                         + ENGLISH.getName(s.toString()));
201                 }
202 
203                 ArrayList<CLDRFile> parentChain = new ArrayList<>();
204                 for (CLDRLocale current = parent;;) {
205                     parentChain.add(FACTORY.make(current.toString(), true));
206                     CLDRLocale grand = current.getParent();
207                     if (ROOT.equals(grand)) {
208                         break;
209                     }
210                     current = grand;
211                 }
212 
213                 for (PathHeader ph : pathToValuesToLocales.keySet()) {
214                     M3<String, CLDRLocale, Boolean> values = pathToValuesToLocales.get(ph);
215                     valueToAncestors.clear();
216                     for (String value : values.keySet()) {
217                         Set<CLDRLocale> childLocales = values.get(value).keySet();
218                         String englishValue = ENGLISH.getStringValue(ph.getOriginalPath());
219                         String originalPath = ph.getOriginalPath();
220                         for (CLDRFile grand : parentChain) {
221                             valueToAncestors.put(quote(grand.getStringValue(originalPath)), grand.getLocaleID());
222                         }
223                         Set<Entry<String, Set<String>>> keyValuesSet = valueToAncestors.keyValuesSet();
224                         final int countParents = keyValuesSet.size();
225                         if (countParents < 1 || countParents > 2) {
226                             throw new IllegalArgumentException("Too few/many parents");
227                         }
228 
229                         // // №  Base    Parent Locales I    Parent Locales II   Child Locales   English value   Parent value I  Parent value II Child value
230                         // Corrected Child value   Comments    Fix Parent value?   Section Page    Header  Code
231 
232                         detailFile.print(
233                             ++count
234                                 + "\t" + base);
235 
236                         for (Entry<String, Set<String>> entry : keyValuesSet) {
237                             detailFile.print("\t" + entry.getValue());
238                         }
239                         if (countParents == 1) {
240                             detailFile.print("\t");
241                         }
242                         detailFile.print(""
243                             + "\t" + childLocales
244                             + "\t" + quote(englishValue));
245                         for (Entry<String, Set<String>> entry : keyValuesSet) {
246                             detailFile.print("\t" + entry.getKey());
247                         }
248                         if (countParents == 1) {
249                             detailFile.print("\t");
250                         }
251                         detailFile.print(""
252                             + "\t" + quote(value)
253                             + "\t" + ""
254                             + "\t" + ""
255                             + "\t" + ""
256                             + "\t" + ph);
257                         detailFile.println();
258                     }
259                 }
260 
261             }
262         }
263         System.out.println("DONE");
264 //        if (detailFile != null) {
265 //            detailFile.close();
266 //        }
267     }
268 
showInheritance(Relation<CLDRLocale, CLDRLocale> parentToChildren)269     private static void showInheritance(Relation<CLDRLocale, CLDRLocale> parentToChildren) {
270         Set<CLDRLocale> values = parentToChildren.values();
271         Set<CLDRLocale> topParents = new TreeSet<>(parentToChildren.keySet());
272         topParents.removeAll(values);
273         showInheritance(topParents, "", parentToChildren);
274     }
275 
showInheritance(Set<CLDRLocale> topParents, String prefix, Relation<CLDRLocale, CLDRLocale> parentToChildren)276     private static void showInheritance(Set<CLDRLocale> topParents, String prefix, Relation<CLDRLocale, CLDRLocale> parentToChildren) {
277         for (CLDRLocale locale : topParents) {
278             String current = nameForLocale(locale) + "\t" + prefix;
279             System.out.println(current);
280             Set<CLDRLocale> newChildren = parentToChildren.get(locale);
281             if (newChildren == null) {
282                 continue;
283             }
284             showInheritance(newChildren, current, parentToChildren);
285         }
286     }
287 
288     static final LikelySubtags LS = new LikelySubtags();
289 
nameForLocale(CLDRLocale key)290     private static String nameForLocale(CLDRLocale key) {
291         String country = key.getCountry();
292         if (country.isEmpty()) {
293             String max = LS.maximize(key.toString());
294             LanguageTagParser ltp = new LanguageTagParser().set(max);
295             country = "(" + ltp.getRegion() + ")";
296         }
297         return ENGLISH.getName(key.toString(), false, CLDRFile.SHORT_ALTS) + "\t" + key + "\t" + country;
298     }
299 
sameExceptEnd(String childValue, String childEnding, String parentValue, String parentEnding)300     private static boolean sameExceptEnd(String childValue, String childEnding, String parentValue, String parentEnding) {
301         if (childValue.endsWith(childEnding)
302             && parentValue.endsWith(parentEnding)
303             && childValue.substring(0, childValue.length() - childEnding.length()).equals(
304                 parentValue.substring(0, parentValue.length() - parentEnding.length()))) {
305             return true;
306         }
307         return false;
308     }
309 
310     static final UnicodeSet SPREAD_SHEET_SENSITIVE = new UnicodeSet().add('=').add('+').add('0', '9');
311 
quote(String value)312     private static String quote(String value) {
313         if (value == null || value.isEmpty()) {
314             return "∅∅∅";
315         }
316         int first = value.codePointAt(0);
317         return SPREAD_SHEET_SENSITIVE.contains(first) ? "'" + value : value;
318     }
319 }
320