1 package org.unicode.cldr.tool;
2 
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.IOException;
6 import java.io.PrintWriter;
7 import java.util.Arrays;
8 import java.util.BitSet;
9 import java.util.Collection;
10 import java.util.Comparator;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.LinkedHashSet;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Map.Entry;
17 import java.util.Set;
18 import java.util.TreeMap;
19 import java.util.TreeSet;
20 
21 import org.unicode.cldr.draft.FileUtilities;
22 import org.unicode.cldr.draft.ScriptMetadata;
23 import org.unicode.cldr.draft.ScriptMetadata.Info;
24 import org.unicode.cldr.util.Builder;
25 import org.unicode.cldr.util.CLDRConfig;
26 import org.unicode.cldr.util.CLDRFile;
27 import org.unicode.cldr.util.CLDRLocale;
28 import org.unicode.cldr.util.CLDRPaths;
29 import org.unicode.cldr.util.CldrUtility;
30 import org.unicode.cldr.util.Containment;
31 import org.unicode.cldr.util.Counter;
32 import org.unicode.cldr.util.Factory;
33 import org.unicode.cldr.util.Iso639Data;
34 import org.unicode.cldr.util.Iso639Data.Scope;
35 import org.unicode.cldr.util.LanguageTagParser;
36 import org.unicode.cldr.util.LocaleIDParser;
37 import org.unicode.cldr.util.Log;
38 import org.unicode.cldr.util.Organization;
39 import org.unicode.cldr.util.PatternCache;
40 import org.unicode.cldr.util.SimpleFactory;
41 import org.unicode.cldr.util.StandardCodes;
42 import org.unicode.cldr.util.SupplementalDataInfo;
43 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
44 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
45 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
46 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
47 
48 import com.google.common.base.Joiner;
49 import com.google.common.collect.ImmutableMap;
50 import com.google.common.collect.ImmutableSet;
51 import com.ibm.icu.impl.Relation;
52 import com.ibm.icu.impl.Row;
53 import com.ibm.icu.impl.Row.R2;
54 import com.ibm.icu.impl.Row.R3;
55 import com.ibm.icu.impl.Row.R4;
56 import com.ibm.icu.lang.UScript;
57 import com.ibm.icu.text.Collator;
58 import com.ibm.icu.text.NumberFormat;
59 import com.ibm.icu.text.UTF16;
60 import com.ibm.icu.text.UnicodeSet;
61 import com.ibm.icu.text.UnicodeSetIterator;
62 import com.ibm.icu.util.ULocale;
63 
64 /**
65  * Problems:
66  * "und_Hani", "zh_Hani"
67  * "und_Sinh", "si_Sinh"
68  *
69  * @author markdavis
70  *
71  */
72 public class GenerateMaximalLocales {
73 
74     private static final String TEMP_UNKNOWN_REGION = "XZ";
75 
76     private static final String DEBUG_ADD_KEY = "und_Latn_ZA";
77 
78     private static final boolean SHOW_ADD = CldrUtility.getProperty("GenerateMaximalLocalesDebug", false);
79     private static final boolean SUPPRESS_CHANGES = CldrUtility.getProperty("GenerateMaximalLocalesSuppress", false);
80     private static final boolean SHOW_CONTAINERS = false;
81 
82     enum OutputStyle {
83         PLAINTEXT, C, C_ALT, XML
84     }
85 
86     private static OutputStyle OUTPUT_STYLE = OutputStyle.valueOf(CldrUtility.getProperty("OutputStyle", "XML", "XML")
87         .toUpperCase());
88 
89     // set based on above
90     private static final String SEPARATOR = OUTPUT_STYLE == OutputStyle.C || OUTPUT_STYLE == OutputStyle.C_ALT ? CldrUtility.LINE_SEPARATOR
91         : "\t";
92     private static final String TAG_SEPARATOR = OUTPUT_STYLE == OutputStyle.C_ALT ? "-" : "_";
93     // private static final boolean FAVOR_REGION = true; // OUTPUT_STYLE == OutputStyle.C_ALT;
94 
95     private static final boolean tryDifferent = true;
96 
97     private static final File list[] = {
98         new File(CLDRPaths.MAIN_DIRECTORY),
99         new File(CLDRPaths.SEED_DIRECTORY),
100         new File(CLDRPaths.EXEMPLARS_DIRECTORY) };
101 
102     private static Factory factory = SimpleFactory.make(list, ".*");
103     private static Factory mainFactory = CLDRConfig.getInstance().getCldrFactory();
104     private static SupplementalDataInfo supplementalData = SupplementalDataInfo
105         .getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
106     private static StandardCodes standardCodes = StandardCodes.make();
107     private static CLDRFile english = factory.make("en", false);
108     static Relation<String, String> cldrContainerToLanguages = Relation.of(new HashMap<String, Set<String>>(), HashSet.class);
109     static {
110         for (CLDRLocale locale : ToolConfig.getToolInstance().getCldrFactory().getAvailableCLDRLocales()) {
111             String region = locale.getCountry();
112             if (region == null || region.isEmpty() || Containment.isLeaf(region)) {
113                 continue;
114             }
cldrContainerToLanguages.put(region, locale.getLanguage())115             cldrContainerToLanguages.put(region, locale.getLanguage());
116         }
cldrContainerToLanguages.freeze()117         cldrContainerToLanguages.freeze();
118         System.out.println("Keep containers " + cldrContainerToLanguages);
119     }
120 
121     private static final List<String> KEEP_TARGETS = Arrays.asList("und_Arab_PK", "und_Latn_ET");
122     private static final ImmutableSet<String> deprecatedISONotInLST = ImmutableSet.of("scc", "scr");
123 
124     /**
125      * This is the simplest way to override, by supplying the max value.
126      * It gets a very low weight, so doesn't override any stronger value.
127      */
128     private static final String[] MAX_ADDITIONS = new String[] {
129         "bss_Latn_CM",
130         "gez_Ethi_ET",
131         "ken_Latn_CM",
132         "und_Arab_PK",
133         "wa_Latn_BE",
134 
135         "fub_Arab_CM",
136         "fuf_Latn_GN",
137         "kby_Arab_NE",
138         "kdh_Arab_TG",
139         "apd_Arab_TG",
140         "zlm_Latn_TG",
141 
142         "cr_Cans_CA",
143         "hif_Latn_FJ",
144         "gon_Telu_IN",
145         "lzz_Latn_TR",
146         "lif_Deva_NP",
147         "unx_Beng_IN",
148         "unr_Beng_IN",
149         "ttt_Latn_AZ",
150         "pnt_Grek_GR",
151         "tly_Latn_AZ",
152         "tkr_Latn_AZ",
153         "bsq_Bass_LR",
154         "ccp_Cakm_BD",
155         "blt_Tavt_VN",
156         "rhg_Arab_MM",
157         "rhg_Rohg_MM",
158     };
159 
160     /**
161      * The following overrides MASH the final values, so they may not result in consistent results. Safer is to add to MAX_ADDITIONS.
162      * However, if you add, add both the language and language+script mappings.
163      */
164     // Many of the overrides below can be removed once the language/pop/country data is updated.
165     private static final Map<String, String> LANGUAGE_OVERRIDES = CldrUtility.asMap(new String[][] {
166         { "cic", "cic_Latn_US" },
167         { "cic_Latn", "cic_Latn_US" },
168         { "eo", "eo_Latn_001" },
169         { "eo_Latn", "eo_Latn_001" },
170         { "es", "es_Latn_ES" },
171         { "es_Latn", "es_Latn_ES" },
172         { "ff_BF", "ff_Latn_BF" },
173         { "ff_GM", "ff_Latn_GM" },
174         { "ff_GH", "ff_Latn_GH" },
175         { "ff_GW", "ff_Latn_GW" },
176         { "ff_LR", "ff_Latn_LR" },
177         { "ff_NE", "ff_Latn_NE" },
178         { "ff_NG", "ff_Latn_NG" },
179         { "ff_SL", "ff_Latn_SL" },
180         { "ff_Adlm", "ff_Adlm_GN" },
181         { "ia", "ia_Latn_001" },
182         { "ia_Latn", "ia_Latn_001" },
183         { "io", "io_Latn_001" },
184         { "io_Latn", "io_Latn_001" },
185         { "jbo", "jbo_Latn_001" },
186         { "jbo_Latn", "jbo_Latn_001" },
187         { "ku_Arab", "ku_Arab_IQ" },
188         { "lrc", "lrc_Arab_IR" },
189         { "lrc_Arab", "lrc_Arab_IR" },
190         { "man", "man_Latn_GM" },
191         { "man_Latn", "man_Latn_GM" },
192         { "mas", "mas_Latn_KE" },
193         { "mas_Latn", "mas_Latn_KE" },
194         { "mn", "mn_Cyrl_MN" },
195         { "mn_Cyrl", "mn_Cyrl_MN" },
196         { "mro", "mro_Mroo_BD" },
197         { "mro_BD", "mro_Mroo_BD" },
198         { "ms_Arab", "ms_Arab_MY" },
199         { "pap", "pap_Latn_AW" },
200         { "pap_Latn", "pap_Latn_AW" },
201         { "prg", "prg_Latn_001" },
202         { "prg_Latn", "prg_Latn_001" },
203         { "rif", "rif_Tfng_MA" },
204         { "rif_Latn", "rif_Latn_MA" },
205         { "rif_Tfng", "rif_Tfng_MA" },
206         { "rif_MA", "rif_Tfng_MA" },
207         { "shi", "shi_Tfng_MA" },
208         { "shi_Tfng", "shi_Tfng_MA" },
209         { "shi_MA", "shi_Tfng_MA" },
210         { "sr_Latn", "sr_Latn_RS" },
211         { "ss", "ss_Latn_ZA" },
212         { "ss_Latn", "ss_Latn_ZA" },
213         { "swc", "swc_Latn_CD" },
214         { "ti", "ti_Ethi_ET" },
215         { "ti_Ethi", "ti_Ethi_ET" },
216         { "und", "en_Latn_US" },
217         { "und_Adlm", "ff_Adlm_GN" },
218         { "und_Adlm_GN", "ff_Adlm_GN" },
219         { "und_Arab", "ar_Arab_EG" },
220         { "und_Arab_PK", "ur_Arab_PK" },
221         { "und_Bopo", "zh_Bopo_TW" },
222         { "und_Deva_FJ", "hif_Deva_FJ" },
223         { "und_EZ", "de_Latn_EZ" },
224         { "und_Hani", "zh_Hani_CN" },
225         { "und_Hani_CN", "zh_Hani_CN" },
226         { "und_Kana", "ja_Kana_JP" },
227         { "und_Kana_JP", "ja_Kana_JP" },
228         { "und_Latn", "en_Latn_US" },
229         { "und_Latn_ET", "en_Latn_ET" },
230         { "und_Latn_NE", "ha_Latn_NE" },
231         { "und_Latn_PH", "fil_Latn_PH" },
232         { "und_ML", "bm_Latn_ML" },
233         { "und_Latn_ML", "bm_Latn_ML" },
234         { "und_MU", "mfe_Latn_MU" },
235         { "und_NE", "ha_Latn_NE" },
236         { "und_PH", "fil_Latn_PH" },
237         { "und_PK", "ur_Arab_PK" },
238         { "und_SO", "so_Latn_SO" },
239         { "und_SS", "en_Latn_SS" },
240         { "und_TK", "tkl_Latn_TK" },
241         { "und_UN", "en_Latn_UN" },
242         { "und_005", "pt_Latn_BR" },
243         { "vo", "vo_Latn_001" },
244         { "vo_Latn", "vo_Latn_001" },
245         { "yi", "yi_Hebr_001" },
246         { "yi_Hebr", "yi_Hebr_001" },
247         { "yue", "yue_Hant_HK" },
248         { "yue_Hant", "yue_Hant_HK" },
249         { "yue_Hans", "yue_Hans_CN" },
250         { "yue_CN", "yue_Hans_CN" },
251         { "zh_Hani", "zh_Hani_CN" },
252 
253         { "zh_Bopo", "zh_Bopo_TW" },
254         { "ccp", "ccp_Cakm_BD" },
255         { "ccp_Cakm", "ccp_Cakm_BD" },
256         { "und_Cakm", "ccp_Cakm_BD" },
257         { "cu_Glag", "cu_Glag_BG" },
258         { "sd_Khoj", "sd_Khoj_IN" },
259         { "lif_Limb", "lif_Limb_IN" },
260         { "grc_Linb", "grc_Linb_GR" },
261         { "arc_Nbat", "arc_Nbat_JO" },
262         { "arc_Palm", "arc_Palm_SY" },
263         { "pal_Phlp", "pal_Phlp_CN" },
264         { "en_Shaw", "en_Shaw_GB" },
265         { "sd_Sind", "sd_Sind_IN" },
266         { "und_Brai", "fr_Brai_FR" }, // hack
267         { "und_Hanb", "zh_Hanb_TW" }, // Special script code
268         { "zh_Hanb", "zh_Hanb_TW" }, // Special script code
269         { "und_Jamo", "ko_Jamo_KR" }, // Special script code
270 
271         //{"und_Cyrl_PL", "be_Cyrl_PL"},
272 
273 //        {"cr", "cr_Cans_CA"},
274 //        {"hif", "hif_Latn_FJ"},
275 //        {"gon", "gon_Telu_IN"},
276 //        {"lzz", "lzz_Latn_TR"},
277 //        {"lif", "lif_Deva_NP"},
278 //        {"unx", "unx_Beng_IN"},
279 //        {"unr", "unr_Beng_IN"},
280 //        {"ttt", "ttt_Latn_AZ"},
281 //        {"pnt", "pnt_Grek_GR"},
282 //        {"tly", "tly_Latn_AZ"},
283 //        {"tkr", "tkr_Latn_AZ"},
284 //        {"bsq", "bsq_Bass_LR"},
285 //        {"ccp", "ccp_Cakm_BD"},
286 //        {"blt", "blt_Tavt_VN"},
287         { "mis_Medf", "mis_Medf_NG" },
288 
289         { "ku_Yezi", "ku_Yezi_GE" },
290         { "und_EU", "en_Latn_IE" },
291     });
292 
293     /**
294      * The following supplements the suppress-script. It overrides info from exemplars and the locale info.
295      */
296     private static String[][] SpecialScripts = {
297         { "zh", "Hans" }, // Hans (not Hani)
298         { "yue", "Hant" }, // Hans (not Hani)
299         { "chk", "Latn" }, // Chuukese (Micronesia)
300         { "fil", "Latn" }, // Filipino (Philippines)"
301         { "ko", "Kore" }, // Korean (North Korea)
302         { "ko_KR", "Kore" }, // Korean (North Korea)
303         { "pap", "Latn" }, // Papiamento (Netherlands Antilles)
304         { "pau", "Latn" }, // Palauan (Palau)
305         { "su", "Latn" }, // Sundanese (Indonesia)
306         { "tet", "Latn" }, // Tetum (East Timor)
307         { "tk", "Latn" }, // Turkmen (Turkmenistan)
308         { "ty", "Latn" }, // Tahitian (French Polynesia)
309         { "ja", "Jpan" }, // Special script for japan
310         { "und", "Latn" }, // Ultimate fallback
311     };
312 
313     private static Map<String, String> localeToScriptCache = new TreeMap<>();
314     static {
315         for (String language : standardCodes.getAvailableCodes("language")) {
316             Map<String, String> info = standardCodes.getLangData("language", language);
317             String script = info.get("Suppress-Script");
318             if (script != null) {
localeToScriptCache.put(language, script)319                 localeToScriptCache.put(language, script);
320             }
321         }
322         for (String[] pair : SpecialScripts) {
localeToScriptCache.put(pair[0], pair[1])323             localeToScriptCache.put(pair[0], pair[1]);
324         }
325     }
326 
327     private static Map<String, String> FALLBACK_SCRIPTS;
328     static {
329         LanguageTagParser additionLtp = new LanguageTagParser();
330         Map<String, String> _FALLBACK_SCRIPTS = new TreeMap<>();
331         for (String addition : MAX_ADDITIONS) {
332             additionLtp.set(addition);
333             String lan = additionLtp.getLanguage();
_FALLBACK_SCRIPTS.put(lan, additionLtp.getScript())334             _FALLBACK_SCRIPTS.put(lan, additionLtp.getScript());
335         }
336         FALLBACK_SCRIPTS = ImmutableMap.copyOf(_FALLBACK_SCRIPTS);
337     }
338 
339     private static int errorCount;
340 
main(String[] args)341     public static void main(String[] args) throws IOException {
342 
343         printDefaultLanguagesAndScripts();
344 
345         Map<String, String> toMaximized = new TreeMap<>();
346 
347         tryDifferentAlgorithm(toMaximized);
348 
349         minimize(toMaximized);
350 
351         // HACK TEMP_UNKNOWN_REGION
352         // this is to get around the removal of items with ZZ in minimize.
353         // probably cleaner way to do it, but this provides control over just those we want to retain.
354         Set<String> toRemove = new TreeSet<>();
355         Map<String, String> toFix = new TreeMap<>();
356         for (Entry<String, String> entry : toMaximized.entrySet()) {
357             String key = entry.getKey();
358             String value = entry.getValue();
359             if (key.contains(TEMP_UNKNOWN_REGION)) {
360                 toRemove.add(key);
361             } else if (value.contains(TEMP_UNKNOWN_REGION)) {
362                 toFix.put(key, value.replace(TEMP_UNKNOWN_REGION, UNKNOWN_REGION));
363             }
364         }
365         for (String key : toRemove) {
366             toMaximized.remove(key);
367         }
368         toMaximized.putAll(toFix);
369 
370         Map<String, String> oldLikely = SupplementalDataInfo.getInstance().getLikelySubtags();
371         Set<String> changes = compareMapsAndFixNew("*WARNING* Likely Subtags: ", oldLikely, toMaximized, "ms_Arab",
372             "ms_Arab_ID");
373         System.out.println(Joiner.on("\n").join(changes));
374 
375         if (OUTPUT_STYLE == OutputStyle.C_ALT) {
376             doAlt(toMaximized);
377         }
378 
379         if (SHOW_ADD)
380             System.out
381                 .println("/*"
382                     + CldrUtility.LINE_SEPARATOR
383                     + " To Maximize:"
384                     +
385                     CldrUtility.LINE_SEPARATOR
386                     + " If using raw strings, make sure the input language/locale uses the right separator, and has the right casing."
387                     +
388                     CldrUtility.LINE_SEPARATOR
389                     + " Remove the script Zzzz and the region ZZ if they occur; change an empty language subtag to 'und'."
390                     +
391                     CldrUtility.LINE_SEPARATOR
392                     + " Get the language, region, and script from the cleaned-up tag, plus any variants/extensions"
393                     +
394                     CldrUtility.LINE_SEPARATOR
395                     + " Try each of the following in order (where the field exists)"
396                     +
397                     CldrUtility.LINE_SEPARATOR
398                     + "   Lookup language-script-region. If in the table, return the result + variants"
399                     +
400                     CldrUtility.LINE_SEPARATOR
401                     + "   Lookup language-script. If in the table, return the result (substituting the original region if it exists) + variants"
402                     +
403                     CldrUtility.LINE_SEPARATOR
404                     + "   Lookup language-region. If in the table, return the result (substituting the original script if it exists) + variants"
405                     +
406                     CldrUtility.LINE_SEPARATOR
407                     + "   Lookup language. If in the table, return the result (substituting the original region and script if either or both exist) + variants"
408                     +
409                     CldrUtility.LINE_SEPARATOR
410                     +
411                     CldrUtility.LINE_SEPARATOR
412                     + " Example: Input is zh-ZZZZ-SG."
413                     +
414                     CldrUtility.LINE_SEPARATOR
415                     + " Normalize to zh-SG. Lookup in table. No match."
416                     +
417                     CldrUtility.LINE_SEPARATOR
418                     + " Remove SG, but remember it. Lookup zh, and get the match (zh-Hans-CN). Substitute SG, and return zh-Hans-SG."
419                     +
420                     CldrUtility.LINE_SEPARATOR
421                     +
422                     CldrUtility.LINE_SEPARATOR
423                     + " To Minimize:"
424                     +
425                     CldrUtility.LINE_SEPARATOR
426                     + " First get max = maximize(input)."
427                     +
428                     CldrUtility.LINE_SEPARATOR
429                     + " Then for trial in {language, language-region, language-script}"
430                     +
431                     CldrUtility.LINE_SEPARATOR
432                     + "     If maximize(trial) == max, then return trial."
433                     +
434                     CldrUtility.LINE_SEPARATOR
435                     + " If you don't get a match, return max."
436                     +
437                     CldrUtility.LINE_SEPARATOR
438                     +
439                     CldrUtility.LINE_SEPARATOR
440                     + " Example: Input is zh-Hant. Maximize to get zh-Hant-TW."
441                     +
442                     CldrUtility.LINE_SEPARATOR
443                     + " zh => zh-Hans-CN. No match, so continue."
444                     +
445                     CldrUtility.LINE_SEPARATOR
446                     + " zh-TW => zh-Hans-TW. Match, so return zh-TW."
447                     +
448                     CldrUtility.LINE_SEPARATOR
449                     +
450                     CldrUtility.LINE_SEPARATOR
451                     + " (A variant of this uses {language, language-script, language-region}): that is, tries script before language."
452                     +
453                     CldrUtility.LINE_SEPARATOR + " toMaximal size:\t" + toMaximized.size() +
454                     CldrUtility.LINE_SEPARATOR + "*/");
455 
456         printLikelySubtags(toMaximized);
457 
458         // if (OUTPUT_STYLE != OutputStyle.XML) {
459         // printMap("const MapToMinimalSubtags default_subtags[]", toMinimized, null);
460         // }
461 
462         printDefaultContent(toMaximized);
463 
464         System.out.println(CldrUtility.LINE_SEPARATOR + "ERRORS:\t" + errorCount + CldrUtility.LINE_SEPARATOR);
465 
466     }
467 
468     static class RowData implements Comparable<RowData> {
469         OfficialStatus os;
470         String name;
471         Long pop;
472 
RowData(OfficialStatus os, String name, Long pop)473         public RowData(OfficialStatus os, String name, Long pop) {
474             this.os = os;
475             this.name = name;
476             this.pop = pop;
477         }
478 
getStatus()479         public OfficialStatus getStatus() {
480             // TODO Auto-generated method stub
481             return os;
482         }
483 
getName()484         public CharSequence getName() {
485             // TODO Auto-generated method stub
486             return name;
487         }
488 
getLiteratePopulation()489         public Long getLiteratePopulation() {
490             // TODO Auto-generated method stub
491             return pop;
492         }
493 
494         @Override
compareTo(RowData o)495         public int compareTo(RowData o) {
496             // TODO Auto-generated method stub
497             int result = os.compareTo(o.os);
498             if (result != 0) return -result;
499             long result2 = pop - o.pop;
500             if (result2 != 0) return result2 < 0 ? 1 : -1;
501             return name.compareTo(o.name);
502         }
503 
504         @Override
equals(Object o)505         public boolean equals(Object o) {
506             return 0 == compareTo((RowData) o);
507         }
508 
509         @Override
hashCode()510         public int hashCode() {
511             throw new UnsupportedOperationException();
512         }
513     }
514 
printDefaultLanguagesAndScripts()515     private static void printDefaultLanguagesAndScripts() {
516 
517         final int minTotalPopulation = 10000000;
518         final int minTerritoryPopulation = 1000000;
519         final double minTerritoryPercent = 1.0 / 3;
520         Map<String, Set<RowData>> languageToReason = new TreeMap<>();
521         Counter<String> languageToLiteratePopulation = new Counter<>();
522         NumberFormat nf = NumberFormat.getIntegerInstance(ULocale.ENGLISH);
523         nf.setGroupingUsed(true);
524         LanguageTagParser ltp = new LanguageTagParser();
525         LikelySubtags likelySubtags = new LikelySubtags();
526         /*
527          * A. X is a qualified language**, and at least one of the following is true:
528          *
529          * 1. X is has official status* in any country
530          * 2. X exceeds a threshold population† of literate users worldwide: 1M
531          * 3. X exceeds a threshold population† in some country Z: 100K and 20% of Z's population†.
532          *
533          * B. X is an exception explicitly approved by the committee or X has minimal
534          * language coverage‡ in CLDR itself.
535          * C. The language is in the CLDR-target locales
536          */
537         OfficialStatus minimalStatus = OfficialStatus.official_regional; // OfficialStatus.de_facto_official;
538         Map<String, String> languages = new TreeMap<>();
539         for (String language : standardCodes.getAvailableCodes("language")) {
540             String path = CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, language);
541             String result = english.getStringValue(path);
542             if (result != null) {
543                 languages.put(language, result);
544             }
545         }
546         for (String language : languages.keySet()) {
547             System.out.println(language + "\t" + languages.get(language));
548         }
549 
550         // also CLDR-target locales
551         final Set<String> CLDRMainLanguages = new TreeSet<>(StandardCodes.make().getLocaleCoverageLocales(Organization.cldr));
552 
553         for (String territory : supplementalData.getTerritoriesWithPopulationData()) {
554             PopulationData territoryPop = supplementalData.getPopulationDataForTerritory(territory);
555             double territoryPopulation = territoryPop.getLiteratePopulation();
556             for (String languageScript : supplementalData.getLanguagesForTerritoryWithPopulationData(territory)) {
557                 PopulationData popData = supplementalData.getLanguageAndTerritoryPopulationData(languageScript,
558                     territory);
559                 ltp.set(languageScript);
560                 String language = ltp.getLanguage();
561 //                if (ltp.getScript().isEmpty()) {
562 //                    String max = likelySubtags.maximize(languageScript);
563 //                    if (max != null) {
564 //                        ltp.set(max).setRegion("");
565 //                        languageScript = ltp.toString();
566 //                    }
567 //                }
568                 boolean add = false;
569                 // #1
570                 OfficialStatus status = popData.getOfficialStatus();
571                 if (status.compareTo(minimalStatus) >= 0) {
572                     add = true;
573                 }
574                 long literatePopulation = getWritingPopulation(popData);
575                 // #2
576                 languageToLiteratePopulation.add(language, literatePopulation);
577                 // #3
578                 if (literatePopulation > minTerritoryPopulation
579                     && literatePopulation > minTerritoryPercent * territoryPopulation) {
580                     add = true;
581                 }
582                 if (add == false && CLDRMainLanguages.contains(language)) {
583                     add = true;
584                 }
585                 if (add) {
586                     add(languageToReason, language, territory, status, literatePopulation);
587                     // Add the containing regions
588                     for (String container : Containment.leafToContainer(territory)) {
589                         add(languageToReason, language, container, OfficialStatus.unknown, literatePopulation);
590                     }
591                 }
592             }
593         }
594         // #2, now that we have the data
595         for (String language : languageToLiteratePopulation.keySet()) {
596             long totalPop = languageToLiteratePopulation.getCount(language);
597             if (totalPop > minTotalPopulation) {
598                 add(languageToReason, language, "001", OfficialStatus.unknown, totalPop);
599             }
600         }
601 
602         // Specials
603         add(languageToReason, "und", "001", OfficialStatus.unknown, 0);
604 
605         // for (String language : Iso639Data.getAvailable()) {
606         // Scope scope = Iso639Data.getScope(language);
607         // Type type = Iso639Data.getType(language);
608         // if (scope == Scope.Special) {
609         // add(languageToReason, language, "001", OfficialStatus.unknown, -1);
610         // }
611         // }
612         // print them
613 
614         System.out.println("Detailed - Including:\t" + languageToReason.size());
615 
616         for (String language : languageToReason.keySet()) {
617             Set<RowData> reasons = languageToReason.get(language);
618 
619             RowData lastReason = reasons.iterator().next();
620 
621             System.out.append(language)
622                 .append("\t")
623                 .append(english.getName(language))
624                 .append("\t")
625                 .append(lastReason.getStatus().toShortString())
626                 .append("\t")
627                 .append(nf.format(languageToLiteratePopulation.getCount(language)));
628             for (RowData reason : reasons) {
629                 String status = reason.getStatus().toShortString();
630                 System.out.append("\t")
631                     .append(status)
632                     .append("-")
633                     .append(reason.getName())
634                     .append("-")
635                     .append(nf.format(reason.getLiteratePopulation()));
636             }
637             System.out.append("\n");
638         }
639 
640         // now list them
641 
642         Set<String> others = new TreeSet<>();
643         others.addAll(standardCodes.getGoodAvailableCodes("language"));
644         others.removeAll(languageToReason.keySet());
645         System.out.println("\nIncluded Languages:\t" + languageToReason.keySet().size());
646         showLanguages(languageToReason.keySet(), languageToReason);
647         System.out.println("\nExcluded Languages:\t" + others.size());
648         showLanguages(others, languageToReason);
649     }
650 
getWritingPopulation(PopulationData popData)651     private static long getWritingPopulation(PopulationData popData) {
652         final double writingPopulation = popData.getWritingPopulation();
653         if (!Double.isNaN(writingPopulation)) {
654             return (long) writingPopulation;
655         }
656         return (long) popData.getLiteratePopulation();
657     }
658 
showLanguages(Set<String> others, Map<String, Set<RowData>> languageToReason)659     private static void showLanguages(Set<String> others, Map<String, Set<RowData>> languageToReason) {
660         Set<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ENGLISH));
661         for (String language : others) {
662             sorted.add(getLanguageName(language, languageToReason));
663         }
664         char last = 0;
665         for (String language : sorted) {
666             final char curr = language.charAt(0);
667             if (last != curr) {
668                 System.out.println();
669             } else if (last != '\u0000') {
670                 System.out.print(", ");
671             }
672             System.out.print(language);
673             last = curr;
674         }
675         System.out.println();
676     }
677 
getLanguageName(String language, Map<String, Set<RowData>> languageToReason)678     private static String getLanguageName(String language,
679         Map<String, Set<RowData>> languageToReason) {
680         OfficialStatus best = OfficialStatus.unknown;
681         Set<RowData> reasons = languageToReason.get(language);
682         if (reasons != null) {
683             for (RowData reason : reasons) {
684                 final OfficialStatus currentStatus = reason.getStatus();
685                 if (best.compareTo(currentStatus) < 0) {
686                     best = currentStatus;
687                 }
688             }
689         }
690         String status = best.toShortString();
691         Scope scope = Iso639Data.getScope(language);
692         if (scope == Scope.Special) {
693             status = "S";
694         }
695         String languageFormatted = english.getName(language) + " [" + language + "]-" + status;
696         return languageFormatted;
697     }
698 
add(Map<String, Set<RowData>> languageToReason, String language, String territoryRaw, OfficialStatus status, long population)699     private static void add(Map<String, Set<RowData>> languageToReason, String language,
700         String territoryRaw, OfficialStatus status, long population) {
701         String territory = english.getName("territory", territoryRaw) + " [" + territoryRaw + "]";
702         Set<RowData> set = languageToReason.get(language);
703         if (set == null) {
704             languageToReason.put(language, set = new TreeSet<>());
705         }
706         set.add(new RowData(status, territory, population));
707     }
708 
printDefaultContent(Map<String, String> toMaximized)709     private static void printDefaultContent(Map<String, String> toMaximized) throws IOException {
710 
711         Set<String> defaultLocaleContent = new TreeSet<>();
712 
713         // go through all the cldr locales, and add default contents
714         // now computed from toMaximized
715         Set<String> available = factory.getAvailable();
716         Relation<String, String> toChildren = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
717         LanguageTagParser ltp = new LanguageTagParser();
718 
719         // System.out.println(maximize("az_Latn_AZ", toMaximized));
720         Set<String> hasScript = new TreeSet<>();
721 
722         // first get a mapping to children
723         for (String locale : available) {
724             if (locale.equals("root")) {
725                 continue;
726             }
727             if (ltp.set(locale).getVariants().size() != 0) {
728                 continue;
729             }
730             String parent = LocaleIDParser.getSimpleParent(locale);
731             if (ltp.getScript().length() != 0) {
732                 hasScript.add(parent);
733             }
734             if (parent.equals("root")) {
735                 continue;
736             }
737             toChildren.put(parent, locale);
738         }
739 
740         // Suppress script for locales for which we only have one locale in common/main. See ticket #7834.
741         Set<String> suppressScriptLocales = new HashSet<>(Arrays.asList(
742             "bm_ML", "en_US", "ha_NG", "iu_CA", "ms_MY", "mn_MN",
743             "byn_ER", "ff_SN", "dyo_SN", "kk_KZ", "ku_TR", "ky_KG", "ml_IN", "so_SO", "sw_TZ", "wo_SN", "yo_NG", "dje_NE",
744             "blt_VN",
745             "hi_IN",
746             "nv_US",
747             "doi_IN"
748             ));
749 
750         // if any have a script, then throw out any that don't have a script (unless they're specifically included.)
751         Set<String> toRemove = new TreeSet<>();
752         for (String locale : hasScript) {
753             toRemove.clear();
754             Set<String> children = toChildren.getAll(locale);
755             for (String child : children) {
756                 if (ltp.set(child).getScript().length() == 0 && !suppressScriptLocales.contains(child)) {
757                     toRemove.add(child);
758                 }
759             }
760             if (toRemove.size() != 0) {
761                 System.out.println("\tRemoving:\t" + locale + "\t" + toRemove + "\tfrom\t" + children);
762                 toChildren.removeAll(locale, toRemove);
763             }
764         }
765 
766         // we add a child as a default locale if it has the same maximization
767         main: for (String locale : toChildren.keySet()) {
768             String maximized = maximize(locale, toMaximized);
769             if (maximized == null) {
770                 if (SHOW_ADD) System.out.println("Missing maximized:\t" + locale);
771                 continue;
772             }
773             Set<String> children = toChildren.getAll(locale);
774             Map<String, String> debugStuff = new TreeMap<>();
775             for (String child : children) {
776                 String maximizedChild = maximize(child, toMaximized);
777                 if (maximized.equals(maximizedChild)) {
778                     defaultLocaleContent.add(child);
779                     continue main;
780                 }
781                 debugStuff.put(child, maximizedChild);
782             }
783             if (SHOW_ADD) System.out.println("Can't find maximized: " + locale + "=" + maximized
784                 + "\tin\t" + debugStuff);
785         }
786 
787         defaultLocaleContent.remove("und_ZZ"); // und_ZZ isn't ever a real locale.
788 
789         showDefaultContentDifferencesAndFix(defaultLocaleContent);
790 
791         Log.setLogNoBOM(CLDRPaths.GEN_DIRECTORY + "/supplemental", "supplementalMetadata.xml");
792         BufferedReader oldFile = FileUtilities.openUTF8Reader(CLDRPaths.SUPPLEMENTAL_DIRECTORY, "supplementalMetadata.xml");
793         CldrUtility.copyUpTo(oldFile, PatternCache.get("\\s*<defaultContent locales=\"\\s*"), Log.getLog(), false);
794 
795         String sep = CldrUtility.LINE_SEPARATOR + "\t\t\t";
796         String broken = CldrUtility.breakLines(CldrUtility.join(defaultLocaleContent, " "), sep,
797             PatternCache.get("(\\S)\\S*").matcher(""), 80);
798 
799         Log.println("\t\t<defaultContent locales=\"" + broken + "\"");
800         Log.println("\t\t/>");
801 
802         // Log.println("</supplementalData>");
803         CldrUtility.copyUpTo(oldFile, PatternCache.get("\\s*/>\\s*(<!--.*)?"), null, true); // skip to matching >
804         CldrUtility.copyUpTo(oldFile, null, Log.getLog(), true); // copy the rest
805 
806         Log.close();
807         oldFile.close();
808     }
809 
810     // private static void oldAlgorithm(Map<String,String> toMaximized) {
811     // Set<String> defaultContentLocales = supplementalData.getDefaultContentLocales();
812     // LanguageTagParser parser = new LanguageTagParser();
813     // for (String locale : defaultContentLocales) {
814     // String parent = parser.getParent(locale);
815     // toMaximized.put(parent, locale);
816     // if (SHOW_ADD) System.out.println("Adding:\t" + parent + "\t=>\t" + locale + "\t\tDefaultContent");
817     // }
818     //
819     // for (String[] specialCase : SpecialCases) {
820     // toMaximized.put(specialCase[0], specialCase[1]);
821     // if (SHOW_ADD) System.out.println("Adding:\t" + specialCase[0] + "\t=>\t" + specialCase[1] + "\t\tSpecial");
822     // }
823     //
824     // // recurse and close
825     // closeMapping(toMaximized);
826     //
827     // addScript(toMaximized, parser);
828     //
829     // closeMapping(toMaximized);
830     //
831     // addLanguageScript(toMaximized, parser);
832     //
833     // closeMapping(toMaximized);
834     //
835     // addLanguageCountry(toMaximized, parser);
836     //
837     // closeMapping(toMaximized);
838     //
839     // addCountries(toMaximized);
840     // addScript(toMaximized, parser);
841     // closeMapping(toMaximized);
842     // closeUnd(toMaximized);
843     //
844     // addDeprecated(toMaximized);
845     //
846     // closeMapping(toMaximized);
847     //
848     // checkConsistency(toMaximized);
849     // }
850 
851     private static class MaxData {
852         Relation<String, Row.R3<Double, String, String>> languages = Relation.of(new TreeMap<String, Set<Row.R3<Double, String, String>>>(), TreeSet.class);
853         Map<String, Counter<String>> languagesToScripts = new TreeMap<>();
854         Map<String, Counter<String>> languagesToRegions = new TreeMap<>();
855 
856         Relation<String, Row.R3<Double, String, String>> scripts = Relation.of(new TreeMap<String, Set<Row.R3<Double, String, String>>>(), TreeSet.class);
857         Map<String, Counter<String>> scriptsToLanguages = new TreeMap<>();
858         Map<String, Counter<String>> scriptsToRegions = new TreeMap<>();
859 
860         Relation<String, Row.R3<Double, String, String>> regions = Relation.of(new TreeMap<String, Set<Row.R3<Double, String, String>>>(), TreeSet.class);
861         Map<String, Counter<String>> regionsToLanguages = new TreeMap<>();
862         Map<String, Counter<String>> regionsToScripts = new TreeMap<>();
863 
864         Map<String, Counter<Row.R2<String, String>>> containersToLanguage = new TreeMap<>();
865         Relation<String, Row.R4<Double, String, String, String>> containersToLangRegion = Relation.of(
866             new TreeMap<String, Set<Row.R4<Double, String, String, String>>>(), TreeSet.class);
867 
868         Relation<Row.R2<String, String>, Row.R2<Double, String>> languageScripts = Relation.of(
869             new TreeMap<Row.R2<String, String>, Set<Row.R2<Double, String>>>(),
870             TreeSet.class);
871         Relation<Row.R2<String, String>, Row.R2<Double, String>> scriptRegions = Relation.of(
872             new TreeMap<Row.R2<String, String>, Set<Row.R2<Double, String>>>(),
873             TreeSet.class);
874         Relation<Row.R2<String, String>, Row.R2<Double, String>> languageRegions = Relation.of(
875             new TreeMap<Row.R2<String, String>, Set<Row.R2<Double, String>>>(),
876             TreeSet.class);
877 
878         /**
879          * Add population information. "order" is the negative of the population (makes the first be the highest).
880          * @param language
881          * @param script
882          * @param region
883          * @param order
884          */
add(String language, String script, String region, Double order)885         void add(String language, String script, String region, Double order) {
886             if (language.equals("cpp")) {
887                 System.out.println(language + "\t" + script + "\t" + region + "\t" + -order);
888             }
889             languages.put(language, Row.of(order, script, region));
890             // addCounter(languagesToScripts, language, script, order);
891             // addCounter(languagesToRegions, language, region, order);
892 
893             scripts.put(script, Row.of(order, language, region));
894             // addCounter(scriptsToLanguages, script, language, order);
895             // addCounter(scriptsToRegions, script, region, order);
896 
897             regions.put(region, Row.of(order, language, script));
898             // addCounter(regionsToLanguages, region, language, order);
899             // addCounter(regionsToScripts, region, script, order);
900 
901             languageScripts.put(Row.of(language, script), Row.of(order, region));
902             scriptRegions.put(Row.of(script, region), Row.of(order, language));
903             languageRegions.put(Row.of(language, region), Row.of(order, script));
904 
905             Set<String> containerSet = Containment.leafToContainer(region);
906             if (containerSet != null) {
907                 for (String container : containerSet) {
908 
909                     containersToLangRegion.put(container, Row.of(order, language, script, region));
910                     Counter<R2<String, String>> data = containersToLanguage.get(container);
911                     if (data == null) {
912                         containersToLanguage.put(container, data = new Counter<>());
913                     }
914                     data.add(Row.of(language, script), (long) (double) order);
915 
916                 }
917             }
918 
919             if (SHOW_ADD) System.out.println("Data:\t" + language + "\t" + script + "\t" + region + "\t" + order);
920         }
921         // private void addCounter(Map<String, Counter<String>> map, String key, String key2, Double count) {
922         // Counter<String> counter = map.get(key);
923         // if (counter == null) {
924         // map.put(key, counter = new Counter<String>());
925         // }
926         // counter.add(key2, count.longValue());
927         // }
928     }
929 
930     private static final double MIN_UNOFFICIAL_LANGUAGE_SIZE = 10000000;
931     private static final double MIN_UNOFFICIAL_LANGUAGE_PROPORTION = 0.20;
932     private static final double MIN_UNOFFICIAL_CLDR_LANGUAGE_SIZE = 100000;
933     private static final double UNOFFICIAL_SCALE_DOWN = 0.2;
934 
935     private static NumberFormat percent = NumberFormat.getPercentInstance();
936     private static NumberFormat number = NumberFormat.getIntegerInstance();
937 
tryDifferentAlgorithm(Map<String, String> toMaximized)938     private static void tryDifferentAlgorithm(Map<String, String> toMaximized) {
939         // we are going to try a different approach.
940         // first gather counts for maximized values
941         // Set<Row.R3<String,String,String>,Double> rowsToCounts = new TreeMap();
942         MaxData maxData = new MaxData();
943         Set<String> cldrLocales = factory.getAvailable();
944         Set<String> otherTerritories = new TreeSet<>(standardCodes.getGoodAvailableCodes("territory"));
945 
946         // process all the information to get the top values for each triple.
947         // each of the combinations of 1 or 2 components gets to be a key.
948         for (String region : supplementalData.getTerritoriesWithPopulationData()) {
949             otherTerritories.remove(region);
950             PopulationData regionData = supplementalData.getPopulationDataForTerritory(region);
951             final double literateTerritoryPopulation = regionData.getLiteratePopulation();
952             // we need any unofficial language to meet a certain absolute size requirement and proportion size
953             // requirement.
954             // so the bar is x percent of the population, reset up to y absolute size.
955             double minimalLiteratePopulation = literateTerritoryPopulation * MIN_UNOFFICIAL_LANGUAGE_PROPORTION;
956             if (minimalLiteratePopulation < MIN_UNOFFICIAL_LANGUAGE_SIZE) {
957                 minimalLiteratePopulation = MIN_UNOFFICIAL_LANGUAGE_SIZE;
958             }
959 
960             for (String writtenLanguage : supplementalData.getLanguagesForTerritoryWithPopulationData(region)) {
961                 PopulationData data = supplementalData.getLanguageAndTerritoryPopulationData(writtenLanguage, region);
962                 final double literatePopulation = getWritingPopulation(data); //data.getLiteratePopulation();
963                 double order = -literatePopulation; // negative so we get the inverse order
964 
965                 if (data.getOfficialStatus() == OfficialStatus.unknown) {
966                     final String locale = writtenLanguage + "_" + region;
967                     if (literatePopulation >= minimalLiteratePopulation) {
968                         // ok, skip
969                     } else if (literatePopulation >= MIN_UNOFFICIAL_CLDR_LANGUAGE_SIZE && cldrLocales.contains(locale)) {
970                         // ok, skip
971                     } else {
972                         // if (SHOW_ADD)
973                         // System.out.println("Skipping:\t" + writtenLanguage + "\t" + region + "\t"
974                         // + english.getName(locale)
975                         // + "\t-- too small:\t" + number.format(literatePopulation));
976                         // continue;
977                     }
978                     order *= UNOFFICIAL_SCALE_DOWN;
979                     if (SHOW_ADD)
980                         System.out.println("Retaining\t" + writtenLanguage + "\t" + region + "\t"
981                             + english.getName(locale)
982                             + "\t" + number.format(literatePopulation)
983                             + "\t" + percent.format(literatePopulation / literateTerritoryPopulation)
984                             + (cldrLocales.contains(locale) ? "\tin-CLDR" : ""));
985                 }
986                 String script;
987                 String language = writtenLanguage;
988                 final int pos = writtenLanguage.indexOf('_');
989                 if (pos > 0) {
990                     language = writtenLanguage.substring(0, pos);
991                     script = writtenLanguage.substring(pos + 1);
992                 } else {
993                     script = getScriptForLocale2(language);
994                 }
995                 maxData.add(language, script, region, order);
996             }
997         }
998 
999         LanguageTagParser additionLtp = new LanguageTagParser();
1000 
1001         for (String addition : MAX_ADDITIONS) {
1002             additionLtp.set(addition);
1003             String lan = additionLtp.getLanguage();
1004             Set<R3<Double, String, String>> key = maxData.languages.get(lan);
1005             if (key == null) {
1006                 maxData.add(lan, additionLtp.getScript(), additionLtp.getRegion(), 1.0);
1007             } else {
1008                 int debug = 0;
1009             }
1010         }
1011 
1012         for (Entry<String, Collection<String>> entry : DeriveScripts.getLanguageToScript().asMap().entrySet()) {
1013             String language = entry.getKey();
1014             final Collection<String> values = entry.getValue();
1015             if (values.size() != 1) {
1016                 continue; // skip, no either way
1017             }
1018             Set<R3<Double, String, String>> old = maxData.languages.get(language);
1019             if (!maxData.languages.containsKey(language)) {
1020                 maxData.add(language, values.iterator().next(), TEMP_UNKNOWN_REGION, 1.0);
1021             }
1022         }
1023 
1024         // add others, with English default
1025         for (String region : otherTerritories) {
1026             if (region.length() == 3) continue; // FIX ONCE WE ADD REGIONS
1027             maxData.add("en", "Latn", region, 1.0);
1028         }
1029 
1030         // get a reverse mapping, so that we can add the aliases
1031 
1032         Map<String, R2<List<String>, String>> languageAliases = SupplementalDataInfo.getInstance().getLocaleAliasInfo()
1033             .get("language");
1034         for (Entry<String, R2<List<String>, String>> str : languageAliases.entrySet()) {
1035             String reason = str.getValue().get1();
1036             if ("overlong".equals(reason) || "bibliographic".equals(reason) || "macrolanguage".equals(reason)) {
1037                 continue;
1038             }
1039             List<String> replacements = str.getValue().get0();
1040             if (replacements == null) {
1041                 continue;
1042             }
1043             String goodLanguage = replacements.get(0);
1044 
1045             String badLanguage = str.getKey();
1046             if (badLanguage.contains("_")) {
1047                 continue;
1048             }
1049             if (deprecatedISONotInLST.contains(badLanguage)) {
1050                 continue;
1051             }
1052             Set<R3<Double, String, String>> goodLanguageData = maxData.languages.getAll(goodLanguage);
1053             if (goodLanguageData == null) {
1054                 continue;
1055             }
1056             R3<Double, String, String> value = goodLanguageData.iterator().next();
1057             final String script = value.get1();
1058             final String region = value.get2();
1059             maxData.add(badLanguage, script, region, 1.0);
1060             System.out.println("Adding aliases: " + badLanguage + ", " + script + ", " + region + ", " + reason);
1061         }
1062 
1063         // now, get the best for each one
1064         for (String language : maxData.languages.keySet()) {
1065             R3<Double, String, String> value = maxData.languages.getAll(language).iterator().next();
1066             final Comparable<String> script = value.get1();
1067             final Comparable<String> region = value.get2();
1068             add(language, language + "_" + script + "_" + region, toMaximized, "L->SR", LocaleOverride.REPLACE_EXISTING,
1069                 SHOW_ADD);
1070         }
1071         for (String language : maxData.languagesToScripts.keySet()) {
1072             String script = maxData.languagesToScripts.get(language).getKeysetSortedByCount(true).iterator().next();
1073             add(language, language + "_" + script, toMaximized, "L->S", LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1074         }
1075         for (String language : maxData.languagesToRegions.keySet()) {
1076             String region = maxData.languagesToRegions.get(language).getKeysetSortedByCount(true).iterator().next();
1077             add(language, language + "_" + region, toMaximized, "L->R", LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1078         }
1079 
1080         for (String script : maxData.scripts.keySet()) {
1081             R3<Double, String, String> value = maxData.scripts.getAll(script).iterator().next();
1082             final Comparable<String> language = value.get1();
1083             final Comparable<String> region = value.get2();
1084             add("und_" + script, language + "_" + script + "_" + region, toMaximized, "S->LR",
1085                 LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1086         }
1087         for (String script : maxData.scriptsToLanguages.keySet()) {
1088             String language = maxData.scriptsToLanguages.get(script).getKeysetSortedByCount(true).iterator().next();
1089             add("und_" + script, language + "_" + script, toMaximized, "S->L", LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1090         }
1091         for (String script : maxData.scriptsToRegions.keySet()) {
1092             String region = maxData.scriptsToRegions.get(script).getKeysetSortedByCount(true).iterator().next();
1093             add("und_" + script, "und_" + script + "_" + region, toMaximized, "S->R", LocaleOverride.REPLACE_EXISTING,
1094                 SHOW_ADD);
1095         }
1096 
1097         for (String region : maxData.regions.keySet()) {
1098             R3<Double, String, String> value = maxData.regions.getAll(region).iterator().next();
1099             final Comparable<String> language = value.get1();
1100             final Comparable<String> script = value.get2();
1101             add("und_" + region, language + "_" + script + "_" + region, toMaximized, "R->LS",
1102                 LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1103         }
1104         for (String region : maxData.regionsToLanguages.keySet()) {
1105             String language = maxData.regionsToLanguages.get(region).getKeysetSortedByCount(true).iterator().next();
1106             add("und_" + region, language + "_" + region, toMaximized, "R->L", LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1107         }
1108         for (String region : maxData.regionsToScripts.keySet()) {
1109             String script = maxData.regionsToScripts.get(region).getKeysetSortedByCount(true).iterator().next();
1110             add("und_" + region, "und_" + script + "_" + region, toMaximized, "R->S", LocaleOverride.REPLACE_EXISTING,
1111                 SHOW_ADD);
1112         }
1113 
1114         for (Entry<String, Counter<R2<String, String>>> containerAndInfo : maxData.containersToLanguage.entrySet()) {
1115             String region = containerAndInfo.getKey();
1116             if (region.equals("001")) {
1117                 continue;
1118             }
1119             Counter<R2<String, String>> data = containerAndInfo.getValue();
1120             Set<R2<String, String>> keysetSortedByCount = data.getKeysetSortedByCount(true);
1121             if (SHOW_CONTAINERS) { // debug
1122                 System.out.println("Container2L:\t" + region + "\t" + shorten(data.getEntrySetSortedByCount(true, null)));
1123                 System.out.println("Container2LR:\t" + region + "\t" + maxData.containersToLangRegion.get(region));
1124             }
1125             R2<String, String> value = keysetSortedByCount.iterator().next(); // will get most negative
1126             final Comparable<String> language = value.get0();
1127             final Comparable<String> script = value.get1();
1128 
1129             // fix special cases like es-419, where a locale exists.
1130             // for those cases, what we add as output is the container. Otherwise the region.
1131             Set<String> skipLanguages = cldrContainerToLanguages.get(region);
1132             if (skipLanguages != null
1133                 && skipLanguages.contains(language)) {
1134                 add("und_" + region, language + "_" + script + "_" + region, toMaximized, "R*->LS",
1135                     LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1136                 continue;
1137             }
1138 
1139             // we now have the best language and script. Find the best region for that
1140             for (R4<Double, String, String, String> e : maxData.containersToLangRegion.get(region)) {
1141                 final Comparable<String> language2 = e.get1();
1142                 final Comparable<String> script2 = e.get2();
1143                 if (language2.equals(language) && script2.equals(script)) {
1144                     add("und_" + region, language + "_" + script + "_" + e.get3(), toMaximized, "R*->LS",
1145                         LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1146                     break;
1147                 }
1148             }
1149         }
1150 
1151         for (R2<String, String> languageScript : maxData.languageScripts.keySet()) {
1152             R2<Double, String> value = maxData.languageScripts.getAll(languageScript).iterator().next();
1153             final Comparable<String> language = languageScript.get0();
1154             final Comparable<String> script = languageScript.get1();
1155             final Comparable<String> region = value.get1();
1156             add(language + "_" + script, language + "_" + script + "_" + region, toMaximized, "LS->R",
1157                 LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1158         }
1159 
1160         for (R2<String, String> scriptRegion : maxData.scriptRegions.keySet()) {
1161             R2<Double, String> value = maxData.scriptRegions.getAll(scriptRegion).iterator().next();
1162             final Comparable<String> script = scriptRegion.get0();
1163             final Comparable<String> region = scriptRegion.get1();
1164             final Comparable<String> language = value.get1();
1165             add("und_" + script + "_" + region, language + "_" + script + "_" + region, toMaximized, "SR->L",
1166                 LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1167         }
1168 
1169         for (R2<String, String> languageRegion : maxData.languageRegions.keySet()) {
1170             R2<Double, String> value = maxData.languageRegions.getAll(languageRegion).iterator().next();
1171             final Comparable<String> language = languageRegion.get0();
1172             final Comparable<String> region = languageRegion.get1();
1173             final Comparable<String> script = value.get1();
1174             add(language + "_" + region, language + "_" + script + "_" + region, toMaximized, "LR->S",
1175                 LocaleOverride.REPLACE_EXISTING, SHOW_ADD);
1176         }
1177 
1178         // get the script info from metadata as fallback
1179 
1180         TreeSet<String> sorted = new TreeSet<>(ScriptMetadata.getScripts());
1181         for (String script : sorted) {
1182             Info i = ScriptMetadata.getInfo(script);
1183             String likelyLanguage = i.likelyLanguage;
1184             String originCountry = i.originCountry;
1185             final String result = likelyLanguage + "_" + script + "_" + originCountry;
1186             add("und_" + script, result, toMaximized, "S->LR•",
1187                 LocaleOverride.KEEP_EXISTING, SHOW_ADD);
1188             add(likelyLanguage, result, toMaximized, "L->SR•",
1189                 LocaleOverride.KEEP_EXISTING, SHOW_ADD);
1190         }
1191 
1192         // add overrides
1193         for (String key : LANGUAGE_OVERRIDES.keySet()) {
1194             add(key, LANGUAGE_OVERRIDES.get(key), toMaximized, "OVERRIDE", LocaleOverride.REPLACE_EXISTING, true);
1195         }
1196     }
1197 
shorten(Object data)1198     public static String shorten(Object data) {
1199         String info = data.toString();
1200         if (info.length() > 255) {
1201             info = info.substring(0, 127) + "…";
1202         }
1203         return info;
1204     }
1205 
doAlt(Map<String, String> toMaximized)1206     private static void doAlt(Map<String, String> toMaximized) {
1207         // TODO Auto-generated method stub
1208         Map<String, String> temp = new TreeMap<>();
1209         for (String locale : toMaximized.keySet()) {
1210             String target = toMaximized.get(locale);
1211             temp.put(toAlt(locale, true), toAlt(target, true));
1212         }
1213         toMaximized.clear();
1214         toMaximized.putAll(temp);
1215     }
1216 
maximize(String languageTag, Map<String, String> toMaximized)1217     public static String maximize(String languageTag, Map<String, String> toMaximized) {
1218         LanguageTagParser ltp = new LanguageTagParser();
1219 
1220         // clean up the input by removing Zzzz, ZZ, and changing "" into und.
1221         ltp.set(languageTag);
1222         String language = ltp.getLanguage();
1223         String region = ltp.getRegion();
1224         String script = ltp.getScript();
1225         boolean changed = false;
1226         if (language.equals("")) {
1227             ltp.setLanguage(language = "und");
1228             changed = true;
1229         }
1230         if (region.equals(UNKNOWN_SCRIPT)) {
1231             ltp.setScript(script = "");
1232             changed = true;
1233         }
1234         if (ltp.getRegion().equals(UNKNOWN_REGION)) {
1235             ltp.setRegion(region = "");
1236             changed = true;
1237         }
1238         if (changed) {
1239             languageTag = ltp.toString();
1240         }
1241         // check whole
1242         String result = toMaximized.get(languageTag);
1243         if (result != null) {
1244             return result;
1245         }
1246         // try empty region
1247         if (region.length() != 0) {
1248             result = toMaximized.get(ltp.setRegion("").toString());
1249             if (result != null) {
1250                 return ltp.set(result).setRegion(region).toString();
1251             }
1252             ltp.setRegion(region); // restore
1253         }
1254         // try empty script
1255         if (script.length() != 0) {
1256             result = toMaximized.get(ltp.setScript("").toString());
1257             if (result != null) {
1258                 return ltp.set(result).setScript(script).toString();
1259             }
1260             // try empty script and region
1261             if (region.length() != 0) {
1262                 result = toMaximized.get(ltp.setRegion("").toString());
1263                 if (result != null) {
1264                     return ltp.set(result).setScript(script).setRegion(region).toString();
1265                 }
1266             }
1267         }
1268         if (!language.equals("und") && script.length() != 0 && region.length() != 0) {
1269             return languageTag; // it was ok, and we couldn't do anything with it
1270         }
1271         return null; // couldn't maximize
1272     }
1273 
minimize(String input, Map<String, String> toMaximized, boolean favorRegion)1274     public static String minimize(String input, Map<String, String> toMaximized, boolean favorRegion) {
1275         if (input.equals("nb_Latn_SJ")) {
1276             System.out.print(""); // debug
1277         }
1278         String maximized = maximize(input, toMaximized);
1279         if (maximized == null) {
1280             return null; // failed
1281         }
1282         LanguageTagParser ltp = new LanguageTagParser().set(maximized);
1283         String language = ltp.getLanguage();
1284         String region = ltp.getRegion();
1285         String script = ltp.getScript();
1286         // try building up from shorter to longer, and find the first that matches
1287         // could be more optimized, but for this code we want simplest
1288         String[] trials = { language,
1289             language + TAG_SEPARATOR + (favorRegion ? region : script),
1290             language + TAG_SEPARATOR + (!favorRegion ? region : script) };
1291         for (String trial : trials) {
1292             String newMaximized = maximize(trial, toMaximized);
1293             if (maximized.equals(newMaximized)) {
1294                 return trial;
1295             }
1296         }
1297         return maximized;
1298     }
1299 
1300     // /**
1301     // * Verify that we can map from each language, script, and country to something.
1302     // * @param toMaximized
1303     // */
1304     // private static void checkConsistency(Map<String, String> toMaximized) {
1305     // Map<String,String> needMappings = new TreeMap();
1306     // LanguageTagParser parser = new LanguageTagParser();
1307     // for (String maximized : new TreeSet<String>(toMaximized.values())) {
1308     // parser.set(maximized);
1309     // final String language = parser.getLanguage();
1310     // final String script = parser.getScript();
1311     // final String region = parser.getRegion();
1312     // if (language.length() == 0 || script.length() == 0 || region.length() == 0) {
1313     // failure("   { \"" + maximized + "\", \"" + maximized + "\" },   //     " + english.getName(maximized) +
1314     // "\t\tFailed-Consistency");
1315     // continue;
1316     // }
1317     // addIfNotIn(language, maximized, needMappings, toMaximized, "Consistency");
1318     // addIfNotIn(language + "_" + script, maximized, needMappings, toMaximized, "Consistency");
1319     // addIfNotIn(language + "_" + region, maximized, needMappings, toMaximized, "Consistency");
1320     // addIfNotIn("und_" + script, maximized, needMappings, toMaximized, "Consistency");
1321     // addIfNotIn("und_" + script + "_" + region, maximized, needMappings, toMaximized, "Consistency");
1322     // addIfNotIn("und_" + region, maximized, needMappings, toMaximized, "Consistency");
1323     // }
1324     // toMaximized.putAll(needMappings);
1325     // }
1326 
1327     // private static void failure(String string) {
1328     // System.out.println(string);
1329     // errorCount++;
1330     // }
1331 
1332     // private static void addIfNotIn(String key, String value, Map<String, String> toAdd, Map<String, String>
1333     // otherToCheck, String kind) {
1334     // addIfNotIn(key, value, toAdd, otherToCheck == null ? null : otherToCheck.keySet(), null, kind);
1335     // }
1336 
1337     // private static void addIfNotIn(String key, String value, Map<String, String> toAdd, Set<String> skipKey,
1338     // Set<String> skipValue, String kind) {
1339     // if (!key.equals(value)
1340     // && !toAdd.containsKey(key)
1341     // && (skipKey == null || !skipKey.contains(key))
1342     // && (skipValue == null || !skipValue.contains(value))) {
1343     // add(key, value, toAdd, kind);
1344     // }
1345     // }
1346 
1347     enum LocaleOverride {
1348         KEEP_EXISTING, REPLACE_EXISTING
1349     }
1350 
add(String key, String value, Map<String, String> toAdd, String kind, LocaleOverride override, boolean showAction)1351     private static void add(String key, String value, Map<String, String> toAdd, String kind, LocaleOverride override,
1352         boolean showAction) {
1353         if (key.equals(DEBUG_ADD_KEY)) {
1354             System.out.println("*debug*");
1355         }
1356         String oldValue = toAdd.get(key);
1357         if (oldValue == null) {
1358             if (showAction) {
1359                 System.out.println("\tAdding:\t\t" + getName(key) + "\t=>\t" + getName(value) + "\t\t\t\t" + kind);
1360             }
1361         } else if (override == LocaleOverride.KEEP_EXISTING || value.equals(oldValue)) {
1362             // if (showAction) {
1363             // System.out.println("Skipping:\t" + key + "\t=>\t" + value + "\t\t\t\t" + kind);
1364             // }
1365             return;
1366         } else {
1367             if (showAction) {
1368                 System.out.println("\tReplacing:\t" + getName(key) + "\t=>\t" + getName(value) + "\t, was\t" + getName(oldValue) + "\t\t" + kind);
1369             }
1370         }
1371         toAdd.put(key, value);
1372     }
1373 
getName(String value)1374     private static String getName(String value) {
1375         return ConvertLanguageData.getLanguageCodeAndName(value);
1376     }
1377 
1378     // private static void addCountries(Map<String, String> toMaximized) {
1379     // Map <String, Map<String, Double>> scriptToLanguageToSize = new TreeMap();
1380     //
1381     // for (String territory : supplementalData.getTerritoriesWithPopulationData()) {
1382     // Set<String> languages = supplementalData.getLanguagesForTerritoryWithPopulationData(territory);
1383     // String biggestOfficial = null;
1384     // double biggest = -1;
1385     // for (String language : languages) {
1386     // PopulationData info = supplementalData.getLanguageAndTerritoryPopulationData(language, territory);
1387     // // add to info about script
1388     //
1389     // String script = getScriptForLocale(language);
1390     // if (script != null) {
1391     // Map<String, Double> languageInfo = scriptToLanguageToSize.get(script);
1392     // if (languageInfo == null) scriptToLanguageToSize.put(script, languageInfo = new TreeMap());
1393     // String baseLanguage = language;
1394     // int pos = baseLanguage.indexOf('_');
1395     // if (pos >= 0) {
1396     // baseLanguage = baseLanguage.substring(0,pos);
1397     // }
1398     // Double size = languageInfo.get(baseLanguage);
1399     // languageInfo.put(baseLanguage, (size == null ? 0 : size) + info.getLiteratePopulation());
1400     // }
1401     //
1402     //
1403     // final OfficialStatus officialStatus = info.getOfficialStatus();
1404     // if (officialStatus == OfficialStatus.de_facto_official || officialStatus == OfficialStatus.official) {
1405     // double size2 = info.getLiteratePopulation();
1406     // if (biggest < size2) {
1407     // biggest = size2;
1408     // biggestOfficial = language;
1409     // }
1410     // }
1411     // }
1412     // if (biggestOfficial != null) {
1413     // final String replacementTag = "und_" + territory;
1414     // String maximized = biggestOfficial + "_" + territory;
1415     // toMaximized.put(replacementTag, maximized);
1416     // if (SHOW_ADD) System.out.println("Adding:\t" + replacementTag + "\t=>\t" + maximized + "\t\tLanguage-Territory");
1417     // }
1418     // }
1419     //
1420     // for (String script : scriptToLanguageToSize.keySet()) {
1421     // String biggestOfficial = null;
1422     // double biggest = -1;
1423     //
1424     // final Map<String, Double> languageToSize = scriptToLanguageToSize.get(script);
1425     // for (String language : languageToSize.keySet()) {
1426     // double size = languageToSize.get(language);
1427     // if (biggest < size) {
1428     // biggest = size;
1429     // biggestOfficial = language;
1430     // }
1431     // }
1432     // if (biggestOfficial != null) {
1433     // final String replacementTag = "und_" + script;
1434     // String maximized = biggestOfficial + "_" + script;
1435     // toMaximized.put(replacementTag, maximized);
1436     // if (SHOW_ADD) System.out.println("Adding:\t" + replacementTag + "\t=>\t" + maximized + "\t\tUnd-Script");
1437     // }
1438     // }
1439     // }
1440 
1441     // private static void closeUnd(Map<String, String> toMaximized) {
1442     // Map<String,String> toAdd = new TreeMap<String,String>();
1443     // for (String oldSource : toMaximized.keySet()) {
1444     // String maximized = toMaximized.get(oldSource);
1445     // if (!maximized.startsWith("und")) {
1446     // int pos = maximized.indexOf("_");
1447     // if (pos >= 0) {
1448     // addIfNotIn( "und" + maximized.substring(pos), maximized, toAdd, toMaximized, "CloseUnd");
1449     // }
1450     // }
1451     // }
1452     // toMaximized.putAll(toAdd);
1453     // }
1454 
1455     /**
1456      * Generate tags where the deprecated values map to the expanded values
1457      *
1458      * @param toMaximized
1459      */
1460     // private static void addDeprecated(Map<String, String> toMaximized) {
1461     // Map<String, Map<String, List<String>>> typeToTagToReplacement = supplementalData.getLocaleAliasInfo();
1462     // LanguageTagParser temp = new LanguageTagParser();
1463     // LanguageTagParser tagParsed = new LanguageTagParser();
1464     // LanguageTagParser replacementParsed = new LanguageTagParser();
1465     // Map<String,String> toAdd = new TreeMap<String,String>();
1466     // while (true) {
1467     // toAdd.clear();
1468     // for (String type : typeToTagToReplacement.keySet()) {
1469     // if (type.equals("variant") || type.equals("zone")) continue;
1470     // boolean addUnd = !type.equals("language");
1471     //
1472     // Map<String, List<String>> tagToReplacement = typeToTagToReplacement.get(type);
1473     // System.out.println("*" + type + " = " + tagToReplacement);
1474     //
1475     // for (String tag: tagToReplacement.keySet()) {
1476     //
1477     // final List<String> list = tagToReplacement.get(tag);
1478     // if (list == null) continue; // we don't have any information
1479     // String replacement = list.get(0);
1480     //
1481     // // only do multiples
1482     // if (tag.contains("_") || !replacement.contains("_")) {
1483     // continue;
1484     // }
1485     //
1486     // // we now have a tag and a replacement value
1487     // // make parsers that we can use
1488     // try {
1489     // tagParsed.set(addUnd ? "und-" + tag : tag);
1490     // replacementParsed.set(addUnd ? "und-" + replacement : replacement);
1491     // } catch (RuntimeException e) {
1492     // continue;
1493     // }
1494     // addIfNotIn(tag, replacement, toAdd, toMaximized,"Deprecated");
1495     //
1496     // for (String locale : toMaximized.keySet()) {
1497     // String maximized = toMaximized.get(locale);
1498     // addIfMatches(temp.set(locale), maximized, replacementParsed, tagParsed, toAdd, toMaximized);
1499     // addIfMatches(temp.set(maximized), maximized, replacementParsed, tagParsed, toAdd, toMaximized);
1500     // }
1501     // }
1502     // }
1503     // if (toAdd.size() == 0) {
1504     // break;
1505     // }
1506     // toMaximized.putAll(toAdd);
1507     // }
1508     // }
1509 
1510     // private static void addIfMatches(LanguageTagParser locale, String maximized, LanguageTagParser tagParsed,
1511     // LanguageTagParser replacementParsed, Map<String, String> toAdd, Map<String, String> toMaximized) {
1512     // if (!tagParsed.getLanguage().equals(locale.getLanguage()) && !tagParsed.getLanguage().equals("und")) {
1513     // return;
1514     // }
1515     // if (!tagParsed.getScript().equals(locale.getScript()) && !tagParsed.getScript().equals("")) {
1516     // return;
1517     // }
1518     // if (!tagParsed.getRegion().equals(locale.getRegion()) && !tagParsed.getRegion().equals("")) {
1519     // return;
1520     // }
1521     // if (!replacementParsed.getLanguage().equals("und")) {
1522     // locale.setLanguage(replacementParsed.getLanguage());
1523     // }
1524     // if (!replacementParsed.getScript().equals("")) {
1525     // locale.setScript(replacementParsed.getScript());
1526     // }
1527     // if (!replacementParsed.getRegion().equals("")) {
1528     // locale.setRegion(replacementParsed.getRegion());
1529     // }
1530     // addIfNotIn(locale.toString(), maximized, toAdd, toMaximized,"Deprecated");
1531     // }
1532 
1533     // private static int getSubtagPosition(String locale, String subtags) {
1534     // int pos = -1;
1535     // while (true) {
1536     // pos = locale.indexOf(subtags, pos + 1);
1537     // if (pos < 0) return -1;
1538     // // make sure boundaries are ok
1539     // if (pos != 0) {
1540     // char charBefore = locale.charAt(pos-1);
1541     // if (charBefore != '_' && charBefore != '_') return -1;
1542     // }
1543     // int limit = pos + subtags.length();
1544     // if (limit != locale.length()) {
1545     // char charAfter = locale.charAt(limit);
1546     // if (charAfter != '_' && charAfter != '_') return -1;
1547     // }
1548     // return pos;
1549     // }
1550     // }
1551 
1552     /*
1553      * Format
1554      * const DefaultSubtags default_subtags[] = {
1555      * {
1556      * // Afar => Afar (Latin, Ethiopia)
1557      * "aa",
1558      * "aa_Latn_ET"
1559      * },{
1560      * // Afrikaans => Afrikaans (Latin, South Africa)
1561      * "af",
1562      * "af_Latn_ZA"
1563      * },{
1564      */
1565 
printLikelySubtags(Map<String, String> fluffup)1566     private static void printLikelySubtags(Map<String, String> fluffup) throws IOException {
1567 
1568         PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY,
1569             "/supplemental/likelySubtags" + (OUTPUT_STYLE == OutputStyle.XML ? ".xml" : ".txt"));
1570         String spacing = OUTPUT_STYLE == OutputStyle.PLAINTEXT ? "\t" : " ";
1571         String header = OUTPUT_STYLE != OutputStyle.XML ? "const MapToMaximalSubtags default_subtags[] = {"
1572             : "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" + CldrUtility.LINE_SEPARATOR
1573                 + "<!DOCTYPE supplementalData SYSTEM \"../../common/dtd/ldmlSupplemental.dtd\">"
1574                 + CldrUtility.LINE_SEPARATOR
1575                 + "<!--"
1576                 + CldrUtility.LINE_SEPARATOR
1577                 + CldrUtility.getCopyrightString()
1578                 + CldrUtility.LINE_SEPARATOR
1579                 + "-->"
1580                 + CldrUtility.LINE_SEPARATOR
1581                 + "<!--"
1582                 + CldrUtility.LINE_SEPARATOR
1583                 + "Likely subtags data is generated programatically from CLDR's language/territory/population" + CldrUtility.LINE_SEPARATOR
1584                 + "data using the GenerateMaximalLocales tool. Under normal circumstances, this file should" + CldrUtility.LINE_SEPARATOR
1585                 + "not be patched by hand, as any changes made in that fashion may be lost."
1586                 + CldrUtility.LINE_SEPARATOR
1587                 + "-->"
1588                 + CldrUtility.LINE_SEPARATOR
1589                 + "<supplementalData>" + CldrUtility.LINE_SEPARATOR
1590                 + "    <version number=\"$" +
1591                 "Revision$\"/>" + CldrUtility.LINE_SEPARATOR
1592                 + "    <likelySubtags>";
1593         String footer = OUTPUT_STYLE != OutputStyle.XML ? SEPARATOR + "};"
1594             : "    </likelySubtags>" + CldrUtility.LINE_SEPARATOR
1595                 + "</supplementalData>";
1596         out.println(header);
1597         boolean first = true;
1598         Set<String> keys = new TreeSet<>(new LocaleStringComparator());
1599         keys.addAll(fluffup.keySet());
1600         for (String printingLocale : keys) {
1601             String printingTarget = fluffup.get(printingLocale);
1602             String comment = printingName(printingLocale, spacing) + spacing + "=>" + spacing
1603                 + printingName(printingTarget, spacing);
1604 
1605             if (OUTPUT_STYLE == OutputStyle.XML) {
1606                 out.println("\t\t<likelySubtag from=\"" + printingLocale +
1607                     "\" to=\"" + printingTarget + "\"" +
1608                     "/>" + CldrUtility.LINE_SEPARATOR + "\t\t" + "<!--" + comment + "-->");
1609             } else {
1610                 if (first) {
1611                     first = false;
1612                 } else {
1613                     out.print(",");
1614                 }
1615                 if (comment.length() > 70 && SEPARATOR.equals(CldrUtility.LINE_SEPARATOR)) {
1616                     comment = printingName(printingLocale, spacing) + SEPARATOR + "    // " + spacing + "=>" + spacing
1617                         + printingName(printingTarget, spacing);
1618                 }
1619                 out.print(
1620                     "  {"
1621                         + SEPARATOR + "    // " + comment
1622                         + SEPARATOR + "    \"" + printingLocale + "\","
1623                         + SEPARATOR + "    \"" + printingTarget + "\""
1624                         + CldrUtility.LINE_SEPARATOR + "  }");
1625             }
1626         }
1627         out.println(footer);
1628         out.close();
1629     }
1630 
printingName(String locale, String spacing)1631     public static String printingName(String locale, String spacing) {
1632         if (locale == null) {
1633             return null;
1634         }
1635         LanguageTagParser parser = new LanguageTagParser().set(locale);
1636         String lang = parser.getLanguage();
1637         String script = parser.getScript();
1638         String region = parser.getRegion();
1639         return "{" + spacing +
1640             (lang.equals("und") ? "?" : english.getName(CLDRFile.LANGUAGE_NAME, lang)) + ";" + spacing +
1641             (script == null || script.equals("") ? "?" : english.getName(CLDRFile.SCRIPT_NAME, script)) + ";" + spacing
1642             +
1643             (region == null || region.equals("") ? "?" : english.getName(CLDRFile.TERRITORY_NAME, region)) + spacing
1644             + "}";
1645     }
1646 
1647     private static final String[][] ALT_REVERSAL = {
1648         { "nb", "no" },
1649         { "no", "nb" },
1650         { "he", "iw" },
1651         { "iw", "he" },
1652     };
1653 
toAlt(String locale, boolean change)1654     public static String toAlt(String locale, boolean change) {
1655         if (!change || locale == null) {
1656             return locale;
1657         }
1658         String firstTag = getFirstTag(locale);
1659         for (String[] pair : ALT_REVERSAL) {
1660             if (firstTag.equals(pair[0])) {
1661                 locale = pair[1] + locale.substring(pair[1].length());
1662                 break;
1663             }
1664         }
1665         locale = locale.replace("_", "-");
1666         return locale;
1667     }
1668 
getFirstTag(String locale)1669     private static String getFirstTag(String locale) {
1670         int pos = locale.indexOf('_');
1671         return pos < 0 ? locale : locale.substring(0, pos);
1672     }
1673 
1674     // private static Map<String, String> getBackMapping(Map<String, String> fluffup) {
1675     // Relation<String,String> backMap = new Relation(new TreeMap(), TreeSet.class, BEST_LANGUAGE_COMPARATOR);
1676     // for (String source : fluffup.keySet()) {
1677     // if (source.startsWith("und")) {
1678     // continue;
1679     // }
1680     // String maximized = fluffup.get(source);
1681     // backMap.put(maximized, source); // put in right order
1682     // }
1683     // Map<String,String> returnBackMap = new TreeMap();
1684     // for (String maximized : backMap.keySet()) {
1685     // final Set<String> all = backMap.getAll(maximized);
1686     // final String minimized = all.iterator().next();
1687     // returnBackMap.put(maximized, minimized);
1688     // }
1689     // return returnBackMap;
1690     // }
1691 
1692     /**
1693      * Language tags are presumed to share the first language, except possibly "und". Best is least
1694      */
1695     // private static Comparator BEST_LANGUAGE_COMPARATOR = new Comparator<String>() {
1696     // LanguageTagParser p1 = new LanguageTagParser();
1697     // LanguageTagParser p2 = new LanguageTagParser();
1698     // public int compare(String o1, String o2) {
1699     // if (o1.equals(o2)) return 0;
1700     // p1.set(o1);
1701     // p2.set(o2);
1702     // String lang1 = p1.getLanguage();
1703     // String lang2 = p2.getLanguage();
1704     //
1705     // // compare languages first
1706     // // put und at the end
1707     // int result = lang1.compareTo(lang2);
1708     // if (result != 0) {
1709     // if (lang1.equals("und")) return 1;
1710     // if (lang2.equals("und")) return -1;
1711     // return result;
1712     // }
1713     //
1714     // // now scripts and regions.
1715     // // if they have different numbers of fields, the shorter wins.
1716     // // If there are two fields, region is lowest.
1717     // // The simplest way is to just compare scripts first
1718     // // so zh-TW < zh-Hant, because we first compare "" to Hant
1719     // String script1 = p1.getScript();
1720     // String script2 = p2.getScript();
1721     // int scriptOrder = script1.compareTo(script2);
1722     // if (scriptOrder != 0) return scriptOrder;
1723     //
1724     // String region1 = p1.getRegion();
1725     // String region2 = p2.getRegion();
1726     // int regionOrder = region1.compareTo(region2);
1727     // if (regionOrder != 0) return regionOrder;
1728     //
1729     // return o1.compareTo(o2);
1730     // }
1731     //
1732     // };
1733 
minimize(Map<String, String> fluffup)1734     public static void minimize(Map<String, String> fluffup) {
1735         LanguageTagParser parser = new LanguageTagParser();
1736         LanguageTagParser targetParser = new LanguageTagParser();
1737         Set<String> removals = new TreeSet<>();
1738         while (true) {
1739             removals.clear();
1740             for (String locale : fluffup.keySet()) {
1741                 String target = fluffup.get(locale);
1742                 if (targetParser.set(target).getRegion().equals(UNKNOWN_REGION)) {
1743                     removals.add(locale);
1744                     if (SHOW_ADD)
1745                         System.out.println("Removing:\t" + getName(locale) + "\t=>\t" + getName(target)
1746                             + "\t\t - Unknown Region in target");
1747                     continue;
1748                 }
1749                 if (targetParser.getScript().equals(UNKNOWN_SCRIPT)) {
1750                     removals.add(locale);
1751                     if (SHOW_ADD)
1752                         System.out.println("Removing:\t" + getName(locale) + "\t=>\t" + getName(target)
1753                             + "\t\t - Unknown Script in target");
1754                     continue;
1755                 }
1756 
1757                 String region = parser.set(locale).getRegion();
1758                 if (region.length() != 0) {
1759                     if (region.equals(UNKNOWN_REGION)) {
1760                         removals.add(locale);
1761                         if (SHOW_ADD)
1762                             System.out.println("Removing:\t" + getName(locale) + "\t=>\t" + getName(target)
1763                                 + "\t\t - Unknown Region in source");
1764                         continue;
1765                     }
1766                     parser.setRegion("");
1767                     String newLocale = parser.toString();
1768                     String newTarget = fluffup.get(newLocale);
1769                     if (newTarget != null) {
1770                         newTarget = targetParser.set(newTarget).setRegion(region).toString();
1771                         if (target.equals(newTarget) && !KEEP_TARGETS.contains(locale)) {
1772                             removals.add(locale);
1773                             if (SHOW_ADD)
1774                                 System.out.println("Removing:\t" + locale + "\t=>\t" + target + "\t\tRedundant with "
1775                                     + newLocale);
1776                             continue;
1777                         }
1778                     }
1779                 }
1780                 String script = parser.set(locale).getScript();
1781                 if (locale.equals(DEBUG_ADD_KEY)) {
1782                     System.out.println("*debug*");
1783                 }
1784                 if (script.length() != 0) {
1785                     if (script.equals(UNKNOWN_SCRIPT)) {
1786                         removals.add(locale);
1787                         if (SHOW_ADD)
1788                             System.out.println("Removing:\t" + locale + "\t=>\t" + target + "\t\t - Unknown Script");
1789                         continue;
1790                     }
1791                     parser.setScript("");
1792                     String newLocale = parser.toString();
1793                     String newTarget = fluffup.get(newLocale);
1794                     if (newTarget != null) {
1795                         newTarget = targetParser.set(newTarget).setScript(script).toString();
1796                         if (target.equals(newTarget) && !KEEP_TARGETS.contains(locale)) {
1797                             removals.add(locale);
1798                             if (SHOW_ADD)
1799                                 System.out.println("Removing:\t" + locale + "\t=>\t" + target + "\t\tRedundant with "
1800                                     + newLocale);
1801                             continue;
1802                         }
1803                     }
1804                 }
1805             }
1806             if (removals.size() == 0) {
1807                 break;
1808             }
1809             for (String locale : removals) {
1810                 fluffup.remove(locale);
1811             }
1812         }
1813     }
1814 
1815     // private static void addLanguageScript(Map<String, String> fluffup, LanguageTagParser parser) {
1816     // // add script
1817     // Map<String, String> temp = new TreeMap<String, String>();
1818     // while (true) {
1819     // temp.clear();
1820     // for (String target : new TreeSet<String>(fluffup.values())) {
1821     // parser.set(target);
1822     // final String territory = parser.getRegion();
1823     // if (territory.length() == 0) {
1824     // continue;
1825     // }
1826     // parser.setRegion("");
1827     // String possibleSource = parser.toString();
1828     // if (fluffup.containsKey(possibleSource)) {
1829     // continue;
1830     // }
1831     // String other = temp.get(possibleSource);
1832     // if (other != null) {
1833     // if (!target.equals(other)) {
1834     // System.out.println("**Failure with multiple sources in addLanguageScript: "
1835     // + possibleSource + "\t=>\t" + target + ", " + other);
1836     // }
1837     // continue;
1838     // }
1839     // temp.put(possibleSource, target);
1840     // if (SHOW_ADD) System.out.println("Adding:\t" + possibleSource + "\t=>\t" + target + "\t\tLanguage-Script");
1841     // }
1842     // if (temp.size() == 0) {
1843     // break;
1844     // }
1845     // fluffup.putAll(temp);
1846     // }
1847     //
1848     // }
1849 
1850     // private static void addLanguageCountry(Map<String, String> fluffup, LanguageTagParser parser) {
1851     // // add script
1852     // Map<String, String> temp = new TreeMap<String, String>();
1853     // while (true) {
1854     // temp.clear();
1855     // for (String target : new TreeSet<String>(fluffup.values())) {
1856     // parser.set(target);
1857     // String script = parser.getScript();
1858     // if (script.length() == 0) {
1859     // continue;
1860     // }
1861     // parser.setScript("");
1862     // String possibleSource = parser.toString();
1863     // if (fluffup.containsKey(possibleSource)) {
1864     // continue;
1865     // }
1866     // String other = temp.get(possibleSource);
1867     //
1868     // if (other != null) {
1869     // if (!target.equals(other)) {
1870     // script = getScriptForLocale(possibleSource);
1871     // if (script == null) {
1872     // System.out.println("**Failure with multiple sources in addLanguageCountry: "
1873     // + possibleSource + "\t=>\t" + target + ", " + other);
1874     // continue; // error message in routine
1875     // }
1876     // parser.setScript(script);
1877     // target = parser.toString();
1878     // }
1879     // }
1880     //
1881     // temp.put(possibleSource, target);
1882     // if (SHOW_ADD) System.out.println("Adding:\t" + possibleSource + "\t=>\t" + target + "\t\tLanguageCountry");
1883     // }
1884     // if (temp.size() == 0) {
1885     // break;
1886     // }
1887     // fluffup.putAll(temp);
1888     // }
1889     //
1890     // }
1891 
1892     // private static void addScript(Map<String, String> fluffup, LanguageTagParser parser) {
1893     // // add script
1894     // Map<String, String> temp = new TreeMap<String, String>();
1895     // while (true) {
1896     // temp.clear();
1897     // Set skipTarget = fluffup.keySet();
1898     // for (String locale : fluffup.keySet()) {
1899     // String target = fluffup.get(locale);
1900     // parser.set(target);
1901     // if (parser.getScript().length() != 0) {
1902     // continue;
1903     // }
1904     // String script = getScriptForLocale(target);
1905     //
1906     // if (script == null) {
1907     // continue; // error message in routine
1908     // }
1909     // parser.setScript(script);
1910     // String furtherTarget = parser.toString();
1911     // addIfNotIn(target, furtherTarget, temp, fluffup, "Script");
1912     // }
1913     // if (temp.size() == 0) {
1914     // break;
1915     // }
1916     // fluffup.putAll(temp);
1917     // }
1918     // }
1919 
1920     // private static String getScriptForLocale(String locale) {
1921     // String result = getScriptForLocale2(locale);
1922     // if (result != null) return result;
1923     // int pos = locale.indexOf('_');
1924     // if (pos >= 0) {
1925     // result = getScriptForLocale2(locale.substring(0,pos));
1926     // }
1927     // return result;
1928     // }
1929 
1930     private static String UNKNOWN_SCRIPT = "Zzzz";
1931     private static String UNKNOWN_REGION = "ZZ";
1932 
getScriptForLocale2(String locale)1933     private static String getScriptForLocale2(String locale) {
1934         String result = localeToScriptCache.get(locale);
1935         if (result != null) {
1936             return result;
1937         }
1938         if (locale.equals("ky")) {
1939             int debug = 0;
1940         }
1941         try {
1942             Map<Type, BasicLanguageData> data = supplementalData.getBasicLanguageDataMap(locale);
1943             if (data != null) {
1944                 for (BasicLanguageData datum : data.values()) {
1945                     final Set<String> scripts = datum.getScripts();
1946                     boolean isPrimary = datum.getType() == BasicLanguageData.Type.primary;
1947                     if (scripts.size() != 1) {
1948                         if (scripts.size() > 1 && isPrimary) {
1949                             break;
1950                         }
1951                         continue;
1952                     }
1953                     String script = scripts.iterator().next();
1954                     if (isPrimary) {
1955                         return result = script;
1956                     } else if (result == null) {
1957                         result = script;
1958                     }
1959                 }
1960                 if (result != null) {
1961                     return result;
1962                 }
1963             }
1964             CLDRFile cldrFile;
1965             try {
1966                 cldrFile = factory.make(locale, true);
1967             } catch (RuntimeException e) {
1968                 result = FALLBACK_SCRIPTS.get(locale);
1969                 if (result == null) {
1970                     System.out.println("***Failed to find script for: " + locale + "\t" + english.getName(locale));
1971                     return result = UNKNOWN_SCRIPT;
1972                 } else {
1973                     return result;
1974                 }
1975             }
1976             UnicodeSet exemplars = getExemplarSet(cldrFile, "");
1977             Set<String> CLDRScripts = getScriptsFromUnicodeSet(exemplars);
1978             CLDRScripts.remove(UNKNOWN_SCRIPT);
1979             if (CLDRScripts.size() == 1) {
1980                 return result = CLDRScripts.iterator().next();
1981             } else if (CLDRScripts.size() == 0) {
1982                 System.out.println("**Failed to get script for:\t" + locale);
1983                 return result = UNKNOWN_SCRIPT;
1984             } else {
1985                 System.out.println("**Failed, too many scripts for:\t" + locale + ", " + CLDRScripts);
1986                 return result = UNKNOWN_SCRIPT;
1987             }
1988         } finally {
1989             if (result.equals(UNKNOWN_SCRIPT)) {
1990                 String temp = LANGUAGE_OVERRIDES.get(locale);
1991                 if (temp != null) {
1992                     result = new LanguageTagParser().set(temp).getScript();
1993                     System.out.println("Getting script from LANGUAGE_OVERRIDES for " + locale + " => " + result);
1994                 }
1995             }
1996             localeToScriptCache.put(locale, result);
1997             if (SHOW_ADD)
1998                 System.out.println("Script:\t" + locale + "\t" + english.getName(locale) + "\t=>\t" + result + "\t"
1999                     + english.getName(CLDRFile.SCRIPT_NAME, result));
2000         }
2001     }
2002 
2003     // private static Map<String, String> closeMapping(Map<String, String> fluffup) {
2004     // if (SHOW_ADD) System.out.flush();
2005     // Map<String,String> temp = new TreeMap<String,String>();
2006     // while (true) {
2007     // temp.clear();
2008     // for (String locale : fluffup.keySet()) {
2009     // String target = fluffup.get(locale);
2010     // if (target.equals("si_Sinh") || target.equals("zh-Hani")) {
2011     // System.out.println("????");
2012     // }
2013     // String furtherTarget = fluffup.get(target);
2014     // if (furtherTarget == null) {
2015     // continue;
2016     // }
2017     // addIfNotIn(locale, furtherTarget, temp, null, "Close");
2018     // }
2019     // if (temp.size() == 0) {
2020     // break;
2021     // }
2022     // fluffup.putAll(temp);
2023     // }
2024     // if (SHOW_ADD) System.out.flush();
2025     // return temp;
2026     // }
2027 
getScriptsFromUnicodeSet(UnicodeSet exemplars)2028     public static Set<String> getScriptsFromUnicodeSet(UnicodeSet exemplars) {
2029         // use bits first, since that's faster
2030         BitSet scriptBits = new BitSet();
2031         boolean show = false;
2032         for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {
2033             if (show)
2034                 System.out.println(Integer.toHexString(it.codepoint));
2035             if (it.codepoint != UnicodeSetIterator.IS_STRING) {
2036                 scriptBits.set(UScript.getScript(it.codepoint));
2037             } else {
2038                 int cp;
2039                 for (int i = 0; i < it.string.length(); i += UTF16.getCharCount(cp)) {
2040                     scriptBits.set(UScript.getScript(cp = UTF16.charAt(it.string, i)));
2041                 }
2042             }
2043         }
2044         scriptBits.clear(UScript.COMMON);
2045         scriptBits.clear(UScript.INHERITED);
2046         Set<String> scripts = new TreeSet<>();
2047         for (int j = 0; j < scriptBits.size(); ++j) {
2048             if (scriptBits.get(j)) {
2049                 scripts.add(UScript.getShortName(j));
2050             }
2051         }
2052         return scripts;
2053     }
2054 
getExemplarSet(CLDRFile cldrfile, String type)2055     public static UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) {
2056         if (type.length() != 0)
2057             type = "[@type=\"" + type + "\"]";
2058         String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters"
2059             + type);
2060         if (v == null)
2061             return new UnicodeSet();
2062         return new UnicodeSet(v);
2063     }
2064 
2065     // private static String[][] SpecialCases = {
2066     // { "zh_Hani", "zh_Hans_CN"},
2067     // { "si_Sinh", "si_Sinh_LK"},
2068     // { "ii", "ii_CN"}, // Sichuan Yi (Yi)
2069     // { "iu", "iu_CA"}, // Inuktitut (Unified Canadian Aboriginal Syllabics)
2070     // { "und", "en"}, // English default
2071     // };
2072 
showDefaultContentDifferencesAndFix(Set<String> defaultLocaleContent)2073     static void showDefaultContentDifferencesAndFix(Set<String> defaultLocaleContent) {
2074         Set<String> errors = new LinkedHashSet<>();
2075         Map<String, String> oldDefaultContent = SupplementalDataInfo.makeLocaleToDefaultContents(
2076             ConvertLanguageData.supplementalData.getDefaultContentLocales(), new TreeMap<String, String>(), errors);
2077         if (!errors.isEmpty()) {
2078             System.out.println(Joiner.on("\n").join(errors));
2079             errors.clear();
2080         }
2081         Map<String, String> newDefaultContent = SupplementalDataInfo.makeLocaleToDefaultContents(defaultLocaleContent,
2082             new TreeMap<String, String>(), errors);
2083         if (!errors.isEmpty()) {
2084             System.out.println("Default Content errors: " + Joiner.on("\n").join(errors));
2085             errors.clear();
2086         }
2087         Set<String> changes = compareMapsAndFixNew("*WARNING* Default Content: ", oldDefaultContent, newDefaultContent,
2088             "ar", "ar_001");
2089         System.out.println(Joiner.on("\n").join(changes));
2090         defaultLocaleContent.clear();
2091         defaultLocaleContent.addAll(newDefaultContent.values());
2092         newDefaultContent = SupplementalDataInfo.makeLocaleToDefaultContents(defaultLocaleContent,
2093             new TreeMap<String, String>(), errors);
2094         if (!errors.isEmpty()) {
2095             System.out.println("***New Errors: " + Joiner.on("\n").join(errors));
2096         }
2097     }
2098 
compareMapsAndFixNew(String title, Map<String, String> oldContent, Map<String, String> newContent, String... allowedOverrideValues)2099     private static Set<String> compareMapsAndFixNew(String title,
2100         Map<String, String> oldContent,
2101         Map<String, String> newContent, String... allowedOverrideValues) {
2102         Map<String, String> allowedOverrideValuesTest = new HashMap<>();
2103         for (int i = 0; i < allowedOverrideValues.length; i += 2) {
2104             allowedOverrideValuesTest.put(allowedOverrideValues[i], allowedOverrideValues[i + 1]);
2105         }
2106         Set<String> changes = new TreeSet<>();
2107         for (String parent : Builder.with(new TreeSet<String>()).addAll(newContent.keySet())
2108             .addAll(oldContent.keySet()).get()) {
2109             String oldValue = oldContent.get(parent);
2110             String newValue = newContent.get(parent);
2111             String overrideValue = allowedOverrideValuesTest.get(parent);
2112             if (overrideValue != null) {
2113                 newContent.put(parent, overrideValue);
2114                 newValue = overrideValue;
2115             }
2116             if (CldrUtility.equals(oldValue, newValue)) {
2117                 continue;
2118             }
2119             String message;
2120             if (oldValue == null) {
2121                 message = "Adding " + ConvertLanguageData.getLanguageCodeAndName(parent) + " => "
2122                     + ConvertLanguageData.getLanguageCodeAndName(newValue);
2123                 newContent.put(parent, newValue);
2124             } else if (newValue == null) {
2125                 if (SUPPRESS_CHANGES) {
2126                     message = "Suppressing removal of "
2127                         + ConvertLanguageData.getLanguageCodeAndName(parent) + " => "
2128                         + ConvertLanguageData.getLanguageCodeAndName(oldValue);
2129                     newContent.put(parent, oldValue);
2130                 } else {
2131                     message = "Removing "
2132                         + ConvertLanguageData.getLanguageCodeAndName(parent) + " => "
2133                         + ConvertLanguageData.getLanguageCodeAndName(oldValue);
2134                     newContent.remove(oldValue);
2135                 }
2136             } else {
2137                 if (SUPPRESS_CHANGES) {
2138                     message = "Suppressing change of "
2139                         + ConvertLanguageData.getLanguageCodeAndName(parent) + " => "
2140                         + ConvertLanguageData.getLanguageCodeAndName(oldValue) + " to "
2141                         + ConvertLanguageData.getLanguageCodeAndName(newValue);
2142                     newContent.remove(newValue);
2143                     newContent.put(parent, oldValue);
2144                 } else {
2145                     message = "Changing "
2146                         + ConvertLanguageData.getLanguageCodeAndName(parent) + " => "
2147                         + ConvertLanguageData.getLanguageCodeAndName(oldValue) + " to "
2148                         + ConvertLanguageData.getLanguageCodeAndName(newValue);
2149                     newContent.remove(oldValue);
2150                     newContent.put(parent, newValue);
2151                 }
2152             }
2153             changes.add(title + message);
2154         }
2155         return changes;
2156     }
2157 
2158     public static class LocaleStringComparator implements Comparator<String> {
2159         LanguageTagParser ltp0 = new LanguageTagParser();
2160         LanguageTagParser ltp1 = new LanguageTagParser();
2161 
2162         @Override
compare(String arg0, String arg1)2163         public int compare(String arg0, String arg1) {
2164             ltp0.set(arg0);
2165             ltp1.set(arg1);
2166             String s0 = ltp0.getLanguage();
2167             String s1 = ltp1.getLanguage();
2168             int result = s0.compareTo(s1);
2169             if (result != 0) {
2170                 return s0.equals("und") ? 1
2171                     : s1.equals("und") ? -1
2172                         : result;
2173             }
2174             s0 = ltp0.getScript();
2175             s1 = ltp1.getScript();
2176             result = s0.compareTo(s1);
2177             if (result != 0) {
2178                 return result;
2179             }
2180             s0 = ltp0.getRegion();
2181             s1 = ltp1.getRegion();
2182             result = s0.compareTo(s1);
2183             if (result != 0) {
2184                 return result;
2185             }
2186             return arg0.compareTo(arg1); // just in case
2187         }
2188 
2189     }
2190 }
2191