1 package org.unicode.cldr.tool;
2 
3 import java.util.List;
4 import java.util.Map;
5 import java.util.Map.Entry;
6 import java.util.Set;
7 
8 import org.unicode.cldr.util.CLDRConfig;
9 import org.unicode.cldr.util.CLDRFile;
10 import org.unicode.cldr.util.StandardCodes;
11 import org.unicode.cldr.util.StandardCodes.LstrField;
12 import org.unicode.cldr.util.StandardCodes.LstrType;
13 import org.unicode.cldr.util.SupplementalDataInfo;
14 
15 import com.google.common.collect.HashMultimap;
16 import com.google.common.collect.Multimap;
17 import com.google.common.collect.TreeMultimap;
18 import com.ibm.icu.impl.Row.R4;
19 import com.ibm.icu.text.Transform;
20 
21 public class GenerateLanguageMatches {
22     private static final CLDRFile ENGLISH = CLDRConfig.getInstance().getEnglish();
23     private static final SupplementalDataInfo SDI = CLDRConfig.getInstance().getSupplementalDataInfo();
24 
main(String[] args)25     public static void main(String[] args) {
26 
27         Map<String, Map<LstrField, String>> lstr = StandardCodes.getLstregEnumRaw().get(LstrType.language);
28 
29         // we will limit to locales that are in CLDR.
30 
31         Set<String> locales = CLDRConfig.getInstance().getFullCldrFactory().getAvailableLanguages();
32 
33         // Get the current languageMatch data
34 
35         List<R4<String, String, Integer, Boolean>> matchData = SDI.getLanguageMatcherData("written_new");
36         Multimap<String,String> desiredToSupported = HashMultimap.create();
37         for (R4<String, String, Integer, Boolean> item : matchData) {
38             desiredToSupported.put(item.get0(), item.get1());
39             if (!item.get3()) { // if not oneway
40                 desiredToSupported.put(item.get1(), item.get0());
41             }
42         }
43 
44         // get the language aliases, since we can suppress those.
45         Set<String> languageAliases = SDI.getLocaleAliasInfo().get("language").keySet();
46 
47         // filter all of the encompassed languages (only use macro languages that are in CLDR).
48 
49         Multimap<String,String> macroToEncompassed = TreeMultimap.create();
50         for (Entry<String, Map<LstrField, String>> localeInfo : lstr.entrySet()) {
51             String locale = localeInfo.getKey();
52             if (locale.contains("_")) {
53                 continue;
54             }
55             // filter out ones with aliases
56             if (languageAliases.contains(locale)) {
57                 continue;
58             }
59 
60             // we filter to only encompassed languages (those with macro languages)
61 
62             Map<LstrField, String> data = localeInfo.getValue();
63             String macroLanguage = data.get(LstrField.Macrolanguage);
64             if (macroLanguage == null || !locales.contains(macroLanguage)) {
65                 continue;
66             }
67 
68             // Filter out what is in LanguageInfo already
69 
70             if (desiredToSupported.containsEntry(locale, macroLanguage)) {
71                 continue;
72             }
73 
74             macroToEncompassed.put(macroLanguage, locale);
75         }
76 
77         // now print sorted items
78 
79         String last = "";
80         System.out.println("<!-- START generated by GenerateLanguageMatches.java: don't manually change -->");
81         for ( Entry<String, String> entry : macroToEncompassed.entries()) {
82             String macroLanguage = entry.getKey();
83             if (macroLanguage.equals("ku")) {
84                 continue; // these are problematic, since they shift scripts; also, might be better to fall back to ar/fa
85             }
86             if (!last.contentEquals(macroLanguage)) {
87                 System.out.println("<!-- Encompassed by " + getName(macroLanguage) + " -->");
88             }
89             String encompassed = entry.getValue();
90             System.out.println("\t\t\t<languageMatch desired=\"" + encompassed
91                 + "\" supported=\"" + macroLanguage
92                 + "\" distance=\"10\" oneway=\"true\"/>\t"
93                 + "<!-- " + getName(encompassed) + " -->");
94             last = macroLanguage;
95         }
96         System.out.println("<!-- END generated by GenerateLanguageMatches.java -->");
97     }
98 
99     static final Transform<String, String> MENU = new Transform<String, String>() {
100         @Override
101         public String transform(@SuppressWarnings("unused") String source) {
102             return "menu";
103         }
104     };
105 
getName(String lang)106     private static String getName(String lang) {
107         return ENGLISH.getName(CLDRFile.LANGUAGE_NAME, lang, MENU);
108     }
109 }
110