1 package org.unicode.cldr.tool; 2 3 import java.util.List; 4 import java.util.Map; 5 import java.util.Map.Entry; 6 import java.util.Set; 7 8 import org.unicode.cldr.util.CLDRConfig; 9 import org.unicode.cldr.util.CLDRFile; 10 import org.unicode.cldr.util.StandardCodes; 11 import org.unicode.cldr.util.StandardCodes.LstrField; 12 import org.unicode.cldr.util.StandardCodes.LstrType; 13 import org.unicode.cldr.util.SupplementalDataInfo; 14 15 import com.google.common.collect.HashMultimap; 16 import com.google.common.collect.Multimap; 17 import com.google.common.collect.TreeMultimap; 18 import com.ibm.icu.impl.Row.R4; 19 import com.ibm.icu.text.Transform; 20 21 public class GenerateLanguageMatches { 22 private static final CLDRFile ENGLISH = CLDRConfig.getInstance().getEnglish(); 23 private static final SupplementalDataInfo SDI = CLDRConfig.getInstance().getSupplementalDataInfo(); 24 main(String[] args)25 public static void main(String[] args) { 26 27 Map<String, Map<LstrField, String>> lstr = StandardCodes.getLstregEnumRaw().get(LstrType.language); 28 29 // we will limit to locales that are in CLDR. 30 31 Set<String> locales = CLDRConfig.getInstance().getFullCldrFactory().getAvailableLanguages(); 32 33 // Get the current languageMatch data 34 35 List<R4<String, String, Integer, Boolean>> matchData = SDI.getLanguageMatcherData("written_new"); 36 Multimap<String,String> desiredToSupported = HashMultimap.create(); 37 for (R4<String, String, Integer, Boolean> item : matchData) { 38 desiredToSupported.put(item.get0(), item.get1()); 39 if (!item.get3()) { // if not oneway 40 desiredToSupported.put(item.get1(), item.get0()); 41 } 42 } 43 44 // get the language aliases, since we can suppress those. 45 Set<String> languageAliases = SDI.getLocaleAliasInfo().get("language").keySet(); 46 47 // filter all of the encompassed languages (only use macro languages that are in CLDR). 48 49 Multimap<String,String> macroToEncompassed = TreeMultimap.create(); 50 for (Entry<String, Map<LstrField, String>> localeInfo : lstr.entrySet()) { 51 String locale = localeInfo.getKey(); 52 if (locale.contains("_")) { 53 continue; 54 } 55 // filter out ones with aliases 56 if (languageAliases.contains(locale)) { 57 continue; 58 } 59 60 // we filter to only encompassed languages (those with macro languages) 61 62 Map<LstrField, String> data = localeInfo.getValue(); 63 String macroLanguage = data.get(LstrField.Macrolanguage); 64 if (macroLanguage == null || !locales.contains(macroLanguage)) { 65 continue; 66 } 67 68 // Filter out what is in LanguageInfo already 69 70 if (desiredToSupported.containsEntry(locale, macroLanguage)) { 71 continue; 72 } 73 74 macroToEncompassed.put(macroLanguage, locale); 75 } 76 77 // now print sorted items 78 79 String last = ""; 80 System.out.println("<!-- START generated by GenerateLanguageMatches.java: don't manually change -->"); 81 for ( Entry<String, String> entry : macroToEncompassed.entries()) { 82 String macroLanguage = entry.getKey(); 83 if (macroLanguage.equals("ku")) { 84 continue; // these are problematic, since they shift scripts; also, might be better to fall back to ar/fa 85 } 86 if (!last.contentEquals(macroLanguage)) { 87 System.out.println("<!-- Encompassed by " + getName(macroLanguage) + " -->"); 88 } 89 String encompassed = entry.getValue(); 90 System.out.println("\t\t\t<languageMatch desired=\"" + encompassed 91 + "\" supported=\"" + macroLanguage 92 + "\" distance=\"10\" oneway=\"true\"/>\t" 93 + "<!-- " + getName(encompassed) + " -->"); 94 last = macroLanguage; 95 } 96 System.out.println("<!-- END generated by GenerateLanguageMatches.java -->"); 97 } 98 99 static final Transform<String, String> MENU = new Transform<String, String>() { 100 @Override 101 public String transform(@SuppressWarnings("unused") String source) { 102 return "menu"; 103 } 104 }; 105 getName(String lang)106 private static String getName(String lang) { 107 return ENGLISH.getName(CLDRFile.LANGUAGE_NAME, lang, MENU); 108 } 109 } 110