1 package org.unicode.cldr.util;
2 
3 import java.util.Collections;
4 import java.util.Comparator;
5 import java.util.LinkedHashMap;
6 import java.util.Map;
7 import java.util.Set;
8 import java.util.TreeMap;
9 
10 import org.unicode.cldr.util.ChainedMap.M3;
11 
12 import com.ibm.icu.dev.util.CollectionUtilities;
13 import com.ibm.icu.util.ULocale;
14 
15 public enum LanguageGroup {
16     root("und"), germanic("gem"), celtic("cel"), romance("roa"), slavic("sla"), baltic("bat"), indic("inc"), other_indo("ine_001"), dravidian("dra"), uralic(
17         "urj"), cjk("und_Hani"), sino_tibetan("sit"), tai("tai"), austronesian("map"), turkic("trk"), afroasiatic(
18             "afa"), austroasiatic("aav"), niger_congo("nic"), east_sudanic("sdv"), songhay("son"), american("und_019"), art("art"), other("und_001");
19 
20     public final String iso;
21 
LanguageGroup(String iso)22     LanguageGroup(String iso) {
23         this.iso = iso;
24     }
25 
26     static final Map<ULocale, LanguageGroup> LANGUAGE_GROUP;
27     static final M3<LanguageGroup, ULocale, Integer> GROUP_LANGUAGE = ChainedMap.of(new TreeMap<LanguageGroup, Object>(), new LinkedHashMap<ULocale, Object>(),
28         Integer.class);
29 
add(Map<ULocale, LanguageGroup> map, LanguageGroup group, String... baseLanguages)30     private static void add(Map<ULocale, LanguageGroup> map, LanguageGroup group, String... baseLanguages) {
31         Map<ULocale, Integer> soFar = GROUP_LANGUAGE.get(group);
32         int count = soFar == null ? 0 : soFar.size();
33         for (String s : baseLanguages) {
34             ULocale loc = new ULocale(s);
35             if (map.put(loc, group) != null) {
36                 throw new IllegalArgumentException("duplicate: " + s + ", " + group);
37             }
38             ;
39             GROUP_LANGUAGE.put(group, loc, count);
40             ++count;
41         }
42     }
43 
44     static {
45         LinkedHashMap<ULocale, LanguageGroup> temp = new LinkedHashMap<>();
46         LANGUAGE_GROUP = Collections.unmodifiableMap(temp);
add(temp, root, "root")47         add(temp, root, "root");
add(temp, germanic, "en", "fy", "nl", "af", "de", "gsw", "wae", "ksh", "lb", "sv", "da", "nb", "nn", "fo", "is", "yi")48         add(temp, germanic, "en", "fy", "nl", "af", "de", "gsw", "wae", "ksh", "lb", "sv", "da", "nb", "nn", "fo", "is", "yi");
add(temp, celtic, "ga", "gd", "cy", "gv", "kw", "br")49         add(temp, celtic, "ga", "gd", "cy", "gv", "kw", "br");
add(temp, romance, "fr", "pt", "gl", "es", "ca", "ast", "it", "rm", "ro")50         add(temp, romance, "fr", "pt", "gl", "es", "ca", "ast", "it", "rm", "ro");
add(temp, slavic, "pl", "cs", "sk", "sl", "hr", "bs", "mk", "sr", "bg", "ru", "be", "uk")51         add(temp, slavic, "pl", "cs", "sk", "sl", "hr", "bs", "mk", "sr", "bg", "ru", "be", "uk");
add(temp, baltic, "lt", "lv")52         add(temp, baltic, "lt", "lv");
add(temp, other_indo, "el", "hy", "sq", "fa", "ps", "os")53         add(temp, other_indo, "el", "hy", "sq", "fa", "ps", "os");
add(temp, indic, "ur", "hi", "gu", "sd", "bn", "as", "ccp", "or", "mr", "ne", "pa", "si")54         add(temp, indic, "ur", "hi", "gu", "sd", "bn", "as", "ccp", "or", "mr", "ne", "pa", "si");
add(temp, dravidian, "ta", "te", "ml", "kn")55         add(temp, dravidian, "ta", "te", "ml", "kn");
add(temp, cjk, "zh", "yue", "ja", "ko")56         add(temp, cjk, "zh", "yue", "ja", "ko");
add(temp, turkic, "tr", "az", "tk", "kk", "ky", "uz", "ug")57         add(temp, turkic, "tr", "az", "tk", "kk", "ky", "uz", "ug");
add(temp, uralic, "hu", "fi", "et", "se", "smn")58         add(temp, uralic, "hu", "fi", "et", "se", "smn");
add(temp, afroasiatic, "ar", "mt", "he", "om", "so", "ha", "am", "tzm", "zgh")59         add(temp, afroasiatic, "ar", "mt", "he", "om", "so", "ha", "am", "tzm", "zgh");
add(temp, tai, "th", "lo")60         add(temp, tai, "th", "lo");
add(temp, austronesian, "id", "ms", "jv", "fil", "haw")61         add(temp, austronesian, "id", "ms", "jv", "fil", "haw");
add(temp, austroasiatic, "vi", "km")62         add(temp, austroasiatic, "vi", "km");
add(temp, niger_congo, "sw", "swc", "yo", "ig", "ff", "sn", "zu")63         add(temp, niger_congo, "sw", "swc", "yo", "ig", "ff", "sn", "zu");
add(temp, other, "ka", "eu", "mn", "naq")64         add(temp, other, "ka", "eu", "mn", "naq");
add(temp, sino_tibetan, "my")65         add(temp, sino_tibetan, "my");
add(temp, afroasiatic, "aa", "kab", "shi", "ssy", "ti")66         add(temp, afroasiatic, "aa", "kab", "shi", "ssy", "ti");
add(temp, american, "chr", "kl", "lkt", "qu")67         add(temp, american, "chr", "kl", "lkt", "qu");
add(temp, art, "eo", "vo", "ia")68         add(temp, art, "eo", "vo", "ia");
add(temp, austronesian, "mg", "to")69         add(temp, austronesian, "mg", "to");
add(temp, east_sudanic, "luo", "mas", "nus", "saq", "teo", "kln")70         add(temp, east_sudanic, "luo", "mas", "nus", "saq", "teo", "kln");
add(temp, indic, "kok", "ks")71         add(temp, indic, "kok", "ks");
add(temp, niger_congo, "agq", "ak", "asa", "bas", "bem", "bez", "bm", "cgg", "dua", "dyo", "ebu", "ee", "ewo", "guz", "jgo", "kam", "ki", "kkj", "ksb", "ksf", "lag", "lg", "ln", "lu", "luy", "mua", "nd", "nnh", "nr", "nyn", "rn", "rof", "rw", "sbp", "sg", "ss", "tn", "ts", "vai", "ve", "dav", "jmc", "kde", "mer", "mgh", "mgo", "nmg", "nso", "rwk", "seh", "vun", "xog", "yav")72         add(temp, niger_congo, "agq", "ak", "asa", "bas", "bem", "bez", "bm", "cgg", "dua", "dyo", "ebu", "ee", "ewo", "guz", "jgo", "kam", "ki", "kkj", "ksb",
73             "ksf", "lag", "lg", "ln", "lu", "luy", "mua", "nd", "nnh", "nr", "nyn", "rn", "rof", "rw", "sbp", "sg", "ss", "tn", "ts", "vai", "ve", "dav",
74             "jmc", "kde", "mer", "mgh", "mgo", "nmg", "nso", "rwk", "seh", "vun", "xog", "yav");
add(temp, romance, "fur", "kea", "mfe")75         add(temp, romance, "fur", "kea", "mfe");
add(temp, sino_tibetan, "bo", "brx", "dz", "ii")76         add(temp, sino_tibetan, "bo", "brx", "dz", "ii");
add(temp, slavic, "dsb", "hsb")77         add(temp, slavic, "dsb", "hsb");
add(temp, songhay, "dje", "khq", "ses", "twq")78         add(temp, songhay, "dje", "khq", "ses", "twq");
add(temp, turkic, "sah")79         add(temp, turkic, "sah");
80         //GROUP_LANGUAGE.freeze();
81     }
82 
get(ULocale locale)83     public static LanguageGroup get(ULocale locale) {
84         return CldrUtility.ifNull(LANGUAGE_GROUP.get(new ULocale(locale.getLanguage())), LanguageGroup.other);
85     }
86 
getExplicit()87     public static Set<ULocale> getExplicit() {
88         return Collections.unmodifiableSet(LANGUAGE_GROUP.keySet());
89     }
90 
getLocales(LanguageGroup group)91     public static Set<ULocale> getLocales(LanguageGroup group) {
92         return Collections.unmodifiableSet(GROUP_LANGUAGE.get(group).keySet());
93     }
94 
95     /**
96      * return position in group, or -1 if in no group
97      * @param locale
98      * @return
99      */
rankInGroup(ULocale locale)100     public static int rankInGroup(ULocale locale) {
101         locale = new ULocale(locale.getLanguage());
102         LanguageGroup group = LANGUAGE_GROUP.get(locale);
103         if (group == null) {
104             return Integer.MAX_VALUE;
105         }
106         return GROUP_LANGUAGE.get(group).get(locale);
107     }
108 
109     public static Comparator<ULocale> COMPARATOR = new Comparator<ULocale>() {
110         @Override
111         public int compare(ULocale o1, ULocale o2) {
112             LanguageGroup group1 = get(o1);
113             LanguageGroup group2 = get(o2);
114             int diff = group1.ordinal() - group2.ordinal();
115             if (diff != 0) return diff;
116             int r1 = rankInGroup(o1);
117             int r2 = rankInGroup(o2);
118             diff = r1 - r2;
119             return diff != 0 ? diff : o1.compareTo(o2);
120         }
121     };
122 
main(String[] args)123     public static void main(String[] args) {
124         CLDRFile english = CLDRConfig.getInstance().getEnglish();
125         System.out.print("<supplementalData>\n"
126             + "\t<version number=\"$Revision:$\"/>\n"
127             + "\t<languageGroups>\n");
128         for (LanguageGroup languageGroup : LanguageGroup.values()) {
129             Set<ULocale> locales = LanguageGroup.getLocales(languageGroup);
130             String englishName = languageGroup.getName(english);
131             System.out.print("\t\t<languageGroup id=\"" + languageGroup.iso
132                 + "\" code=\"" + CollectionUtilities.join(locales, ", ")
133                 + "\"/>\t<!-- " + englishName + " -->\n");
134         }
135         System.out.print("\t</languageGroups>"
136             + "\n<supplementalData>\n");
137     }
138 
getName(CLDRFile cldrFile)139     public String getName(CLDRFile cldrFile) {
140         String prefix = "";
141         LanguageTagParser ltp = new LanguageTagParser().set(iso);
142         switch (ltp.getRegion()) {
143         case "001":
144             if (ltp.getLanguage().equals("und")) {
145                 return "Other";
146             }
147             prefix = "Other ";
148             break;
149         case "":
150             break;
151         default:
152             return cldrFile.getName(CLDRFile.TERRITORY_NAME, ltp.getRegion());
153         }
154         switch (ltp.getScript()) {
155         case "Hani":
156             return "CJK";
157         case "":
158             break;
159         default:
160             throw new IllegalArgumentException("Need to fix code: " + ltp.getScript());
161         }
162         return prefix + cldrFile.getName(ltp.getLanguage()).replace(" [Other]", "").replace(" languages", "");
163     }
164 
165     @Override
toString()166     public String toString() {
167         return getName(CLDRConfig.getInstance().getEnglish());
168     }
169 }