1 package org.unicode.cldr.tool;
2 
3 import java.util.Set;
4 
5 import org.unicode.cldr.util.CLDRFile;
6 import org.unicode.cldr.util.CLDRPaths;
7 import org.unicode.cldr.util.Counter;
8 import org.unicode.cldr.util.Factory;
9 import org.unicode.cldr.util.Pair;
10 import org.unicode.cldr.util.SupplementalDataInfo;
11 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
12 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
13 
14 public class GetLanguageData {
15     SupplementalDataInfo sdata = SupplementalDataInfo
16         .getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
17     Factory cldrFactory = Factory
18         .make(CLDRPaths.MAIN_DIRECTORY, ".*");
19     CLDRFile english = cldrFactory.make("en", true);
20     Set<String> euCountries = sdata.getContained("EU");
21     Counter<String> languageToGdp = new Counter<String>();
22     Counter<String> languageToPop = new Counter<String>();
23 
main(String[] args)24     public static void main(String[] args) {
25         new GetLanguageData().run();
26     }
27 
run()28     private void run() {
29         findSuspectData();
30         System.out.println("Code\tLang\tLpop\tApprox. Gdp");
31         for (String language : sdata.getLanguages()) {
32             final long pop = languageToPop.getCount(language);
33             System.out.print(language + "\t" + english.getName(language));
34             if (pop > 0) {
35                 Pair<OfficialStatus, String> status = isOfficialLanguageOfEUCountry(language);
36                 System.out.print("\t" + pop //
37                     + "\t" + languageToGdp.getCount(language) //
38                     + "\t" + (status.getFirst().isOfficial() ? status.getFirst() : "") //
39                     + "\t" + status.getSecond() //
40                 );
41             }
42             System.out.println();
43         }
44     }
45 
findSuspectData()46     private void findSuspectData() {
47         Set<String> territories = sdata.getTerritoriesWithPopulationData();
48         for (String territory : territories) {
49             double scale = 1.0;
50             final PopulationData populationDataForTerritory = sdata
51                 .getPopulationDataForTerritory(territory);
52             final double gdp = populationDataForTerritory.getGdp();
53             double territoryPop = populationDataForTerritory.getPopulation();
54             double langPop = 0;
55             double officialLangPop = 0;
56             Set<String> languages = sdata.getLanguagesForTerritoryWithPopulationData(territory);
57             for (String language : languages) {
58                 if (language.equals("tl")) continue;
59                 PopulationData pop2 = sdata.getLanguageAndTerritoryPopulationData(language, territory);
60                 langPop += pop2.getPopulation();
61                 if (pop2.getOfficialStatus().isOfficial()) {
62                     officialLangPop += pop2.getPopulation();
63                 }
64             }
65             final double missing = 0.75 * territoryPop - langPop;
66             if (missing > 0) {
67                 System.out.println(territory //
68                     + "\t" + english.getName("territory", territory) //
69                     + "\t" + territoryPop //
70                     + "\t" + langPop //
71                     + "\t" + gdp //
72                 );
73                 scale = 1 + missing / officialLangPop;
74                 // scale up the official so that
75                 // official + non-official = 70% of total
76                 langPop = territoryPop * 0.75;
77                 System.out.println("\tScaling " + territory + "\t" + scale * 100 + "%");
78             }
79             long langUnknown = (long) territoryPop;
80             for (String language : languages) {
81                 if (language.equals("tl")) continue;
82                 PopulationData pop2 = sdata.getLanguageAndTerritoryPopulationData(language, territory);
83                 double langPop2 = pop2.getPopulation();
84                 if (pop2.getOfficialStatus().isOfficial()) {
85                     langPop2 *= scale;
86                 }
87                 languageToGdp.add(language, (long) (gdp * langPop2 / territoryPop));
88                 languageToPop.add(language, (long) (langPop2));
89                 langUnknown -= langPop2;
90             }
91             if (langUnknown > 0) {
92                 languageToGdp.add("und", (long) (gdp * langUnknown / territoryPop));
93                 languageToPop.add("und", (long) (langUnknown));
94             }
95         }
96     }
97 
isOfficialLanguageOfEUCountry(String language)98     private Pair<OfficialStatus, String> isOfficialLanguageOfEUCountry(String language) {
99         OfficialStatus bestStatus = OfficialStatus.unknown;
100         String eu = "";
101         double bestEuPop = 0;
102         Set<String> territories = sdata.getTerritoriesForPopulationData(language);
103         for (String territory : territories) {
104             PopulationData pop = sdata.getLanguageAndTerritoryPopulationData(language, territory);
105             OfficialStatus status = pop.getOfficialStatus();
106             if (bestStatus.compareTo(status) < 0) {
107                 bestStatus = status;
108             }
109             if (status.isMajor() && euCountries.contains(territory)) {
110                 if (pop.getLiteratePopulation() > bestEuPop) {
111                     bestEuPop = pop.getLiteratePopulation();
112                     eu = territory;
113                 }
114             }
115         }
116         return Pair.of(bestStatus, eu);
117     }
118 }
119