• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import static com.google.common.collect.Comparators.lexicographical;
4 
5 import java.lang.invoke.MethodHandles;
6 import java.util.ArrayList;
7 import java.util.Comparator;
8 import java.util.EnumSet;
9 import java.util.LinkedHashSet;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16 
17 import org.unicode.cldr.util.CLDRConfig;
18 import org.unicode.cldr.util.CLDRFile;
19 import org.unicode.cldr.util.CLDRPaths;
20 import org.unicode.cldr.util.DtdType;
21 import org.unicode.cldr.util.Factory;
22 import org.unicode.cldr.util.ICUServiceBuilder;
23 import org.unicode.cldr.util.LanguageTagParser;
24 import org.unicode.cldr.util.Level;
25 import org.unicode.cldr.util.Organization;
26 import org.unicode.cldr.util.PluralRanges;
27 import org.unicode.cldr.util.StandardCodes;
28 import org.unicode.cldr.util.SupplementalDataInfo;
29 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
30 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
31 import org.unicode.cldr.util.TempPrintWriter;
32 
33 import com.google.common.base.Joiner;
34 import com.ibm.icu.impl.Relation;
35 import com.ibm.icu.text.DecimalFormat;
36 import com.ibm.icu.text.MessageFormat;
37 import com.ibm.icu.text.PluralRules;
38 import com.ibm.icu.text.PluralRules.FixedDecimal;
39 import com.ibm.icu.util.Output;
40 import com.ibm.icu.util.ULocale;
41 
42 public class GeneratePluralRanges {
GeneratePluralRanges(SupplementalDataInfo supplementalDataInfo)43     public GeneratePluralRanges(SupplementalDataInfo supplementalDataInfo) {
44         SUPPLEMENTAL = supplementalDataInfo;
45         prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
46     }
47 
48     private static final boolean MINIMAL = true;
49 
main(String[] args)50     public static void main(String[] args) {
51         CLDRConfig testInfo = ToolConfig.getToolInstance();
52         GeneratePluralRanges me = new GeneratePluralRanges(testInfo.getSupplementalDataInfo());
53         me.reformatPluralRanges();
54         //me.generateSamples(testInfo.getEnglish(), testInfo.getCldrFactory());
55     }
56 
generateSamples(CLDRFile english, Factory factory)57     private void generateSamples(CLDRFile english, Factory factory) {
58         //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns();
59         // add all the items with plural ranges
60         Set<String> sorted = new TreeSet<>(SUPPLEMENTAL.getPluralRangesLocales());
61         // add the core locales
62 //        sorted.addAll(StandardCodes.make().getLocaleCoverageLocales("google", EnumSet.of(Level.MODERN)));
63         sorted.addAll(StandardCodes.make().getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN)));
64         // add any variant plural forms
65         LanguageTagParser ltp = new LanguageTagParser();
66         for (String locale : SUPPLEMENTAL.getPluralLocales()) {
67             if (locale.contains("_")) {
68                 if (sorted.contains(ltp.set(locale).getLanguage())) {
69                     sorted.add(locale);
70                 }
71             }
72         }
73         //sorted.add("fil");
74         System.out.println("Co.\tLocale Name\tStart\tEnd\tResult\tStart Sample\tEnd Sample\tStart Example\tEnd Example\tCombined Example");
75         for (String locale : sorted) {
76             PluralInfo pluralInfo3 = SUPPLEMENTAL.getPlurals(locale);
77             if (locale.contains("_")) {
78                 PluralInfo pluralInfo2 = SUPPLEMENTAL.getPlurals(ltp.set(locale).getLanguage());
79                 if (pluralInfo2.equals(pluralInfo3)) {
80                     continue;
81                 }
82             }
83 
84             Set<Count> counts3 = pluralInfo3.getCounts();
85             if (counts3.size() == 1) {
86                 continue; // skip japanese, etc.
87             }
88 
89             List<RangeSample> list = getRangeInfo(factory.make(locale, true));
90             if (list == null) {
91                 System.out.println("Failure with " + locale);
92                 continue;
93             }
94             for (RangeSample rangeSample : list) {
95                 System.out.println(locale + "\t" + english.getName(locale)
96                 + "\t" + rangeSample.start
97                 + "\t" + rangeSample.end
98                 + "\t" + (rangeSample.result == null ? "missing" : rangeSample.result)
99                 + "\t" + rangeSample.min
100                 + "\t" + rangeSample.max
101                 + "\t" + rangeSample.startExample
102                 + "\t" + rangeSample.endExample
103                 + "\t" + rangeSample.resultExample);
104             }
105         }
106     }
107 
getRangeInfo(CLDRFile cldrFile)108     public List<RangeSample> getRangeInfo(CLDRFile cldrFile) {
109         String locale = cldrFile.getLocaleID();
110         if (locale.equals("iw")) {
111             locale = "he";
112         }
113         //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns();
114         List<RangeSample> list = new ArrayList<>();
115         PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
116         Set<Count> counts = pluralInfo.getCounts();
117         PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
118         if (pluralRanges == null && locale.contains("_")) {
119             String locale2 = new ULocale(locale).getLanguage();
120             pluralRanges = SUPPLEMENTAL.getPluralRanges(locale2);
121         }
122         if (pluralRanges == null) {
123             return null;
124         }
125         ULocale ulocale = new ULocale(locale);
126         PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(ulocale.toString()); // CldrUtility.get(samples, ulocale);
127 //        if (samplePatterns == null && locale.contains("_")) {
128 //            ulocale = new ULocale(ulocale.getLanguage());
129 //            samplePatterns = CldrUtility.get(samples, ulocale);
130 //            if (samplePatterns == null) {
131 //                return null;
132 //            }
133 //        }
134 
135         Output<FixedDecimal> maxSample = new Output<>();
136         Output<FixedDecimal> minSample = new Output<>();
137 
138         ICUServiceBuilder icusb = new ICUServiceBuilder();
139         icusb.setCldrFile(cldrFile);
140         DecimalFormat nf = icusb.getNumberFormat(1);
141         //String decimal = cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal");
142         String defaultNumberingSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem");
143         String range = cldrFile.getWinningValue("//ldml/numbers/miscPatterns[@numberSystem=\""
144             + defaultNumberingSystem
145             + "\"]/pattern[@type=\"range\"]");
146 
147         //            if (decimal == null) {
148         //                throw new IllegalArgumentException();
149         //            }
150         for (Count s : counts) {
151             for (Count e : counts) {
152                 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) {
153                     continue;
154                 }
155                 Count r = pluralRanges.getExplicit(s, e);
156                 String minFormatted = format(nf, minSample.value);
157                 String maxFormatted = format(nf, maxSample.value);
158                 String rangeFormatted = MessageFormat.format(range, minFormatted, maxFormatted);
159 
160                 list.add(new RangeSample(
161                     s, e, r,
162                     minSample.value,
163                     maxSample.value,
164                     getExample(locale, samplePatterns, s, minFormatted), getExample(locale, samplePatterns, e, maxFormatted),
165                     getExample(locale, samplePatterns, r, rangeFormatted)));
166             }
167         }
168         return list;
169     }
170 
171     public static class RangeSample {
172         // Category Examples    Minimal Pairs   Rules
RangeSample(Count start, Count end, Count result, FixedDecimal min, FixedDecimal max, String startExample, String endExample, String resultExample)173         public RangeSample(Count start, Count end, Count result,
174             FixedDecimal min, FixedDecimal max,
175             String startExample, String endExample, String resultExample) {
176             this.start = start;
177             this.end = end;
178             this.result = result;
179             this.min = min;
180             this.max = max;
181             this.startExample = startExample;
182             this.endExample = endExample;
183             this.resultExample = resultExample;
184         }
185 
186         final Count start;
187         final Count end;
188         final Count result;
189         final FixedDecimal min;
190         final FixedDecimal max;
191         final String startExample;
192         final String endExample;
193         final String resultExample;
194     }
195 
format(DecimalFormat nf, FixedDecimal minSample)196     public static String format(DecimalFormat nf, FixedDecimal minSample) {
197         nf.setMinimumFractionDigits(minSample.getVisibleDecimalDigitCount());
198         nf.setMaximumFractionDigits(minSample.getVisibleDecimalDigitCount());
199         return nf.format(minSample);
200     }
201 
202     //    private String format(String decimal, Output<FixedDecimal> minSample) {
203     //        return minSample.toString().replace(".", decimal);
204     //    }
205 
getExample(String locale, PluralMinimalPairs samplePatterns, Count r, String numString)206     public static String getExample(String locale, PluralMinimalPairs samplePatterns, Count r, String numString) {
207         if (r == null) {
208             return "«missing»";
209         }
210         String samplePattern;
211         try {
212             samplePattern = samplePatterns.get(PluralRules.PluralType.CARDINAL, r); // CldrUtility.get(samplePatterns.keywordToPattern, r);
213         } catch (Exception e) {
214             throw new IllegalArgumentException("Locale: " + locale + "; Count: " + r, e);
215         }
216         return samplePattern
217             .replace('\u00A0', '\u0020')
218             .replace("{0}", numString);
219     }
220 
221     private final SupplementalDataInfo SUPPLEMENTAL;
222     private final PluralRulesFactory prf;
223 
224     // Ordering by size-of-set first, and then lexicographically, with a final tie-break on the
225     // string representation.
226     private static final Comparator<Set<String>> STRING_SET_COMPARATOR =
227         Comparator.<Set<String>, Integer>comparing(Set::size)
228             .thenComparing(lexicographical(Comparator.<String>naturalOrder()));
229     private static final Comparator<Set<Count>> COUNT_SET_COMPARATOR =
230         Comparator.<Set<Count>, Integer>comparing(Set::size)
231             .thenComparing(lexicographical(Comparator.<Count>naturalOrder()));
232 
reformatPluralRanges()233     public void reformatPluralRanges() {
234         Map<Set<Count>, Relation<Set<String>, String>> seen = new TreeMap<>(COUNT_SET_COMPARATOR);
235         try (TempPrintWriter out = TempPrintWriter.openUTF8Writer(CLDRPaths.SUPPLEMENTAL_DIRECTORY,"pluralRanges.xml")) {
236             out.println(DtdType.supplementalData.header(MethodHandles.lookup().lookupClass()) +
237                 "\t<version number=\"$Revision$\" />\n" +
238                 "\t<plurals>"
239                 );
240             for (String locale : SUPPLEMENTAL.getPluralRangesLocales()) {
241 
242 
243                 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
244                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
245                 Set<Count> counts = pluralInfo.getCounts();
246 
247                 Set<String> s;
248                 if (false) {
249                     out.println("Minimized, but not ready for prime-time");
250                     s = minimize(pluralRanges, pluralInfo);
251                 } else {
252                     s = reformat(pluralRanges, counts);
253                 }
254                 Relation<Set<String>, String> item = seen.get(counts);
255                 if (item == null) {
256                     seen.put(counts,
257                         item = Relation.of(new TreeMap<Set<String>, Set<String>>(STRING_SET_COMPARATOR), TreeSet.class));
258                 }
259                 item.put(s, locale);
260             }
261             for (Entry<Set<Count>, Relation<Set<String>, String>> entry0 : seen.entrySet()) {
262                 out.println("\n<!-- " + Joiner.on(", ").join(entry0.getKey()) + " -->");
263                 for (Entry<Set<String>, Set<String>> entry : entry0.getValue().keyValuesSet()) {
264                     out.println("\t\t<pluralRanges locales=\"" + Joiner.on(" ")
265                         .join(entry.getValue()) + "\">");
266                     for (String line : entry.getKey()) {
267                         out.println("\t\t\t" + line);
268                     }
269                     out.println("\t\t</pluralRanges>");
270                 }
271             }
272             out.println("\t</plurals>\n" +
273                 "</supplementalData>");
274         }
275     }
276 
277     enum RangeStrategy {
278         other, end, start, mixed
279     }
280 
reformat(PluralRanges pluralRanges, Set<Count> counts)281     public Set<String> reformat(PluralRanges pluralRanges, Set<Count> counts) {
282         Set<String> s;
283         s = new LinkedHashSet<>();
284         // first determine the general principle
285 
286         //        EnumSet<RangeStrategy> strategy = EnumSet.allOf(RangeStrategy.class);
287         //        Count firstResult = null;
288         //        for (Count start : counts) {
289         //            for (Count end : counts) {
290         //                Count result = pluralRanges.getExplicit(start, end);
291         //                if (result == null) {
292         //                    continue;
293         //                } else if (firstResult == null) {
294         //                    firstResult = result;
295         //                }
296         //                if (result != start) {
297         //                    strategy.remove(RangeStrategy.start);
298         //                }
299         //                if (result != end) {
300         //                    strategy.remove(RangeStrategy.end);
301         //                }
302         //                if (result != Count.other) {
303         //                    strategy.remove(RangeStrategy.other);
304         //                }
305         //           }
306         //        }
307         //        s.add("<!-- Range Principle: " + strategy.iterator().next() + " -->");
308         for (Count start : counts) {
309             for (Count end : counts) {
310                 Count result = pluralRanges.getExplicit(start, end);
311                 if (result == null) {
312                     continue;
313                 }
314                 String line = PluralRanges.showRange(start, end, result);
315                 s.add(line);
316             }
317         }
318         return s;
319     }
320 
minimize(PluralRanges pluralRanges, PluralInfo pluralInfo)321     Set<String> minimize(PluralRanges pluralRanges, PluralInfo pluralInfo) {
322         Set<String> result = new LinkedHashSet<>();
323         // make it easier to manage
324         PluralRanges.Matrix matrix = new PluralRanges.Matrix();
325         Output<FixedDecimal> maxSample = new Output<>();
326         Output<FixedDecimal> minSample = new Output<>();
327         for (Count s : Count.VALUES) {
328             for (Count e : Count.VALUES) {
329                 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) {
330                     continue;
331                 }
332                 Count r = pluralRanges.getExplicit(s, e);
333                 matrix.set(s, e, r);
334             }
335         }
336         // if everything is 'other', we are done
337         //        if (allOther == true) {
338         //            return result;
339         //        }
340         EnumSet<Count> endDone = EnumSet.noneOf(Count.class);
341         EnumSet<Count> startDone = EnumSet.noneOf(Count.class);
342         if (MINIMAL) {
343             for (Count end : pluralInfo.getCounts()) {
344                 Count r = matrix.endSame(end);
345                 if (r != null
346                     //&& r != Count.other
347                     ) {
348                     result.add("<pluralRange" +
349                         "              \t\tend=\"" + end
350                         + "\"\tresult=\"" + r + "\"/>");
351                     endDone.add(end);
352                 }
353             }
354             Output<Boolean> emit = new Output<>();
355             for (Count start : pluralInfo.getCounts()) {
356                 Count r = matrix.startSame(start, endDone, emit);
357                 if (r != null
358                     // && r != Count.other
359                     ) {
360                     if (emit.value) {
361                         result.add("<pluralRange" +
362                             "\tstart=\"" + start
363                             + "\"          \t\tresult=\"" + r + "\"/>");
364                     }
365                     startDone.add(start);
366                 }
367             }
368         }
369         //Set<String> skip = new LinkedHashSet<String>();
370         for (Count end : pluralInfo.getCounts()) {
371             if (endDone.contains(end)) {
372                 continue;
373             }
374             for (Count start : pluralInfo.getCounts()) {
375                 if (startDone.contains(start)) {
376                     continue;
377                 }
378                 Count r = matrix.get(start, end);
379                 if (r != null
380                     //&& !(MINIMAL && r == Count.other)
381                     ) {
382                     result.add(PluralRanges.showRange(start, end, r));
383                 } else {
384                     result.add("<!-- <pluralRange" +
385                         "\tstart=\"" + start
386                         + "\" \tend=\"" + end
387                         + "\" \tresult=\"" + r + "\"/> -->");
388 
389                 }
390 
391             }
392         }
393         return result;
394     }
395 
396 }
397