1 package org.unicode.cldr.tool; 2 3 import static com.google.common.collect.Comparators.lexicographical; 4 5 import java.lang.invoke.MethodHandles; 6 import java.util.ArrayList; 7 import java.util.Comparator; 8 import java.util.EnumSet; 9 import java.util.LinkedHashSet; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeMap; 15 import java.util.TreeSet; 16 17 import org.unicode.cldr.util.CLDRConfig; 18 import org.unicode.cldr.util.CLDRFile; 19 import org.unicode.cldr.util.CLDRPaths; 20 import org.unicode.cldr.util.DtdType; 21 import org.unicode.cldr.util.Factory; 22 import org.unicode.cldr.util.ICUServiceBuilder; 23 import org.unicode.cldr.util.LanguageTagParser; 24 import org.unicode.cldr.util.Level; 25 import org.unicode.cldr.util.Organization; 26 import org.unicode.cldr.util.PluralRanges; 27 import org.unicode.cldr.util.StandardCodes; 28 import org.unicode.cldr.util.SupplementalDataInfo; 29 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 30 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 31 import org.unicode.cldr.util.TempPrintWriter; 32 33 import com.google.common.base.Joiner; 34 import com.ibm.icu.impl.Relation; 35 import com.ibm.icu.text.DecimalFormat; 36 import com.ibm.icu.text.MessageFormat; 37 import com.ibm.icu.text.PluralRules; 38 import com.ibm.icu.text.PluralRules.FixedDecimal; 39 import com.ibm.icu.util.Output; 40 import com.ibm.icu.util.ULocale; 41 42 public class GeneratePluralRanges { GeneratePluralRanges(SupplementalDataInfo supplementalDataInfo)43 public GeneratePluralRanges(SupplementalDataInfo supplementalDataInfo) { 44 SUPPLEMENTAL = supplementalDataInfo; 45 prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 46 } 47 48 private static final boolean MINIMAL = true; 49 main(String[] args)50 public static void main(String[] args) { 51 CLDRConfig testInfo = ToolConfig.getToolInstance(); 52 GeneratePluralRanges me = new GeneratePluralRanges(testInfo.getSupplementalDataInfo()); 53 me.reformatPluralRanges(); 54 //me.generateSamples(testInfo.getEnglish(), testInfo.getCldrFactory()); 55 } 56 generateSamples(CLDRFile english, Factory factory)57 private void generateSamples(CLDRFile english, Factory factory) { 58 //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns(); 59 // add all the items with plural ranges 60 Set<String> sorted = new TreeSet<>(SUPPLEMENTAL.getPluralRangesLocales()); 61 // add the core locales 62 // sorted.addAll(StandardCodes.make().getLocaleCoverageLocales("google", EnumSet.of(Level.MODERN))); 63 sorted.addAll(StandardCodes.make().getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN))); 64 // add any variant plural forms 65 LanguageTagParser ltp = new LanguageTagParser(); 66 for (String locale : SUPPLEMENTAL.getPluralLocales()) { 67 if (locale.contains("_")) { 68 if (sorted.contains(ltp.set(locale).getLanguage())) { 69 sorted.add(locale); 70 } 71 } 72 } 73 //sorted.add("fil"); 74 System.out.println("Co.\tLocale Name\tStart\tEnd\tResult\tStart Sample\tEnd Sample\tStart Example\tEnd Example\tCombined Example"); 75 for (String locale : sorted) { 76 PluralInfo pluralInfo3 = SUPPLEMENTAL.getPlurals(locale); 77 if (locale.contains("_")) { 78 PluralInfo pluralInfo2 = SUPPLEMENTAL.getPlurals(ltp.set(locale).getLanguage()); 79 if (pluralInfo2.equals(pluralInfo3)) { 80 continue; 81 } 82 } 83 84 Set<Count> counts3 = pluralInfo3.getCounts(); 85 if (counts3.size() == 1) { 86 continue; // skip japanese, etc. 87 } 88 89 List<RangeSample> list = getRangeInfo(factory.make(locale, true)); 90 if (list == null) { 91 System.out.println("Failure with " + locale); 92 continue; 93 } 94 for (RangeSample rangeSample : list) { 95 System.out.println(locale + "\t" + english.getName(locale) 96 + "\t" + rangeSample.start 97 + "\t" + rangeSample.end 98 + "\t" + (rangeSample.result == null ? "missing" : rangeSample.result) 99 + "\t" + rangeSample.min 100 + "\t" + rangeSample.max 101 + "\t" + rangeSample.startExample 102 + "\t" + rangeSample.endExample 103 + "\t" + rangeSample.resultExample); 104 } 105 } 106 } 107 getRangeInfo(CLDRFile cldrFile)108 public List<RangeSample> getRangeInfo(CLDRFile cldrFile) { 109 String locale = cldrFile.getLocaleID(); 110 if (locale.equals("iw")) { 111 locale = "he"; 112 } 113 //Map<ULocale, PluralRulesFactory.SamplePatterns> samples = PluralRulesFactory.getLocaleToSamplePatterns(); 114 List<RangeSample> list = new ArrayList<>(); 115 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 116 Set<Count> counts = pluralInfo.getCounts(); 117 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 118 if (pluralRanges == null && locale.contains("_")) { 119 String locale2 = new ULocale(locale).getLanguage(); 120 pluralRanges = SUPPLEMENTAL.getPluralRanges(locale2); 121 } 122 if (pluralRanges == null) { 123 return null; 124 } 125 ULocale ulocale = new ULocale(locale); 126 PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(ulocale.toString()); // CldrUtility.get(samples, ulocale); 127 // if (samplePatterns == null && locale.contains("_")) { 128 // ulocale = new ULocale(ulocale.getLanguage()); 129 // samplePatterns = CldrUtility.get(samples, ulocale); 130 // if (samplePatterns == null) { 131 // return null; 132 // } 133 // } 134 135 Output<FixedDecimal> maxSample = new Output<>(); 136 Output<FixedDecimal> minSample = new Output<>(); 137 138 ICUServiceBuilder icusb = new ICUServiceBuilder(); 139 icusb.setCldrFile(cldrFile); 140 DecimalFormat nf = icusb.getNumberFormat(1); 141 //String decimal = cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal"); 142 String defaultNumberingSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem"); 143 String range = cldrFile.getWinningValue("//ldml/numbers/miscPatterns[@numberSystem=\"" 144 + defaultNumberingSystem 145 + "\"]/pattern[@type=\"range\"]"); 146 147 // if (decimal == null) { 148 // throw new IllegalArgumentException(); 149 // } 150 for (Count s : counts) { 151 for (Count e : counts) { 152 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) { 153 continue; 154 } 155 Count r = pluralRanges.getExplicit(s, e); 156 String minFormatted = format(nf, minSample.value); 157 String maxFormatted = format(nf, maxSample.value); 158 String rangeFormatted = MessageFormat.format(range, minFormatted, maxFormatted); 159 160 list.add(new RangeSample( 161 s, e, r, 162 minSample.value, 163 maxSample.value, 164 getExample(locale, samplePatterns, s, minFormatted), getExample(locale, samplePatterns, e, maxFormatted), 165 getExample(locale, samplePatterns, r, rangeFormatted))); 166 } 167 } 168 return list; 169 } 170 171 public static class RangeSample { 172 // Category Examples Minimal Pairs Rules RangeSample(Count start, Count end, Count result, FixedDecimal min, FixedDecimal max, String startExample, String endExample, String resultExample)173 public RangeSample(Count start, Count end, Count result, 174 FixedDecimal min, FixedDecimal max, 175 String startExample, String endExample, String resultExample) { 176 this.start = start; 177 this.end = end; 178 this.result = result; 179 this.min = min; 180 this.max = max; 181 this.startExample = startExample; 182 this.endExample = endExample; 183 this.resultExample = resultExample; 184 } 185 186 final Count start; 187 final Count end; 188 final Count result; 189 final FixedDecimal min; 190 final FixedDecimal max; 191 final String startExample; 192 final String endExample; 193 final String resultExample; 194 } 195 format(DecimalFormat nf, FixedDecimal minSample)196 public static String format(DecimalFormat nf, FixedDecimal minSample) { 197 nf.setMinimumFractionDigits(minSample.getVisibleDecimalDigitCount()); 198 nf.setMaximumFractionDigits(minSample.getVisibleDecimalDigitCount()); 199 return nf.format(minSample); 200 } 201 202 // private String format(String decimal, Output<FixedDecimal> minSample) { 203 // return minSample.toString().replace(".", decimal); 204 // } 205 getExample(String locale, PluralMinimalPairs samplePatterns, Count r, String numString)206 public static String getExample(String locale, PluralMinimalPairs samplePatterns, Count r, String numString) { 207 if (r == null) { 208 return "«missing»"; 209 } 210 String samplePattern; 211 try { 212 samplePattern = samplePatterns.get(PluralRules.PluralType.CARDINAL, r); // CldrUtility.get(samplePatterns.keywordToPattern, r); 213 } catch (Exception e) { 214 throw new IllegalArgumentException("Locale: " + locale + "; Count: " + r, e); 215 } 216 return samplePattern 217 .replace('\u00A0', '\u0020') 218 .replace("{0}", numString); 219 } 220 221 private final SupplementalDataInfo SUPPLEMENTAL; 222 private final PluralRulesFactory prf; 223 224 // Ordering by size-of-set first, and then lexicographically, with a final tie-break on the 225 // string representation. 226 private static final Comparator<Set<String>> STRING_SET_COMPARATOR = 227 Comparator.<Set<String>, Integer>comparing(Set::size) 228 .thenComparing(lexicographical(Comparator.<String>naturalOrder())); 229 private static final Comparator<Set<Count>> COUNT_SET_COMPARATOR = 230 Comparator.<Set<Count>, Integer>comparing(Set::size) 231 .thenComparing(lexicographical(Comparator.<Count>naturalOrder())); 232 reformatPluralRanges()233 public void reformatPluralRanges() { 234 Map<Set<Count>, Relation<Set<String>, String>> seen = new TreeMap<>(COUNT_SET_COMPARATOR); 235 try (TempPrintWriter out = TempPrintWriter.openUTF8Writer(CLDRPaths.SUPPLEMENTAL_DIRECTORY,"pluralRanges.xml")) { 236 out.println(DtdType.supplementalData.header(MethodHandles.lookup().lookupClass()) + 237 "\t<version number=\"$Revision$\" />\n" + 238 "\t<plurals>" 239 ); 240 for (String locale : SUPPLEMENTAL.getPluralRangesLocales()) { 241 242 243 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 244 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 245 Set<Count> counts = pluralInfo.getCounts(); 246 247 Set<String> s; 248 if (false) { 249 out.println("Minimized, but not ready for prime-time"); 250 s = minimize(pluralRanges, pluralInfo); 251 } else { 252 s = reformat(pluralRanges, counts); 253 } 254 Relation<Set<String>, String> item = seen.get(counts); 255 if (item == null) { 256 seen.put(counts, 257 item = Relation.of(new TreeMap<Set<String>, Set<String>>(STRING_SET_COMPARATOR), TreeSet.class)); 258 } 259 item.put(s, locale); 260 } 261 for (Entry<Set<Count>, Relation<Set<String>, String>> entry0 : seen.entrySet()) { 262 out.println("\n<!-- " + Joiner.on(", ").join(entry0.getKey()) + " -->"); 263 for (Entry<Set<String>, Set<String>> entry : entry0.getValue().keyValuesSet()) { 264 out.println("\t\t<pluralRanges locales=\"" + Joiner.on(" ") 265 .join(entry.getValue()) + "\">"); 266 for (String line : entry.getKey()) { 267 out.println("\t\t\t" + line); 268 } 269 out.println("\t\t</pluralRanges>"); 270 } 271 } 272 out.println("\t</plurals>\n" + 273 "</supplementalData>"); 274 } 275 } 276 277 enum RangeStrategy { 278 other, end, start, mixed 279 } 280 reformat(PluralRanges pluralRanges, Set<Count> counts)281 public Set<String> reformat(PluralRanges pluralRanges, Set<Count> counts) { 282 Set<String> s; 283 s = new LinkedHashSet<>(); 284 // first determine the general principle 285 286 // EnumSet<RangeStrategy> strategy = EnumSet.allOf(RangeStrategy.class); 287 // Count firstResult = null; 288 // for (Count start : counts) { 289 // for (Count end : counts) { 290 // Count result = pluralRanges.getExplicit(start, end); 291 // if (result == null) { 292 // continue; 293 // } else if (firstResult == null) { 294 // firstResult = result; 295 // } 296 // if (result != start) { 297 // strategy.remove(RangeStrategy.start); 298 // } 299 // if (result != end) { 300 // strategy.remove(RangeStrategy.end); 301 // } 302 // if (result != Count.other) { 303 // strategy.remove(RangeStrategy.other); 304 // } 305 // } 306 // } 307 // s.add("<!-- Range Principle: " + strategy.iterator().next() + " -->"); 308 for (Count start : counts) { 309 for (Count end : counts) { 310 Count result = pluralRanges.getExplicit(start, end); 311 if (result == null) { 312 continue; 313 } 314 String line = PluralRanges.showRange(start, end, result); 315 s.add(line); 316 } 317 } 318 return s; 319 } 320 minimize(PluralRanges pluralRanges, PluralInfo pluralInfo)321 Set<String> minimize(PluralRanges pluralRanges, PluralInfo pluralInfo) { 322 Set<String> result = new LinkedHashSet<>(); 323 // make it easier to manage 324 PluralRanges.Matrix matrix = new PluralRanges.Matrix(); 325 Output<FixedDecimal> maxSample = new Output<>(); 326 Output<FixedDecimal> minSample = new Output<>(); 327 for (Count s : Count.VALUES) { 328 for (Count e : Count.VALUES) { 329 if (!pluralInfo.rangeExists(s, e, minSample, maxSample)) { 330 continue; 331 } 332 Count r = pluralRanges.getExplicit(s, e); 333 matrix.set(s, e, r); 334 } 335 } 336 // if everything is 'other', we are done 337 // if (allOther == true) { 338 // return result; 339 // } 340 EnumSet<Count> endDone = EnumSet.noneOf(Count.class); 341 EnumSet<Count> startDone = EnumSet.noneOf(Count.class); 342 if (MINIMAL) { 343 for (Count end : pluralInfo.getCounts()) { 344 Count r = matrix.endSame(end); 345 if (r != null 346 //&& r != Count.other 347 ) { 348 result.add("<pluralRange" + 349 " \t\tend=\"" + end 350 + "\"\tresult=\"" + r + "\"/>"); 351 endDone.add(end); 352 } 353 } 354 Output<Boolean> emit = new Output<>(); 355 for (Count start : pluralInfo.getCounts()) { 356 Count r = matrix.startSame(start, endDone, emit); 357 if (r != null 358 // && r != Count.other 359 ) { 360 if (emit.value) { 361 result.add("<pluralRange" + 362 "\tstart=\"" + start 363 + "\" \t\tresult=\"" + r + "\"/>"); 364 } 365 startDone.add(start); 366 } 367 } 368 } 369 //Set<String> skip = new LinkedHashSet<String>(); 370 for (Count end : pluralInfo.getCounts()) { 371 if (endDone.contains(end)) { 372 continue; 373 } 374 for (Count start : pluralInfo.getCounts()) { 375 if (startDone.contains(start)) { 376 continue; 377 } 378 Count r = matrix.get(start, end); 379 if (r != null 380 //&& !(MINIMAL && r == Count.other) 381 ) { 382 result.add(PluralRanges.showRange(start, end, r)); 383 } else { 384 result.add("<!-- <pluralRange" + 385 "\tstart=\"" + start 386 + "\" \tend=\"" + end 387 + "\" \tresult=\"" + r + "\"/> -->"); 388 389 } 390 391 } 392 } 393 return result; 394 } 395 396 } 397