1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.util.Comparator; 7 import java.util.HashSet; 8 import java.util.Set; 9 import java.util.TreeSet; 10 11 import org.unicode.cldr.draft.FileUtilities; 12 import org.unicode.cldr.util.CLDRFile; 13 import org.unicode.cldr.util.CLDRFile.Status; 14 import org.unicode.cldr.util.CLDRPaths; 15 import org.unicode.cldr.util.CldrUtility; 16 import org.unicode.cldr.util.Counter; 17 import org.unicode.cldr.util.EscapingUtilities; 18 import org.unicode.cldr.util.Factory; 19 import org.unicode.cldr.util.PathUtilities; 20 import org.unicode.cldr.util.PrettyPath; 21 import org.unicode.cldr.util.SimpleFactory; 22 import org.unicode.cldr.util.Timer; 23 24 import com.ibm.icu.impl.Row; 25 import com.ibm.icu.impl.Row.R2; 26 import com.ibm.icu.text.Collator; 27 import com.ibm.icu.text.NumberFormat; 28 import com.ibm.icu.text.UTF16; 29 30 public class GenerateComparison { 31 32 private static PrettyPath prettyPathMaker; 33 34 private static Collator collator = Collator.getInstance(); 35 36 static class EnglishRowComparator implements Comparator<R2<String, String>> { 37 private static Comparator<String> unicode = new UTF16.StringComparator(true, false, 0); 38 39 @Override compare(R2<String, String> arg0, R2<String, String> arg1)40 public int compare(R2<String, String> arg0, R2<String, String> arg1) { 41 int result = collator.compare(arg0.get0(), arg1.get0()); 42 if (result != 0) return result; 43 result = unicode.compare(arg0.get0(), arg1.get0()); 44 if (result != 0) return result; 45 result = collator.compare(arg0.get1(), arg1.get1()); 46 if (result != 0) return result; 47 result = unicode.compare(arg0.get1(), arg1.get1()); 48 return result; 49 } 50 } 51 52 static EnglishRowComparator ENG = new EnglishRowComparator(); 53 54 static final String warningMessage = "<p><b>Warning: this chart is still under development. For how to use it, see <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\">Help: How to Vet</a>.</b></p>"; 55 main(String[] args)56 public static void main(String[] args) throws IOException { 57 58 // Setup 59 Timer timer = new Timer(); 60 Timer totalTimer = new Timer(); 61 long totalPaths = 0; 62 format = NumberFormat.getNumberInstance(); 63 format.setGroupingUsed(true); 64 65 Counter<String> totalCounter = new Counter<>(); 66 67 // Get the args 68 69 String oldDirectory = CldrUtility.getProperty("oldDirectory", PathUtilities.getNormalizedPathString(new File( 70 CLDRPaths.BASE_DIRECTORY, 71 "common/main")) + "/"); 72 String newDirectory = CldrUtility.getProperty("newDirectory", PathUtilities.getNormalizedPathString(new File( 73 CLDRPaths.BASE_DIRECTORY, 74 "../cldr-release-1-7/common/main")) + "/"); 75 String changesDirectory = CldrUtility.getProperty("changesDirectory", PathUtilities 76 .getNormalizedPathString(CLDRPaths.CHART_DIRECTORY 77 + "/changes/") 78 + "/"); 79 80 String filter = CldrUtility.getProperty("localeFilter", ".*"); 81 boolean SHOW_ALIASED = CldrUtility.getProperty("showAliased", "false").toLowerCase().startsWith("t"); 82 83 // Create the factories 84 85 Factory oldFactory = Factory.make(oldDirectory, filter); 86 Factory newFactory = Factory.make(newDirectory, filter); 87 CLDRFile english = newFactory.make("en", true); 88 CLDRFile newRoot = newFactory.make("root", true); 89 90 // Get the union of all the language locales, sorted by English name 91 92 Set<String> oldList = oldFactory.getAvailableLanguages(); 93 Set<String> newList = newFactory.getAvailableLanguages(); 94 Set<String> unifiedList = new HashSet<>(oldList); 95 unifiedList.addAll(newList); 96 Set<R2<String, String>> pairs = new TreeSet<>(); 97 for (String code : unifiedList) { 98 pairs.add(Row.of(english.getName(code), code)); 99 } 100 101 prettyPathMaker = new PrettyPath(); 102 int totalDifferences = 0; 103 int differences = 0; 104 105 Set<R2<String, String>> indexInfo = new TreeSet<>(ENG); 106 107 // iterate through those 108 for (R2<String, String> pair : pairs) { 109 timer.start(); 110 final String locale = pair.get1(); 111 final String localeName = pair.get0(); 112 System.out.println(locale); 113 differences = 0; 114 System.out.println(); 115 116 // Create CLDR files for both; null if can't open 117 118 CLDRFile oldFile = null; 119 if (oldList.contains(locale)) { 120 try { 121 oldFile = oldFactory.make(locale, true, true); 122 } catch (Exception e) { 123 addToIndex(indexInfo, "ERROR1.6 ", locale, localeName); 124 continue; 125 } 126 } else { 127 oldFile = SimpleFactory.makeFile(locale); // make empty file 128 } 129 CLDRFile newFile = null; 130 if (newList.contains(locale)) { 131 try { 132 newFile = newFactory.make(locale, true, true); 133 } catch (Exception e) { 134 addToIndex(indexInfo, "ERROR1.7 ", locale, localeName); 135 continue; 136 } 137 } else { 138 newFile = SimpleFactory.makeFile(locale); // make empty file 139 } 140 141 // for(String str : newFile) { 142 // String xo = newFile.getFullXPath(str); 143 // String v = newFile.getStringValue(str); 144 // 145 // System.out.println(xo+"\t"+v+"\n"); 146 // 147 // } 148 // Check for null cases 149 150 if (oldFile == null) { 151 addToIndex(indexInfo, "NEW ", locale, localeName); 152 continue; 153 } else if (newFile == null) { 154 addToIndex(indexInfo, "DELETED ", locale, localeName); 155 continue; 156 } 157 System.out.println("*** " + localeName + "\t" + locale); 158 System.out.println(); 159 160 // exclude aliased locales 161 if (newFile.isAliasedAtTopLevel()) { 162 continue; 163 } 164 165 // Get the union of all the paths 166 167 Set<String> paths; 168 try { 169 paths = new HashSet<>(); 170 oldFile.forEach(paths::add); 171 if (oldList.contains(locale)) { 172 paths.addAll(oldFile.getExtraPaths()); 173 } 174 newFile.forEach(paths::add); 175 if (newList.contains(locale)) { 176 paths.addAll(newFile.getExtraPaths()); 177 } 178 } catch (Exception e) { 179 System.err.println("Locale: " + locale + ", " + localeName); 180 e.printStackTrace(); 181 addToIndex(indexInfo, "ERROR ", locale, localeName); 182 continue; 183 } 184 185 // We now have the full set of all the paths for old and new files 186 // TODO Sort by the pretty form 187 // Set<R2<String,String>> pathPairs = new TreeSet(); 188 // for (String code : unifiedList) { 189 // pairs.add(Row.make(code, english.getName(code))); 190 // } 191 192 // Initialize sets 193 // .addColumn("Code", "class='source'", "<a name=\"{0}\" href='likely_subtags.html#und_{0}'>{0}</a>", 194 // "class='source'", true) 195 196 final String localeDisplayName = english.getName(locale); 197 TablePrinter table = new TablePrinter() 198 .setCaption("Changes in " + localeDisplayName + " (" + locale + ")") 199 .addColumn("PRETTY_SORT1").setSortPriority(1).setHidden(true).setRepeatHeader(true) 200 .addColumn("PRETTY_SORT2").setSortPriority(2).setHidden(true) 201 .addColumn("PRETTY_SORT3").setSortPriority(3).setHidden(true) 202 .addColumn("ESCAPED_PATH").setHidden(true) 203 .addColumn("Inh.").setCellAttributes("class=\"{0}\"").setSortPriority(0).setSpanRows(true) 204 .setRepeatHeader(true) 205 .addColumn("Section").setSpanRows(true).setCellAttributes("class='section'") 206 .addColumn("Subsection").setSpanRows(true).setCellAttributes("class='subsection'") 207 .addColumn("Item").setSpanRows(true).setCellPattern("<a href=\"{4}\">{0}</a>") 208 .setCellAttributes("class='item'") 209 .addColumn("English").setCellAttributes("class='english'") 210 .addColumn("Status").setSortPriority(4).setCellAttributes("class=\"{0}\"") 211 .addColumn("Old" + localeDisplayName).setCellAttributes("class='old'") 212 .addColumn("New" + localeDisplayName).setCellAttributes("class='new'"); 213 Counter<String> fileCounter = new Counter<>(); 214 215 for (String path : paths) { 216 if (path.contains("/alias") || path.contains("/identity")) { 217 continue; 218 } 219 String cleanedPath = CLDRFile.getNondraftNonaltXPath(path); 220 221 String oldValue = oldFile.getStringValue(cleanedPath); 222 String newValue = newFile.getStringValue(path); 223 String englishValue = english.getStringValue(cleanedPath); 224 225 // for debugging 226 if (oldValue != null && oldValue.contains("{1} {0}")) { 227 System.out.print(""); 228 } 229 230 if (equals(newValue, oldValue)) { 231 continue; 232 } 233 234 // get the actual place the data is stored 235 // AND adjust if the same as root! 236 237 Status newStatus = new Status(); 238 String newFoundLocale = getStatus(newFile, newRoot, path, newValue, newStatus); 239 240 // At this point, we have two unequal values 241 // TODO check for non-distinguishing attribute value differences 242 243 boolean isAliased = false; 244 245 // Skip deletions of alt-proposed 246 247 // if (newValue == null) { 248 // if (path.contains("@alt=\"proposed")) { 249 // continue; 250 // } 251 // } 252 253 // Skip if both inherited from the same locale, since we should catch it 254 // in that locale. 255 256 // Mark as aliased if new locale or path is different 257 if (!newStatus.pathWhereFound.equals(path)) { 258 isAliased = true; 259 // continue; 260 } 261 262 if (!newFoundLocale.equals(locale)) { 263 isAliased = true; 264 // continue; 265 } 266 267 // // skip if old locale or path is aliased 268 // if (!oldFoundLocale.equals(locale)) { 269 // //isAliased=true; 270 // continue; 271 // } 272 // 273 // // Skip if either found path is are different 274 // if (!oldStatus.pathWhereFound.equals(cleanedPath)) { 275 // //isAliased=true; 276 // continue; 277 // } 278 279 // Now check other aliases 280 281 // final boolean newIsAlias = !newStatus.pathWhereFound.equals(path); 282 // if (newIsAlias) { // new is alias 283 // // filter out cases of a new string that is found via alias 284 // if (oldValue == null) { 285 // continue; 286 // } 287 // 288 // } 289 290 if (isAliased && !SHOW_ALIASED) { 291 continue; 292 } 293 294 // We definitely have a difference worth recording, so do so 295 296 String newFullPath = newFile.getFullXPath(path); 297 final boolean reject = newFullPath != null && newFullPath.contains("@draft") 298 && !newFullPath.contains("@draft=\"contributed\""); 299 String status; 300 if (reject) { 301 status = "NOT-ACC"; 302 } else if (newValue == null) { 303 status = "deleted"; 304 } else if (oldValue == null) { 305 status = "added"; 306 } else { 307 status = "changed"; 308 } 309 String coreStatus = status; 310 if (isAliased) { 311 status = "I+" + status; 312 } 313 fileCounter.increment(status); 314 totalCounter.increment(status); 315 316 String pretty_sort = prettyPathMaker.getPrettyPath(cleanedPath); 317 String[] prettyPartsSort = pretty_sort.split("[|]"); 318 if (prettyPartsSort.length != 3) { 319 System.out.println("Bad pretty path: " + pretty_sort + ", original: " + cleanedPath); 320 } 321 String prettySort1 = prettyPartsSort[0]; 322 String prettySort2 = prettyPartsSort[1]; 323 String prettySort3 = prettyPartsSort[2]; 324 325 String pretty = prettyPathMaker.getOutputForm(pretty_sort); 326 String escapedPath = "http://unicode.org/cldr/apps/survey?_=" + locale + "&xpath=" 327 + EscapingUtilities.urlEscape(cleanedPath); 328 String[] prettyParts = pretty.split("[|]"); 329 if (prettyParts.length != 3) { 330 System.out.println("Bad pretty path: " + pretty + ", original: " + cleanedPath); 331 } 332 String pretty1 = prettyParts[0]; 333 String pretty2 = prettyParts[1]; 334 String pretty3 = prettyParts[2]; 335 336 // http://kwanyin.unicode.org/cldr-apps/survey?_=kw_GB&xpath=%2F%2Fldml%2FlocaleDisplayNames%2Flanguages%2Flanguage%5B%40type%3D%22mt%22%5D 337 338 table.addRow() 339 .addCell(prettySort1) 340 .addCell(prettySort2) 341 .addCell(prettySort3) 342 .addCell(escapedPath) 343 .addCell(isAliased ? "I" : "") 344 .addCell(pretty1) 345 .addCell(pretty2) 346 .addCell(pretty3) 347 .addCell(englishValue == null ? "-" : englishValue) 348 .addCell(coreStatus) 349 .addCell(oldValue == null ? "-" : oldValue) 350 .addCell(newValue == null ? "-" : newValue) 351 .finishRow(); 352 353 totalDifferences++; 354 differences++; 355 } 356 357 addToIndex(indexInfo, "", locale, localeName, fileCounter); 358 PrintWriter out = FileUtilities.openUTF8Writer(changesDirectory, locale + ".html"); 359 String title = "Changes in " + localeDisplayName; 360 out.println("<html>" 361 + 362 "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" 363 + CldrUtility.LINE_SEPARATOR 364 + 365 "<title>" 366 + title 367 + "</title>" 368 + CldrUtility.LINE_SEPARATOR 369 + 370 "<link rel='stylesheet' href='index.css' type='text/css'>" 371 + CldrUtility.LINE_SEPARATOR 372 + 373 "<base target='_blank'>" 374 + CldrUtility.LINE_SEPARATOR 375 + 376 "</head><body>" 377 + CldrUtility.LINE_SEPARATOR 378 + 379 "<h1>" 380 + title 381 + "</h1>" 382 + CldrUtility.LINE_SEPARATOR 383 + "<a href='index.html'>Index</a> | <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>" 384 + warningMessage); 385 386 TablePrinter table2 = new TablePrinter() 387 .setCaption("Totals") 388 .addColumn("Inh.").setSortPriority(0) 389 .addColumn("Status").setSortPriority(1) 390 .addColumn("Total"); 391 392 for (String key : fileCounter.getKeysetSortedByKey()) { 393 boolean inherited = key.startsWith("I+"); 394 table2.addRow() 395 .addCell(inherited ? "I" : "") 396 .addCell(inherited ? key.substring(2) : key) 397 .addCell(format.format(fileCounter.getCount(key))) 398 .finishRow(); 399 } 400 out.println(table2); 401 out.println("<br>"); 402 out.println(table); 403 404 // show status on console 405 406 System.out.println(locale + "\tDifferences:\t" + format.format(differences) 407 + "\tPaths:\t" + format.format(paths.size()) 408 + "\tTime:\t" + timer); 409 410 totalPaths += paths.size(); 411 out.println(ShowData.dateFooter()); 412 out.println(CldrUtility.ANALYTICS); 413 out.println("</body></html>"); 414 out.close(); 415 } 416 PrintWriter indexFile = FileUtilities.openUTF8Writer(changesDirectory, "index.html"); 417 indexFile 418 .println("<html>" 419 + 420 "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" 421 + CldrUtility.LINE_SEPARATOR 422 + 423 "<title>" 424 + "Change Summary" 425 + "</title>" 426 + CldrUtility.LINE_SEPARATOR 427 + 428 "<link rel='stylesheet' href='index.css' type='text/css'>" 429 + CldrUtility.LINE_SEPARATOR 430 + 431 "<base target='_blank'>" 432 + CldrUtility.LINE_SEPARATOR 433 + 434 "</head><body>" 435 + CldrUtility.LINE_SEPARATOR 436 + 437 "<h1>" 438 + "Change Summary" 439 + "</h1>" 440 + CldrUtility.LINE_SEPARATOR 441 + "<a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>" 442 + warningMessage 443 + "<table><tr>"); 444 445 String separator = ""; 446 int last = 0; 447 for (R2<String, String> indexPair : indexInfo) { 448 int firstChar = indexPair.get0().codePointAt(0); 449 indexFile.append(firstChar == last ? separator 450 : (last == 0 ? "" : "</td></tr>\n<tr>") + "<th>" + String.valueOf((char) firstChar) + "</th><td>") 451 .append(indexPair.get1()); 452 separator = " | "; 453 last = indexPair.get0().codePointAt(0); 454 } 455 indexFile.println("</tr></table>"); 456 indexFile.println(ShowData.dateFooter()); 457 indexFile.println(CldrUtility.ANALYTICS); 458 indexFile.println("</body></html>"); 459 indexFile.close(); 460 461 System.out.println(); 462 463 for (String key : totalCounter.getKeysetSortedByKey()) { 464 System.out.println(key + "\t" + totalCounter.getCount(key)); 465 } 466 467 System.out.println("Total Differences:\t" + format.format(totalDifferences) 468 + "\tPaths:\t" + format.format(totalPaths) 469 + "\tTotal Time:\t" + format.format(totalTimer.getDuration()) + "ms"); 470 } 471 472 // static Transliterator urlHex = Transliterator.createFromRules("foo", 473 // "([^!(-*,-\\:A-Z_a-z~]) > &hex($1) ;" + 474 // ":: null;" + 475 // "'\\u00' > '%' ;" 476 // , Transliterator.FORWARD); 477 478 private static NumberFormat format; 479 addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName)480 private static void addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, 481 final String localeName) { 482 addToIndex(indexInfo, title, locale, localeName, null); 483 } 484 addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName, Counter<String> fileCounter)485 private static void addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, 486 final String localeName, Counter<String> fileCounter) { 487 if (title.startsWith("ERROR")) { 488 indexInfo.add(R2.of(localeName, 489 title + " " + localeName + " (" + locale + ")")); 490 return; 491 } 492 String counterString = ""; 493 if (fileCounter != null) { 494 for (String s : fileCounter) { 495 if (counterString.length() != 0) { 496 counterString += "; "; 497 } 498 counterString += s.charAt(0) + ":" + format.format(fileCounter.getCount(s)); 499 } 500 } 501 indexInfo.add(R2.of(localeName, 502 "<a href='" + locale + ".html'>" + title + localeName + " (" + locale + ")</a>" 503 + (counterString.length() == 0 ? "" : " [" + counterString + "]"))); 504 } 505 506 // private static int accumulate(Set<R2<String,String>> rejected, int totalRejected, 507 // final String locale, String indicator, String oldValue, String newValue, String path) { 508 // String pretty = prettyPathMaker.getPrettyPath(path, false); 509 // String line = locale + "\t" + indicator +"\t\u200E[" + oldValue + "]\u200E\t\u200E[" + newValue + "]\u200E\t" + 510 // pretty; 511 // String pretty2 = prettyPathMaker.getOutputForm(pretty); 512 // rejected.add(Row.make(pretty2, line)); 513 // totalRejected++; 514 // return totalRejected; 515 // } 516 getStatus(CLDRFile oldFile, CLDRFile oldRoot, String path, String oldString, Status oldStatus)517 private static String getStatus(CLDRFile oldFile, CLDRFile oldRoot, String path, 518 String oldString, Status oldStatus) { 519 String oldLocale = oldFile.getSourceLocaleID(path, oldStatus); 520 if (!oldLocale.equals("root")) { 521 String oldRootValue = oldRoot.getStringValue(oldStatus.pathWhereFound); 522 if (equals(oldString, oldRootValue)) { 523 oldLocale = "root"; 524 } 525 } 526 return oldLocale; 527 } 528 showSet(PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title)529 private static void showSet(PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title) { 530 if (rejected.size() != 0) { 531 out.println(); 532 out.println(locale + "\t" + title + "\t" + rejected.size()); 533 for (R2<String, String> prettyAndline : rejected) { 534 out.println(prettyAndline.get1()); 535 } 536 } 537 } 538 equals(String newString, String oldString)539 private static boolean equals(String newString, String oldString) { 540 if (newString == null) { 541 return oldString == null; 542 } 543 return newString.equals(oldString); 544 } 545 546 } 547