1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.util.Comparator;
7 import java.util.HashSet;
8 import java.util.Set;
9 import java.util.TreeSet;
10 
11 import org.unicode.cldr.draft.FileUtilities;
12 import org.unicode.cldr.util.CLDRFile;
13 import org.unicode.cldr.util.CLDRFile.Status;
14 import org.unicode.cldr.util.CLDRPaths;
15 import org.unicode.cldr.util.CldrUtility;
16 import org.unicode.cldr.util.Counter;
17 import org.unicode.cldr.util.EscapingUtilities;
18 import org.unicode.cldr.util.Factory;
19 import org.unicode.cldr.util.PrettyPath;
20 import org.unicode.cldr.util.SimpleFactory;
21 import org.unicode.cldr.util.Timer;
22 
23 import com.ibm.icu.dev.util.CollectionUtilities;
24 import com.ibm.icu.impl.Row;
25 import com.ibm.icu.impl.Row.R2;
26 import com.ibm.icu.text.Collator;
27 import com.ibm.icu.text.NumberFormat;
28 import com.ibm.icu.text.UTF16;
29 
30 public class GenerateComparison {
31 
32     private static PrettyPath prettyPathMaker;
33 
34     private static Collator collator = Collator.getInstance();
35 
36     static class EnglishRowComparator implements Comparator<R2<String, String>> {
37         private static Comparator<String> unicode = new UTF16.StringComparator(true, false, 0);
38 
compare(R2<String, String> arg0, R2<String, String> arg1)39         public int compare(R2<String, String> arg0, R2<String, String> arg1) {
40             int result = collator.compare(arg0.get0(), arg1.get0());
41             if (result != 0) return result;
42             result = unicode.compare(arg0.get0(), arg1.get0());
43             if (result != 0) return result;
44             result = collator.compare(arg0.get1(), arg1.get1());
45             if (result != 0) return result;
46             result = unicode.compare(arg0.get1(), arg1.get1());
47             return result;
48         }
49     }
50 
51     static EnglishRowComparator ENG = new EnglishRowComparator();
52 
53     static final String warningMessage = "<p><b>Warning: this chart is still under development. For how to use it, see <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\">Help: How to Vet</a>.</b></p>";
54 
main(String[] args)55     public static void main(String[] args) throws IOException {
56 
57         // Setup
58         Timer timer = new Timer();
59         Timer totalTimer = new Timer();
60         long totalPaths = 0;
61         format = NumberFormat.getNumberInstance();
62         format.setGroupingUsed(true);
63 
64         Counter<String> totalCounter = new Counter<String>();
65 
66         // Get the args
67 
68         String oldDirectory = CldrUtility.getProperty("oldDirectory", new File(CLDRPaths.BASE_DIRECTORY,
69             "common/main").getCanonicalPath() + "/");
70         String newDirectory = CldrUtility.getProperty("newDirectory", new File(CLDRPaths.BASE_DIRECTORY,
71             "../cldr-release-1-7/common/main").getCanonicalPath() + "/");
72         String changesDirectory = CldrUtility.getProperty("changesDirectory", new File(CLDRPaths.CHART_DIRECTORY
73             + "/changes/").getCanonicalPath()
74             + "/");
75 
76         String filter = CldrUtility.getProperty("localeFilter", ".*");
77         boolean SHOW_ALIASED = CldrUtility.getProperty("showAliased", "false").toLowerCase().startsWith("t");
78 
79         // Create the factories
80 
81         Factory oldFactory = Factory.make(oldDirectory, filter);
82         Factory newFactory = Factory.make(newDirectory, filter);
83         CLDRFile english = newFactory.make("en", true);
84         CLDRFile newRoot = newFactory.make("root", true);
85 
86         // Get the union of all the language locales, sorted by English name
87 
88         Set<String> oldList = oldFactory.getAvailableLanguages();
89         Set<String> newList = newFactory.getAvailableLanguages();
90         Set<String> unifiedList = new HashSet<String>(oldList);
91         unifiedList.addAll(newList);
92         Set<R2<String, String>> pairs = new TreeSet<R2<String, String>>();
93         for (String code : unifiedList) {
94             pairs.add(Row.of(english.getName(code), code));
95         }
96 
97         prettyPathMaker = new PrettyPath();
98         int totalDifferences = 0;
99         int differences = 0;
100 
101         Set<R2<String, String>> indexInfo = new TreeSet<R2<String, String>>(ENG);
102 
103         // iterate through those
104         for (R2<String, String> pair : pairs) {
105             timer.start();
106             final String locale = pair.get1();
107             final String localeName = pair.get0();
108             System.out.println(locale);
109             differences = 0;
110             System.out.println();
111 
112             // Create CLDR files for both; null if can't open
113 
114             CLDRFile oldFile = null;
115             if (oldList.contains(locale)) {
116                 try {
117                     oldFile = oldFactory.make(locale, true, true);
118                 } catch (Exception e) {
119                     addToIndex(indexInfo, "ERROR1.6 ", locale, localeName);
120                     continue;
121                 }
122             } else {
123                 oldFile = SimpleFactory.makeFile(locale); // make empty file
124             }
125             CLDRFile newFile = null;
126             if (newList.contains(locale)) {
127                 try {
128                     newFile = newFactory.make(locale, true, true);
129                 } catch (Exception e) {
130                     addToIndex(indexInfo, "ERROR1.7 ", locale, localeName);
131                     continue;
132                 }
133             } else {
134                 newFile = SimpleFactory.makeFile(locale); // make empty file
135             }
136 
137             // for(String str : newFile) {
138             // String xo = newFile.getFullXPath(str);
139             // String v = newFile.getStringValue(str);
140             //
141             // System.out.println(xo+"\t"+v+"\n");
142             //
143             // }
144             // Check for null cases
145 
146             if (oldFile == null) {
147                 addToIndex(indexInfo, "NEW ", locale, localeName);
148                 continue;
149             } else if (newFile == null) {
150                 addToIndex(indexInfo, "DELETED ", locale, localeName);
151                 continue;
152             }
153             System.out.println("*** " + localeName + "\t" + locale);
154             System.out.println();
155 
156             // exclude aliased locales
157             if (newFile.isAliasedAtTopLevel()) {
158                 continue;
159             }
160 
161             // Get the union of all the paths
162 
163             Set<String> paths;
164             try {
165                 paths = new HashSet<String>();
166                 CollectionUtilities.addAll(oldFile.iterator(), paths);
167                 if (oldList.contains(locale)) {
168                     paths.addAll(oldFile.getExtraPaths());
169                 }
170                 CollectionUtilities.addAll(newFile.iterator(), paths);
171                 if (newList.contains(locale)) {
172                     paths.addAll(newFile.getExtraPaths());
173                 }
174             } catch (Exception e) {
175                 System.err.println("Locale: " + locale + ", " + localeName);
176                 e.printStackTrace();
177                 addToIndex(indexInfo, "ERROR ", locale, localeName);
178                 continue;
179             }
180 
181             // We now have the full set of all the paths for old and new files
182             // TODO Sort by the pretty form
183             // Set<R2<String,String>> pathPairs = new TreeSet();
184             // for (String code : unifiedList) {
185             // pairs.add(Row.make(code, english.getName(code)));
186             // }
187 
188             // Initialize sets
189             // .addColumn("Code", "class='source'", "<a name=\"{0}\" href='likely_subtags.html#und_{0}'>{0}</a>",
190             // "class='source'", true)
191 
192             final String localeDisplayName = english.getName(locale);
193             TablePrinter table = new TablePrinter()
194                 .setCaption("Changes in " + localeDisplayName + " (" + locale + ")")
195                 .addColumn("PRETTY_SORT1").setSortPriority(1).setHidden(true).setRepeatHeader(true)
196                 .addColumn("PRETTY_SORT2").setSortPriority(2).setHidden(true)
197                 .addColumn("PRETTY_SORT3").setSortPriority(3).setHidden(true)
198                 .addColumn("ESCAPED_PATH").setHidden(true)
199                 .addColumn("Inh.").setCellAttributes("class=\"{0}\"").setSortPriority(0).setSpanRows(true)
200                 .setRepeatHeader(true)
201                 .addColumn("Section").setSpanRows(true).setCellAttributes("class='section'")
202                 .addColumn("Subsection").setSpanRows(true).setCellAttributes("class='subsection'")
203                 .addColumn("Item").setSpanRows(true).setCellPattern("<a href=\"{4}\">{0}</a>")
204                 .setCellAttributes("class='item'")
205                 .addColumn("English").setCellAttributes("class='english'")
206                 .addColumn("Status").setSortPriority(4).setCellAttributes("class=\"{0}\"")
207                 .addColumn("Old" + localeDisplayName).setCellAttributes("class='old'")
208                 .addColumn("New" + localeDisplayName).setCellAttributes("class='new'");
209             Counter<String> fileCounter = new Counter<String>();
210 
211             for (String path : paths) {
212                 if (path.contains("/alias") || path.contains("/identity")) {
213                     continue;
214                 }
215                 String cleanedPath = CLDRFile.getNondraftNonaltXPath(path);
216 
217                 String oldValue = oldFile.getStringValue(cleanedPath);
218                 String newValue = newFile.getStringValue(path);
219                 String englishValue = english.getStringValue(cleanedPath);
220 
221                 // for debugging
222                 if (oldValue != null && oldValue.contains("{1} {0}")) {
223                     System.out.print("");
224                 }
225 
226                 if (equals(newValue, oldValue)) {
227                     continue;
228                 }
229 
230                 // get the actual place the data is stored
231                 // AND adjust if the same as root!
232 
233                 Status newStatus = new Status();
234                 String newFoundLocale = getStatus(newFile, newRoot, path, newValue, newStatus);
235 
236                 // At this point, we have two unequal values
237                 // TODO check for non-distinguishing attribute value differences
238 
239                 boolean isAliased = false;
240 
241                 // Skip deletions of alt-proposed
242 
243                 // if (newValue == null) {
244                 // if (path.contains("@alt=\"proposed")) {
245                 // continue;
246                 // }
247                 // }
248 
249                 // Skip if both inherited from the same locale, since we should catch it
250                 // in that locale.
251 
252                 // Mark as aliased if new locale or path is different
253                 if (!newStatus.pathWhereFound.equals(path)) {
254                     isAliased = true;
255                     // continue;
256                 }
257 
258                 if (!newFoundLocale.equals(locale)) {
259                     isAliased = true;
260                     // continue;
261                 }
262 
263                 // // skip if old locale or path is aliased
264                 // if (!oldFoundLocale.equals(locale)) {
265                 // //isAliased=true;
266                 // continue;
267                 // }
268                 //
269                 // // Skip if either found path is are different
270                 // if (!oldStatus.pathWhereFound.equals(cleanedPath)) {
271                 // //isAliased=true;
272                 // continue;
273                 // }
274 
275                 // Now check other aliases
276 
277                 // final boolean newIsAlias = !newStatus.pathWhereFound.equals(path);
278                 // if (newIsAlias) { // new is alias
279                 // // filter out cases of a new string that is found via alias
280                 // if (oldValue == null) {
281                 // continue;
282                 // }
283                 //
284                 // }
285 
286                 if (isAliased && !SHOW_ALIASED) {
287                     continue;
288                 }
289 
290                 // We definitely have a difference worth recording, so do so
291 
292                 String newFullPath = newFile.getFullXPath(path);
293                 final boolean reject = newFullPath != null && newFullPath.contains("@draft")
294                     && !newFullPath.contains("@draft=\"contributed\"");
295                 String status;
296                 if (reject) {
297                     status = "NOT-ACC";
298                 } else if (newValue == null) {
299                     status = "deleted";
300                 } else if (oldValue == null) {
301                     status = "added";
302                 } else {
303                     status = "changed";
304                 }
305                 String coreStatus = status;
306                 if (isAliased) {
307                     status = "I+" + status;
308                 }
309                 fileCounter.increment(status);
310                 totalCounter.increment(status);
311 
312                 String pretty_sort = prettyPathMaker.getPrettyPath(cleanedPath);
313                 String[] prettyPartsSort = pretty_sort.split("[|]");
314                 if (prettyPartsSort.length != 3) {
315                     System.out.println("Bad pretty path: " + pretty_sort + ", original: " + cleanedPath);
316                 }
317                 String prettySort1 = prettyPartsSort[0];
318                 String prettySort2 = prettyPartsSort[1];
319                 String prettySort3 = prettyPartsSort[2];
320 
321                 String pretty = prettyPathMaker.getOutputForm(pretty_sort);
322                 String escapedPath = "http://unicode.org/cldr/apps/survey?_=" + locale + "&xpath="
323                     + EscapingUtilities.urlEscape(cleanedPath);
324                 String[] prettyParts = pretty.split("[|]");
325                 if (prettyParts.length != 3) {
326                     System.out.println("Bad pretty path: " + pretty + ", original: " + cleanedPath);
327                 }
328                 String pretty1 = prettyParts[0];
329                 String pretty2 = prettyParts[1];
330                 String pretty3 = prettyParts[2];
331 
332                 // http://kwanyin.unicode.org/cldr-apps/survey?_=kw_GB&xpath=%2F%2Fldml%2FlocaleDisplayNames%2Flanguages%2Flanguage%5B%40type%3D%22mt%22%5D
333 
334                 table.addRow()
335                     .addCell(prettySort1)
336                     .addCell(prettySort2)
337                     .addCell(prettySort3)
338                     .addCell(escapedPath)
339                     .addCell(isAliased ? "I" : "")
340                     .addCell(pretty1)
341                     .addCell(pretty2)
342                     .addCell(pretty3)
343                     .addCell(englishValue == null ? "-" : englishValue)
344                     .addCell(coreStatus)
345                     .addCell(oldValue == null ? "-" : oldValue)
346                     .addCell(newValue == null ? "-" : newValue)
347                     .finishRow();
348 
349                 totalDifferences++;
350                 differences++;
351             }
352 
353             addToIndex(indexInfo, "", locale, localeName, fileCounter);
354             PrintWriter out = FileUtilities.openUTF8Writer(changesDirectory, locale + ".html");
355             String title = "Changes in " + localeDisplayName;
356             out.println("<html>"
357                 +
358                 "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>"
359                 + CldrUtility.LINE_SEPARATOR
360                 +
361                 "<title>"
362                 + title
363                 + "</title>"
364                 + CldrUtility.LINE_SEPARATOR
365                 +
366                 "<link rel='stylesheet' href='index.css' type='text/css'>"
367                 + CldrUtility.LINE_SEPARATOR
368                 +
369                 "<base target='_blank'>"
370                 + CldrUtility.LINE_SEPARATOR
371                 +
372                 "</head><body>"
373                 + CldrUtility.LINE_SEPARATOR
374                 +
375                 "<h1>"
376                 + title
377                 + "</h1>"
378                 + CldrUtility.LINE_SEPARATOR
379                 + "<a href='index.html'>Index</a> | <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>"
380                 + warningMessage);
381 
382             TablePrinter table2 = new TablePrinter()
383                 .setCaption("Totals")
384                 .addColumn("Inh.").setSortPriority(0)
385                 .addColumn("Status").setSortPriority(1)
386                 .addColumn("Total");
387 
388             for (String key : fileCounter.getKeysetSortedByKey()) {
389                 boolean inherited = key.startsWith("I+");
390                 table2.addRow()
391                     .addCell(inherited ? "I" : "")
392                     .addCell(inherited ? key.substring(2) : key)
393                     .addCell(format.format(fileCounter.getCount(key)))
394                     .finishRow();
395             }
396             out.println(table2);
397             out.println("<br>");
398             out.println(table);
399 
400             // show status on console
401 
402             System.out.println(locale + "\tDifferences:\t" + format.format(differences)
403                 + "\tPaths:\t" + format.format(paths.size())
404                 + "\tTime:\t" + timer);
405 
406             totalPaths += paths.size();
407             out.println(ShowData.dateFooter());
408             out.println(CldrUtility.ANALYTICS);
409             out.println("</body></html>");
410             out.close();
411         }
412         PrintWriter indexFile = FileUtilities.openUTF8Writer(changesDirectory, "index.html");
413         indexFile
414             .println("<html>"
415                 +
416                 "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>"
417                 + CldrUtility.LINE_SEPARATOR
418                 +
419                 "<title>"
420                 + "Change Summary"
421                 + "</title>"
422                 + CldrUtility.LINE_SEPARATOR
423                 +
424                 "<link rel='stylesheet' href='index.css' type='text/css'>"
425                 + CldrUtility.LINE_SEPARATOR
426                 +
427                 "<base target='_blank'>"
428                 + CldrUtility.LINE_SEPARATOR
429                 +
430                 "</head><body>"
431                 + CldrUtility.LINE_SEPARATOR
432                 +
433                 "<h1>"
434                 + "Change Summary"
435                 + "</h1>"
436                 + CldrUtility.LINE_SEPARATOR
437                 + "<a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>"
438                 + warningMessage
439                 + "<table><tr>");
440 
441         String separator = "";
442         int last = 0;
443         for (R2<String, String> indexPair : indexInfo) {
444             int firstChar = indexPair.get0().codePointAt(0);
445             indexFile.append(firstChar == last ? separator
446                 : (last == 0 ? "" : "</td></tr>\n<tr>") + "<th>" + String.valueOf((char) firstChar) + "</th><td>")
447                 .append(indexPair.get1());
448             separator = " | ";
449             last = indexPair.get0().codePointAt(0);
450         }
451         indexFile.println("</tr></table>");
452         indexFile.println(ShowData.dateFooter());
453         indexFile.println(CldrUtility.ANALYTICS);
454         indexFile.println("</body></html>");
455         indexFile.close();
456 
457         System.out.println();
458 
459         for (String key : totalCounter.getKeysetSortedByKey()) {
460             System.out.println(key + "\t" + totalCounter.getCount(key));
461         }
462 
463         System.out.println("Total Differences:\t" + format.format(totalDifferences)
464             + "\tPaths:\t" + format.format(totalPaths)
465             + "\tTotal Time:\t" + format.format(totalTimer.getDuration()) + "ms");
466     }
467 
468     // static Transliterator urlHex = Transliterator.createFromRules("foo",
469     // "([^!(-*,-\\:A-Z_a-z~]) > &hex($1) ;" +
470     // ":: null;" +
471     // "'\\u00' > '%' ;"
472 // , Transliterator.FORWARD);
473 
474     private static NumberFormat format;
475 
addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName)476     private static void addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale,
477         final String localeName) {
478         addToIndex(indexInfo, title, locale, localeName, null);
479     }
480 
addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName, Counter<String> fileCounter)481     private static void addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale,
482         final String localeName, Counter<String> fileCounter) {
483         if (title.startsWith("ERROR")) {
484             indexInfo.add(R2.of(localeName,
485                 title + " " + localeName + " (" + locale + ")"));
486             return;
487         }
488         String counterString = "";
489         if (fileCounter != null) {
490             for (String s : fileCounter) {
491                 if (counterString.length() != 0) {
492                     counterString += "; ";
493                 }
494                 counterString += s.charAt(0) + ":" + format.format(fileCounter.getCount(s));
495             }
496         }
497         indexInfo.add(R2.of(localeName,
498             "<a href='" + locale + ".html'>" + title + localeName + " (" + locale + ")</a>"
499                 + (counterString.length() == 0 ? "" : " [" + counterString + "]")));
500     }
501 
502     // private static int accumulate(Set<R2<String,String>> rejected, int totalRejected,
503     // final String locale, String indicator, String oldValue, String newValue, String path) {
504     // String pretty = prettyPathMaker.getPrettyPath(path, false);
505     // String line = locale + "\t" + indicator +"\t\u200E[" + oldValue + "]\u200E\t\u200E[" + newValue + "]\u200E\t" +
506     // pretty;
507     // String pretty2 = prettyPathMaker.getOutputForm(pretty);
508     // rejected.add(Row.make(pretty2, line));
509     // totalRejected++;
510     // return totalRejected;
511     // }
512 
getStatus(CLDRFile oldFile, CLDRFile oldRoot, String path, String oldString, Status oldStatus)513     private static String getStatus(CLDRFile oldFile, CLDRFile oldRoot, String path,
514         String oldString, Status oldStatus) {
515         String oldLocale = oldFile.getSourceLocaleID(path, oldStatus);
516         if (!oldLocale.equals("root")) {
517             String oldRootValue = oldRoot.getStringValue(oldStatus.pathWhereFound);
518             if (equals(oldString, oldRootValue)) {
519                 oldLocale = "root";
520             }
521         }
522         return oldLocale;
523     }
524 
showSet(PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title)525     private static void showSet(PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title) {
526         if (rejected.size() != 0) {
527             out.println();
528             out.println(locale + "\t" + title + "\t" + rejected.size());
529             for (R2<String, String> prettyAndline : rejected) {
530                 out.println(prettyAndline.get1());
531             }
532         }
533     }
534 
equals(String newString, String oldString)535     private static boolean equals(String newString, String oldString) {
536         if (newString == null) {
537             return oldString == null;
538         }
539         return newString.equals(oldString);
540     }
541 
542 }
543