1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.util.Comparator;
7 import java.util.HashSet;
8 import java.util.Set;
9 import java.util.TreeSet;
10 
11 import org.unicode.cldr.draft.FileUtilities;
12 import org.unicode.cldr.util.CLDRFile;
13 import org.unicode.cldr.util.CLDRFile.Status;
14 import org.unicode.cldr.util.CLDRPaths;
15 import org.unicode.cldr.util.CldrUtility;
16 import org.unicode.cldr.util.Counter;
17 import org.unicode.cldr.util.EscapingUtilities;
18 import org.unicode.cldr.util.Factory;
19 import org.unicode.cldr.util.PathUtilities;
20 import org.unicode.cldr.util.PrettyPath;
21 import org.unicode.cldr.util.SimpleFactory;
22 import org.unicode.cldr.util.Timer;
23 
24 import com.ibm.icu.impl.Row;
25 import com.ibm.icu.impl.Row.R2;
26 import com.ibm.icu.text.Collator;
27 import com.ibm.icu.text.NumberFormat;
28 import com.ibm.icu.text.UTF16;
29 
30 public class GenerateComparison {
31 
32     private static PrettyPath prettyPathMaker;
33 
34     private static Collator collator = Collator.getInstance();
35 
36     static class EnglishRowComparator implements Comparator<R2<String, String>> {
37         private static Comparator<String> unicode = new UTF16.StringComparator(true, false, 0);
38 
39         @Override
compare(R2<String, String> arg0, R2<String, String> arg1)40         public int compare(R2<String, String> arg0, R2<String, String> arg1) {
41             int result = collator.compare(arg0.get0(), arg1.get0());
42             if (result != 0) return result;
43             result = unicode.compare(arg0.get0(), arg1.get0());
44             if (result != 0) return result;
45             result = collator.compare(arg0.get1(), arg1.get1());
46             if (result != 0) return result;
47             result = unicode.compare(arg0.get1(), arg1.get1());
48             return result;
49         }
50     }
51 
52     static EnglishRowComparator ENG = new EnglishRowComparator();
53 
54     static final String warningMessage = "<p><b>Warning: this chart is still under development. For how to use it, see <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\">Help: How to Vet</a>.</b></p>";
55 
main(String[] args)56     public static void main(String[] args) throws IOException {
57 
58         // Setup
59         Timer timer = new Timer();
60         Timer totalTimer = new Timer();
61         long totalPaths = 0;
62         format = NumberFormat.getNumberInstance();
63         format.setGroupingUsed(true);
64 
65         Counter<String> totalCounter = new Counter<>();
66 
67         // Get the args
68 
69         String oldDirectory = CldrUtility.getProperty("oldDirectory", PathUtilities.getNormalizedPathString(new File(
70                 CLDRPaths.BASE_DIRECTORY,
71             "common/main")) + "/");
72         String newDirectory = CldrUtility.getProperty("newDirectory", PathUtilities.getNormalizedPathString(new File(
73                 CLDRPaths.BASE_DIRECTORY,
74             "../cldr-release-1-7/common/main")) + "/");
75         String changesDirectory = CldrUtility.getProperty("changesDirectory", PathUtilities
76                 .getNormalizedPathString(CLDRPaths.CHART_DIRECTORY
77             + "/changes/")
78             + "/");
79 
80         String filter = CldrUtility.getProperty("localeFilter", ".*");
81         boolean SHOW_ALIASED = CldrUtility.getProperty("showAliased", "false").toLowerCase().startsWith("t");
82 
83         // Create the factories
84 
85         Factory oldFactory = Factory.make(oldDirectory, filter);
86         Factory newFactory = Factory.make(newDirectory, filter);
87         CLDRFile english = newFactory.make("en", true);
88         CLDRFile newRoot = newFactory.make("root", true);
89 
90         // Get the union of all the language locales, sorted by English name
91 
92         Set<String> oldList = oldFactory.getAvailableLanguages();
93         Set<String> newList = newFactory.getAvailableLanguages();
94         Set<String> unifiedList = new HashSet<>(oldList);
95         unifiedList.addAll(newList);
96         Set<R2<String, String>> pairs = new TreeSet<>();
97         for (String code : unifiedList) {
98             pairs.add(Row.of(english.getName(code), code));
99         }
100 
101         prettyPathMaker = new PrettyPath();
102         int totalDifferences = 0;
103         int differences = 0;
104 
105         Set<R2<String, String>> indexInfo = new TreeSet<>(ENG);
106 
107         // iterate through those
108         for (R2<String, String> pair : pairs) {
109             timer.start();
110             final String locale = pair.get1();
111             final String localeName = pair.get0();
112             System.out.println(locale);
113             differences = 0;
114             System.out.println();
115 
116             // Create CLDR files for both; null if can't open
117 
118             CLDRFile oldFile = null;
119             if (oldList.contains(locale)) {
120                 try {
121                     oldFile = oldFactory.make(locale, true, true);
122                 } catch (Exception e) {
123                     addToIndex(indexInfo, "ERROR1.6 ", locale, localeName);
124                     continue;
125                 }
126             } else {
127                 oldFile = SimpleFactory.makeFile(locale); // make empty file
128             }
129             CLDRFile newFile = null;
130             if (newList.contains(locale)) {
131                 try {
132                     newFile = newFactory.make(locale, true, true);
133                 } catch (Exception e) {
134                     addToIndex(indexInfo, "ERROR1.7 ", locale, localeName);
135                     continue;
136                 }
137             } else {
138                 newFile = SimpleFactory.makeFile(locale); // make empty file
139             }
140 
141             // for(String str : newFile) {
142             // String xo = newFile.getFullXPath(str);
143             // String v = newFile.getStringValue(str);
144             //
145             // System.out.println(xo+"\t"+v+"\n");
146             //
147             // }
148             // Check for null cases
149 
150             if (oldFile == null) {
151                 addToIndex(indexInfo, "NEW ", locale, localeName);
152                 continue;
153             } else if (newFile == null) {
154                 addToIndex(indexInfo, "DELETED ", locale, localeName);
155                 continue;
156             }
157             System.out.println("*** " + localeName + "\t" + locale);
158             System.out.println();
159 
160             // exclude aliased locales
161             if (newFile.isAliasedAtTopLevel()) {
162                 continue;
163             }
164 
165             // Get the union of all the paths
166 
167             Set<String> paths;
168             try {
169                 paths = new HashSet<>();
170                 oldFile.forEach(paths::add);
171                 if (oldList.contains(locale)) {
172                     paths.addAll(oldFile.getExtraPaths());
173                 }
174                 newFile.forEach(paths::add);
175                 if (newList.contains(locale)) {
176                     paths.addAll(newFile.getExtraPaths());
177                 }
178             } catch (Exception e) {
179                 System.err.println("Locale: " + locale + ", " + localeName);
180                 e.printStackTrace();
181                 addToIndex(indexInfo, "ERROR ", locale, localeName);
182                 continue;
183             }
184 
185             // We now have the full set of all the paths for old and new files
186             // TODO Sort by the pretty form
187             // Set<R2<String,String>> pathPairs = new TreeSet();
188             // for (String code : unifiedList) {
189             // pairs.add(Row.make(code, english.getName(code)));
190             // }
191 
192             // Initialize sets
193             // .addColumn("Code", "class='source'", "<a name=\"{0}\" href='likely_subtags.html#und_{0}'>{0}</a>",
194             // "class='source'", true)
195 
196             final String localeDisplayName = english.getName(locale);
197             TablePrinter table = new TablePrinter()
198                 .setCaption("Changes in " + localeDisplayName + " (" + locale + ")")
199                 .addColumn("PRETTY_SORT1").setSortPriority(1).setHidden(true).setRepeatHeader(true)
200                 .addColumn("PRETTY_SORT2").setSortPriority(2).setHidden(true)
201                 .addColumn("PRETTY_SORT3").setSortPriority(3).setHidden(true)
202                 .addColumn("ESCAPED_PATH").setHidden(true)
203                 .addColumn("Inh.").setCellAttributes("class=\"{0}\"").setSortPriority(0).setSpanRows(true)
204                 .setRepeatHeader(true)
205                 .addColumn("Section").setSpanRows(true).setCellAttributes("class='section'")
206                 .addColumn("Subsection").setSpanRows(true).setCellAttributes("class='subsection'")
207                 .addColumn("Item").setSpanRows(true).setCellPattern("<a href=\"{4}\">{0}</a>")
208                 .setCellAttributes("class='item'")
209                 .addColumn("English").setCellAttributes("class='english'")
210                 .addColumn("Status").setSortPriority(4).setCellAttributes("class=\"{0}\"")
211                 .addColumn("Old" + localeDisplayName).setCellAttributes("class='old'")
212                 .addColumn("New" + localeDisplayName).setCellAttributes("class='new'");
213             Counter<String> fileCounter = new Counter<>();
214 
215             for (String path : paths) {
216                 if (path.contains("/alias") || path.contains("/identity")) {
217                     continue;
218                 }
219                 String cleanedPath = CLDRFile.getNondraftNonaltXPath(path);
220 
221                 String oldValue = oldFile.getStringValue(cleanedPath);
222                 String newValue = newFile.getStringValue(path);
223                 String englishValue = english.getStringValue(cleanedPath);
224 
225                 // for debugging
226                 if (oldValue != null && oldValue.contains("{1} {0}")) {
227                     System.out.print("");
228                 }
229 
230                 if (equals(newValue, oldValue)) {
231                     continue;
232                 }
233 
234                 // get the actual place the data is stored
235                 // AND adjust if the same as root!
236 
237                 Status newStatus = new Status();
238                 String newFoundLocale = getStatus(newFile, newRoot, path, newValue, newStatus);
239 
240                 // At this point, we have two unequal values
241                 // TODO check for non-distinguishing attribute value differences
242 
243                 boolean isAliased = false;
244 
245                 // Skip deletions of alt-proposed
246 
247                 // if (newValue == null) {
248                 // if (path.contains("@alt=\"proposed")) {
249                 // continue;
250                 // }
251                 // }
252 
253                 // Skip if both inherited from the same locale, since we should catch it
254                 // in that locale.
255 
256                 // Mark as aliased if new locale or path is different
257                 if (!newStatus.pathWhereFound.equals(path)) {
258                     isAliased = true;
259                     // continue;
260                 }
261 
262                 if (!newFoundLocale.equals(locale)) {
263                     isAliased = true;
264                     // continue;
265                 }
266 
267                 // // skip if old locale or path is aliased
268                 // if (!oldFoundLocale.equals(locale)) {
269                 // //isAliased=true;
270                 // continue;
271                 // }
272                 //
273                 // // Skip if either found path is are different
274                 // if (!oldStatus.pathWhereFound.equals(cleanedPath)) {
275                 // //isAliased=true;
276                 // continue;
277                 // }
278 
279                 // Now check other aliases
280 
281                 // final boolean newIsAlias = !newStatus.pathWhereFound.equals(path);
282                 // if (newIsAlias) { // new is alias
283                 // // filter out cases of a new string that is found via alias
284                 // if (oldValue == null) {
285                 // continue;
286                 // }
287                 //
288                 // }
289 
290                 if (isAliased && !SHOW_ALIASED) {
291                     continue;
292                 }
293 
294                 // We definitely have a difference worth recording, so do so
295 
296                 String newFullPath = newFile.getFullXPath(path);
297                 final boolean reject = newFullPath != null && newFullPath.contains("@draft")
298                     && !newFullPath.contains("@draft=\"contributed\"");
299                 String status;
300                 if (reject) {
301                     status = "NOT-ACC";
302                 } else if (newValue == null) {
303                     status = "deleted";
304                 } else if (oldValue == null) {
305                     status = "added";
306                 } else {
307                     status = "changed";
308                 }
309                 String coreStatus = status;
310                 if (isAliased) {
311                     status = "I+" + status;
312                 }
313                 fileCounter.increment(status);
314                 totalCounter.increment(status);
315 
316                 String pretty_sort = prettyPathMaker.getPrettyPath(cleanedPath);
317                 String[] prettyPartsSort = pretty_sort.split("[|]");
318                 if (prettyPartsSort.length != 3) {
319                     System.out.println("Bad pretty path: " + pretty_sort + ", original: " + cleanedPath);
320                 }
321                 String prettySort1 = prettyPartsSort[0];
322                 String prettySort2 = prettyPartsSort[1];
323                 String prettySort3 = prettyPartsSort[2];
324 
325                 String pretty = prettyPathMaker.getOutputForm(pretty_sort);
326                 String escapedPath = "http://unicode.org/cldr/apps/survey?_=" + locale + "&xpath="
327                     + EscapingUtilities.urlEscape(cleanedPath);
328                 String[] prettyParts = pretty.split("[|]");
329                 if (prettyParts.length != 3) {
330                     System.out.println("Bad pretty path: " + pretty + ", original: " + cleanedPath);
331                 }
332                 String pretty1 = prettyParts[0];
333                 String pretty2 = prettyParts[1];
334                 String pretty3 = prettyParts[2];
335 
336                 // http://kwanyin.unicode.org/cldr-apps/survey?_=kw_GB&xpath=%2F%2Fldml%2FlocaleDisplayNames%2Flanguages%2Flanguage%5B%40type%3D%22mt%22%5D
337 
338                 table.addRow()
339                     .addCell(prettySort1)
340                     .addCell(prettySort2)
341                     .addCell(prettySort3)
342                     .addCell(escapedPath)
343                     .addCell(isAliased ? "I" : "")
344                     .addCell(pretty1)
345                     .addCell(pretty2)
346                     .addCell(pretty3)
347                     .addCell(englishValue == null ? "-" : englishValue)
348                     .addCell(coreStatus)
349                     .addCell(oldValue == null ? "-" : oldValue)
350                     .addCell(newValue == null ? "-" : newValue)
351                     .finishRow();
352 
353                 totalDifferences++;
354                 differences++;
355             }
356 
357             addToIndex(indexInfo, "", locale, localeName, fileCounter);
358             PrintWriter out = FileUtilities.openUTF8Writer(changesDirectory, locale + ".html");
359             String title = "Changes in " + localeDisplayName;
360             out.println("<html>"
361                 +
362                 "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>"
363                 + CldrUtility.LINE_SEPARATOR
364                 +
365                 "<title>"
366                 + title
367                 + "</title>"
368                 + CldrUtility.LINE_SEPARATOR
369                 +
370                 "<link rel='stylesheet' href='index.css' type='text/css'>"
371                 + CldrUtility.LINE_SEPARATOR
372                 +
373                 "<base target='_blank'>"
374                 + CldrUtility.LINE_SEPARATOR
375                 +
376                 "</head><body>"
377                 + CldrUtility.LINE_SEPARATOR
378                 +
379                 "<h1>"
380                 + title
381                 + "</h1>"
382                 + CldrUtility.LINE_SEPARATOR
383                 + "<a href='index.html'>Index</a> | <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>"
384                 + warningMessage);
385 
386             TablePrinter table2 = new TablePrinter()
387                 .setCaption("Totals")
388                 .addColumn("Inh.").setSortPriority(0)
389                 .addColumn("Status").setSortPriority(1)
390                 .addColumn("Total");
391 
392             for (String key : fileCounter.getKeysetSortedByKey()) {
393                 boolean inherited = key.startsWith("I+");
394                 table2.addRow()
395                     .addCell(inherited ? "I" : "")
396                     .addCell(inherited ? key.substring(2) : key)
397                     .addCell(format.format(fileCounter.getCount(key)))
398                     .finishRow();
399             }
400             out.println(table2);
401             out.println("<br>");
402             out.println(table);
403 
404             // show status on console
405 
406             System.out.println(locale + "\tDifferences:\t" + format.format(differences)
407                 + "\tPaths:\t" + format.format(paths.size())
408                 + "\tTime:\t" + timer);
409 
410             totalPaths += paths.size();
411             out.println(ShowData.dateFooter());
412             out.println(CldrUtility.ANALYTICS);
413             out.println("</body></html>");
414             out.close();
415         }
416         PrintWriter indexFile = FileUtilities.openUTF8Writer(changesDirectory, "index.html");
417         indexFile
418             .println("<html>"
419                 +
420                 "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>"
421                 + CldrUtility.LINE_SEPARATOR
422                 +
423                 "<title>"
424                 + "Change Summary"
425                 + "</title>"
426                 + CldrUtility.LINE_SEPARATOR
427                 +
428                 "<link rel='stylesheet' href='index.css' type='text/css'>"
429                 + CldrUtility.LINE_SEPARATOR
430                 +
431                 "<base target='_blank'>"
432                 + CldrUtility.LINE_SEPARATOR
433                 +
434                 "</head><body>"
435                 + CldrUtility.LINE_SEPARATOR
436                 +
437                 "<h1>"
438                 + "Change Summary"
439                 + "</h1>"
440                 + CldrUtility.LINE_SEPARATOR
441                 + "<a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>"
442                 + warningMessage
443                 + "<table><tr>");
444 
445         String separator = "";
446         int last = 0;
447         for (R2<String, String> indexPair : indexInfo) {
448             int firstChar = indexPair.get0().codePointAt(0);
449             indexFile.append(firstChar == last ? separator
450                 : (last == 0 ? "" : "</td></tr>\n<tr>") + "<th>" + String.valueOf((char) firstChar) + "</th><td>")
451                 .append(indexPair.get1());
452             separator = " | ";
453             last = indexPair.get0().codePointAt(0);
454         }
455         indexFile.println("</tr></table>");
456         indexFile.println(ShowData.dateFooter());
457         indexFile.println(CldrUtility.ANALYTICS);
458         indexFile.println("</body></html>");
459         indexFile.close();
460 
461         System.out.println();
462 
463         for (String key : totalCounter.getKeysetSortedByKey()) {
464             System.out.println(key + "\t" + totalCounter.getCount(key));
465         }
466 
467         System.out.println("Total Differences:\t" + format.format(totalDifferences)
468             + "\tPaths:\t" + format.format(totalPaths)
469             + "\tTotal Time:\t" + format.format(totalTimer.getDuration()) + "ms");
470     }
471 
472     // static Transliterator urlHex = Transliterator.createFromRules("foo",
473     // "([^!(-*,-\\:A-Z_a-z~]) > &hex($1) ;" +
474     // ":: null;" +
475     // "'\\u00' > '%' ;"
476 // , Transliterator.FORWARD);
477 
478     private static NumberFormat format;
479 
addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName)480     private static void addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale,
481         final String localeName) {
482         addToIndex(indexInfo, title, locale, localeName, null);
483     }
484 
addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName, Counter<String> fileCounter)485     private static void addToIndex(Set<R2<String, String>> indexInfo, String title, final String locale,
486         final String localeName, Counter<String> fileCounter) {
487         if (title.startsWith("ERROR")) {
488             indexInfo.add(R2.of(localeName,
489                 title + " " + localeName + " (" + locale + ")"));
490             return;
491         }
492         String counterString = "";
493         if (fileCounter != null) {
494             for (String s : fileCounter) {
495                 if (counterString.length() != 0) {
496                     counterString += "; ";
497                 }
498                 counterString += s.charAt(0) + ":" + format.format(fileCounter.getCount(s));
499             }
500         }
501         indexInfo.add(R2.of(localeName,
502             "<a href='" + locale + ".html'>" + title + localeName + " (" + locale + ")</a>"
503                 + (counterString.length() == 0 ? "" : " [" + counterString + "]")));
504     }
505 
506     // private static int accumulate(Set<R2<String,String>> rejected, int totalRejected,
507     // final String locale, String indicator, String oldValue, String newValue, String path) {
508     // String pretty = prettyPathMaker.getPrettyPath(path, false);
509     // String line = locale + "\t" + indicator +"\t\u200E[" + oldValue + "]\u200E\t\u200E[" + newValue + "]\u200E\t" +
510     // pretty;
511     // String pretty2 = prettyPathMaker.getOutputForm(pretty);
512     // rejected.add(Row.make(pretty2, line));
513     // totalRejected++;
514     // return totalRejected;
515     // }
516 
getStatus(CLDRFile oldFile, CLDRFile oldRoot, String path, String oldString, Status oldStatus)517     private static String getStatus(CLDRFile oldFile, CLDRFile oldRoot, String path,
518         String oldString, Status oldStatus) {
519         String oldLocale = oldFile.getSourceLocaleID(path, oldStatus);
520         if (!oldLocale.equals("root")) {
521             String oldRootValue = oldRoot.getStringValue(oldStatus.pathWhereFound);
522             if (equals(oldString, oldRootValue)) {
523                 oldLocale = "root";
524             }
525         }
526         return oldLocale;
527     }
528 
showSet(PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title)529     private static void showSet(PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title) {
530         if (rejected.size() != 0) {
531             out.println();
532             out.println(locale + "\t" + title + "\t" + rejected.size());
533             for (R2<String, String> prettyAndline : rejected) {
534                 out.println(prettyAndline.get1());
535             }
536         }
537     }
538 
equals(String newString, String oldString)539     private static boolean equals(String newString, String oldString) {
540         if (newString == null) {
541             return oldString == null;
542         }
543         return newString.equals(oldString);
544     }
545 
546 }
547