1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2004, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.tool;
10 
11 import java.io.File;
12 import java.io.IOException;
13 import java.io.PrintWriter;
14 import java.util.Arrays;
15 import java.util.Comparator;
16 import java.util.Date;
17 import java.util.EnumSet;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.Iterator;
21 import java.util.Locale;
22 import java.util.Map;
23 import java.util.Map.Entry;
24 import java.util.Set;
25 import java.util.TreeMap;
26 import java.util.TreeSet;
27 import java.util.regex.Matcher;
28 
29 import org.unicode.cldr.draft.FileUtilities;
30 import org.unicode.cldr.tool.ShowData.DataShower;
31 import org.unicode.cldr.util.CLDRFile;
32 import org.unicode.cldr.util.CLDRFile.Status;
33 import org.unicode.cldr.util.CLDRPaths;
34 import org.unicode.cldr.util.CldrUtility;
35 import org.unicode.cldr.util.Factory;
36 import org.unicode.cldr.util.FileCopier;
37 import org.unicode.cldr.util.LanguageTagParser;
38 import org.unicode.cldr.util.LanguageTagParser.Fields;
39 import org.unicode.cldr.util.LocaleIDParser;
40 import org.unicode.cldr.util.PathHeader;
41 import org.unicode.cldr.util.PathHeader.PageId;
42 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
43 import org.unicode.cldr.util.PatternCache;
44 import org.unicode.cldr.util.SimpleFactory;
45 import org.unicode.cldr.util.StringId;
46 import org.unicode.cldr.util.TransliteratorUtilities;
47 import org.unicode.cldr.util.XPathParts;
48 import org.xml.sax.SAXException;
49 
50 import com.google.common.collect.ImmutableMap;
51 import com.ibm.icu.dev.tool.UOption;
52 import com.ibm.icu.dev.util.UnicodeMap;
53 import com.ibm.icu.impl.Relation;
54 import com.ibm.icu.impl.Utility;
55 import com.ibm.icu.lang.UCharacter;
56 import com.ibm.icu.lang.UScript;
57 import com.ibm.icu.text.BreakIterator;
58 import com.ibm.icu.text.Collator;
59 import com.ibm.icu.text.Normalizer;
60 import com.ibm.icu.text.RuleBasedCollator;
61 import com.ibm.icu.text.RuleBasedNumberFormat;
62 import com.ibm.icu.text.Transliterator;
63 import com.ibm.icu.text.UTF16;
64 import com.ibm.icu.text.UnicodeSet;
65 import com.ibm.icu.text.UnicodeSetIterator;
66 import com.ibm.icu.util.Output;
67 import com.ibm.icu.util.ULocale;
68 
69 /**
70  * This is a simple class that walks through the CLDR hierarchy.
71  * It gathers together all the items from all the locales that share the
72  * same element chain, and thus presents a "sideways" view of the data, in files called
73  * by_type/X.html, where X is a type. X may be the concatenation of more than more than
74  * one element, where the file would otherwise be too large.
75  *
76  * @author medavis
77  */
78 /*
79  * Notes:
80  * http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3
81  * http://developers.sun.com/dev/coolstuff/xml/readme.html
82  * http://lists.xml.org/archives/xml-dev/200007/msg00284.html
83  * http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html
84  */
85 public class GenerateSidewaysView {
86     private static final String DIR_NAME = "by_type";
87     // debug flags
88     static final boolean DEBUG = false;
89     static final boolean DEBUG2 = false;
90     static final boolean DEBUG_SHOW_ADD = false;
91     static final boolean DEBUG_ELEMENT = false;
92     static final boolean DEBUG_SHOW_BAT = false;
93 
94     static final boolean FIX_ZONE_ALIASES = true;
95 
96     private static final int HELP1 = 0,
97         HELP2 = 1,
98         SOURCEDIR = 2,
99         DESTDIR = 3,
100         MATCH = 4,
101         SKIP = 5,
102         TZADIR = 6,
103         NONVALIDATING = 7,
104         SHOW_DTD = 8,
105         TRANSLIT = 9,
106         PATH = 10;
107 
108     private static final UOption[] options = {
109         UOption.HELP_H(),
110         UOption.HELP_QUESTION_MARK(),
111         UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY),
112         UOption.DESTDIR().setDefault(CLDRPaths.CHART_DIRECTORY + DIR_NAME + "/"), // C:/cvsdata/unicode/cldr/diff/by_type/
113         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
114         UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"),
115         UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault(
116             "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
117         UOption.create("nonvalidating", 'n', UOption.NO_ARG),
118         UOption.create("dtd", 'w', UOption.NO_ARG),
119         UOption.create("transliterate", 'y', UOption.NO_ARG),
120         UOption.create("path", 'p', UOption.REQUIRES_ARG),
121     };
122 
123     private static final Matcher altProposedMatcher = CLDRFile.ALT_PROPOSED_PATTERN.matcher("");
124     // private static final UnicodeSet ALL_CHARS = new UnicodeSet(0, 0x10FFFF);
125     protected static final UnicodeSet COMBINING = new UnicodeSet("[[:m:]]").freeze();
126 
getFirstScript(UnicodeSet exemplars)127     static int getFirstScript(UnicodeSet exemplars) {
128         for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {
129             int script = UScript.getScript(it.codepoint);
130             if (script == UScript.COMMON || script == UScript.INHERITED) {
131                 continue;
132             }
133             return script;
134         }
135         return UScript.COMMON;
136     }
137 
138     static Comparator<Object> UCA;
139     static {
140         RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
141         UCA2.setNumericCollation(true);
142         UCA2.setStrength(Collator.IDENTICAL);
143         UCA = new org.unicode.cldr.util.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0));
144     }
145 
146     private static Map<PathHeader, Map<String, Set<String>>> path_value_locales = new TreeMap<PathHeader, Map<String, Set<String>>>();
147     private static XPathParts parts = new XPathParts(null, null);
148     private static long startTime = System.currentTimeMillis();
149 
150     static RuleBasedCollator standardCollation = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH);
151     static {
152         standardCollation.setStrength(Collator.IDENTICAL);
153         standardCollation.setNumericCollation(true);
154     }
155 
156     private static CLDRFile english;
157     // private static DataShower dataShower = new DataShower();
158     private static Matcher pathMatcher;
159 
main(String[] args)160     public static void main(String[] args) throws SAXException, IOException {
161         startTime = System.currentTimeMillis();
162         ToolUtilities.registerExtraTransliterators();
163         UOption.parseArgs(args, options);
164 
165         pathMatcher = options[PATH].value == null ? null : PatternCache.get(options[PATH].value).matcher("");
166 
167         File[] paths = {
168             new File(CLDRPaths.MAIN_DIRECTORY),
169             new File(CLDRPaths.ANNOTATIONS_DIRECTORY),
170             new File(CLDRPaths.SUBDIVISIONS_DIRECTORY)
171         };
172         Factory cldrFactory = SimpleFactory.make(paths, options[MATCH].value);
173 
174         // Factory cldrFactory = Factory.make(options[SOURCEDIR].value, options[MATCH].value);
175         english = cldrFactory.make("en", true);
176         pathHeaderFactory = PathHeader.getFactory(english);
177 
178         FileCopier.ensureDirectoryExists(options[DESTDIR].value);
179         FileCopier.copy(GenerateSidewaysView.class, "bytype-index.css", options[DESTDIR].value, "index.css");
180 
181         // now get the info
182 
183         loadInformation(cldrFactory);
184         String oldMain = "";
185         PrintWriter out = null;
186 
187         System.out.println("Getting types " + path_value_locales.size());
188         // Set<String> types = new TreeSet<String>();
189         // for (PathHeader path : path_value_locales.keySet()) {
190         // String main = getFileName2(path);
191         // if (!main.equals(oldMain)) {
192         // oldMain = main;
193         // types.add(main);
194         // }
195         // }
196         String headerString = getHeader(path_value_locales.keySet());
197         FileCopier.copyAndReplace(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html",
198             ImmutableMap.of(
199                 "%header%", headerString,
200                 "%version%", ToolConstants.CHART_DISPLAY_VERSION,
201                 "%index-title%", "Main Charts Index",
202                 "%date%", CldrUtility.isoFormatDateOnly(new Date())));
203 //        FileUtilities.copyFile(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html",
204 //            new String[] { "%header%", headerString });
205 
206         System.out.println("Printing files in " + new File(options[DESTDIR].value).getAbsolutePath());
207         // Transliterator toLatin = Transliterator.getInstance("any-latin");
208         toHTML = TransliteratorUtilities.toHTML;
209         // UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]");
210 
211         String oldHeader = "";
212         Output<PrintWriter> tsvFile = new Output<>();
213 
214         for (PathHeader path : path_value_locales.keySet()) {
215             String main = getFileName2(path, null);
216             if (!main.equals(oldMain)) {
217                 oldMain = main;
218                 out = start(out, main, headerString, path.getSection() + ":" + path.getPage(), tsvFile);
219                 out.println("<table class='table'>");
220                 oldHeader = "";
221             }
222             String key = path.getCode();
223             String anchor = toHTML.transliterate(key);
224 
225             String originalPath = path.getOriginalPath(); // prettyPath.getOriginal(path);
226             String englishValue = english.getStringValue(originalPath);
227             if (englishValue != null) {
228                 englishValue = "English: ‹" + englishValue + "›";
229             } else {
230                 englishValue = "";
231             }
232 
233             String header = path.getHeader();
234             if (!header.equals(oldHeader) && !header.equals("null")) {
235                 out.println("<tr><th colSpan='2' class='pathHeader'>" + CldrUtility.getDoubleLinkedText(header)
236                     + "</th></tr>");
237                 oldHeader = header;
238             }
239             String anchorId = Long.toHexString(StringId.getId(path.getOriginalPath()));
240             out.println("<tr>" +
241                 "<th class='path'>" + CldrUtility.getDoubleLinkedText(anchorId, anchor) + "</th>" +
242                 "<th class='path'>" + toHTML.transliterate(englishValue) + "</th>" +
243                 "</tr>");
244             Map<String, Set<String>> value_locales = path_value_locales.get(path);
245             for (String value : value_locales.keySet()) {
246                 // String outValue = toHTML.transliterate(value);
247                 // String transValue = value;
248                 // try {
249                 // transValue = toLatin.transliterate(value);
250                 // } catch (RuntimeException e) {
251                 // }
252                 // if (!transValue.equals(value)) {
253                 // outValue = "<span title='" + toHTML.transliterate(transValue) + "'>" + outValue + "</span>";
254                 // }
255                 String valueClass = " class='value'";
256                 if (DataShower.getBidiStyle(value).length() != 0) {
257                     valueClass = " class='rtl_value'";
258                 }
259                 out.println("<tr><th" + valueClass + ">" + DataShower.getPrettyValue(value) + "</th><td class='td'>");
260                 tsvFile.value.print(
261                     path.getSection()
262                         + "\t" + path.getPage()
263                         + "\t" + path.getHeader()
264                         + "\t" + path.getCode()
265                         + "\t" + value
266                         + "\t");
267 
268                 Set<String> locales = value_locales.get(value);
269                 boolean first = true;
270                 boolean containsRoot = locales.contains("root");
271                 for (String locale : locales) {
272                     if (first)
273                         first = false;
274                     else
275                         out.print(" ");
276                     if (locale.endsWith("*")) {
277                         locale = locale.substring(0, locale.length() - 1);
278                         out.print("<i>\u00B7" + locale + "\u00B7</i>");
279                         tsvFile.value.print("\u00B7" + locale + "\u00B7");
280                     } else if (!containsRoot) {
281                         out.print("\u00B7" + locale + "\u00B7");
282                         tsvFile.value.print("\u00B7" + locale + "\u00B7");
283                     } else if (locale.contains("_")) {
284                         // not same as root, but need to test for parent
285                         // if the parent is not in the same list, then we include anyway.
286                         // Cf http://unicode.org/cldr/trac/ticket/7228
287                         String parent = LocaleIDParser.getParent(locale);
288                         if (!locales.contains(parent)) {
289                             out.print("<b>\u00B7" + locale + "\u00B7</b>");
290                             tsvFile.value.print("\u00B7" + locale + "\u00B7");
291                         }
292                     }
293                 }
294                 if (containsRoot) {
295                     out.print("<b>\u00B7all\u00B7others\u00B7</b>");
296                     tsvFile.value.print("\u00B7all-others\u00B7");
297                 }
298                 out.println("</td></tr>");
299                 tsvFile.value.println();
300             }
301         }
302         for (String[] pair : EXEMPLARS) {
303             showExemplars(out, headerString, pair[0], pair[1], pair[2], tsvFile);
304         }
305         finish(out, tsvFile.value);
306         finishAll(out, tsvFile.value);
307         System.out.println("Done in " + new RuleBasedNumberFormat(new ULocale("en"), RuleBasedNumberFormat.DURATION)
308             .format((System.currentTimeMillis() - startTime) / 1000.0));
309     }
310 
311     // static Comparator UCA;
312     // static {
313     // RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
314     // UCA2.setNumericCollation(true);
315     // UCA2.setStrength(UCA2.IDENTICAL);
316     // UCA = new CollectionUtilities.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0) );
317     // }
318 
319     static final String[][] EXEMPLARS = {
320         { "//ldml/characters/exemplarCharacters", "main", "Main Exemplars" },
321         { "//ldml/characters/exemplarCharacters[@type=\"punctuation\"]", "punctuation", "Punctuation Exemplars" },
322         { "//ldml/characters/exemplarCharacters[@type=\"index\"]", "index", "Index Exemplars" },
323         // TODO look at numbers, auxiliary
324     };
325 
showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, Output<PrintWriter> tsvFile)326     private static PrintWriter showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title,
327         Output<PrintWriter> tsvFile)
328         throws IOException {
329         PathHeader cleanPath = fixPath(pathName, null);
330         String filename = getFileName2(cleanPath, variant);
331         out = start(out, filename, headerString, title, tsvFile);
332         Map<String, Set<String>> value_locales = path_value_locales.get(cleanPath);
333 
334         // TODO change logic so that aux characters characters work well.
335 
336         Map<String, UnicodeMap<Set<String>>> script_UnicodeMap = new TreeMap<String, UnicodeMap<Set<String>>>();
337         // UnicodeMap mapping = new UnicodeMap();
338         UnicodeSet stuffToSkip = new UnicodeSet("[:Han:]");
339 
340         // get the locale information
341         UnicodeSet totalExemplars = new UnicodeSet();
342         for (String value : value_locales.keySet()) {
343             // flatten out UnicodeSet
344             UnicodeSet exemplars = new UnicodeSet(value);
345             if (variant.equals("main")) {
346                 UnicodeSet extras = new UnicodeSet();
347                 for (String item : exemplars) {
348                     extras.addAll(Normalizer.normalize(item, Normalizer.NFD));
349                 }
350                 exemplars.addAll(extras);
351             }
352             totalExemplars.addAll(exemplars);
353             exemplars.removeAll(stuffToSkip);
354 
355             Set<String> locales = value_locales.get(value);
356             //String script = UScript.getName(getFirstScript(exemplars));
357             for (String locale : locales) {
358                 checkTr(script_UnicodeMap);
359                 String key = locale.endsWith("*") ? locale.substring(0, locale.length() - 1) : locale;
360                 String script = LOCALE_TO_SCRIPT.get(key);
361                 // try a few variants until we get the script
362                 if (script == null && key.contains("_")) {
363                     String simpleParent = LanguageTagParser.getSimpleParent(key);
364                     script = LOCALE_TO_SCRIPT.get(simpleParent);
365                     if (script == null && simpleParent.contains("_")) {
366                         simpleParent = LanguageTagParser.getSimpleParent(simpleParent);
367                         script = LOCALE_TO_SCRIPT.get(simpleParent);
368                     }
369                 }
370                 if (script == null) {
371                     script = UScript.getName(UScript.UNKNOWN);
372                 }
373                 Set<String> temp = new HashSet<String>();
374                 temp.add(locale);
375                 checkTr(script_UnicodeMap);
376                 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script);
377                 if (mapping == null) {
378                     script_UnicodeMap.put(script, mapping = new UnicodeMap<Set<String>>());
379                 }
380                 checkTr(script_UnicodeMap);
381                 mapping.composeWith(exemplars, temp, setComposer);
382                 checkTr(script_UnicodeMap);
383             }
384         }
385         System.out.println("@@@TOTAL:\t" + variant + "\t" + totalExemplars.toPattern(false));
386         for (String script : script_UnicodeMap.keySet()) {
387             UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script);
388             writeCharToLocaleMapping(out, script, mapping);
389         }
390         return out;
391     }
392 
checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap)393     private static void checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap) {
394         UnicodeMap<Set<String>> unicodeMap = script_UnicodeMap.get("Cyrillic");
395         if (unicodeMap == null) {
396             return;
397         }
398         Set<String> foo = unicodeMap.get(0x21);
399         if (foo == null) {
400             return;
401         }
402         if (foo.contains("tr")) {
403             System.out.println("huh?");
404         }
405     }
406 
writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping)407     private static void writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping) {
408         BreakIterator charBreaks = BreakIterator.getCharacterInstance(ULocale.ROOT); // TODO, make default language for
409         // script
410         System.out.println("@@Exemplars for\t" + script + "\t" + mapping.keySet());
411         if (script.equals("Hangul")) { //  || script.equals("Common")
412             return; // skip these
413         }
414         // find out all the locales and all the characters
415         Set<String> allLocales = new TreeSet<String>(UCA);
416         Set<String> allChars = new TreeSet<String>(UCA);
417         Set<String> allStrings = new TreeSet<String>(UCA);
418         for (Set<String> locales : mapping.getAvailableValues()) {
419             allLocales.addAll(locales);
420             UnicodeSet unicodeSet = mapping.keySet(locales);
421             for (String item : unicodeSet) {
422                 charBreaks.setText(item);
423                 int endFirst = charBreaks.next();
424                 if (endFirst == item.length()) {
425                     allChars.add(item);
426                 } else {
427                     allStrings.add(item);
428                 }
429             }
430         }
431         // get the columns, and show them
432         out.println("<table class='table' style='width:1%'>");
433         out.println("<caption>" + script + "</caption>");
434         exemplarHeader(out, allChars);
435 
436         for (String locale : allLocales) {
437             String headerHeader = "<th class='head'>" + cleanLocale(locale, false) + "</th><td class='head nowrap left'>"
438                 + cleanLocale(locale, true) + "</td>";
439             out.println("<tr>");
440             out.println(headerHeader);
441 
442             for (String item : allChars) {
443                 // String exemplarsWithoutBrackets = displayExemplars(item);
444                 if (mapping.get(item).contains(locale)) {
445                     out.println("<td class='cell'" +
446                         ">" + displayCharacter(item) + "</td>");
447                 } else {
448                     out.println("<td class='empty'>\u00a0</td>");
449                 }
450             }
451             // now strings, if any
452             StringBuilder strings = new StringBuilder();
453             int lastLineStart = 0;
454             for (String item : allStrings) {
455                 // String exemplarsWithoutBrackets = displayExemplars(item);
456                 if (mapping.get(item).contains(locale)) {
457                     int str_len = strings.length();
458                     if (str_len != 0) {
459                         if (str_len - lastLineStart > 20) {
460                             strings.append(System.lineSeparator());
461                             lastLineStart = str_len;
462                         } else {
463                             strings.append(' ');
464                         }
465                     }
466                     strings.append(displayCharacter(item));
467                 }
468             }
469             if (strings.length() == 0) {
470                 out.println("<td class='empty'>\u00a0</td>");
471             } else {
472                 out.println("<td class='cell nowrap'>" + displayCharacter(strings.toString()).replace(System.lineSeparator(), "<br>")
473                     + "</td>");
474             }
475 
476             out.println(headerHeader);
477             out.println("</tr>");
478         }
479         exemplarHeader(out, allChars);
480         out.println("</table>");
481         out.flush();
482     }
483 
characterTitle(String item)484     private static String characterTitle(String item) {
485         return ("title='U+" +
486             toHTML.transform(
487                 Utility.hex(item, 4, ", U+", true, new StringBuilder())
488                     + " " + UCharacter.getName(item, ", "))
489             + "'");
490     }
491 
exemplarHeader(PrintWriter out, Set<String> allChars)492     private static void exemplarHeader(PrintWriter out, Set<String> allChars) {
493         out.println("<tr>");
494         out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>");
495         for (String item : allChars) {
496             out.println("<th class='head' " + characterTitle(item) + ">" + displayCharacter(item) + "</th>");
497         }
498         out.println("<th class='head'>Clusters</th>");
499         out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>");
500         out.println("</tr>");
501     }
502 
503     static final UnicodeSet NONSPACING = new UnicodeSet("[[:Mn:][:Me:][:default_ignorable_code_point:]]").freeze();
504 
displayCharacter(String item)505     public static String displayCharacter(String item) {
506         if (item.length() == 0) return "<i>none</i>";
507         int ch = item.codePointAt(0);
508         if (NONSPACING.contains(ch)) {
509             item = "\u00a0" + item + "\u00a0";
510         }
511         String result = toHTML.transform(item);
512         return result;
513     }
514 
515     static LanguageTagParser cleanLocaleParser = new LanguageTagParser();
516     static Set<Fields> allButScripts = EnumSet.allOf(Fields.class);
517     static {
518         allButScripts.remove(Fields.SCRIPT);
519     }
520 
cleanLocale(String item, boolean name)521     private static String cleanLocale(String item, boolean name) {
522         if (item == null) {
523             return "<i>null</i>";
524         }
525         boolean draft = item.endsWith("*");
526         if (draft) {
527             item = item.substring(0, item.length() - 1);
528         }
529         cleanLocaleParser.set(item);
530         item = cleanLocaleParser.toString(allButScripts);
531         String core = item;
532         item = toHTML.transform(item);
533         if (name) {
534             item = english.getName(core);
535             item = item == null ? "<i>null</i>" : toHTML.transform(item);
536         }
537         if (draft) {
538             item = "<i>" + item + "</i>";
539         }
540         return item;
541     }
542 
543     // private static void showExemplarRow(PrintWriter out, Set<String> allLocales, UnicodeSet lastChars, Set locales) {
544     // String exemplarsWithoutBrackets = displayExemplars(lastChars);
545     // out.println("<tr><th class='head'>" + exemplarsWithoutBrackets + "</th>");
546     // for (String item : allLocales) {
547     // String cleanItem;
548     // if (locales.contains(item)) {
549     // cleanItem = "<th class='value'>" + cleanLocale(item, false) + "</th>";
550     // } else {
551     // cleanItem = "<td class='value'>\u00a0</td>";
552     // }
553     // out.println(cleanItem);
554     // }
555     // out.println("</tr>");
556     // }
557 
558     // private static final StringTransform MyTransform = new StringTransform() {
559     //
560     // public String transform(String source) {
561     // StringBuilder builder = new StringBuilder();
562     // int cp = 0;
563     // builder.append("<span title='");
564     // String prefix = "";
565     // for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
566     // cp = UTF16.charAt(source, i);
567     // if (i == 0) {
568     // if (COMBINING.contains(cp)) {
569     // prefix = "\u25CC";
570     // }
571     // } else {
572     // builder.append(" + ");
573     // }
574     // builder.append("U+").append(com.ibm.icu.impl.Utility.hex(cp,4)).append(' ').append(UCharacter.getExtendedName(cp));
575     // }
576     // builder.append("'>").append(prefix).append(source).append("</span>");
577     // return builder.toString();
578     // }
579     //
580     // };
581 
582     // private static String displayExemplars(UnicodeSet lastChars) {
583     // String exemplarsWithoutBrackets = new PrettyPrinter()
584     // .setOrdering(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT))
585     // .setSpaceComparator(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT)
586     // .setStrength2(Collator.PRIMARY))
587     // .setCompressRanges(true)
588     // .setToQuote(ALL_CHARS)
589     // .setQuoter(MyTransform)
590     // .format(lastChars);
591     // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, exemplarsWithoutBrackets.length() - 1);
592     // return exemplarsWithoutBrackets;
593     // }
594 
595     // private static boolean isNextCharacter(String last, String value) {
596     // if (UTF16.hasMoreCodePointsThan(last, 1)) return false;
597     // if (UTF16.hasMoreCodePointsThan(value, 1)) return false;
598     // int lastChar = UTF16.charAt(last,0);
599     // int valueChar = UTF16.charAt(value,0);
600     // return lastChar + 1 == valueChar;
601     // }
602 
603     static UnicodeMap.Composer<Set<String>> setComposer = new UnicodeMap.Composer<Set<String>>() {
604         public Set<String> compose(int codepoint, String string, Set<String> a, Set<String> b) {
605             if (a == null) {
606                 return b;
607             } else if (b == null) {
608                 return a;
609             } else {
610                 TreeSet<String> result = new TreeSet<String>(a);
611                 result.addAll(b);
612                 return result;
613             }
614         }
615     };
616 
617     static Map<String, String> LOCALE_TO_SCRIPT = new HashMap<String, String>();
618 
loadInformation(Factory cldrFactory)619     private static void loadInformation(Factory cldrFactory) {
620         Set<String> alllocales = cldrFactory.getAvailable();
621         String[] postFix = new String[] { "" };
622         // gather all information
623         // TODO tweek for value-laden attributes
624         for (String localeID : alllocales) {
625             System.out.println("Loading: " + localeID);
626             System.out.flush();
627 
628             CLDRFile cldrFile;
629             try {
630                 cldrFile = cldrFactory.make(localeID, localeID.equals("root"));
631             } catch (IllegalArgumentException e) {
632                 System.err.println("Couldn't open " + localeID);
633                 continue;
634             }
635             if (cldrFile.isNonInheriting()) continue;
636             for (String path : cldrFile) {
637                 if (pathMatcher != null && !pathMatcher.reset(path).matches()) {
638                     continue;
639                 }
640                 if (altProposedMatcher.reset(path).matches()) {
641                     continue;
642                 }
643                 if (path.indexOf("/alias") >= 0) continue;
644                 if (path.indexOf("/identity") >= 0) continue;
645                 if (path.indexOf("/references") >= 0) continue;
646                 PathHeader cleanPath = fixPath(path, postFix);
647                 final SurveyToolStatus surveyToolStatus = cleanPath.getSurveyToolStatus();
648                 if (surveyToolStatus == SurveyToolStatus.DEPRECATED || surveyToolStatus == SurveyToolStatus.HIDE) {
649                     // System.out.println("Skipping " + path);
650                     continue;
651                 }
652                 String fullPath = cldrFile.getFullXPath(path);
653                 String value = getValue(cldrFile, path, fullPath);
654                 if (value == null) {
655                     continue;
656                 }
657                 if (fullPath.indexOf("[@draft=\"unconfirmed\"]") >= 0
658                     || fullPath.indexOf("[@draft=\"provisional\"]") >= 0) {
659                     postFix[0] = "*";
660                 }
661                 if (path.equals("//ldml/characters/exemplarCharacters")) {
662                     UnicodeSet exemplars = new UnicodeSet(value);
663                     String script = UScript.getName(getFirstScript(exemplars));
664                     LOCALE_TO_SCRIPT.put(localeID, script);
665                 }
666                 Map<String, Set<String>> value_locales = path_value_locales.get(cleanPath);
667                 if (value_locales == null) {
668                     path_value_locales.put(cleanPath, value_locales = new TreeMap<String, Set<String>>(
669                         standardCollation));
670                 }
671                 Set<String> locales = value_locales.get(value);
672                 if (locales == null) {
673                     value_locales.put(value, locales = new TreeSet<String>());
674                 }
675                 locales.add(localeID + postFix[0]);
676             }
677         }
678         Relation<String, String> sorted = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
679         for (Entry<String, String> s : LOCALE_TO_SCRIPT.entrySet()) {
680             sorted.put(s.getValue(), s.getKey());
681         }
682         for (Entry<String, Set<String>> s : sorted.keyValuesSet()) {
683             System.out.println(s);
684         }
685     }
686 
687     static PathHeader.Factory pathHeaderFactory;
688 
689     // static org.unicode.cldr.util.PrettyPath prettyPath = new org.unicode.cldr.util.PrettyPath();
690     /**
691      *
692      */
fixPath(String path, String[] localePrefix)693     private static PathHeader fixPath(String path, String[] localePrefix) {
694         if (localePrefix != null) localePrefix[0] = "";
695         //        if (path.indexOf("[@alt=") >= 0 || path.indexOf("[@draft=") >= 0) {
696         //            if (localePrefix != null) localePrefix[0] = "*";
697         //            path = removeAttributes(path, skipSet);
698         //        }
699         // if (usePrettyPath) path = prettyPath.getPrettyPath(path);
700         return pathHeaderFactory.fromPath(path);
701     }
702 
removeAttributes(String xpath, Set<String> skipAttributes)703     private static String removeAttributes(String xpath, Set<String> skipAttributes) {
704         XPathParts parts = new XPathParts(null, null).set(xpath);
705         removeAttributes(parts, skipAttributes);
706         return parts.toString();
707     }
708 
709     /**
710      *
711      */
removeAttributes(XPathParts parts, Set<String> skipAttributes)712     private static void removeAttributes(XPathParts parts, Set<String> skipAttributes) {
713         for (int i = 0; i < parts.size(); ++i) {
714             // String element = parts.getElement(i);
715             Map<String, String> attributes = parts.getAttributes(i);
716             for (Iterator<String> it = attributes.keySet().iterator(); it.hasNext();) {
717                 String attribute = it.next();
718                 if (skipAttributes.contains(attribute)) it.remove();
719             }
720         }
721     }
722 
723     static Set<String> skipSet = new HashSet<String>(Arrays.asList("draft", "alt"));
724 
725     static Status status = new Status();
726 
727     /**
728      *
729      */
getValue(CLDRFile cldrFile, String path, String fullPath)730     private static String getValue(CLDRFile cldrFile, String path, String fullPath) {
731         String value = cldrFile.getStringValue(path);
732         if (value == null) {
733             System.out.println("Null value for " + path);
734             return value;
735         }
736         cldrFile.getSourceLocaleID(path, status);
737         if (!path.equals(status.pathWhereFound)) {
738             // value = "[" + prettyPath.getPrettyPath(status.pathWhereFound, false) + "]";
739             value = null;
740             return value;
741         }
742         if (value.length() == 0) {
743             parts.set(fullPath);
744             removeAttributes(parts, skipSet);
745             int limit = parts.size();
746             value = parts.toString(limit - 1, limit);
747             return value;
748         }
749         return value;
750     }
751 
getFileName2(PathHeader header, String suffix)752     private static String getFileName2(PathHeader header, String suffix) {
753         String result = (header.getSection() + "." + header.getPage())
754             .replace(" ", "_")
755             .replace("/", "_")
756             .replace("(", "_")
757             .replace(")", "_");
758         if (suffix != null) {
759             result += "." + suffix;
760         }
761         return result.toLowerCase(Locale.ENGLISH);
762     }
763 
764     static String[] headerAndFooter = new String[2];
765     private static Transliterator toHTML;
766 
767     /**
768      * @param tsvFile TODO
769      * @param path2
770      *
771      */
start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)772     private static PrintWriter start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)
773         throws IOException {
774         finish(out, tsvFile.value);
775         out = writeHeader(main, title, tsvFile);
776         out.println(headerString);
777         return out;
778     }
779 
getHeader(Set<PathHeader> set)780     public static String getHeader(Set<PathHeader> set) {
781         StringBuffer out = new StringBuffer("<table class='simple'><tr>");
782         String lastMain = "";
783         String lastSub = "";
784         for (PathHeader pathHeader : set) {
785             String mainName = pathHeader.getSection();
786             String subName = TransliteratorUtilities.toHTML.transform(pathHeader.getPage());
787             if (!mainName.equals(lastMain)) {
788                 if (lastMain.length() != 0) {
789                     out.append("</tr>" + System.lineSeparator() + "<tr>");
790                 }
791                 out.append("<th align='right' nowrap style='vertical-align: top'><b>"
792                     + TransliteratorUtilities.toHTML.transform(mainName)
793                     + ":&nbsp;</b></th><td>");
794                 lastMain = mainName;
795                 lastSub = subName;
796             } else if (!subName.equals(lastSub)) {
797                 out.append(" | ");
798                 lastSub = subName;
799             } else {
800                 continue; // identical, skip
801             }
802             out.append("<a href='" + getFileName2(pathHeader, null) + ".html'>" + subName + "</a>");
803             if (pathHeader.getPageId() == PageId.Alphabetic_Information) {
804                 for (String[] pair : EXEMPLARS) {
805                     out.append(" | <a href='" + getFileName2(pathHeader, pair[1]) + ".html'>" + pair[2] + "</a>");
806                 }
807             }
808             continue;
809         }
810         return out.append("</td></tr>" + System.lineSeparator() + "</table>").toString();
811     }
812 
writeHeader(String main, String title, Output<PrintWriter> tsvFile)813     private static PrintWriter writeHeader(String main, String title, Output<PrintWriter> tsvFile) throws IOException {
814         PrintWriter out;
815         out = FileUtilities.openUTF8Writer(options[DESTDIR].value, main + ".html");
816         if (tsvFile.value == null) {
817             tsvFile.value = FileUtilities.openUTF8Writer(Chart.getTsvDir(options[DESTDIR].value, DIR_NAME), DIR_NAME + ".tsv");
818             tsvFile.value.println("# By-Type Data");
819             tsvFile.value.println("# Section\tPage\tHeader\tCode\tValue\tLocales");
820         }
821 
822         ShowData.getChartTemplate("By-Type Chart: " + title,
823             ToolConstants.CHART_DISPLAY_VERSION,
824             "",
825             // "<link rel='stylesheet' type='text/css' href='by_type.css'>" +
826             // "<style type='text/css'>" + Utility.LINE_SEPARATOR +
827             // "h1 {margin-bottom:1em}" + Utility.LINE_SEPARATOR +
828             // "</style>" + Utility.LINE_SEPARATOR,
829             headerAndFooter, null, false);
830         out.println(headerAndFooter[0]);
831         return out;
832     }
833 
834     /**
835      * @param tsvFile TODO
836      *
837      */
finish(PrintWriter out, PrintWriter tsvFile)838     private static void finish(PrintWriter out, PrintWriter tsvFile) {
839         if (out == null) return;
840         out.println("</table>");
841         out.println(headerAndFooter[1]);
842         out.close();
843     }
844 
finishAll(PrintWriter out, PrintWriter tsvFile)845     private static void finishAll(PrintWriter out, PrintWriter tsvFile) {
846         // TODO Auto-generated method stub
847         tsvFile.println("# EOF");
848         tsvFile.close();
849     }
850 }
851