1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.util.BitSet;
6 import java.util.Collection;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.LinkedHashSet;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16 import java.util.regex.Matcher;
17 
18 import org.unicode.cldr.draft.FileUtilities;
19 import org.unicode.cldr.draft.Keyboard;
20 import org.unicode.cldr.draft.Keyboard.Gesture;
21 import org.unicode.cldr.draft.Keyboard.Iso;
22 import org.unicode.cldr.draft.Keyboard.KeyMap;
23 import org.unicode.cldr.draft.Keyboard.Output;
24 import org.unicode.cldr.draft.Keyboard.TransformStatus;
25 import org.unicode.cldr.draft.Keyboard.TransformType;
26 import org.unicode.cldr.draft.Keyboard.Transforms;
27 import org.unicode.cldr.draft.KeyboardModifierSet;
28 import org.unicode.cldr.tool.Option.Options;
29 import org.unicode.cldr.util.CLDRConfig;
30 import org.unicode.cldr.util.CLDRFile;
31 import org.unicode.cldr.util.CLDRFile.WinningChoice;
32 import org.unicode.cldr.util.CLDRPaths;
33 import org.unicode.cldr.util.CLDRTool;
34 import org.unicode.cldr.util.CldrUtility;
35 import org.unicode.cldr.util.Counter;
36 import org.unicode.cldr.util.Factory;
37 import org.unicode.cldr.util.FileCopier;
38 import org.unicode.cldr.util.LanguageTagCanonicalizer;
39 import org.unicode.cldr.util.Log;
40 import org.unicode.cldr.util.PatternCache;
41 import org.unicode.cldr.util.SupplementalDataInfo;
42 import org.unicode.cldr.util.TransliteratorUtilities;
43 import org.unicode.cldr.util.UnicodeSetPrettyPrinter;
44 
45 import com.ibm.icu.dev.util.CollectionUtilities;
46 import com.ibm.icu.impl.Relation;
47 import com.ibm.icu.impl.Row;
48 import com.ibm.icu.impl.Row.R2;
49 import com.ibm.icu.impl.Row.R3;
50 import com.ibm.icu.impl.Utility;
51 import com.ibm.icu.lang.UCharacter;
52 import com.ibm.icu.lang.UProperty;
53 import com.ibm.icu.lang.UScript;
54 import com.ibm.icu.text.Collator;
55 import com.ibm.icu.text.Transliterator;
56 import com.ibm.icu.text.UnicodeSet;
57 import com.ibm.icu.util.ICUUncheckedIOException;
58 import com.ibm.icu.util.ULocale;
59 
60 @CLDRTool(alias = "showkeyboards", description = "Generate keyboard charts")
61 public class ShowKeyboards {
62     // TODO - fix ' > xxx
63     // TODO - check for bad locale ids
64 
65     private static final String ABOUT_KEYBOARD_CHARTS = "<p>For more information, see <a target='ABOUT_KB' href='http://cldr.unicode.org/index/charts/keyboards'>About Keyboard Charts</a>.</p>";
66     private static String keyboardChartDir;
67     private static String keyboardChartLayoutsDir;
68     static final CLDRConfig testInfo = ToolConfig.getToolInstance();
69     static final Factory factory = testInfo.getCldrFactory();
70 
71     static final boolean SHOW_BACKGROUND = false;
72 
73     final static Options myOptions = new Options();
74 
75     enum MyOptions {
76         idFilter(".+", ".*", "Filter the information based on id, using a regex argument."), sourceDirectory(".+", CLDRPaths.BASE_DIRECTORY + "keyboards/",
77             "The source directory. CURRENTLY CAN’T BE CHANGED!!"), targetDirectory(".+", CLDRPaths.CHART_DIRECTORY + "keyboards/",
78                 "The target directory."), layouts(null, null,
79                     "Only create html files for keyboard layouts"), repertoire(null, null, "Only create html files for repertoire"),;
80         // boilerplate
81         final Option option;
82 
MyOptions(String argumentPattern, String defaultArgument, String helpText)83         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
84             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
85         }
86     }
87 
88     static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
89 
90     // *********************************************
91     // Temporary, for some simple testing
92     // *********************************************
main(String[] args)93     public static void main(String[] args) throws IOException {
94         myOptions.parse(MyOptions.idFilter, args, true);
95         String idPattern = MyOptions.idFilter.option.getValue();
96         keyboardChartDir = MyOptions.targetDirectory.option.getValue();
97         keyboardChartLayoutsDir = keyboardChartDir + "/layouts/";
98 
99         FileCopier.ensureDirectoryExists(keyboardChartDir);
100         FileCopier.copy(ShowKeyboards.class, "keyboards-index.html", keyboardChartDir, "index.html");
101 
102         Matcher idMatcher = PatternCache.get(idPattern).matcher("");
103         try {
104             Log.setLog(CLDRPaths.LOG_DIRECTORY + "keyboard-log.txt");
105         } catch (IOException e) {
106             throw new ICUUncheckedIOException(e);
107         }
108         boolean layoutsOnly = MyOptions.layouts.option.doesOccur();
109         boolean repertoireOnly = MyOptions.repertoire.option.doesOccur();
110 
111         if (!repertoireOnly) {
112             showHtml(idMatcher);
113         }
114         if (!layoutsOnly) {
115             showRepertoire(idMatcher);
116         }
117     }
118 
showRepertoire(Matcher idMatcher)119     public static void showRepertoire(Matcher idMatcher) {
120         Set<Exception> totalErrors = new LinkedHashSet<Exception>();
121         Set<Exception> errors = new LinkedHashSet<Exception>();
122         UnicodeSet controls = new UnicodeSet("[:Cc:]").freeze();
123         // check what the characters are, excluding controls.
124         Map<Id, UnicodeSet> id2unicodeset = new TreeMap<Id, UnicodeSet>();
125         Set<String> totalModifiers = new LinkedHashSet<String>();
126         Relation<String, Id> locale2ids = Relation.of(new TreeMap<String, Set<Id>>(), TreeSet.class);
127         LanguageTagCanonicalizer canonicalizer = new LanguageTagCanonicalizer();
128         IdInfo idInfo = new IdInfo();
129         for (String platformId : Keyboard.getPlatformIDs()) {
130             //Platform p = Keyboard.getPlatform(platformId);
131             // System.out.println(platformId + "\t" + p.getHardwareMap());
132             for (String keyboardId : Keyboard.getKeyboardIDs(platformId)) {
133                 if (!idMatcher.reset(keyboardId).matches()) {
134                     continue;
135                 }
136                 Keyboard keyboard = Keyboard.getKeyboard(platformId, keyboardId, errors);
137                 for (Exception error : errors) {
138                     totalErrors.add(new IllegalArgumentException(keyboardId, error));
139                 }
140                 UnicodeSet unicodeSet = keyboard.getPossibleResults().removeAll(controls);
141                 final Id id = new Id(keyboardId, keyboard.getPlatformVersion());
142                 idInfo.add(id, unicodeSet);
143                 String canonicalLocale = canonicalizer.transform(id.locale).replace('_', '-');
144                 if (!id.locale.equals(canonicalLocale)) {
145                     totalErrors.add(new IllegalArgumentException("Non-canonical id: " + id.locale + "\t=>\t" + canonicalLocale));
146                 }
147                 id2unicodeset.put(id, unicodeSet.freeze());
148                 locale2ids.put(id.locale, id);
149                 System.out.println(id.toString().replace('/', '\t') + "\t" + keyboard.getNames());
150                 for (KeyMap keymap : keyboard.getKeyMaps()) {
151                     totalModifiers.add(keymap.getModifiers().toString());
152                 }
153             }
154         }
155         if (totalErrors.size() != 0) {
156             System.out.println("Errors\t" + CollectionUtilities.join(totalErrors, System.lineSeparator() + "\t"));
157         }
158         for (String item : totalModifiers) {
159             System.out.println(item);
160         }
161         // logInfo.put(Row.of("k-cldr",common), keyboardId);
162         try {
163             FileCopier.copy(ShowKeyboards.class, "keyboards.css", keyboardChartDir, "index.css");
164             PrintWriter out = FileUtilities.openUTF8Writer(keyboardChartDir, "chars2keyboards.html");
165             String[] headerAndFooter = new String[2];
166 
167             ShowData.getChartTemplate(
168                 "Characters → Keyboards",
169                 ToolConstants.CHART_DISPLAY_VERSION,
170                 "",
171                 headerAndFooter, null, false);
172             out.println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS);
173 
174             // printTop("Characters → Keyboards", out);
175             idInfo.print(out);
176             // printBottom(out);
177             out.println(headerAndFooter[1]);
178             out.close();
179 
180             out = FileUtilities.openUTF8Writer(keyboardChartDir, "keyboards2chars.html");
181             ShowData.getChartTemplate(
182                 "Keyboards → Characters",
183                 ToolConstants.CHART_DISPLAY_VERSION,
184                 "",
185                 headerAndFooter, null, false);
186             out.println(headerAndFooter[0]
187                 + ABOUT_KEYBOARD_CHARTS);
188             // printTop("Keyboards → Characters", out);
189             showLocaleToCharacters(out, id2unicodeset, locale2ids);
190             // printBottom(out);
191             out.println(headerAndFooter[1]);
192             out.close();
193         } catch (IOException e1) {
194             e1.printStackTrace();
195         }
196         for (Entry<R2<String, UnicodeSet>, Set<Id>> entry : logInfo.keyValuesSet()) {
197             IdSet idSet = new IdSet();
198             idSet.addAll(entry.getValue());
199             Log.logln(entry.getKey().get0() + "\t" + entry.getKey().get1().toPattern(false) + "\t"
200                 + idSet.toString(idInfo.allIds));
201         }
202         Log.close();
203     }
204 
showHtml(Matcher idMatcher)205     private static void showHtml(Matcher idMatcher) throws IOException {
206         Set<Exception> errors = new LinkedHashSet<Exception>();
207         Relation<String, Row.R3<String, String, String>> locale2keyboards = Relation.of(
208             new TreeMap<String, Set<Row.R3<String, String, String>>>(), TreeSet.class);
209         Map<String, String> localeIndex = new TreeMap<String, String>();
210 
211         for (String platformId : Keyboard.getPlatformIDs()) {
212             //Platform p = Keyboard.getPlatform(platformId);
213             // System.out.println(platformId + "\t" + p.getHardwareMap());
214             for (String keyboardId : Keyboard.getKeyboardIDs(platformId)) {
215                 if (!idMatcher.reset(keyboardId).matches()) {
216                     continue;
217                 }
218                 String baseLocale = keyboardId.substring(0, keyboardId.indexOf('-'));
219                 String locale = keyboardId.substring(0, keyboardId.indexOf("-t-"));
220                 locale2keyboards.put(baseLocale, Row.of(platformId, locale, keyboardId));
221 
222                 final String localeName = testInfo.getEnglish().getName(baseLocale, true);
223                 localeIndex.put(localeName, baseLocale);
224             }
225         }
226 
227         FileCopier.ensureDirectoryExists(keyboardChartLayoutsDir);
228         FileCopier.copy(ShowKeyboards.class, "keyboards.css", keyboardChartLayoutsDir, "index.css");
229         PrintWriter index = FileUtilities.openUTF8Writer(keyboardChartLayoutsDir, "index.html");
230         String[] headerAndFooter = new String[2];
231         ShowData.getChartTemplate(
232             "Keyboard Layout Index",
233             ToolConstants.CHART_DISPLAY_VERSION,
234             "",
235             headerAndFooter, "Keyboard Index", false);
236         index
237             .println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS);
238         // printTop("Keyboard Layout Index", index);
239         index.println("<ol>");
240         for (Entry<String, String> entry : localeIndex.entrySet()) {
241             index.println("<li><a href='" + entry.getValue() + ".html'>"
242                 + entry.getKey() + "</a>"
243                 + " [" + entry.getValue() + "]" +
244                 "</li>");
245         }
246         index.println("</ol>");
247         index.println(headerAndFooter[1]);
248         // printBottom(index);
249         index.close();
250         // FileUtilities.copyFile(ShowKeyboards.class, "keyboards.css", keyboardChartLayoutsDir);
251 
252         for (Entry<String, Set<R3<String, String, String>>> localeKeyboards : locale2keyboards.keyValuesSet()) {
253             String locale = localeKeyboards.getKey();
254             final String localeName = testInfo.getEnglish().getName(locale);
255 
256             // String localeNameString = localeName.replace(' ', '_').toLowerCase(Locale.ENGLISH);
257             PrintWriter out = FileUtilities.openUTF8Writer(keyboardChartLayoutsDir, locale + ".html");
258             ShowData.getChartTemplate(
259                 "Layouts: " + localeName + " (" + locale + ")",
260                 ToolConstants.CHART_DISPLAY_VERSION,
261                 "",
262                 headerAndFooter, null, false);
263             out.println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS);
264             // printTop("Layouts: " + localeName + " (" + locale + ")", out);
265             Set<R3<String, String, String>> keyboards = localeKeyboards.getValue();
266             for (R3<String, String, String> platformKeyboard : keyboards) {
267                 String platformId = platformKeyboard.get0();
268                 String keyboardId = platformKeyboard.get2();
269                 // System.out.println(platformId + "\t" + p.getHardwareMap());
270                 Keyboard keyboard = Keyboard.getKeyboard(platformId, keyboardId, errors);
271                 showErrors(errors);
272                 Set<String> names = keyboard.getNames();
273                 String platformFromKeyboardId = Keyboard.getPlatformId(keyboardId);
274                 String printId = platformId.equals(platformFromKeyboardId) ? keyboardId : keyboardId + "/und";
275                 out.println("<h2>" + CldrUtility.getDoubleLinkedText(printId, printId)
276                     + (names.size() == 0 ? "" : " " + names)
277                     + "</h2>");
278 
279                 Transforms transforms = keyboard.getTransforms().get(TransformType.SIMPLE);
280 
281                 out.println("<table class='keyboards'><tr>");
282                 for (KeyMap map : keyboard.getKeyMaps()) {
283                     KeyboardModifierSet mods = map.getModifiers();
284                     out.println("<td class='keyboardTD'><table class='keyboard'>");
285                     // KeyboardModifierSet modifiers = map.getModifiers();
286                     Map<Iso, Output> isoMap = map.getIso2Output();
287                     for (Keyboard.IsoRow row : Keyboard.IsoRow.values()) {
288                         out.println("<tr>");
289                         for (Iso isoValue : Iso.values()) {
290                             if (isoValue.isoRow != row) {
291                                 continue;
292                             }
293                             Output output = isoMap.get(isoValue);
294                             if (output == null) {
295                                 out.println("<td class='x'>&nbsp;</td>");
296                                 continue;
297                             }
298                             String chars = output.getOutput();
299                             TransformStatus transformStatus = output.getTransformStatus();
300                             StringBuilder hover = new StringBuilder();
301                             if (transformStatus == TransformStatus.DEFAULT && transforms != null) {
302                                 Map<String, String> map2 = transforms.getMatch(chars);
303                                 add(map2, hover);
304                             }
305                             Map<Gesture, List<String>> gestures = output.getGestures();
306                             if (!gestures.isEmpty()) {
307                                 add(gestures, hover);
308                             }
309                             final String longPress = hover.length() == 0 ? ""
310                                 : " title='" + hover + "'";
311                             out.println("<td class='" + (hover.length() == 0 ? 'm' : 'h') +
312                                 "'" + longPress + ">"
313                                 + toSafeHtml(chars) + "</td>");
314                         }
315                         out.println("</tr>");
316                     }
317                     String modsString = mods.getShortInput();
318                     if (modsString.isEmpty()) {
319                         modsString = "\u00A0";
320                     } else if (modsString.length() > 20) {
321                         modsString = modsString.substring(0, 20) + "…";
322                     }
323                     out.println("</table><span class='modifiers'>"
324                         + TransliteratorUtilities.toHTML.transform(modsString) +
325                         "</span></td>");
326                 }
327                 out.println("</tr></table>");
328             }
329             index.println(headerAndFooter[1]);
330             // printBottom(out);
331             out.close();
332         }
333         System.out.println("Failing Invisibles: " + FAILING_INVISIBLE.retainAll(INVISIBLE));
334     }
335 
showErrors(Set<Exception> errors)336     private static void showErrors(Set<Exception> errors) {
337         for (Exception error : errors) {
338             String title = error.getMessage().contains("No minimal data for") ? "Warning" : "Error";
339             System.out.println("\t*" + title + ":\t" + error);
340         }
341     }
342 
343     static Transliterator TO_SAFE_HTML;
344     static {
345         StringBuilder rules = new StringBuilder(TransliteratorUtilities.toHTML.toRules(false));
346         for (char i = 0; i < 0x20; ++i) {
String.valueOf(i)347             addRule(String.valueOf(i), "^" + String.valueOf((char) (i + 0x40)), rules);
348         }
349         String[][] map = {
350             // {"\u0020","sp"},
351             { "\u007F", "del" },
352             { "\u00A0", "nbsp" },
353             { "\u00AD", "shy" },
354             { "\u200B", "zwsp" },
355             { "\u200C", "zwnj" },
356             { "\u200D", "zwj" },
357             { "\u200E", "lrm" },
358             { "\u200F", "rlm" },
359             { "\u202F", "nnbs" },
360             { "\uFEFF", "bom" },
361             { "\u180B", "mvs1" },
362             { "\u180C", "mvs2" },
363             { "\u180D", "mvs3" },
364             { "\u180E", "mvs" },
365             // {"\uF8FF","appl"},
366         };
367         for (String[] items : map) {
368             final String fromItem = items[0];
369             final String toItem = items[1];
addRule(fromItem, toItem, rules)370             addRule(fromItem, toItem, rules);
371         }
372         TO_SAFE_HTML = Transliterator.createFromRules("none", rules.toString(), Transliterator.FORWARD);
373     }
374 
addRule(final String fromItem, final String toItem, StringBuilder rules)375     public static void addRule(final String fromItem, final String toItem, StringBuilder rules) {
376         rules.append("'"
377             + fromItem
378             + "'>"
379             + "'<span class=\"cc\">"
380             + toItem
381             + "</span>'"
382             + ";"
383             + System.lineSeparator());
384     }
385 
386     static UnicodeSet INVISIBLE = new UnicodeSet("[[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]-[\\u0020]]").freeze();
387     static UnicodeSet FAILING_INVISIBLE = new UnicodeSet();
388 
toSafeHtml(Object hover)389     public static String toSafeHtml(Object hover) {
390         String result = TO_SAFE_HTML.transform(hover.toString());
391         if (INVISIBLE.containsSome(result)) {
392             FAILING_INVISIBLE.addAll(result);
393         }
394         return result;
395     }
396 
add(Map<K, V> map2, StringBuilder hover)397     private static <K, V> void add(Map<K, V> map2, StringBuilder hover) {
398         if (!map2.isEmpty()) {
399             for (Entry<K, V> entry : map2.entrySet()) {
400                 if (hover.length() != 0) {
401                     hover.append("; ");
402                 }
403                 final K key = entry.getKey();
404                 String keyString = key == Gesture.LONGPRESS ? "LP" : key.toString();
405                 final V value = entry.getValue();
406                 String valueString = value instanceof Collection
407                     ? CollectionUtilities.join((Collection) value, " ")
408                     : value.toString();
409                 hover.append(TransliteratorUtilities.toHTML.transform(keyString)).append("→")
410                     .append(TransliteratorUtilities.toHTML.transform(valueString));
411             }
412         }
413     }
414 
415     // public static void printTop(String title, PrintWriter out) {
416     // out.println(
417     // "<html>\n" +
418     // "<head>\n" +
419     // "<meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/>\n" +
420     // "<link rel='stylesheet' type='text/css' href='keyboards.css'>" +
421     // "<title>" + title + "</title>\n" +
422     // "</head>\n" +
423     // "<body>\n" +
424     // "<h1>DRAFT " +
425     // title +
426     // "</h1>\n" +
427     // "<p>For more information, see <a href='http://cldr.unicode.org/index/charts/keyboards'>Keyboard Charts</a>.</p>"
428     // );
429     // }
430     //
431     // public static void printBottom(PrintWriter pw) {
432     // pw.println(
433     // "</body>\n" +
434     // "</html>"
435     // );
436     // }
437 
showLocaleToCharacters(PrintWriter out, Map<Id, UnicodeSet> id2unicodeset, Relation<String, Id> locale2ids)438     public static void showLocaleToCharacters(PrintWriter out, Map<Id, UnicodeSet> id2unicodeset,
439         Relation<String, Id> locale2ids) {
440 
441         TablePrinter t = new TablePrinter()
442             .addColumn("Name").setSpanRows(true).setBreakSpans(true).setSortPriority(0)
443             .setCellAttributes("class='cell'")
444             .addColumn("Locale").setSpanRows(true).setBreakSpans(true).setCellAttributes("class='cell'")
445             .addColumn("Platform").setSpanRows(true).setCellAttributes("class='cell'")
446             .addColumn("Variant").setCellAttributes("class='cell'")
447             .addColumn("Script").setCellAttributes("class='cell'")
448             .addColumn("Statistics").setCellAttributes("class='cell'")
449             .addColumn("Characters").setSpanRows(true).setCellAttributes("class='cell'");
450 
451         Map<String, UnicodeSet> commonSets = new HashMap<String, UnicodeSet>();
452         Counter<String> commonCount = new Counter<String>();
453         Set<String> commonDone = new HashSet<String>();
454 
455         for (Entry<String, Set<Id>> localeAndIds : locale2ids.keyValuesSet()) {
456             final String key = localeAndIds.getKey();
457             final Set<Id> keyboardIds = localeAndIds.getValue();
458 
459             // System.out.println();
460             final String localeName = testInfo.getEnglish().getName(key, true);
461             final String linkedLocaleName = CldrUtility.getDoubleLinkedText(key, localeName);
462             final ULocale uLocale = ULocale.forLanguageTag(key);
463             String script = uLocale.getScript();
464             String writtenLanguage = uLocale.getLanguage() + (script.isEmpty() ? "" : "_" + script);
465             CLDRFile cldrFile = null;
466             try {
467                 cldrFile = factory.make(writtenLanguage, true);
468             } catch (Exception e) {
469             }
470 
471             // final String heading = uLocale.getDisplayName(ULocale.ENGLISH)
472             // + "\t" + ULocale.addLikelySubtags(uLocale).getScript()
473             // + "\t";
474             UnicodeSet common = UnicodeSet.EMPTY;
475             final String likelyScript = ULocale.addLikelySubtags(uLocale).getScript();
476             commonCount.clear();
477             for (String platform : Keyboard.getPlatformIDs()) {
478                 commonSets.put(platform, UnicodeSet.EMPTY);
479             }
480             if (keyboardIds.size() > 1) {
481                 common = UnicodeSet.EMPTY;
482                 for (Id keyboardId : keyboardIds) {
483                     final UnicodeSet keyboardSet = id2unicodeset.get(keyboardId);
484                     if (common == UnicodeSet.EMPTY) {
485                         common = new UnicodeSet(keyboardSet);
486                     } else {
487                         common.retainAll(keyboardSet);
488                     }
489                     UnicodeSet platformCommon = commonSets.get(keyboardId.platform);
490                     commonCount.add(keyboardId.platform, 1);
491                     if (platformCommon == UnicodeSet.EMPTY) {
492                         commonSets.put(keyboardId.platform, new UnicodeSet(keyboardSet));
493                     } else {
494                         platformCommon.retainAll(keyboardSet);
495                     }
496                 }
497                 common.freeze();
498                 t.addRow()
499                     .addCell(linkedLocaleName) // name
500                     .addCell(key) // locale
501                     .addCell("ALL") // platform
502                     .addCell("COMMON") // variant
503                     .addCell(likelyScript) // script
504                     .addCell(getInfo(null, common, cldrFile)) // stats
505                     .addCell(safeUnicodeSet(common)) // characters
506                     .finishRow();
507 
508                 // System.out.println(
509                 // locale + "\tCOMMON\t\t-"
510                 // + "\t" + heading + getInfo(common, cldrFile)
511                 // + "\t" + common.toPattern(false));
512             }
513             commonDone.clear();
514             for (Id keyboardId : keyboardIds) {
515                 UnicodeSet platformCommon = commonSets.get(keyboardId.platform);
516                 if (!commonDone.contains(keyboardId.platform)) {
517                     commonDone.add(keyboardId.platform);
518                     if (commonCount.get(keyboardId.platform) <= 1) {
519                         platformCommon = UnicodeSet.EMPTY;
520                         commonSets.put(keyboardId.platform, platformCommon);
521                     } else if (platformCommon.size() > 0) {
522                         // get stats for all, but otherwise remove common.
523                         final String stats = getInfo(null, platformCommon, cldrFile);
524                         platformCommon.removeAll(common).freeze();
525                         commonSets.put(keyboardId.platform, platformCommon);
526                         t.addRow()
527                             .addCell(linkedLocaleName) // name
528                             .addCell(key) // locale
529                             .addCell(keyboardId.platform) // platform
530                             .addCell("COMMON") // variant
531                             .addCell(likelyScript) // script
532                             .addCell(stats) // stats
533                             .addCell(safeUnicodeSet(platformCommon)) // characters
534                             .finishRow();
535                     }
536                 }
537                 final UnicodeSet current2 = id2unicodeset.get(keyboardId);
538                 final UnicodeSet remainder = new UnicodeSet(current2)
539                     .removeAll(common)
540                     .removeAll(platformCommon);
541 
542                 t.addRow()
543                     .addCell(linkedLocaleName) // name
544                     .addCell(key) // locale
545                     .addCell(keyboardId.platform) // platform
546                     .addCell(keyboardId.variant) // variant
547                     .addCell(likelyScript) // script
548                     .addCell(getInfo(keyboardId, current2, cldrFile)) // stats
549                     .addCell(safeUnicodeSet(remainder)) // characters
550                     .finishRow();
551                 // System.out.println(
552                 // keyboardId.toString().replace('/','\t')
553                 // + "\t" + keyboardId.platformVersion
554                 // + "\t" + heading + getInfo(current2, cldrFile)
555                 // + "\t" + remainder.toPattern(false));
556             }
557         }
558         out.println(t.toTable());
559     }
560 
561     static UnicodeSetPrettyPrinter prettyPrinter = new UnicodeSetPrettyPrinter()
562         .setOrdering(Collator.getInstance(ULocale.ROOT))
563         .setSpaceComparator(Collator.getInstance(ULocale.ROOT).setStrength2(Collator.PRIMARY));
564 
safeUnicodeSet(UnicodeSet unicodeSet)565     public static String safeUnicodeSet(UnicodeSet unicodeSet) {
566         return TransliteratorUtilities.toHTML.transform(prettyPrinter.format(unicodeSet));
567     }
568 
569     static class IdInfo {
570         final Collator collator = Collator.getInstance(ULocale.ENGLISH);
571         BitSet bitset = new BitSet();
572         BitSet bitset2 = new BitSet();
573         @SuppressWarnings("unchecked")
574         TreeMap<String, IdSet>[] charToKeyboards = new TreeMap[UScript.CODE_LIMIT];
575         {
576             collator.setStrength(Collator.IDENTICAL);
577             for (int i = 0; i < charToKeyboards.length; ++i) {
578                 charToKeyboards[i] = new TreeMap<String, IdSet>(collator);
579             }
580         }
581         IdSet allIds = new IdSet();
582 
add(Id id, UnicodeSet unicodeSet)583         public void add(Id id, UnicodeSet unicodeSet) {
584             allIds.add(id);
585             for (String s : unicodeSet) {
586                 int script = getScriptExtensions(s, bitset);
587                 if (script >= 0) {
588                     addToScript(script, id, s);
589                 } else {
590                     for (int script2 = bitset.nextSetBit(0); script2 >= 0; script2 = bitset.nextSetBit(script2 + 1)) {
591                         addToScript(script2, id, s);
592                     }
593                 }
594             }
595         }
596 
getScriptExtensions(String s, BitSet outputBitset)597         public int getScriptExtensions(String s, BitSet outputBitset) {
598             final int firstCodePoint = s.codePointAt(0);
599             int result = UScript.getScriptExtensions(firstCodePoint, outputBitset);
600             final int firstCodePointCount = Character.charCount(firstCodePoint);
601             if (s.length() == firstCodePointCount) {
602                 return result;
603             }
604             for (int i = firstCodePointCount; i < s.length();) {
605                 int ch = s.codePointAt(i);
606                 UScript.getScriptExtensions(ch, bitset2);
607                 outputBitset.or(bitset2);
608                 i += Character.charCount(ch);
609             }
610             // remove inherited, if there is anything else; then remove common if there is anything else
611             int cardinality = outputBitset.cardinality();
612             if (cardinality > 1) {
613                 if (outputBitset.get(UScript.INHERITED)) {
614                     outputBitset.clear(UScript.INHERITED);
615                     --cardinality;
616                 }
617                 if (cardinality > 1) {
618                     if (outputBitset.get(UScript.COMMON)) {
619                         outputBitset.clear(UScript.COMMON);
620                         --cardinality;
621                     }
622                 }
623             }
624             if (cardinality == 1) {
625                 return outputBitset.nextSetBit(0);
626             } else {
627                 return -cardinality;
628             }
629         }
630 
addToScript(int script, Id id, String s)631         public void addToScript(int script, Id id, String s) {
632             TreeMap<String, IdSet> charToKeyboard = charToKeyboards[script];
633             IdSet idSet = charToKeyboard.get(s);
634             if (idSet == null) {
635                 charToKeyboard.put(s, idSet = new IdSet());
636             }
637             idSet.add(id);
638         }
639 
print(PrintWriter pw)640         public void print(PrintWriter pw) {
641 
642             TablePrinter t = new TablePrinter()
643                 .addColumn("Script").setSpanRows(true).setCellAttributes("class='s'")
644                 .addColumn("Char").setCellAttributes("class='ch'")
645                 .addColumn("Code").setCellAttributes("class='c'")
646                 .addColumn("Name").setCellAttributes("class='n'")
647                 .addColumn("Keyboards").setSpanRows(true).setCellAttributes("class='k'");
648             Set<String> missingScripts = new TreeSet<String>();
649             UnicodeSet notNFKC = new UnicodeSet("[:nfkcqc=n:]");
650             UnicodeSet COMMONINHERITED = new UnicodeSet("[[:sc=common:][:sc=inherited:]]");
651 
652             for (int script = 0; script < charToKeyboards.length; ++script) {
653                 UnicodeSet inScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script).removeAll(
654                     notNFKC);
655 
656                 // UnicodeSet fullScript = new UnicodeSet(inScript);
657                 // int fullScriptSize = inScript.size();
658                 if (inScript.size() == 0) {
659                     continue;
660                 }
661                 final TreeMap<String, IdSet> charToKeyboard = charToKeyboards[script];
662                 final String scriptName = UScript.getName(script);
663                 final String linkedScriptName = CldrUtility.getDoubleLinkedText(UScript.getShortName(script),
664                     scriptName);
665                 if (charToKeyboard.size() == 0) {
666                     missingScripts.add(scriptName);
667                     continue;
668                 }
669 
670                 // also check to see that at least one item is not all common
671                 check: if (script != UScript.COMMON && script != UScript.INHERITED) {
672                     for (String s : charToKeyboard.keySet()) {
673                         if (!COMMONINHERITED.containsAll(s)) {
674                             break check;
675                         }
676                     }
677                     missingScripts.add(scriptName);
678                     continue;
679                 }
680 
681                 String last = "";
682                 for (Entry<String, IdSet> entry : charToKeyboard.entrySet()) {
683                     String s = entry.getKey();
684                     IdSet value = entry.getValue();
685                     final String keyboardsString = value.toString(allIds);
686                     if (!s.equalsIgnoreCase(last)) {
687                         if (s.equals("\u094D\u200C")) { // Hack, because the browsers width is way off
688                             s = "\u094D";
689                         }
690                         String name = UCharacter.getName(s, " + ");
691                         if (name == null) {
692                             name = "[no name]";
693                         }
694                         String ch = s.equals("\u0F39") ? "\uFFFD" : s;
695                         t.addRow()
696                             .addCell(linkedScriptName)
697                             .addCell((SHOW_BACKGROUND ? "<span class='ybg'>" : "") +
698                                 TransliteratorUtilities.toHTML.transform(ch)
699                                 + (SHOW_BACKGROUND ? "</span>" : ""))
700                             .addCell(Utility.hex(s, 4, " + "))
701                             .addCell(name)
702                             .addCell(keyboardsString)
703                             .finishRow();
704                     }
705                     inScript.remove(s);
706                     last = s;
707                 }
708                 if (inScript.size() != 0 && script != UScript.UNKNOWN) {
709                     // String pattern;
710                     // if (inScript.size() < 255 || inScript.size()*4 < fullScriptSize) {
711                     // } else {
712                     // fullScript.removeAll(inScript);
713                     // inScript = new UnicodeSet("[[:sc=" + UScript.getShortName(script) + ":]-" +
714                     // fullScript.toPattern(false) + "]");
715                     // }
716                     t.addRow()
717                         .addCell(linkedScriptName)
718                         .addCell("")
719                         .addCell(String.valueOf(inScript.size()))
720                         .addCell("missing (NFKC)!")
721                         .addCell(safeUnicodeSet(inScript))
722                         .finishRow();
723                 }
724             }
725             t.addRow()
726                 .addCell("")
727                 .addCell("")
728                 .addCell(String.valueOf(missingScripts.size()))
729                 .addCell("missing scripts!")
730                 .addCell(missingScripts.toString())
731                 .finishRow();
732             pw.println(t.toTable());
733         }
734     }
735 
getInfo(Id keyboardId, UnicodeSet common, CLDRFile cldrFile)736     private static String getInfo(Id keyboardId, UnicodeSet common, CLDRFile cldrFile) {
737         Counter<String> results = new Counter<String>();
738         for (String s : common) {
739             int first = s.codePointAt(0); // first char is good enough
740             results.add(UScript.getShortName(UScript.getScript(first)), 1);
741         }
742         results.remove("Zyyy");
743         results.remove("Zinh");
744         results.remove("Zzzz");
745 
746         if (cldrFile != null) {
747             UnicodeSet exemplars = new UnicodeSet(cldrFile.getExemplarSet("", WinningChoice.WINNING));
748             UnicodeSet auxExemplars = cldrFile.getExemplarSet("auxiliary", WinningChoice.WINNING);
749             if (auxExemplars != null) {
750                 exemplars.addAll(auxExemplars);
751             }
752             UnicodeSet punctuationExemplars = cldrFile.getExemplarSet("punctuation", WinningChoice.WINNING);
753             if (punctuationExemplars != null) {
754                 exemplars.addAll(punctuationExemplars);
755             }
756             exemplars.addAll(getNumericExemplars(cldrFile));
757             exemplars.addAll(getQuotationMarks(cldrFile));
758             exemplars.add(" ");
759             addComparison(keyboardId, common, exemplars, results);
760         }
761         StringBuilder b = new StringBuilder();
762         for (String entry : results.keySet()) {
763             if (b.length() != 0) {
764                 b.append(", ");
765             }
766             b.append(entry).append(":").append(results.get(entry));
767         }
768         return b.toString();
769     }
770 
addComparison(Id keyboardId, UnicodeSet keyboard, UnicodeSet exemplars, Counter<String> results)771     private static void addComparison(Id keyboardId, UnicodeSet keyboard, UnicodeSet exemplars,
772         Counter<String> results) {
773         UnicodeSet common = new UnicodeSet(keyboard).retainAll(exemplars);
774         if (common.size() != 0) {
775             results.add("k∩cldr", common.size());
776         }
777         common = new UnicodeSet(keyboard).removeAll(exemplars);
778         if (common.size() != 0) {
779             results.add("k‑cldr", common.size());
780             if (keyboardId != null) {
781                 common.remove(0, 0x7F); // don't care much about ASCII.
782                 logInfo.put(Row.of("k-cldr\t" + keyboardId.getBaseLanguage(), common), keyboardId);
783                 // Log.logln(keyboardId + "\tk-cldr\t" + common.toPattern(false));
784             }
785         }
786         common = new UnicodeSet(exemplars).removeAll(keyboard).remove("ss");
787         if (common.size() != 0) {
788             results.add("cldr‑k", common.size());
789             if (keyboardId != null && SKIP_LOG.containsNone(common)) {
790                 logInfo.put(Row.of("cldr‑k\t" + keyboardId.getBaseLanguage(), common), keyboardId);
791                 // Log.logln(keyboardId + "\tcldr‑k\t" + common.toPattern(false));
792             }
793         }
794     }
795 
796     static final UnicodeSet SKIP_LOG = new UnicodeSet("[가一]").freeze();
797     static Relation<Row.R2<String, UnicodeSet>, Id> logInfo = Relation.of(new TreeMap<Row.R2<String, UnicodeSet>, Set<Id>>(), TreeSet.class);
798 
799     static class Id implements Comparable<Id> {
800         final String locale;
801         final String platform;
802         final String variant;
803         final String platformVersion;
804 
Id(String input, String platformVersion)805         Id(String input, String platformVersion) {
806             int pos = input.indexOf("-t-k0-");
807             String localeTemp = input.substring(0, pos);
808             locale = ULocale.minimizeSubtags(ULocale.forLanguageTag(localeTemp)).toLanguageTag();
809             pos += 6;
810             int pos2 = input.indexOf('-', pos);
811             if (pos2 > 0) {
812                 platform = input.substring(pos, pos2);
813                 variant = input.substring(pos2 + 1);
814             } else {
815                 platform = input.substring(pos);
816                 variant = "";
817             }
818             this.platformVersion = platformVersion;
819         }
820 
821         @Override
compareTo(Id other)822         public int compareTo(Id other) {
823             int result;
824             if (0 != (result = locale.compareTo(other.locale))) {
825                 return result;
826             }
827             if (0 != (result = platform.compareTo(other.platform))) {
828                 return result;
829             }
830             if (0 != (result = variant.compareTo(other.variant))) {
831                 return result;
832             }
833             return 0;
834         }
835 
836         @Override
toString()837         public String toString() {
838             return locale + "/" + platform + "/" + variant;
839         }
840 
getBaseLanguage()841         public String getBaseLanguage() {
842             int pos = locale.indexOf('-');
843             return pos < 0 ? locale : locale.substring(0, pos);
844         }
845     }
846 
847     static class IdSet {
848         Map<String, Relation<String, String>> data = new TreeMap<String, Relation<String, String>>();
849 
add(Id id)850         public void add(Id id) {
851             Relation<String, String> platform2variant = data.get(id.platform);
852             if (platform2variant == null) {
853                 data.put(id.platform, platform2variant = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class));
854             }
855             platform2variant.put(id.locale, id.variant);
856         }
857 
addAll(Collection<Id> idSet)858         public void addAll(Collection<Id> idSet) {
859             for (Id id : idSet) {
860                 add(id);
861             }
862         }
863 
toString(IdSet allIds)864         public String toString(IdSet allIds) {
865             if (this.equals(allIds)) {
866                 return "*";
867             }
868             StringBuilder b = new StringBuilder();
869             final Set<Entry<String, Relation<String, String>>> entrySet = data.entrySet();
870             boolean first = true;
871             for (Entry<String, Relation<String, String>> entry : entrySet) {
872                 if (first) {
873                     first = false;
874                 } else {
875                     b.append(" ");
876                 }
877                 String key = entry.getKey();
878                 Set<Entry<String, Set<String>>> valueSet = entry.getValue().keyValuesSet();
879                 b.append(key).append(":");
880                 appendLocaleAndVariants(b, valueSet, allIds.data.get(key));
881             }
882             return b.toString();
883         }
884 
appendLocaleAndVariants(StringBuilder b, Set<Entry<String, Set<String>>> set, Relation<String, String> relation)885         private void appendLocaleAndVariants(StringBuilder b, Set<Entry<String, Set<String>>> set,
886             Relation<String, String> relation) {
887             if (set.equals(relation.keyValuesSet())) {
888                 b.append("*");
889                 return;
890             }
891             final int setSize = set.size();
892             if (setSize > 9) {
893                 b.append(setSize).append("/").append(relation.size());
894                 return;
895             }
896             final boolean isSingle = setSize == 1;
897             if (!isSingle) b.append("(");
898             boolean first = true;
899             for (Entry<String, Set<String>> item : set) {
900                 if (first) {
901                     first = false;
902                 } else {
903                     b.append("|");
904                 }
905                 final String key = item.getKey();
906                 b.append(key);
907                 final Set<String> variants = item.getValue();
908                 final int size = variants.size();
909                 if (size != 0) {
910                     if (size == 1) {
911                         String firstOne = variants.iterator().next();
912                         if (firstOne.isEmpty()) {
913                             continue; // fr-CA/∅ => fr-CA
914                         }
915                     }
916                     b.append("/");
917                     appendVariant(b, variants, relation.get(key));
918                 }
919             }
920             if (!isSingle) b.append(")");
921         }
922 
appendVariant(StringBuilder b, Set<String> set, Set<String> set2)923         private void appendVariant(StringBuilder b, Set<String> set, Set<String> set2) {
924             if (set.equals(set2)) {
925                 b.append("*");
926                 return;
927             }
928             final boolean isSingle = set.size() == 1;
929             if (!isSingle) b.append("(");
930             boolean first = true;
931             for (String item : set) {
932                 if (first) {
933                     first = false;
934                 } else {
935                     b.append("|");
936                 }
937                 b.append(item.isEmpty() ? "∅" : item);
938             }
939             if (!isSingle) b.append(")");
940         }
941 
isEquals(Object other)942         public boolean isEquals(Object other) {
943             return data.equals(((IdSet) other).data);
944         }
945 
hashCode()946         public int hashCode() {
947             return data.hashCode();
948         }
949     }
950 
951     // public static class Key {
952     // Iso iso;
953     // ModifierSet modifierSet;
954     // }
955     // /**
956     // * Return all possible results. Could be external utility. WARNING: doesn't account for transform='no' or
957     // failure='omit'.
958     // */
959     // public Map<String,List<Key>> getPossibleSource() {
960     // Map<String,List<Key>> results = new HashMap<String,List<Key>>();
961     // UnicodeSet results = new UnicodeSet();
962     // addOutput(getBaseMap().iso2output.values(), results);
963     // for (KeyMap keymap : getKeyMaps()) {
964     // addOutput(keymap.string2output.values(), results);
965     // }
966     // for (Transforms transforms : getTransforms().values()) {
967     // // loop, to catch empty case
968     // for (String result : transforms.string2string.values()) {
969     // if (!result.isEmpty()) {
970     // results.add(result);
971     // }
972     // }
973     // }
974     // return results;
975     // }
976 
getQuotationMarks(CLDRFile file)977     static UnicodeSet getQuotationMarks(CLDRFile file) {
978         UnicodeSet results = new UnicodeSet();
979         // TODO should have a test to make sure these are in exemplars.
980         results.add(file.getStringValue("//ldml/delimiters/quotationEnd"));
981         results.add(file.getStringValue("//ldml/delimiters/quotationStart"));
982         results.add(file.getStringValue("//ldml/delimiters/alternateQuotationEnd"));
983         results.add(file.getStringValue("//ldml/delimiters/alternateQuotationStart"));
984         return results;
985     }
986 
987     // TODO Add as utility to CLDRFile
getNumericExemplars(CLDRFile file)988     static UnicodeSet getNumericExemplars(CLDRFile file) {
989         UnicodeSet results = new UnicodeSet();
990         String defaultNumberingSystem = file.getStringValue("//ldml/numbers/defaultNumberingSystem");
991         String nativeNumberingSystem = file.getStringValue("//ldml/numbers/otherNumberingSystems/native");
992         // "//ldml/numbers/otherNumberingSystems/native"
993         addNumberingSystem(file, results, "latn");
994         if (!defaultNumberingSystem.equals("latn")) {
995             addNumberingSystem(file, results, defaultNumberingSystem);
996         }
997         if (!nativeNumberingSystem.equals("latn") && !nativeNumberingSystem.equals(defaultNumberingSystem)) {
998             addNumberingSystem(file, results, nativeNumberingSystem);
999         }
1000         return results;
1001     }
1002 
addNumberingSystem(CLDRFile file, UnicodeSet results, String numberingSystem)1003     public static void addNumberingSystem(CLDRFile file, UnicodeSet results, String numberingSystem) {
1004         String digits = supplementalDataInfo.getDigits(numberingSystem);
1005         results.addAll(digits);
1006         addSymbol(file, numberingSystem, "decimal", results);
1007         addSymbol(file, numberingSystem, "group", results);
1008         addSymbol(file, numberingSystem, "minusSign", results);
1009         addSymbol(file, numberingSystem, "percentSign", results);
1010         addSymbol(file, numberingSystem, "plusSign", results);
1011     }
1012 
addSymbol(CLDRFile file, String numberingSystem, String key, UnicodeSet results)1013     public static void addSymbol(CLDRFile file, String numberingSystem, String key, UnicodeSet results) {
1014         String symbol = file.getStringValue("//ldml/numbers/symbols[@numberSystem=\"" + numberingSystem + "\"]/" +
1015             key);
1016         results.add(symbol);
1017     }
1018 }
1019