1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.util.BitSet;
6 import java.util.Collection;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.LinkedHashSet;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16 import java.util.regex.Matcher;
17 
18 import org.unicode.cldr.draft.FileUtilities;
19 import org.unicode.cldr.draft.Keyboard;
20 import org.unicode.cldr.draft.Keyboard.Gesture;
21 import org.unicode.cldr.draft.Keyboard.Iso;
22 import org.unicode.cldr.draft.Keyboard.KeyMap;
23 import org.unicode.cldr.draft.Keyboard.Output;
24 import org.unicode.cldr.draft.Keyboard.TransformStatus;
25 import org.unicode.cldr.draft.Keyboard.TransformType;
26 import org.unicode.cldr.draft.Keyboard.Transforms;
27 import org.unicode.cldr.draft.KeyboardModifierSet;
28 import org.unicode.cldr.tool.Option.Options;
29 import org.unicode.cldr.util.CLDRConfig;
30 import org.unicode.cldr.util.CLDRFile;
31 import org.unicode.cldr.util.CLDRFile.WinningChoice;
32 import org.unicode.cldr.util.CLDRPaths;
33 import org.unicode.cldr.util.CLDRTool;
34 import org.unicode.cldr.util.CldrUtility;
35 import org.unicode.cldr.util.Counter;
36 import org.unicode.cldr.util.Factory;
37 import org.unicode.cldr.util.FileCopier;
38 import org.unicode.cldr.util.LanguageTagCanonicalizer;
39 import org.unicode.cldr.util.Log;
40 import org.unicode.cldr.util.PatternCache;
41 import org.unicode.cldr.util.SupplementalDataInfo;
42 import org.unicode.cldr.util.TransliteratorUtilities;
43 import org.unicode.cldr.util.UnicodeSetPrettyPrinter;
44 
45 import com.google.common.base.Joiner;
46 import com.ibm.icu.impl.Relation;
47 import com.ibm.icu.impl.Row;
48 import com.ibm.icu.impl.Row.R2;
49 import com.ibm.icu.impl.Row.R3;
50 import com.ibm.icu.impl.Utility;
51 import com.ibm.icu.lang.UCharacter;
52 import com.ibm.icu.lang.UProperty;
53 import com.ibm.icu.lang.UScript;
54 import com.ibm.icu.text.Collator;
55 import com.ibm.icu.text.Transliterator;
56 import com.ibm.icu.text.UnicodeSet;
57 import com.ibm.icu.util.ICUUncheckedIOException;
58 import com.ibm.icu.util.ULocale;
59 
60 @CLDRTool(alias = "showkeyboards", description = "Generate keyboard charts")
61 public class ShowKeyboards {
62     // TODO - fix ' > xxx
63     // TODO - check for bad locale ids
64 
65     private static final String ABOUT_KEYBOARD_CHARTS = "<p>For more information, see <a target='ABOUT_KB' href='http://cldr.unicode.org/index/charts/keyboards'>About Keyboard Charts</a>.</p>";
66     private static String keyboardChartDir;
67     private static String keyboardChartLayoutsDir;
68     static final CLDRConfig testInfo = ToolConfig.getToolInstance();
69     static final Factory factory = testInfo.getCldrFactory();
70 
71     static final boolean SHOW_BACKGROUND = false;
72 
73     final static Options myOptions = new Options();
74 
75     enum MyOptions {
76         idFilter(".+", ".*", "Filter the information based on id, using a regex argument."), sourceDirectory(".+", CLDRPaths.BASE_DIRECTORY + "keyboards/",
77             "The source directory. CURRENTLY CAN’T BE CHANGED!!"), targetDirectory(".+", CLDRPaths.CHART_DIRECTORY + "keyboards/",
78                 "The target directory."), layouts(null, null,
79                     "Only create html files for keyboard layouts"), repertoire(null, null, "Only create html files for repertoire"),;
80         // boilerplate
81         final Option option;
82 
MyOptions(String argumentPattern, String defaultArgument, String helpText)83         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
84             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
85         }
86     }
87 
88     static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
89 
90     // *********************************************
91     // Temporary, for some simple testing
92     // *********************************************
main(String[] args)93     public static void main(String[] args) throws IOException {
94         myOptions.parse(MyOptions.idFilter, args, true);
95         String idPattern = MyOptions.idFilter.option.getValue();
96         keyboardChartDir = MyOptions.targetDirectory.option.getValue();
97         keyboardChartLayoutsDir = keyboardChartDir + "/layouts/";
98 
99         FileCopier.ensureDirectoryExists(keyboardChartDir);
100         FileCopier.copy(ShowKeyboards.class, "keyboards-index.html", keyboardChartDir, "index.html");
101 
102         Matcher idMatcher = PatternCache.get(idPattern).matcher("");
103         try {
104             Log.setLog(CLDRPaths.LOG_DIRECTORY + "keyboard-log.txt");
105         } catch (IOException e) {
106             throw new ICUUncheckedIOException(e);
107         }
108         boolean layoutsOnly = MyOptions.layouts.option.doesOccur();
109         boolean repertoireOnly = MyOptions.repertoire.option.doesOccur();
110 
111         if (!repertoireOnly) {
112             showHtml(idMatcher);
113         }
114         if (!layoutsOnly) {
115             showRepertoire(idMatcher);
116         }
117     }
118 
showRepertoire(Matcher idMatcher)119     public static void showRepertoire(Matcher idMatcher) {
120         Set<Exception> totalErrors = new LinkedHashSet<>();
121         Set<Exception> errors = new LinkedHashSet<>();
122         UnicodeSet controls = new UnicodeSet("[:Cc:]").freeze();
123         // check what the characters are, excluding controls.
124         Map<Id, UnicodeSet> id2unicodeset = new TreeMap<>();
125         Set<String> totalModifiers = new LinkedHashSet<>();
126         Relation<String, Id> locale2ids = Relation.of(new TreeMap<String, Set<Id>>(), TreeSet.class);
127         LanguageTagCanonicalizer canonicalizer = new LanguageTagCanonicalizer();
128         IdInfo idInfo = new IdInfo();
129         for (String platformId : Keyboard.getPlatformIDs()) {
130             //Platform p = Keyboard.getPlatform(platformId);
131             // System.out.println(platformId + "\t" + p.getHardwareMap());
132             for (String keyboardId : Keyboard.getKeyboardIDs(platformId)) {
133                 if (!idMatcher.reset(keyboardId).matches()) {
134                     continue;
135                 }
136                 Keyboard keyboard = Keyboard.getKeyboard(platformId, keyboardId, errors);
137                 for (Exception error : errors) {
138                     totalErrors.add(new IllegalArgumentException(keyboardId, error));
139                 }
140                 UnicodeSet unicodeSet = keyboard.getPossibleResults().removeAll(controls);
141                 final Id id = new Id(keyboardId, keyboard.getPlatformVersion());
142                 idInfo.add(id, unicodeSet);
143                 String canonicalLocale = canonicalizer.transform(id.locale).replace('_', '-');
144                 if (!id.locale.equals(canonicalLocale)) {
145                     totalErrors.add(new IllegalArgumentException("Non-canonical id: " + id.locale + "\t=>\t" + canonicalLocale));
146                 }
147                 id2unicodeset.put(id, unicodeSet.freeze());
148                 locale2ids.put(id.locale, id);
149                 System.out.println(id.toString().replace('/', '\t') + "\t" + keyboard.getNames());
150                 for (KeyMap keymap : keyboard.getKeyMaps()) {
151                     totalModifiers.add(keymap.getModifiers().toString());
152                 }
153             }
154         }
155         if (totalErrors.size() != 0) {
156             System.out.println("Errors\t" + Joiner.on(System.lineSeparator() + "\t")
157                 .join(totalErrors));
158         }
159         for (String item : totalModifiers) {
160             System.out.println(item);
161         }
162         // logInfo.put(Row.of("k-cldr",common), keyboardId);
163         try {
164             FileCopier.copy(ShowKeyboards.class, "keyboards.css", keyboardChartDir, "index.css");
165             FormattedFileWriter.copyIncludeHtmls(keyboardChartDir);
166 
167             PrintWriter out = FileUtilities.openUTF8Writer(keyboardChartDir, "chars2keyboards.html");
168             String[] headerAndFooter = new String[2];
169 
170             ShowData.getChartTemplate(
171                 "Characters → Keyboards",
172                 ToolConstants.CHART_DISPLAY_VERSION,
173                 "",
174                 headerAndFooter, null, false);
175             out.println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS);
176 
177             // printTop("Characters → Keyboards", out);
178             idInfo.print(out);
179             // printBottom(out);
180             out.println(headerAndFooter[1]);
181             out.close();
182 
183             out = FileUtilities.openUTF8Writer(keyboardChartDir, "keyboards2chars.html");
184             ShowData.getChartTemplate(
185                 "Keyboards → Characters",
186                 ToolConstants.CHART_DISPLAY_VERSION,
187                 "",
188                 headerAndFooter, null, false);
189             out.println(headerAndFooter[0]
190                 + ABOUT_KEYBOARD_CHARTS);
191             // printTop("Keyboards → Characters", out);
192             showLocaleToCharacters(out, id2unicodeset, locale2ids);
193             // printBottom(out);
194             out.println(headerAndFooter[1]);
195             out.close();
196         } catch (IOException e1) {
197             e1.printStackTrace();
198         }
199         for (Entry<R2<String, UnicodeSet>, Set<Id>> entry : logInfo.keyValuesSet()) {
200             IdSet idSet = new IdSet();
201             idSet.addAll(entry.getValue());
202             Log.logln(entry.getKey().get0() + "\t" + entry.getKey().get1().toPattern(false) + "\t"
203                 + idSet.toString(idInfo.allIds));
204         }
205         Log.close();
206     }
207 
showHtml(Matcher idMatcher)208     private static void showHtml(Matcher idMatcher) throws IOException {
209         Set<Exception> errors = new LinkedHashSet<>();
210         Relation<String, Row.R3<String, String, String>> locale2keyboards = Relation.of(
211             new TreeMap<String, Set<Row.R3<String, String, String>>>(), TreeSet.class);
212         Map<String, String> localeIndex = new TreeMap<>();
213 
214         for (String platformId : Keyboard.getPlatformIDs()) {
215             //Platform p = Keyboard.getPlatform(platformId);
216             // System.out.println(platformId + "\t" + p.getHardwareMap());
217             for (String keyboardId : Keyboard.getKeyboardIDs(platformId)) {
218                 if (!idMatcher.reset(keyboardId).matches()) {
219                     continue;
220                 }
221                 String baseLocale = keyboardId.substring(0, keyboardId.indexOf('-'));
222                 String locale = keyboardId.substring(0, keyboardId.indexOf("-t-"));
223                 locale2keyboards.put(baseLocale, Row.of(platformId, locale, keyboardId));
224 
225                 final String localeName = testInfo.getEnglish().getName(baseLocale, true);
226                 localeIndex.put(localeName, baseLocale);
227             }
228         }
229 
230         FileCopier.ensureDirectoryExists(keyboardChartLayoutsDir);
231         FileCopier.copy(ShowKeyboards.class, "keyboards.css", keyboardChartLayoutsDir, "index.css");
232         FormattedFileWriter.copyIncludeHtmls(keyboardChartLayoutsDir);
233         PrintWriter index = FileUtilities.openUTF8Writer(keyboardChartLayoutsDir, "index.html");
234         String[] headerAndFooter = new String[2];
235         ShowData.getChartTemplate(
236             "Keyboard Layout Index",
237             ToolConstants.CHART_DISPLAY_VERSION,
238             "",
239             headerAndFooter, "Keyboard Index", false);
240         index
241             .println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS);
242         // printTop("Keyboard Layout Index", index);
243         index.println("<ol>");
244         for (Entry<String, String> entry : localeIndex.entrySet()) {
245             index.println("<li><a href='" + entry.getValue() + ".html'>"
246                 + entry.getKey() + "</a>"
247                 + " [" + entry.getValue() + "]" +
248                 "</li>");
249         }
250         index.println("</ol>");
251         index.println(headerAndFooter[1]);
252         // printBottom(index);
253         index.close();
254         // FileUtilities.copyFile(ShowKeyboards.class, "keyboards.css", keyboardChartLayoutsDir);
255 
256         for (Entry<String, Set<R3<String, String, String>>> localeKeyboards : locale2keyboards.keyValuesSet()) {
257             String locale = localeKeyboards.getKey();
258             final String localeName = testInfo.getEnglish().getName(locale);
259 
260             // String localeNameString = localeName.replace(' ', '_').toLowerCase(Locale.ENGLISH);
261             PrintWriter out = FileUtilities.openUTF8Writer(keyboardChartLayoutsDir, locale + ".html");
262             ShowData.getChartTemplate(
263                 "Layouts: " + localeName + " (" + locale + ")",
264                 ToolConstants.CHART_DISPLAY_VERSION,
265                 "",
266                 headerAndFooter, null, false);
267             out.println(headerAndFooter[0] + ABOUT_KEYBOARD_CHARTS);
268             // printTop("Layouts: " + localeName + " (" + locale + ")", out);
269             Set<R3<String, String, String>> keyboards = localeKeyboards.getValue();
270             for (R3<String, String, String> platformKeyboard : keyboards) {
271                 String platformId = platformKeyboard.get0();
272                 String keyboardId = platformKeyboard.get2();
273                 // System.out.println(platformId + "\t" + p.getHardwareMap());
274                 Keyboard keyboard = Keyboard.getKeyboard(platformId, keyboardId, errors);
275                 showErrors(errors);
276                 Set<String> names = keyboard.getNames();
277                 String platformFromKeyboardId = Keyboard.getPlatformId(keyboardId);
278                 String printId = platformId.equals(platformFromKeyboardId) ? keyboardId : keyboardId + "/und";
279                 out.println("<h2>" + CldrUtility.getDoubleLinkedText(printId, printId)
280                     + (names.size() == 0 ? "" : " " + names)
281                     + "</h2>");
282 
283                 Transforms transforms = keyboard.getTransforms().get(TransformType.SIMPLE);
284 
285                 out.println("<table class='keyboards'><tr>");
286                 for (KeyMap map : keyboard.getKeyMaps()) {
287                     KeyboardModifierSet mods = map.getModifiers();
288                     out.println("<td class='keyboardTD'><table class='keyboard'>");
289                     // KeyboardModifierSet modifiers = map.getModifiers();
290                     Map<Iso, Output> isoMap = map.getIso2Output();
291                     for (Keyboard.IsoRow row : Keyboard.IsoRow.values()) {
292                         out.println("<tr>");
293                         for (Iso isoValue : Iso.values()) {
294                             if (isoValue.isoRow != row) {
295                                 continue;
296                             }
297                             Output output = isoMap.get(isoValue);
298                             if (output == null) {
299                                 out.println("<td class='x'>&nbsp;</td>");
300                                 continue;
301                             }
302                             String chars = output.getOutput();
303                             TransformStatus transformStatus = output.getTransformStatus();
304                             StringBuilder hover = new StringBuilder();
305                             if (transformStatus == TransformStatus.DEFAULT && transforms != null) {
306                                 Map<String, String> map2 = transforms.getMatch(chars);
307                                 add(map2, hover);
308                             }
309                             Map<Gesture, List<String>> gestures = output.getGestures();
310                             if (!gestures.isEmpty()) {
311                                 add(gestures, hover);
312                             }
313                             final String longPress = hover.length() == 0 ? ""
314                                 : " title='" + hover + "'";
315                             out.println("<td class='" + (hover.length() == 0 ? 'm' : 'h') +
316                                 "'" + longPress + ">"
317                                 + toSafeHtml(chars) + "</td>");
318                         }
319                         out.println("</tr>");
320                     }
321                     String modsString = mods.getShortInput();
322                     if (modsString.isEmpty()) {
323                         modsString = "\u00A0";
324                     } else if (modsString.length() > 20) {
325                         modsString = modsString.substring(0, 20) + "…";
326                     }
327                     out.println("</table><span class='modifiers'>"
328                         + TransliteratorUtilities.toHTML.transform(modsString) +
329                         "</span></td>");
330                 }
331                 out.println("</tr></table>");
332             }
333             index.println(headerAndFooter[1]);
334             // printBottom(out);
335             out.close();
336         }
337         System.out.println("Failing Invisibles: " + FAILING_INVISIBLE.retainAll(INVISIBLE));
338     }
339 
showErrors(Set<Exception> errors)340     private static void showErrors(Set<Exception> errors) {
341         for (Exception error : errors) {
342             String title = error.getMessage().contains("No minimal data for") ? "Warning" : "Error";
343             System.out.println("\t*" + title + ":\t" + error);
344         }
345     }
346 
347     static Transliterator TO_SAFE_HTML;
348     static {
349         StringBuilder rules = new StringBuilder(TransliteratorUtilities.toHTML.toRules(false));
350         for (char i = 0; i < 0x20; ++i) {
String.valueOf(i)351             addRule(String.valueOf(i), "^" + String.valueOf((char) (i + 0x40)), rules);
352         }
353         String[][] map = {
354             // {"\u0020","sp"},
355             { "\u007F", "del" },
356             { "\u00A0", "nbsp" },
357             { "\u00AD", "shy" },
358             { "\u200B", "zwsp" },
359             { "\u200C", "zwnj" },
360             { "\u200D", "zwj" },
361             { "\u200E", "lrm" },
362             { "\u200F", "rlm" },
363             { "\u202F", "nnbs" },
364             { "\uFEFF", "bom" },
365             { "\u180B", "mvs1" },
366             { "\u180C", "mvs2" },
367             { "\u180D", "mvs3" },
368             { "\u180E", "mvs" },
369             // {"\uF8FF","appl"},
370         };
371         for (String[] items : map) {
372             final String fromItem = items[0];
373             final String toItem = items[1];
addRule(fromItem, toItem, rules)374             addRule(fromItem, toItem, rules);
375         }
376         TO_SAFE_HTML = Transliterator.createFromRules("none", rules.toString(), Transliterator.FORWARD);
377     }
378 
addRule(final String fromItem, final String toItem, StringBuilder rules)379     public static void addRule(final String fromItem, final String toItem, StringBuilder rules) {
380         rules.append("'"
381             + fromItem
382             + "'>"
383             + "'<span class=\"cc\">"
384             + toItem
385             + "</span>'"
386             + ";"
387             + System.lineSeparator());
388     }
389 
390     static UnicodeSet INVISIBLE = new UnicodeSet("[[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]-[\\u0020]]").freeze();
391     static UnicodeSet FAILING_INVISIBLE = new UnicodeSet();
392 
toSafeHtml(Object hover)393     public static String toSafeHtml(Object hover) {
394         String result = TO_SAFE_HTML.transform(hover.toString());
395         if (INVISIBLE.containsSome(result)) {
396             FAILING_INVISIBLE.addAll(result);
397         }
398         return result;
399     }
400 
add(Map<K, V> map2, StringBuilder hover)401     private static <K, V> void add(Map<K, V> map2, StringBuilder hover) {
402         if (!map2.isEmpty()) {
403             for (Entry<K, V> entry : map2.entrySet()) {
404                 if (hover.length() != 0) {
405                     hover.append("; ");
406                 }
407                 final K key = entry.getKey();
408                 String keyString = key == Gesture.LONGPRESS ? "LP" : key.toString();
409                 final V value = entry.getValue();
410                 String valueString = value instanceof Collection
411                     ? Joiner.on(" ").join((Collection) value)
412                     : value.toString();
413                 hover.append(TransliteratorUtilities.toHTML.transform(keyString)).append("→")
414                     .append(TransliteratorUtilities.toHTML.transform(valueString));
415             }
416         }
417     }
418 
419     // public static void printTop(String title, PrintWriter out) {
420     // out.println(
421     // "<html>\n" +
422     // "<head>\n" +
423     // "<meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/>\n" +
424     // "<link rel='stylesheet' type='text/css' href='keyboards.css'>" +
425     // "<title>" + title + "</title>\n" +
426     // "</head>\n" +
427     // "<body>\n" +
428     // "<h1>DRAFT " +
429     // title +
430     // "</h1>\n" +
431     // "<p>For more information, see <a href='http://cldr.unicode.org/index/charts/keyboards'>Keyboard Charts</a>.</p>"
432     // );
433     // }
434     //
435     // public static void printBottom(PrintWriter pw) {
436     // pw.println(
437     // "</body>\n" +
438     // "</html>"
439     // );
440     // }
441 
showLocaleToCharacters(PrintWriter out, Map<Id, UnicodeSet> id2unicodeset, Relation<String, Id> locale2ids)442     public static void showLocaleToCharacters(PrintWriter out, Map<Id, UnicodeSet> id2unicodeset,
443         Relation<String, Id> locale2ids) {
444 
445         TablePrinter t = new TablePrinter()
446             .addColumn("Name").setSpanRows(true).setBreakSpans(true).setSortPriority(0)
447             .setCellAttributes("class='cell'")
448             .addColumn("Locale").setSpanRows(true).setBreakSpans(true).setCellAttributes("class='cell'")
449             .addColumn("Platform").setSpanRows(true).setCellAttributes("class='cell'")
450             .addColumn("Variant").setCellAttributes("class='cell'")
451             .addColumn("Script").setCellAttributes("class='cell'")
452             .addColumn("Statistics").setCellAttributes("class='cell'")
453             .addColumn("Characters").setSpanRows(true).setCellAttributes("class='cell'");
454 
455         Map<String, UnicodeSet> commonSets = new HashMap<>();
456         Counter<String> commonCount = new Counter<>();
457         Set<String> commonDone = new HashSet<>();
458 
459         for (Entry<String, Set<Id>> localeAndIds : locale2ids.keyValuesSet()) {
460             final String key = localeAndIds.getKey();
461             final Set<Id> keyboardIds = localeAndIds.getValue();
462 
463             // System.out.println();
464             final String localeName = testInfo.getEnglish().getName(key, true);
465             final String linkedLocaleName = CldrUtility.getDoubleLinkedText(key, localeName);
466             final ULocale uLocale = ULocale.forLanguageTag(key);
467             String script = uLocale.getScript();
468             String writtenLanguage = uLocale.getLanguage() + (script.isEmpty() ? "" : "_" + script);
469             CLDRFile cldrFile = null;
470             try {
471                 cldrFile = factory.make(writtenLanguage, true);
472             } catch (Exception e) {
473             }
474 
475             // final String heading = uLocale.getDisplayName(ULocale.ENGLISH)
476             // + "\t" + ULocale.addLikelySubtags(uLocale).getScript()
477             // + "\t";
478             UnicodeSet common = UnicodeSet.EMPTY;
479             final String likelyScript = ULocale.addLikelySubtags(uLocale).getScript();
480             commonCount.clear();
481             for (String platform : Keyboard.getPlatformIDs()) {
482                 commonSets.put(platform, UnicodeSet.EMPTY);
483             }
484             if (keyboardIds.size() > 1) {
485                 common = UnicodeSet.EMPTY;
486                 for (Id keyboardId : keyboardIds) {
487                     final UnicodeSet keyboardSet = id2unicodeset.get(keyboardId);
488                     if (common == UnicodeSet.EMPTY) {
489                         common = new UnicodeSet(keyboardSet);
490                     } else {
491                         common.retainAll(keyboardSet);
492                     }
493                     UnicodeSet platformCommon = commonSets.get(keyboardId.platform);
494                     commonCount.add(keyboardId.platform, 1);
495                     if (platformCommon == UnicodeSet.EMPTY) {
496                         commonSets.put(keyboardId.platform, new UnicodeSet(keyboardSet));
497                     } else {
498                         platformCommon.retainAll(keyboardSet);
499                     }
500                 }
501                 common.freeze();
502                 t.addRow()
503                     .addCell(linkedLocaleName) // name
504                     .addCell(key) // locale
505                     .addCell("ALL") // platform
506                     .addCell("COMMON") // variant
507                     .addCell(likelyScript) // script
508                     .addCell(getInfo(null, common, cldrFile)) // stats
509                     .addCell(safeUnicodeSet(common)) // characters
510                     .finishRow();
511 
512                 // System.out.println(
513                 // locale + "\tCOMMON\t\t-"
514                 // + "\t" + heading + getInfo(common, cldrFile)
515                 // + "\t" + common.toPattern(false));
516             }
517             commonDone.clear();
518             for (Id keyboardId : keyboardIds) {
519                 UnicodeSet platformCommon = commonSets.get(keyboardId.platform);
520                 if (!commonDone.contains(keyboardId.platform)) {
521                     commonDone.add(keyboardId.platform);
522                     if (commonCount.get(keyboardId.platform) <= 1) {
523                         platformCommon = UnicodeSet.EMPTY;
524                         commonSets.put(keyboardId.platform, platformCommon);
525                     } else if (platformCommon.size() > 0) {
526                         // get stats for all, but otherwise remove common.
527                         final String stats = getInfo(null, platformCommon, cldrFile);
528                         platformCommon.removeAll(common).freeze();
529                         commonSets.put(keyboardId.platform, platformCommon);
530                         t.addRow()
531                             .addCell(linkedLocaleName) // name
532                             .addCell(key) // locale
533                             .addCell(keyboardId.platform) // platform
534                             .addCell("COMMON") // variant
535                             .addCell(likelyScript) // script
536                             .addCell(stats) // stats
537                             .addCell(safeUnicodeSet(platformCommon)) // characters
538                             .finishRow();
539                     }
540                 }
541                 final UnicodeSet current2 = id2unicodeset.get(keyboardId);
542                 final UnicodeSet remainder = new UnicodeSet(current2)
543                     .removeAll(common)
544                     .removeAll(platformCommon);
545 
546                 t.addRow()
547                     .addCell(linkedLocaleName) // name
548                     .addCell(key) // locale
549                     .addCell(keyboardId.platform) // platform
550                     .addCell(keyboardId.variant) // variant
551                     .addCell(likelyScript) // script
552                     .addCell(getInfo(keyboardId, current2, cldrFile)) // stats
553                     .addCell(safeUnicodeSet(remainder)) // characters
554                     .finishRow();
555                 // System.out.println(
556                 // keyboardId.toString().replace('/','\t')
557                 // + "\t" + keyboardId.platformVersion
558                 // + "\t" + heading + getInfo(current2, cldrFile)
559                 // + "\t" + remainder.toPattern(false));
560             }
561         }
562         out.println(t.toTable());
563     }
564 
565     static UnicodeSetPrettyPrinter prettyPrinter = new UnicodeSetPrettyPrinter()
566         .setOrdering(Collator.getInstance(ULocale.ROOT))
567         .setSpaceComparator(Collator.getInstance(ULocale.ROOT).setStrength2(Collator.PRIMARY));
568 
safeUnicodeSet(UnicodeSet unicodeSet)569     public static String safeUnicodeSet(UnicodeSet unicodeSet) {
570         return TransliteratorUtilities.toHTML.transform(prettyPrinter.format(unicodeSet));
571     }
572 
573     static class IdInfo {
574         final Collator collator = Collator.getInstance(ULocale.ENGLISH);
575         BitSet bitset = new BitSet();
576         BitSet bitset2 = new BitSet();
577         @SuppressWarnings("unchecked")
578         TreeMap<String, IdSet>[] charToKeyboards = new TreeMap[UScript.CODE_LIMIT];
579         {
580             collator.setStrength(Collator.IDENTICAL);
581             for (int i = 0; i < charToKeyboards.length; ++i) {
582                 charToKeyboards[i] = new TreeMap<>(collator);
583             }
584         }
585         IdSet allIds = new IdSet();
586 
add(Id id, UnicodeSet unicodeSet)587         public void add(Id id, UnicodeSet unicodeSet) {
588             allIds.add(id);
589             for (String s : unicodeSet) {
590                 int script = getScriptExtensions(s, bitset);
591                 if (script >= 0) {
592                     addToScript(script, id, s);
593                 } else {
594                     for (int script2 = bitset.nextSetBit(0); script2 >= 0; script2 = bitset.nextSetBit(script2 + 1)) {
595                         addToScript(script2, id, s);
596                     }
597                 }
598             }
599         }
600 
getScriptExtensions(String s, BitSet outputBitset)601         public int getScriptExtensions(String s, BitSet outputBitset) {
602             final int firstCodePoint = s.codePointAt(0);
603             int result = UScript.getScriptExtensions(firstCodePoint, outputBitset);
604             final int firstCodePointCount = Character.charCount(firstCodePoint);
605             if (s.length() == firstCodePointCount) {
606                 return result;
607             }
608             for (int i = firstCodePointCount; i < s.length();) {
609                 int ch = s.codePointAt(i);
610                 UScript.getScriptExtensions(ch, bitset2);
611                 outputBitset.or(bitset2);
612                 i += Character.charCount(ch);
613             }
614             // remove inherited, if there is anything else; then remove common if there is anything else
615             int cardinality = outputBitset.cardinality();
616             if (cardinality > 1) {
617                 if (outputBitset.get(UScript.INHERITED)) {
618                     outputBitset.clear(UScript.INHERITED);
619                     --cardinality;
620                 }
621                 if (cardinality > 1) {
622                     if (outputBitset.get(UScript.COMMON)) {
623                         outputBitset.clear(UScript.COMMON);
624                         --cardinality;
625                     }
626                 }
627             }
628             if (cardinality == 1) {
629                 return outputBitset.nextSetBit(0);
630             } else {
631                 return -cardinality;
632             }
633         }
634 
addToScript(int script, Id id, String s)635         public void addToScript(int script, Id id, String s) {
636             TreeMap<String, IdSet> charToKeyboard = charToKeyboards[script];
637             IdSet idSet = charToKeyboard.get(s);
638             if (idSet == null) {
639                 charToKeyboard.put(s, idSet = new IdSet());
640             }
641             idSet.add(id);
642         }
643 
print(PrintWriter pw)644         public void print(PrintWriter pw) {
645 
646             TablePrinter t = new TablePrinter()
647                 .addColumn("Script").setSpanRows(true).setCellAttributes("class='s'")
648                 .addColumn("Char").setCellAttributes("class='ch'")
649                 .addColumn("Code").setCellAttributes("class='c'")
650                 .addColumn("Name").setCellAttributes("class='n'")
651                 .addColumn("Keyboards").setSpanRows(true).setCellAttributes("class='k'");
652             Set<String> missingScripts = new TreeSet<>();
653             UnicodeSet notNFKC = new UnicodeSet("[:nfkcqc=n:]");
654             UnicodeSet COMMONINHERITED = new UnicodeSet("[[:sc=common:][:sc=inherited:]]");
655 
656             for (int script = 0; script < charToKeyboards.length; ++script) {
657                 UnicodeSet inScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script).removeAll(
658                     notNFKC);
659 
660                 // UnicodeSet fullScript = new UnicodeSet(inScript);
661                 // int fullScriptSize = inScript.size();
662                 if (inScript.size() == 0) {
663                     continue;
664                 }
665                 final TreeMap<String, IdSet> charToKeyboard = charToKeyboards[script];
666                 final String scriptName = UScript.getName(script);
667                 final String linkedScriptName = CldrUtility.getDoubleLinkedText(UScript.getShortName(script),
668                     scriptName);
669                 if (charToKeyboard.size() == 0) {
670                     missingScripts.add(scriptName);
671                     continue;
672                 }
673 
674                 // also check to see that at least one item is not all common
675                 check: if (script != UScript.COMMON && script != UScript.INHERITED) {
676                     for (String s : charToKeyboard.keySet()) {
677                         if (!COMMONINHERITED.containsAll(s)) {
678                             break check;
679                         }
680                     }
681                     missingScripts.add(scriptName);
682                     continue;
683                 }
684 
685                 String last = "";
686                 for (Entry<String, IdSet> entry : charToKeyboard.entrySet()) {
687                     String s = entry.getKey();
688                     IdSet value = entry.getValue();
689                     final String keyboardsString = value.toString(allIds);
690                     if (!s.equalsIgnoreCase(last)) {
691                         if (s.equals("\u094D\u200C")) { // Hack, because the browsers width is way off
692                             s = "\u094D";
693                         }
694                         String name = UCharacter.getName(s, " + ");
695                         if (name == null) {
696                             name = "[no name]";
697                         }
698                         String ch = s.equals("\u0F39") ? "\uFFFD" : s;
699                         t.addRow()
700                             .addCell(linkedScriptName)
701                             .addCell((SHOW_BACKGROUND ? "<span class='ybg'>" : "") +
702                                 TransliteratorUtilities.toHTML.transform(ch)
703                                 + (SHOW_BACKGROUND ? "</span>" : ""))
704                             .addCell(Utility.hex(s, 4, " + "))
705                             .addCell(name)
706                             .addCell(keyboardsString)
707                             .finishRow();
708                     }
709                     inScript.remove(s);
710                     last = s;
711                 }
712                 if (inScript.size() != 0 && script != UScript.UNKNOWN) {
713                     // String pattern;
714                     // if (inScript.size() < 255 || inScript.size()*4 < fullScriptSize) {
715                     // } else {
716                     // fullScript.removeAll(inScript);
717                     // inScript = new UnicodeSet("[[:sc=" + UScript.getShortName(script) + ":]-" +
718                     // fullScript.toPattern(false) + "]");
719                     // }
720                     t.addRow()
721                         .addCell(linkedScriptName)
722                         .addCell("")
723                         .addCell(String.valueOf(inScript.size()))
724                         .addCell("missing (NFKC)!")
725                         .addCell(safeUnicodeSet(inScript))
726                         .finishRow();
727                 }
728             }
729             t.addRow()
730                 .addCell("")
731                 .addCell("")
732                 .addCell(String.valueOf(missingScripts.size()))
733                 .addCell("missing scripts!")
734                 .addCell(missingScripts.toString())
735                 .finishRow();
736             pw.println(t.toTable());
737         }
738     }
739 
getInfo(Id keyboardId, UnicodeSet common, CLDRFile cldrFile)740     private static String getInfo(Id keyboardId, UnicodeSet common, CLDRFile cldrFile) {
741         Counter<String> results = new Counter<>();
742         for (String s : common) {
743             int first = s.codePointAt(0); // first char is good enough
744             results.add(UScript.getShortName(UScript.getScript(first)), 1);
745         }
746         results.remove("Zyyy");
747         results.remove("Zinh");
748         results.remove("Zzzz");
749 
750         if (cldrFile != null) {
751             UnicodeSet exemplars = new UnicodeSet(cldrFile.getExemplarSet("", WinningChoice.WINNING));
752             UnicodeSet auxExemplars = cldrFile.getExemplarSet("auxiliary", WinningChoice.WINNING);
753             if (auxExemplars != null) {
754                 exemplars.addAll(auxExemplars);
755             }
756             UnicodeSet punctuationExemplars = cldrFile.getExemplarSet("punctuation", WinningChoice.WINNING);
757             if (punctuationExemplars != null) {
758                 exemplars.addAll(punctuationExemplars);
759             }
760             exemplars.addAll(getNumericExemplars(cldrFile));
761             exemplars.addAll(getQuotationMarks(cldrFile));
762             exemplars.add(" ");
763             addComparison(keyboardId, common, exemplars, results);
764         }
765         StringBuilder b = new StringBuilder();
766         for (String entry : results.keySet()) {
767             if (b.length() != 0) {
768                 b.append(", ");
769             }
770             b.append(entry).append(":").append(results.get(entry));
771         }
772         return b.toString();
773     }
774 
addComparison(Id keyboardId, UnicodeSet keyboard, UnicodeSet exemplars, Counter<String> results)775     private static void addComparison(Id keyboardId, UnicodeSet keyboard, UnicodeSet exemplars,
776         Counter<String> results) {
777         UnicodeSet common = new UnicodeSet(keyboard).retainAll(exemplars);
778         if (common.size() != 0) {
779             results.add("k∩cldr", common.size());
780         }
781         common = new UnicodeSet(keyboard).removeAll(exemplars);
782         if (common.size() != 0) {
783             results.add("k‑cldr", common.size());
784             if (keyboardId != null) {
785                 common.remove(0, 0x7F); // don't care much about ASCII.
786                 logInfo.put(Row.of("k-cldr\t" + keyboardId.getBaseLanguage(), common), keyboardId);
787                 // Log.logln(keyboardId + "\tk-cldr\t" + common.toPattern(false));
788             }
789         }
790         common = new UnicodeSet(exemplars).removeAll(keyboard).remove("ss");
791         if (common.size() != 0) {
792             results.add("cldr‑k", common.size());
793             if (keyboardId != null && SKIP_LOG.containsNone(common)) {
794                 logInfo.put(Row.of("cldr‑k\t" + keyboardId.getBaseLanguage(), common), keyboardId);
795                 // Log.logln(keyboardId + "\tcldr‑k\t" + common.toPattern(false));
796             }
797         }
798     }
799 
800     static final UnicodeSet SKIP_LOG = new UnicodeSet("[가一]").freeze();
801     static Relation<Row.R2<String, UnicodeSet>, Id> logInfo = Relation.of(new TreeMap<Row.R2<String, UnicodeSet>, Set<Id>>(), TreeSet.class);
802 
803     static class Id implements Comparable<Id> {
804         final String locale;
805         final String platform;
806         final String variant;
807         final String platformVersion;
808 
Id(String input, String platformVersion)809         Id(String input, String platformVersion) {
810             int pos = input.indexOf("-t-k0-");
811             String localeTemp = input.substring(0, pos);
812             locale = ULocale.minimizeSubtags(ULocale.forLanguageTag(localeTemp)).toLanguageTag();
813             pos += 6;
814             int pos2 = input.indexOf('-', pos);
815             if (pos2 > 0) {
816                 platform = input.substring(pos, pos2);
817                 variant = input.substring(pos2 + 1);
818             } else {
819                 platform = input.substring(pos);
820                 variant = "";
821             }
822             this.platformVersion = platformVersion;
823         }
824 
825         @Override
compareTo(Id other)826         public int compareTo(Id other) {
827             int result;
828             if (0 != (result = locale.compareTo(other.locale))) {
829                 return result;
830             }
831             if (0 != (result = platform.compareTo(other.platform))) {
832                 return result;
833             }
834             if (0 != (result = variant.compareTo(other.variant))) {
835                 return result;
836             }
837             return 0;
838         }
839 
840         @Override
toString()841         public String toString() {
842             return locale + "/" + platform + "/" + variant;
843         }
844 
getBaseLanguage()845         public String getBaseLanguage() {
846             int pos = locale.indexOf('-');
847             return pos < 0 ? locale : locale.substring(0, pos);
848         }
849     }
850 
851     static class IdSet {
852         Map<String, Relation<String, String>> data = new TreeMap<>();
853 
add(Id id)854         public void add(Id id) {
855             Relation<String, String> platform2variant = data.get(id.platform);
856             if (platform2variant == null) {
857                 data.put(id.platform, platform2variant = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class));
858             }
859             platform2variant.put(id.locale, id.variant);
860         }
861 
addAll(Collection<Id> idSet)862         public void addAll(Collection<Id> idSet) {
863             for (Id id : idSet) {
864                 add(id);
865             }
866         }
867 
toString(IdSet allIds)868         public String toString(IdSet allIds) {
869             if (this.equals(allIds)) {
870                 return "*";
871             }
872             StringBuilder b = new StringBuilder();
873             final Set<Entry<String, Relation<String, String>>> entrySet = data.entrySet();
874             boolean first = true;
875             for (Entry<String, Relation<String, String>> entry : entrySet) {
876                 if (first) {
877                     first = false;
878                 } else {
879                     b.append(" ");
880                 }
881                 String key = entry.getKey();
882                 Set<Entry<String, Set<String>>> valueSet = entry.getValue().keyValuesSet();
883                 b.append(key).append(":");
884                 appendLocaleAndVariants(b, valueSet, allIds.data.get(key));
885             }
886             return b.toString();
887         }
888 
appendLocaleAndVariants(StringBuilder b, Set<Entry<String, Set<String>>> set, Relation<String, String> relation)889         private void appendLocaleAndVariants(StringBuilder b, Set<Entry<String, Set<String>>> set,
890             Relation<String, String> relation) {
891             if (set.equals(relation.keyValuesSet())) {
892                 b.append("*");
893                 return;
894             }
895             final int setSize = set.size();
896             if (setSize > 9) {
897                 b.append(setSize).append("/").append(relation.size());
898                 return;
899             }
900             final boolean isSingle = setSize == 1;
901             if (!isSingle) b.append("(");
902             boolean first = true;
903             for (Entry<String, Set<String>> item : set) {
904                 if (first) {
905                     first = false;
906                 } else {
907                     b.append("|");
908                 }
909                 final String key = item.getKey();
910                 b.append(key);
911                 final Set<String> variants = item.getValue();
912                 final int size = variants.size();
913                 if (size != 0) {
914                     if (size == 1) {
915                         String firstOne = variants.iterator().next();
916                         if (firstOne.isEmpty()) {
917                             continue; // fr-CA/∅ => fr-CA
918                         }
919                     }
920                     b.append("/");
921                     appendVariant(b, variants, relation.get(key));
922                 }
923             }
924             if (!isSingle) b.append(")");
925         }
926 
appendVariant(StringBuilder b, Set<String> set, Set<String> set2)927         private void appendVariant(StringBuilder b, Set<String> set, Set<String> set2) {
928             if (set.equals(set2)) {
929                 b.append("*");
930                 return;
931             }
932             final boolean isSingle = set.size() == 1;
933             if (!isSingle) b.append("(");
934             boolean first = true;
935             for (String item : set) {
936                 if (first) {
937                     first = false;
938                 } else {
939                     b.append("|");
940                 }
941                 b.append(item.isEmpty() ? "∅" : item);
942             }
943             if (!isSingle) b.append(")");
944         }
945 
isEquals(Object other)946         public boolean isEquals(Object other) {
947             return data.equals(((IdSet) other).data);
948         }
949 
950         @Override
hashCode()951         public int hashCode() {
952             return data.hashCode();
953         }
954     }
955 
956     // public static class Key {
957     // Iso iso;
958     // ModifierSet modifierSet;
959     // }
960     // /**
961     // * Return all possible results. Could be external utility. WARNING: doesn't account for transform='no' or
962     // failure='omit'.
963     // */
964     // public Map<String,List<Key>> getPossibleSource() {
965     // Map<String,List<Key>> results = new HashMap<String,List<Key>>();
966     // UnicodeSet results = new UnicodeSet();
967     // addOutput(getBaseMap().iso2output.values(), results);
968     // for (KeyMap keymap : getKeyMaps()) {
969     // addOutput(keymap.string2output.values(), results);
970     // }
971     // for (Transforms transforms : getTransforms().values()) {
972     // // loop, to catch empty case
973     // for (String result : transforms.string2string.values()) {
974     // if (!result.isEmpty()) {
975     // results.add(result);
976     // }
977     // }
978     // }
979     // return results;
980     // }
981 
getQuotationMarks(CLDRFile file)982     static UnicodeSet getQuotationMarks(CLDRFile file) {
983         UnicodeSet results = new UnicodeSet();
984         // TODO should have a test to make sure these are in exemplars.
985         results.add(file.getStringValue("//ldml/delimiters/quotationEnd"));
986         results.add(file.getStringValue("//ldml/delimiters/quotationStart"));
987         results.add(file.getStringValue("//ldml/delimiters/alternateQuotationEnd"));
988         results.add(file.getStringValue("//ldml/delimiters/alternateQuotationStart"));
989         return results;
990     }
991 
992     // TODO Add as utility to CLDRFile
getNumericExemplars(CLDRFile file)993     static UnicodeSet getNumericExemplars(CLDRFile file) {
994         UnicodeSet results = new UnicodeSet();
995         String defaultNumberingSystem = file.getStringValue("//ldml/numbers/defaultNumberingSystem");
996         String nativeNumberingSystem = file.getStringValue("//ldml/numbers/otherNumberingSystems/native");
997         // "//ldml/numbers/otherNumberingSystems/native"
998         addNumberingSystem(file, results, "latn");
999         if (!defaultNumberingSystem.equals("latn")) {
1000             addNumberingSystem(file, results, defaultNumberingSystem);
1001         }
1002         if (!nativeNumberingSystem.equals("latn") && !nativeNumberingSystem.equals(defaultNumberingSystem)) {
1003             addNumberingSystem(file, results, nativeNumberingSystem);
1004         }
1005         return results;
1006     }
1007 
addNumberingSystem(CLDRFile file, UnicodeSet results, String numberingSystem)1008     public static void addNumberingSystem(CLDRFile file, UnicodeSet results, String numberingSystem) {
1009         String digits = supplementalDataInfo.getDigits(numberingSystem);
1010         results.addAll(digits);
1011         addSymbol(file, numberingSystem, "decimal", results);
1012         addSymbol(file, numberingSystem, "group", results);
1013         addSymbol(file, numberingSystem, "minusSign", results);
1014         addSymbol(file, numberingSystem, "percentSign", results);
1015         addSymbol(file, numberingSystem, "plusSign", results);
1016     }
1017 
addSymbol(CLDRFile file, String numberingSystem, String key, UnicodeSet results)1018     public static void addSymbol(CLDRFile file, String numberingSystem, String key, UnicodeSet results) {
1019         String symbol = file.getStringValue("//ldml/numbers/symbols[@numberSystem=\"" + numberingSystem + "\"]/" +
1020             key);
1021         results.add(symbol);
1022     }
1023 }
1024