1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.util.Arrays;
5 import java.util.Collection;
6 import java.util.EnumMap;
7 import java.util.LinkedHashMap;
8 import java.util.Map;
9 import java.util.Map.Entry;
10 import java.util.Set;
11 import java.util.TreeSet;
12 
13 import org.unicode.cldr.draft.FileUtilities;
14 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
15 import org.unicode.cldr.util.Annotations;
16 import org.unicode.cldr.util.Annotations.AnnotationSet;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRPaths;
19 import org.unicode.cldr.util.CldrUtility;
20 import org.unicode.cldr.util.Factory;
21 import org.unicode.cldr.util.FileCopier;
22 import org.unicode.cldr.util.LanguageGroup;
23 import org.unicode.cldr.util.LanguageTagParser;
24 import org.unicode.cldr.util.LocaleIDParser;
25 
26 import com.google.common.collect.Multimap;
27 import com.google.common.collect.TreeMultimap;
28 import com.ibm.icu.dev.util.CollectionUtilities;
29 import com.ibm.icu.impl.Relation;
30 import com.ibm.icu.impl.Row;
31 import com.ibm.icu.impl.Row.R3;
32 import com.ibm.icu.impl.Utility;
33 import com.ibm.icu.text.RuleBasedCollator;
34 import com.ibm.icu.text.UnicodeSet;
35 import com.ibm.icu.util.ULocale;
36 
37 public class ChartAnnotations extends Chart {
38 
39     private static final String LDML_ANNOTATIONS = "<a href='http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-general.html#Annotations'>LDML Annotations</a>";
40 
41     private static final String MAIN_HEADER = "<p>Annotations provide names and keywords for Unicode characters, currently focusing on emoji. "
42         + "If you see any problems, please <a target='_blank' href='http://unicode.org/cldr/trac/newticket'>file a ticket</a> with the corrected values for the locale. "
43         + "For the XML data used for these charts, see "
44         + "<a href='http://unicode.org/repos/cldr/tags/latest/common/annotations/'>latest-release annotations </a> "
45         + "or <a href='http://unicode.org/repos/cldr/tags/latest/common/annotations/'>beta annotations</a>. "
46         + "For more information, see " + LDML_ANNOTATIONS + ".</p>";
47     private static final boolean DEBUG = false;
48     private static final String DIR = CLDRPaths.CHART_DIRECTORY + "annotations/";
49 
main(String[] args)50     public static void main(String[] args) {
51         new ChartAnnotations().writeChart(null);
52     }
53 
54     @Override
getDirectory()55     public String getDirectory() {
56         return DIR;
57     }
58 
59     @Override
getTitle()60     public String getTitle() {
61         return "Annotation Charts";
62     }
63 
64     @Override
getFileName()65     public String getFileName() {
66         return "index";
67     }
68 
69     @Override
getExplanation()70     public String getExplanation() {
71         return MAIN_HEADER + "<p>The charts are presented in groups of related languages, for easier comparison.<p>";
72     }
73 
writeContents(FormattedFileWriter pw)74     public void writeContents(FormattedFileWriter pw) throws IOException {
75         FileCopier.ensureDirectoryExists(DIR);
76         FileCopier.copy(Chart.class, "index.css", DIR);
77 
78         FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors();
79         writeSubcharts(anchors);
80         pw.setIndex("Main Chart Index", "../index.html");
81         pw.write(anchors.toString());
82     }
83 
84     static final UnicodeSet EXTRAS = new UnicodeSet()
85         .addAll(Arrays.asList(
86             "����", "��", "#️⃣", "����", "��‍❤️‍��‍��", "��‍❤️‍��", "��‍��‍��", "����‍⚕️", "����‍♂️", "����‍♀️", "��‍❤️‍��‍��", "����‍♀️",
87             "��", "��‍❤️‍��‍��", "��", "��‍❤️‍��", "��", "��‍��‍��",
88             "����", "����", "��‍⚖", "����‍⚖", "��‍⚖", "����‍⚖", "��", "��‍♂️", "����‍♂️", "��‍♀️", "����‍♀️",
89             "��", "����", "��‍♂️", "����‍♂️", "��‍♀️", "����‍♀️",
90             "��������������",
91             "#️⃣",
92             "����",
93             "⛹️‍♀️",
94             "��‍⚕️",
95             "��️‍��","��‍☠️",
96             "��‍��",
97             "����‍��",
98             "��","��"
99             ))
100         .freeze();
101 
writeSubcharts(Anchors anchors)102     public void writeSubcharts(Anchors anchors) throws IOException {
103         Set<String> locales = Annotations.getAvailableLocales();
104 
105         AnnotationSet english = Annotations.getDataSet("en");
106         UnicodeSet s = new UnicodeSet(english.keySet()).addAll(EXTRAS).freeze();
107 
108         // set up right order for columns
109 
110         Map<String, String> nameToCode = new LinkedHashMap<String, String>();
111         Relation<LanguageGroup, R3<Integer, String, String>> groupToNameAndCodeSorted = Relation.of(
112             new EnumMap<LanguageGroup, Set<R3<Integer, String, String>>>(LanguageGroup.class),
113             TreeSet.class);
114 
115         Multimap<String, String> localeToSub = TreeMultimap.create();
116         LanguageTagParser ltp = new LanguageTagParser();
117 
118         for (String locale : locales) {
119             ltp.set(locale);
120             if (locale.equals("root")) {
121                 continue;
122             }
123             if (locale.equals("en")) { // make first
124                 continue;
125             }
126             String region = ltp.getRegion();
127             if (!region.isEmpty()) {
128                 localeToSub.put(ltp.getLanguageScript(), locale);
129                 continue;
130             }
131 
132             if (locale.startsWith("en")) {
133                 int debug = 0;
134             }
135             String name = ENGLISH.getName(locale, true);
136             int baseEnd = locale.indexOf('_');
137             ULocale loc = new ULocale(baseEnd < 0 ? locale : locale.substring(0, baseEnd));
138             LanguageGroup group = LanguageGroup.get(loc);
139             int rank = LanguageGroup.rankInGroup(loc);
140             groupToNameAndCodeSorted.put(group, Row.of(rank, name, locale));
141         }
142 
143         for (Entry<LanguageGroup, Set<R3<Integer, String, String>>> groupPairs : groupToNameAndCodeSorted.keyValuesSet()) {
144             LanguageGroup group = groupPairs.getKey();
145             String ename = ENGLISH.getName("en", true);
146             nameToCode.clear();
147             nameToCode.put(ename, "en"); // always have english first
148 
149             // add English variants if they exist
150 
151             for (R3<Integer, String, String> pair : groupPairs.getValue()) {
152                 String name = pair.get1();
153                 String locale = pair.get2();
154                 if (locale.startsWith("en_")) {
155                     nameToCode.put(name, locale);
156                 }
157             }
158 
159             for (R3<Integer, String, String> pair : groupPairs.getValue()) {
160                 String name = pair.get1();
161                 String locale = pair.get2();
162 
163                 nameToCode.put(name, locale);
164                 System.out.println(pair);
165             }
166             // now build table with right order for columns
167             double width = ((int) ((99.0 / (locales.size() + 1)) * 1000)) / 1000.0;
168             //String widthString = "class='source' width='"+ width + "%'";
169             String widthStringTarget = "class='target' width='" + width + "%'";
170 
171             TablePrinter tablePrinter = new TablePrinter()
172                 .addColumn("Char", "class='source' width='1%'", CldrUtility.getDoubleLinkMsg(), "class='source-image'", true)
173                 .addColumn("Hex", "class='source' width='1%'", null, "class='source'", true)
174             //.addColumn("Formal Name", "class='source' width='" + width + "%'", null, "class='source'", true)
175             ;
176 
177             for (Entry<String, String> entry : nameToCode.entrySet()) {
178                 String name = entry.getKey();
179                 tablePrinter.addColumn(name, widthStringTarget, null, "class='target'", true);
180             }
181             // sort the characters
182             Set<String> sorted = new TreeSet<>(RBC);
183             Multimap<String, String> valueToSub = TreeMultimap.create();
184 
185             for (String cp : s.addAllTo(sorted)) {
186                 tablePrinter
187                     .addRow()
188                     .addCell(cp)
189                     .addCell(Utility.hex(cp, 4, " "))
190                 //.addCell(getName(cp))
191                 ;
192                 for (Entry<String, String> nameAndLocale : nameToCode.entrySet()) {
193                     String name = nameAndLocale.getKey();
194                     String locale = nameAndLocale.getValue();
195 
196                     AnnotationSet annotations = Annotations.getDataSet(locale);
197                     AnnotationSet parentAnnotations = Annotations.getDataSet(LocaleIDParser.getParent(locale));
198                     String baseAnnotation = annotations.toString(cp, true, parentAnnotations);
199                     String baseAnnotationOriginal = baseAnnotation;
200 
201                     if (DEBUG) System.out.println(name + ":" + annotations.toString(cp, false, null));
202                     Collection<String> subs = localeToSub.get(locale);
203                     if (!subs.isEmpty()) {
204                         valueToSub.clear();
205                         for (String sub : subs) {
206                             AnnotationSet subAnnotations = Annotations.getDataSet(sub);
207                             AnnotationSet subParentAnnotations = Annotations.getDataSet(LocaleIDParser.getParent(locale));
208                             String baseAnnotation2 = subAnnotations.toString(cp, true, subParentAnnotations);
209                             if (!baseAnnotation2.equals(baseAnnotationOriginal)) {
210                                 valueToSub.put(baseAnnotation2, sub);
211                             }
212                         }
213                         for (Entry<String, Collection<String>> entry : valueToSub.asMap().entrySet()) {
214                             baseAnnotation += "<hr><i>" + CollectionUtilities.join(entry.getValue(), ", ") + "</i>: " + entry.getKey();
215                         }
216                     }
217                     tablePrinter.addCell(baseAnnotation);
218                 }
219                 tablePrinter.finishRow();
220             }
221             final String name = group.toString();
222             new Subchart(name + " Annotations", FileUtilities.anchorize(name), tablePrinter).writeChart(anchors);
223         }
224     }
225 
226     static final int FIRST_REGIONAL = 0x1F1E6;
227     static final int LAST_REGIONAL = 0x1F1FF;
228 
getRegionalIndicator(int firstCodepoint)229     public static int getRegionalIndicator(int firstCodepoint) {
230         return FIRST_REGIONAL <= firstCodepoint && firstCodepoint <= LAST_REGIONAL ? firstCodepoint - FIRST_REGIONAL + 'A' : -1;
231     }
232 
233 //    private String getName(String cp) {
234 //        int ri1 = getRegionalIndicator(cp.codePointAt(0));
235 //        if (ri1 >= 0) {
236 //            int ri2 = getRegionalIndicator(cp.codePointAt(2));
237 //            return ENGLISH.getName(CLDRFile.TERRITORY_NAME, String.valueOf((char) ri1) + String.valueOf((char) ri2));
238 //        }
239 //        String result = NAMES80.get(cp);
240 //        return result != null ? result : UCharacter.getName(cp, ", ");
241 //    }
242 //
243 //    private static UnicodeMap<String> NAMES80 = new UnicodeMap<>();
244 //    static {
245 //        String[][] data = {
246 //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-1-2" },
247 //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-3" },
248 //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-4" },
249 //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-5" },
250 //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-6" },
251 //            { "��", "ZIPPER-MOUTH FACE" },
252 //            { "��", "MONEY-MOUTH FACE" },
253 //            { "��", "FACE WITH THERMOMETER" },
254 //            { "��", "NERD FACE" },
255 //            { "��", "THINKING FACE" },
256 //            { "��", "FACE WITH ROLLING EYES" },
257 //            { "��", "UPSIDE-DOWN FACE" },
258 //            { "��", "FACE WITH HEAD-BANDAGE" },
259 //            { "��", "ROBOT FACE" },
260 //            { "��", "HUGGING FACE" },
261 //            { "��", "SIGN OF THE HORNS" },
262 //            { "��", "CRAB (also Cancer)" },
263 //            { "��", "SCORPION (also Scorpio)" },
264 //            { "��", "LION FACE (also Leo)" },
265 //            { "��", "BOW AND ARROW (also Sagittarius)" },
266 //            { "��", "AMPHORA (also Aquarius)" },
267 //            { "��", "PLACE OF WORSHIP" },
268 //            { "��", "KAABA" },
269 //            { "��", "MOSQUE" },
270 //            { "��", "SYNAGOGUE" },
271 //            { "��", "MENORAH WITH NINE BRANCHES" },
272 //            { "��", "PRAYER BEADS" },
273 //            { "��", "HOT DOG" },
274 //            { "��", "TACO" },
275 //            { "��", "BURRITO" },
276 //            { "��", "CHEESE WEDGE" },
277 //            { "��", "POPCORN" },
278 //            { "��", "BOTTLE WITH POPPING CORK" },
279 //            { "��", "TURKEY" },
280 //            { "��", "UNICORN FACE" },
281 //            { "��", "CRICKET BAT AND BALL" },
282 //            { "��", "VOLLEYBALL" },
283 //            { "��", "FIELD HOCKEY STICK AND BALL" },
284 //            { "��", "ICE HOCKEY STICK AND PUCK" },
285 //            { "��", "TABLE TENNIS PADDLE AND BALL" },
286 //            { "��", "BADMINTON RACQUET AND SHUTTLECOCK" } };
287 //        for (String[] pair : data) {
288 //            NAMES80.put(pair[0], pair[1]);
289 //        }
290 //        NAMES80.freeze();
291 //    }
292 
293     private class Subchart extends Chart {
294         String title;
295         String file;
296         private TablePrinter tablePrinter;
297 
298         @Override
getShowDate()299         public boolean getShowDate() {
300             return false;
301         }
302 
Subchart(String title, String file, TablePrinter tablePrinter)303         public Subchart(String title, String file, TablePrinter tablePrinter) {
304             super();
305             this.title = title;
306             this.file = file;
307             this.tablePrinter = tablePrinter;
308         }
309 
310         @Override
getDirectory()311         public String getDirectory() {
312             return DIR;
313         }
314 
315         @Override
getTitle()316         public String getTitle() {
317             return title;
318         }
319 
320         @Override
getFileName()321         public String getFileName() {
322             return file;
323         }
324 
325         @Override
getExplanation()326         public String getExplanation() {
327             return MAIN_HEADER
328                 + "<p>This table shows the annotations for a group of related languages (plus English) for easier comparison. "
329                 + "The first item is the <b>short name</b> (also the text-to-speech phrase). "
330                 + "It is bolded for clarity, and marked with a * for searching on this page. "
331                 + "The remaining phrases are <b>keywords</b> (labels), separated by “|”. "
332                 + "The keywords plus the words in the short name are typically used for search and predictive typing.<p>\n"
333                 + "<p>Most short names and keywords that can be constructed with the mechanism in " + LDML_ANNOTATIONS + " are omitted. "
334                 + "However, a few are included for comparison: "
335                 + CollectionUtilities.join(EXTRAS.addAllTo(new TreeSet<>()), ", ") + ". "
336                 + "In this chart, missing items are marked with “" + Annotations.MISSING_MARKER + "”, "
337                 + "‘fallback’ constructed items with “" + Annotations.BAD_MARKER + "”, "
338                 + "substituted English values with “" + Annotations.ENGLISH_MARKER + "”, and "
339                 + "values equal to their parent locale’s values are replaced with " + Annotations.EQUIVALENT + ".</p>\n";
340         }
341 
342         @Override
writeContents(FormattedFileWriter pw)343         public void writeContents(FormattedFileWriter pw) throws IOException {
344             pw.write(tablePrinter.toTable());
345         }
346     }
347 
348     public static RuleBasedCollator RBC;
349     static {
350         Factory cldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "collation/", ".*");
351         CLDRFile root = cldrFactory.make("root", false);
352         String rules = root.getStringValue("//ldml/collations/collation[@type=\"emoji\"][@visibility=\"external\"]/cr");
353 
354 //        if (!rules.contains("'#⃣'")) {
355 //            rules = rules.replace("#⃣", "'#⃣'").replace("*⃣", "'*⃣'"); //hack for 8288
356 //        }
357 
358         try {
359             RBC = new RuleBasedCollator(rules);
360         } catch (Exception e) {
361             throw new IllegalArgumentException(e);
362         }
363     }
364 
365 //    static final Set<String> ENGLISH_LABELS = new LinkedHashSet<>(Arrays.asList(
366 //        "flag", "nature", "objects", "people", "places", "symbols", "travel", "animal",
367 //        "office", "sign", "word", "time", "food", "person", "weather", "activity",
368 //        "vehicle", "restaurant", "communication", "emotion", "geometric", "mark",
369 //        "education", "gesture", "japanese", "symbol", "congratulation", "body", "clothing"));
370 
371 //    static class Annotations {
372 //
373 //        final UnicodeRelation<String> values = new UnicodeRelation<>();
374 //
375 //        static Factory cldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "annotations/", ".*");
376 //
377 //        static Set<String> getAvailableLocales() {
378 //            return cldrFactory.getAvailable();
379 //        }
380 //
381 //        static Map<String, Annotations> cache = new ConcurrentHashMap<>();
382 //
383 //        static synchronized Annotations make(String locale) {
384 //            Annotations result = cache.get(locale);
385 //            if (result == null) {
386 //                CLDRFile file = cldrFactory.make(locale, false); // for now, don't resolve
387 //                result = new Annotations();
388 //                LinkedHashSet<String> values = new LinkedHashSet<>();
389 //                XPathParts parts = new XPathParts();
390 //                Splitter sp = Splitter.on(';').omitEmptyStrings().trimResults();
391 //                for (String path : file) {
392 //                    if (path.startsWith("//ldml/identity")) {
393 //                        continue;
394 //                    }
395 //                    String value = file.getStringValue(path);
396 //                    String fullPath = file.getFullXPath(path);
397 //                    String cpString = parts.set(fullPath).getAttributeValue(-1, "cp");
398 //                    UnicodeSet cps = new UnicodeSet(cpString);
399 //                    String tts = parts.set(fullPath).getAttributeValue(-1, "tts");
400 //                    values.clear();
401 //                    if (tts != null) {
402 //                        values.add(tts.trim()); // always first value
403 //                    }
404 //                    values.addAll(sp.splitToList(value));
405 //                    result.values.addAll(cps, values);
406 //                }
407 //
408 //                // remove labels
409 //
410 //                if (locale.equals("en")) {
411 //                    for (Entry<String, Set<String>> item : result.values.keyValues()) {
412 //                        String key = item.getKey();
413 //                        Set<String> valueSet = new LinkedHashSet<>(item.getValue());
414 //                        for (String skip : ENGLISH_LABELS) {
415 //                            if (valueSet.contains(skip)) {
416 //                                result.values.remove(key, skip);
417 //                                if (result.values.get(key) == null) {
418 //                                    result.values.add(key, skip); // restore
419 //                                    break;
420 //                                }
421 //                            }
422 //                        }
423 //                        Set<String> newSet = result.values.get(key);
424 //                        if (!valueSet.equals(newSet)) {
425 //                            if (DEBUG) System.out.println("dropping labels from " + item.getKey() + ", old: " + valueSet + ", new: " + newSet);
426 //                        }
427 //                    }
428 //                }
429 //                result.values.freeze();
430 //                cache.put(locale, result);
431 //            }
432 //            return result;
433 //        }
434 //    }
435 }
436