1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.util.Arrays;
5 import java.util.Collection;
6 import java.util.EnumMap;
7 import java.util.LinkedHashMap;
8 import java.util.Map;
9 import java.util.Map.Entry;
10 import java.util.Set;
11 import java.util.TreeSet;
12 
13 import org.unicode.cldr.draft.FileUtilities;
14 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
15 import org.unicode.cldr.util.Annotations;
16 import org.unicode.cldr.util.Annotations.AnnotationSet;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRPaths;
19 import org.unicode.cldr.util.CLDRURLS;
20 import org.unicode.cldr.util.CldrUtility;
21 import org.unicode.cldr.util.Factory;
22 import org.unicode.cldr.util.FileCopier;
23 import org.unicode.cldr.util.LanguageGroup;
24 import org.unicode.cldr.util.LanguageTagParser;
25 import org.unicode.cldr.util.LocaleIDParser;
26 
27 import com.google.common.base.Joiner;
28 import com.google.common.collect.Multimap;
29 import com.google.common.collect.TreeMultimap;
30 import com.ibm.icu.impl.Relation;
31 import com.ibm.icu.impl.Row;
32 import com.ibm.icu.impl.Row.R3;
33 import com.ibm.icu.impl.Utility;
34 import com.ibm.icu.text.RuleBasedCollator;
35 import com.ibm.icu.text.UnicodeSet;
36 import com.ibm.icu.util.ULocale;
37 
38 public class ChartAnnotations extends Chart {
39 
40     private static final String LDML_ANNOTATIONS = "<a href='http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-general.html#Annotations'>LDML Annotations</a>";
41 
42     private static final String MAIN_HEADER = "<p>Annotations provide names and keywords for Unicode characters, currently focusing on emoji. "
43         + "If you see any problems, please <a target='_blank' href='"
44         + CLDRURLS.CLDR_NEWTICKET_URL
45         + "'>file a ticket</a> with the corrected values for the locale. "
46         + "For the XML data used for these charts, see "
47         + "<a href='http://unicode.org/repos/cldr/tags/latest/common/annotations/'>latest-release annotations </a> "
48         + "or <a href='http://unicode.org/repos/cldr/tags/latest/common/annotations/'>beta annotations</a>. "
49         + "For more information, see " + LDML_ANNOTATIONS + ".</p>";
50     private static final boolean DEBUG = false;
51     private static final String DIR = CLDRPaths.CHART_DIRECTORY + "annotations/";
52 
main(String[] args)53     public static void main(String[] args) {
54         new ChartAnnotations().writeChart(null);
55     }
56 
57     @Override
getDirectory()58     public String getDirectory() {
59         return DIR;
60     }
61 
62     @Override
getTitle()63     public String getTitle() {
64         return "Annotation Charts";
65     }
66 
67     @Override
getFileName()68     public String getFileName() {
69         return "index";
70     }
71 
72     @Override
getExplanation()73     public String getExplanation() {
74         return MAIN_HEADER + "<p>The charts are presented in groups of related languages, for easier comparison.<p>";
75     }
76 
77     @Override
writeContents(FormattedFileWriter pw)78     public void writeContents(FormattedFileWriter pw) throws IOException {
79         FileCopier.ensureDirectoryExists(DIR);
80         FileCopier.copy(Chart.class, "index.css", DIR);
81         FormattedFileWriter.copyIncludeHtmls(DIR);
82 
83         FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors();
84         writeSubcharts(anchors);
85         pw.setIndex("Main Chart Index", "../index.html");
86         pw.write(anchors.toString());
87     }
88 
89     static final UnicodeSet EXTRAS = new UnicodeSet()
90         .addAll(Arrays.asList(
91             "����", "��", "#️⃣", "����", "��‍❤️‍��‍��", "��‍❤️‍��", "��‍��‍��", "����‍⚕️", "����‍♂️", "����‍♀️", "��‍❤️‍��‍��", "����‍♀️",
92             "��", "��‍❤️‍��‍��", "��", "��‍❤️‍��", "��", "��‍��‍��",
93             "����", "����", "��‍⚖", "����‍⚖", "��‍⚖", "����‍⚖", "��", "��‍♂️", "����‍♂️", "��‍♀️", "����‍♀️",
94             "��", "����", "��‍♂️", "����‍♂️", "��‍♀️", "����‍♀️",
95             "��������������",
96             "#️⃣",
97             "����",
98             "⛹️‍♀️",
99             "��‍⚕️",
100             "��️‍��","��‍☠️",
101             "��‍��",
102             "����‍��",
103             "��","��"
104             ))
105         .freeze();
106 
writeSubcharts(Anchors anchors)107     public void writeSubcharts(Anchors anchors) throws IOException {
108         Set<String> locales = Annotations.getAvailableLocales();
109 
110         AnnotationSet english = Annotations.getDataSet("en");
111         UnicodeSet s = new UnicodeSet(english.keySet()).addAll(EXTRAS).freeze();
112 
113         // set up right order for columns
114 
115         Map<String, String> nameToCode = new LinkedHashMap<>();
116         Relation<LanguageGroup, R3<Integer, String, String>> groupToNameAndCodeSorted = Relation.of(
117             new EnumMap<LanguageGroup, Set<R3<Integer, String, String>>>(LanguageGroup.class),
118             TreeSet.class);
119 
120         Multimap<String, String> localeToSub = TreeMultimap.create();
121         LanguageTagParser ltp = new LanguageTagParser();
122 
123         for (String locale : locales) {
124             ltp.set(locale);
125             if (locale.equals("root")) {
126                 continue;
127             }
128             if (locale.equals("en")) { // make first
129                 continue;
130             }
131             String region = ltp.getRegion();
132             if (!region.isEmpty()) {
133                 localeToSub.put(ltp.getLanguageScript(), locale);
134                 continue;
135             }
136 
137             if (locale.startsWith("en")) {
138                 int debug = 0;
139             }
140             String name = ENGLISH.getName(locale, true);
141             int baseEnd = locale.indexOf('_');
142             ULocale loc = new ULocale(baseEnd < 0 ? locale : locale.substring(0, baseEnd));
143             LanguageGroup group = LanguageGroup.get(loc);
144             int rank = LanguageGroup.rankInGroup(loc);
145             groupToNameAndCodeSorted.put(group, Row.of(rank, name, locale));
146         }
147 
148         for (Entry<LanguageGroup, Set<R3<Integer, String, String>>> groupPairs : groupToNameAndCodeSorted.keyValuesSet()) {
149             LanguageGroup group = groupPairs.getKey();
150             String ename = ENGLISH.getName("en", true);
151             nameToCode.clear();
152             nameToCode.put(ename, "en"); // always have english first
153 
154             // add English variants if they exist
155 
156             for (R3<Integer, String, String> pair : groupPairs.getValue()) {
157                 String name = pair.get1();
158                 String locale = pair.get2();
159                 if (locale.startsWith("en_")) {
160                     nameToCode.put(name, locale);
161                 }
162             }
163 
164             for (R3<Integer, String, String> pair : groupPairs.getValue()) {
165                 String name = pair.get1();
166                 String locale = pair.get2();
167 
168                 nameToCode.put(name, locale);
169                 System.out.println(pair);
170             }
171             // now build table with right order for columns
172             double width = ((int) ((99.0 / (locales.size() + 1)) * 1000)) / 1000.0;
173             //String widthString = "class='source' width='"+ width + "%'";
174             String widthStringTarget = "class='target' width='" + width + "%'";
175 
176             TablePrinter tablePrinter = new TablePrinter()
177                 .addColumn("Char", "class='source' width='1%'", CldrUtility.getDoubleLinkMsg(), "class='source-image'", true)
178                 .addColumn("Hex", "class='source' width='1%'", null, "class='source'", true)
179             //.addColumn("Formal Name", "class='source' width='" + width + "%'", null, "class='source'", true)
180             ;
181 
182             for (Entry<String, String> entry : nameToCode.entrySet()) {
183                 String name = entry.getKey();
184                 tablePrinter.addColumn(name, widthStringTarget, null, "class='target'", true);
185             }
186             // sort the characters
187             Set<String> sorted = new TreeSet<>(RBC);
188             Multimap<String, String> valueToSub = TreeMultimap.create();
189 
190             for (String cp : s.addAllTo(sorted)) {
191                 tablePrinter
192                     .addRow()
193                     .addCell(cp)
194                     .addCell(Utility.hex(cp, 4, " "))
195                 //.addCell(getName(cp))
196                 ;
197                 for (Entry<String, String> nameAndLocale : nameToCode.entrySet()) {
198                     String name = nameAndLocale.getKey();
199                     String locale = nameAndLocale.getValue();
200 
201                     AnnotationSet annotations = Annotations.getDataSet(locale);
202                     AnnotationSet parentAnnotations = Annotations.getDataSet(LocaleIDParser.getParent(locale));
203                     String baseAnnotation = annotations.toString(cp, true, parentAnnotations);
204                     String baseAnnotationOriginal = baseAnnotation;
205 
206                     if (DEBUG) System.out.println(name + ":" + annotations.toString(cp, false, null));
207                     Collection<String> subs = localeToSub.get(locale);
208                     if (!subs.isEmpty()) {
209                         valueToSub.clear();
210                         for (String sub : subs) {
211                             AnnotationSet subAnnotations = Annotations.getDataSet(sub);
212                             AnnotationSet subParentAnnotations = Annotations.getDataSet(LocaleIDParser.getParent(locale));
213                             String baseAnnotation2 = subAnnotations.toString(cp, true, subParentAnnotations);
214                             if (!baseAnnotation2.equals(baseAnnotationOriginal)) {
215                                 valueToSub.put(baseAnnotation2, sub);
216                             }
217                         }
218                         for (Entry<String, Collection<String>> entry : valueToSub.asMap().entrySet()) {
219                             baseAnnotation += "<hr><i>" + Joiner.on(", ").join(entry.getValue()) + "</i>: " + entry.getKey();
220                         }
221                     }
222                     tablePrinter.addCell(baseAnnotation);
223                 }
224                 tablePrinter.finishRow();
225             }
226             final String name = group.toString();
227             new Subchart(name + " Annotations", FileUtilities.anchorize(name), tablePrinter).writeChart(anchors);
228         }
229     }
230 
231     static final int FIRST_REGIONAL = 0x1F1E6;
232     static final int LAST_REGIONAL = 0x1F1FF;
233 
getRegionalIndicator(int firstCodepoint)234     public static int getRegionalIndicator(int firstCodepoint) {
235         return FIRST_REGIONAL <= firstCodepoint && firstCodepoint <= LAST_REGIONAL ? firstCodepoint - FIRST_REGIONAL + 'A' : -1;
236     }
237 
238 //    private String getName(String cp) {
239 //        int ri1 = getRegionalIndicator(cp.codePointAt(0));
240 //        if (ri1 >= 0) {
241 //            int ri2 = getRegionalIndicator(cp.codePointAt(2));
242 //            return ENGLISH.getName(CLDRFile.TERRITORY_NAME, String.valueOf((char) ri1) + String.valueOf((char) ri2));
243 //        }
244 //        String result = NAMES80.get(cp);
245 //        return result != null ? result : UCharacter.getName(cp, ", ");
246 //    }
247 //
248 //    private static UnicodeMap<String> NAMES80 = new UnicodeMap<>();
249 //    static {
250 //        String[][] data = {
251 //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-1-2" },
252 //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-3" },
253 //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-4" },
254 //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-5" },
255 //            { "��", "EMOJI MODIFIER FITZPATRICK TYPE-6" },
256 //            { "��", "ZIPPER-MOUTH FACE" },
257 //            { "��", "MONEY-MOUTH FACE" },
258 //            { "��", "FACE WITH THERMOMETER" },
259 //            { "��", "NERD FACE" },
260 //            { "��", "THINKING FACE" },
261 //            { "��", "FACE WITH ROLLING EYES" },
262 //            { "��", "UPSIDE-DOWN FACE" },
263 //            { "��", "FACE WITH HEAD-BANDAGE" },
264 //            { "��", "ROBOT FACE" },
265 //            { "��", "HUGGING FACE" },
266 //            { "��", "SIGN OF THE HORNS" },
267 //            { "��", "CRAB (also Cancer)" },
268 //            { "��", "SCORPION (also Scorpio)" },
269 //            { "��", "LION FACE (also Leo)" },
270 //            { "��", "BOW AND ARROW (also Sagittarius)" },
271 //            { "��", "AMPHORA (also Aquarius)" },
272 //            { "��", "PLACE OF WORSHIP" },
273 //            { "��", "KAABA" },
274 //            { "��", "MOSQUE" },
275 //            { "��", "SYNAGOGUE" },
276 //            { "��", "MENORAH WITH NINE BRANCHES" },
277 //            { "��", "PRAYER BEADS" },
278 //            { "��", "HOT DOG" },
279 //            { "��", "TACO" },
280 //            { "��", "BURRITO" },
281 //            { "��", "CHEESE WEDGE" },
282 //            { "��", "POPCORN" },
283 //            { "��", "BOTTLE WITH POPPING CORK" },
284 //            { "��", "TURKEY" },
285 //            { "��", "UNICORN FACE" },
286 //            { "��", "CRICKET BAT AND BALL" },
287 //            { "��", "VOLLEYBALL" },
288 //            { "��", "FIELD HOCKEY STICK AND BALL" },
289 //            { "��", "ICE HOCKEY STICK AND PUCK" },
290 //            { "��", "TABLE TENNIS PADDLE AND BALL" },
291 //            { "��", "BADMINTON RACQUET AND SHUTTLECOCK" } };
292 //        for (String[] pair : data) {
293 //            NAMES80.put(pair[0], pair[1]);
294 //        }
295 //        NAMES80.freeze();
296 //    }
297 
298     private class Subchart extends Chart {
299         String title;
300         String file;
301         private TablePrinter tablePrinter;
302 
303         @Override
getShowDate()304         public boolean getShowDate() {
305             return false;
306         }
307 
Subchart(String title, String file, TablePrinter tablePrinter)308         public Subchart(String title, String file, TablePrinter tablePrinter) {
309             super();
310             this.title = title;
311             this.file = file;
312             this.tablePrinter = tablePrinter;
313         }
314 
315         @Override
getDirectory()316         public String getDirectory() {
317             return DIR;
318         }
319 
320         @Override
getTitle()321         public String getTitle() {
322             return title;
323         }
324 
325         @Override
getFileName()326         public String getFileName() {
327             return file;
328         }
329 
330         @Override
getExplanation()331         public String getExplanation() {
332             return MAIN_HEADER
333                 + "<p>This table shows the annotations for a group of related languages (plus English) for easier comparison. "
334                 + "The first item is the <b>short name</b> (also the text-to-speech phrase). "
335                 + "It is bolded for clarity, and marked with a * for searching on this page. "
336                 + "The remaining phrases are <b>keywords</b> (labels), separated by “|”. "
337                 + "The keywords plus the words in the short name are typically used for search and predictive typing.<p>\n"
338                 + "<p>Most short names and keywords that can be constructed with the mechanism in " + LDML_ANNOTATIONS + " are omitted. "
339                 + "However, a few are included for comparison: "
340                 + Joiner.on(", ").join(EXTRAS.addAllTo(new TreeSet<>())) + ". "
341                 + "In this chart, missing items are marked with “" + Annotations.MISSING_MARKER + "”, "
342                 + "‘fallback’ constructed items with “" + Annotations.BAD_MARKER + "”, "
343                 + "substituted English values with “" + Annotations.ENGLISH_MARKER + "”, and "
344                 + "values equal to their parent locale’s values are replaced with " + Annotations.EQUIVALENT + ".</p>\n";
345         }
346 
347         @Override
writeContents(FormattedFileWriter pw)348         public void writeContents(FormattedFileWriter pw) throws IOException {
349             pw.write(tablePrinter.toTable());
350         }
351     }
352 
353     public static RuleBasedCollator RBC;
354     static {
355         Factory cldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "collation/", ".*");
356         CLDRFile root = cldrFactory.make("root", false);
357         String rules = root.getStringValue("//ldml/collations/collation[@type=\"emoji\"][@visibility=\"external\"]/cr");
358 
359 //        if (!rules.contains("'#⃣'")) {
360 //            rules = rules.replace("#⃣", "'#⃣'").replace("*⃣", "'*⃣'"); //hack for 8288
361 //        }
362 
363         try {
364             RBC = new RuleBasedCollator(rules);
365         } catch (Exception e) {
366             throw new IllegalArgumentException("Failure in rules for " + CLDRPaths.COMMON_DIRECTORY + "collation/" + "root", e);
367         }
368     }
369 }
370