1 package org.unicode.cldr.tool; 2 3 import java.io.IOException; 4 import java.util.Arrays; 5 import java.util.Collection; 6 import java.util.EnumMap; 7 import java.util.LinkedHashMap; 8 import java.util.Map; 9 import java.util.Map.Entry; 10 import java.util.Set; 11 import java.util.TreeSet; 12 13 import org.unicode.cldr.draft.FileUtilities; 14 import org.unicode.cldr.tool.FormattedFileWriter.Anchors; 15 import org.unicode.cldr.util.Annotations; 16 import org.unicode.cldr.util.Annotations.AnnotationSet; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRPaths; 19 import org.unicode.cldr.util.CLDRURLS; 20 import org.unicode.cldr.util.CldrUtility; 21 import org.unicode.cldr.util.Factory; 22 import org.unicode.cldr.util.FileCopier; 23 import org.unicode.cldr.util.LanguageGroup; 24 import org.unicode.cldr.util.LanguageTagParser; 25 import org.unicode.cldr.util.LocaleIDParser; 26 27 import com.google.common.base.Joiner; 28 import com.google.common.collect.Multimap; 29 import com.google.common.collect.TreeMultimap; 30 import com.ibm.icu.impl.Relation; 31 import com.ibm.icu.impl.Row; 32 import com.ibm.icu.impl.Row.R3; 33 import com.ibm.icu.impl.Utility; 34 import com.ibm.icu.text.RuleBasedCollator; 35 import com.ibm.icu.text.UnicodeSet; 36 import com.ibm.icu.util.ULocale; 37 38 public class ChartAnnotations extends Chart { 39 40 private static final String LDML_ANNOTATIONS = "<a href='http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-general.html#Annotations'>LDML Annotations</a>"; 41 42 private static final String MAIN_HEADER = "<p>Annotations provide names and keywords for Unicode characters, currently focusing on emoji. " 43 + "If you see any problems, please <a target='_blank' href='" 44 + CLDRURLS.CLDR_NEWTICKET_URL 45 + "'>file a ticket</a> with the corrected values for the locale. " 46 + "For the XML data used for these charts, see " 47 + "<a href='http://unicode.org/repos/cldr/tags/latest/common/annotations/'>latest-release annotations </a> " 48 + "or <a href='http://unicode.org/repos/cldr/tags/latest/common/annotations/'>beta annotations</a>. " 49 + "For more information, see " + LDML_ANNOTATIONS + ".</p>"; 50 private static final boolean DEBUG = false; 51 private static final String DIR = CLDRPaths.CHART_DIRECTORY + "annotations/"; 52 main(String[] args)53 public static void main(String[] args) { 54 new ChartAnnotations().writeChart(null); 55 } 56 57 @Override getDirectory()58 public String getDirectory() { 59 return DIR; 60 } 61 62 @Override getTitle()63 public String getTitle() { 64 return "Annotation Charts"; 65 } 66 67 @Override getFileName()68 public String getFileName() { 69 return "index"; 70 } 71 72 @Override getExplanation()73 public String getExplanation() { 74 return MAIN_HEADER + "<p>The charts are presented in groups of related languages, for easier comparison.<p>"; 75 } 76 77 @Override writeContents(FormattedFileWriter pw)78 public void writeContents(FormattedFileWriter pw) throws IOException { 79 FileCopier.ensureDirectoryExists(DIR); 80 FileCopier.copy(Chart.class, "index.css", DIR); 81 FormattedFileWriter.copyIncludeHtmls(DIR); 82 83 FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors(); 84 writeSubcharts(anchors); 85 pw.setIndex("Main Chart Index", "../index.html"); 86 pw.write(anchors.toString()); 87 } 88 89 static final UnicodeSet EXTRAS = new UnicodeSet() 90 .addAll(Arrays.asList( 91 "", "", "#️⃣", "", "❤️", "❤️", "", "⚕️", "♂️", "♀️", "❤️", "♀️", 92 "", "❤️", "", "❤️", "", "", 93 "", "", "⚖", "⚖", "⚖", "⚖", "", "♂️", "♂️", "♀️", "♀️", 94 "", "", "♂️", "♂️", "♀️", "♀️", 95 "", 96 "#️⃣", 97 "", 98 "⛹️♀️", 99 "⚕️", 100 "️","☠️", 101 "", 102 "", 103 "","" 104 )) 105 .freeze(); 106 writeSubcharts(Anchors anchors)107 public void writeSubcharts(Anchors anchors) throws IOException { 108 Set<String> locales = Annotations.getAvailableLocales(); 109 110 AnnotationSet english = Annotations.getDataSet("en"); 111 UnicodeSet s = new UnicodeSet(english.keySet()).addAll(EXTRAS).freeze(); 112 113 // set up right order for columns 114 115 Map<String, String> nameToCode = new LinkedHashMap<>(); 116 Relation<LanguageGroup, R3<Integer, String, String>> groupToNameAndCodeSorted = Relation.of( 117 new EnumMap<LanguageGroup, Set<R3<Integer, String, String>>>(LanguageGroup.class), 118 TreeSet.class); 119 120 Multimap<String, String> localeToSub = TreeMultimap.create(); 121 LanguageTagParser ltp = new LanguageTagParser(); 122 123 for (String locale : locales) { 124 ltp.set(locale); 125 if (locale.equals("root")) { 126 continue; 127 } 128 if (locale.equals("en")) { // make first 129 continue; 130 } 131 String region = ltp.getRegion(); 132 if (!region.isEmpty()) { 133 localeToSub.put(ltp.getLanguageScript(), locale); 134 continue; 135 } 136 137 if (locale.startsWith("en")) { 138 int debug = 0; 139 } 140 String name = ENGLISH.getName(locale, true); 141 int baseEnd = locale.indexOf('_'); 142 ULocale loc = new ULocale(baseEnd < 0 ? locale : locale.substring(0, baseEnd)); 143 LanguageGroup group = LanguageGroup.get(loc); 144 int rank = LanguageGroup.rankInGroup(loc); 145 groupToNameAndCodeSorted.put(group, Row.of(rank, name, locale)); 146 } 147 148 for (Entry<LanguageGroup, Set<R3<Integer, String, String>>> groupPairs : groupToNameAndCodeSorted.keyValuesSet()) { 149 LanguageGroup group = groupPairs.getKey(); 150 String ename = ENGLISH.getName("en", true); 151 nameToCode.clear(); 152 nameToCode.put(ename, "en"); // always have english first 153 154 // add English variants if they exist 155 156 for (R3<Integer, String, String> pair : groupPairs.getValue()) { 157 String name = pair.get1(); 158 String locale = pair.get2(); 159 if (locale.startsWith("en_")) { 160 nameToCode.put(name, locale); 161 } 162 } 163 164 for (R3<Integer, String, String> pair : groupPairs.getValue()) { 165 String name = pair.get1(); 166 String locale = pair.get2(); 167 168 nameToCode.put(name, locale); 169 System.out.println(pair); 170 } 171 // now build table with right order for columns 172 double width = ((int) ((99.0 / (locales.size() + 1)) * 1000)) / 1000.0; 173 //String widthString = "class='source' width='"+ width + "%'"; 174 String widthStringTarget = "class='target' width='" + width + "%'"; 175 176 TablePrinter tablePrinter = new TablePrinter() 177 .addColumn("Char", "class='source' width='1%'", CldrUtility.getDoubleLinkMsg(), "class='source-image'", true) 178 .addColumn("Hex", "class='source' width='1%'", null, "class='source'", true) 179 //.addColumn("Formal Name", "class='source' width='" + width + "%'", null, "class='source'", true) 180 ; 181 182 for (Entry<String, String> entry : nameToCode.entrySet()) { 183 String name = entry.getKey(); 184 tablePrinter.addColumn(name, widthStringTarget, null, "class='target'", true); 185 } 186 // sort the characters 187 Set<String> sorted = new TreeSet<>(RBC); 188 Multimap<String, String> valueToSub = TreeMultimap.create(); 189 190 for (String cp : s.addAllTo(sorted)) { 191 tablePrinter 192 .addRow() 193 .addCell(cp) 194 .addCell(Utility.hex(cp, 4, " ")) 195 //.addCell(getName(cp)) 196 ; 197 for (Entry<String, String> nameAndLocale : nameToCode.entrySet()) { 198 String name = nameAndLocale.getKey(); 199 String locale = nameAndLocale.getValue(); 200 201 AnnotationSet annotations = Annotations.getDataSet(locale); 202 AnnotationSet parentAnnotations = Annotations.getDataSet(LocaleIDParser.getParent(locale)); 203 String baseAnnotation = annotations.toString(cp, true, parentAnnotations); 204 String baseAnnotationOriginal = baseAnnotation; 205 206 if (DEBUG) System.out.println(name + ":" + annotations.toString(cp, false, null)); 207 Collection<String> subs = localeToSub.get(locale); 208 if (!subs.isEmpty()) { 209 valueToSub.clear(); 210 for (String sub : subs) { 211 AnnotationSet subAnnotations = Annotations.getDataSet(sub); 212 AnnotationSet subParentAnnotations = Annotations.getDataSet(LocaleIDParser.getParent(locale)); 213 String baseAnnotation2 = subAnnotations.toString(cp, true, subParentAnnotations); 214 if (!baseAnnotation2.equals(baseAnnotationOriginal)) { 215 valueToSub.put(baseAnnotation2, sub); 216 } 217 } 218 for (Entry<String, Collection<String>> entry : valueToSub.asMap().entrySet()) { 219 baseAnnotation += "<hr><i>" + Joiner.on(", ").join(entry.getValue()) + "</i>: " + entry.getKey(); 220 } 221 } 222 tablePrinter.addCell(baseAnnotation); 223 } 224 tablePrinter.finishRow(); 225 } 226 final String name = group.toString(); 227 new Subchart(name + " Annotations", FileUtilities.anchorize(name), tablePrinter).writeChart(anchors); 228 } 229 } 230 231 static final int FIRST_REGIONAL = 0x1F1E6; 232 static final int LAST_REGIONAL = 0x1F1FF; 233 getRegionalIndicator(int firstCodepoint)234 public static int getRegionalIndicator(int firstCodepoint) { 235 return FIRST_REGIONAL <= firstCodepoint && firstCodepoint <= LAST_REGIONAL ? firstCodepoint - FIRST_REGIONAL + 'A' : -1; 236 } 237 238 // private String getName(String cp) { 239 // int ri1 = getRegionalIndicator(cp.codePointAt(0)); 240 // if (ri1 >= 0) { 241 // int ri2 = getRegionalIndicator(cp.codePointAt(2)); 242 // return ENGLISH.getName(CLDRFile.TERRITORY_NAME, String.valueOf((char) ri1) + String.valueOf((char) ri2)); 243 // } 244 // String result = NAMES80.get(cp); 245 // return result != null ? result : UCharacter.getName(cp, ", "); 246 // } 247 // 248 // private static UnicodeMap<String> NAMES80 = new UnicodeMap<>(); 249 // static { 250 // String[][] data = { 251 // { "", "EMOJI MODIFIER FITZPATRICK TYPE-1-2" }, 252 // { "", "EMOJI MODIFIER FITZPATRICK TYPE-3" }, 253 // { "", "EMOJI MODIFIER FITZPATRICK TYPE-4" }, 254 // { "", "EMOJI MODIFIER FITZPATRICK TYPE-5" }, 255 // { "", "EMOJI MODIFIER FITZPATRICK TYPE-6" }, 256 // { "", "ZIPPER-MOUTH FACE" }, 257 // { "", "MONEY-MOUTH FACE" }, 258 // { "", "FACE WITH THERMOMETER" }, 259 // { "", "NERD FACE" }, 260 // { "", "THINKING FACE" }, 261 // { "", "FACE WITH ROLLING EYES" }, 262 // { "", "UPSIDE-DOWN FACE" }, 263 // { "", "FACE WITH HEAD-BANDAGE" }, 264 // { "", "ROBOT FACE" }, 265 // { "", "HUGGING FACE" }, 266 // { "", "SIGN OF THE HORNS" }, 267 // { "", "CRAB (also Cancer)" }, 268 // { "", "SCORPION (also Scorpio)" }, 269 // { "", "LION FACE (also Leo)" }, 270 // { "", "BOW AND ARROW (also Sagittarius)" }, 271 // { "", "AMPHORA (also Aquarius)" }, 272 // { "", "PLACE OF WORSHIP" }, 273 // { "", "KAABA" }, 274 // { "", "MOSQUE" }, 275 // { "", "SYNAGOGUE" }, 276 // { "", "MENORAH WITH NINE BRANCHES" }, 277 // { "", "PRAYER BEADS" }, 278 // { "", "HOT DOG" }, 279 // { "", "TACO" }, 280 // { "", "BURRITO" }, 281 // { "", "CHEESE WEDGE" }, 282 // { "", "POPCORN" }, 283 // { "", "BOTTLE WITH POPPING CORK" }, 284 // { "", "TURKEY" }, 285 // { "", "UNICORN FACE" }, 286 // { "", "CRICKET BAT AND BALL" }, 287 // { "", "VOLLEYBALL" }, 288 // { "", "FIELD HOCKEY STICK AND BALL" }, 289 // { "", "ICE HOCKEY STICK AND PUCK" }, 290 // { "", "TABLE TENNIS PADDLE AND BALL" }, 291 // { "", "BADMINTON RACQUET AND SHUTTLECOCK" } }; 292 // for (String[] pair : data) { 293 // NAMES80.put(pair[0], pair[1]); 294 // } 295 // NAMES80.freeze(); 296 // } 297 298 private class Subchart extends Chart { 299 String title; 300 String file; 301 private TablePrinter tablePrinter; 302 303 @Override getShowDate()304 public boolean getShowDate() { 305 return false; 306 } 307 Subchart(String title, String file, TablePrinter tablePrinter)308 public Subchart(String title, String file, TablePrinter tablePrinter) { 309 super(); 310 this.title = title; 311 this.file = file; 312 this.tablePrinter = tablePrinter; 313 } 314 315 @Override getDirectory()316 public String getDirectory() { 317 return DIR; 318 } 319 320 @Override getTitle()321 public String getTitle() { 322 return title; 323 } 324 325 @Override getFileName()326 public String getFileName() { 327 return file; 328 } 329 330 @Override getExplanation()331 public String getExplanation() { 332 return MAIN_HEADER 333 + "<p>This table shows the annotations for a group of related languages (plus English) for easier comparison. " 334 + "The first item is the <b>short name</b> (also the text-to-speech phrase). " 335 + "It is bolded for clarity, and marked with a * for searching on this page. " 336 + "The remaining phrases are <b>keywords</b> (labels), separated by “|”. " 337 + "The keywords plus the words in the short name are typically used for search and predictive typing.<p>\n" 338 + "<p>Most short names and keywords that can be constructed with the mechanism in " + LDML_ANNOTATIONS + " are omitted. " 339 + "However, a few are included for comparison: " 340 + Joiner.on(", ").join(EXTRAS.addAllTo(new TreeSet<>())) + ". " 341 + "In this chart, missing items are marked with “" + Annotations.MISSING_MARKER + "”, " 342 + "‘fallback’ constructed items with “" + Annotations.BAD_MARKER + "”, " 343 + "substituted English values with “" + Annotations.ENGLISH_MARKER + "”, and " 344 + "values equal to their parent locale’s values are replaced with " + Annotations.EQUIVALENT + ".</p>\n"; 345 } 346 347 @Override writeContents(FormattedFileWriter pw)348 public void writeContents(FormattedFileWriter pw) throws IOException { 349 pw.write(tablePrinter.toTable()); 350 } 351 } 352 353 public static RuleBasedCollator RBC; 354 static { 355 Factory cldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "collation/", ".*"); 356 CLDRFile root = cldrFactory.make("root", false); 357 String rules = root.getStringValue("//ldml/collations/collation[@type=\"emoji\"][@visibility=\"external\"]/cr"); 358 359 // if (!rules.contains("'#⃣'")) { 360 // rules = rules.replace("#⃣", "'#⃣'").replace("*⃣", "'*⃣'"); //hack for 8288 361 // } 362 363 try { 364 RBC = new RuleBasedCollator(rules); 365 } catch (Exception e) { 366 throw new IllegalArgumentException("Failure in rules for " + CLDRPaths.COMMON_DIRECTORY + "collation/" + "root", e); 367 } 368 } 369 } 370