1 package org.unicode.cldr.util; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.util.ArrayList; 6 import java.util.Arrays; 7 import java.util.Collection; 8 import java.util.Collections; 9 import java.util.LinkedHashSet; 10 import java.util.Locale; 11 import java.util.Map; 12 import java.util.Set; 13 import java.util.TreeSet; 14 import java.util.concurrent.ConcurrentHashMap; 15 import java.util.regex.Pattern; 16 17 import org.unicode.cldr.test.EmojiSubdivisionNames; 18 import org.unicode.cldr.tool.ChartAnnotations; 19 import org.unicode.cldr.util.XMLFileReader.SimpleHandler; 20 21 import com.google.common.base.Objects; 22 import com.google.common.base.Splitter; 23 import com.google.common.collect.ImmutableSet; 24 import com.google.common.collect.ImmutableSet.Builder; 25 import com.ibm.icu.dev.util.CollectionUtilities; 26 import com.ibm.icu.dev.util.UnicodeMap; 27 import com.ibm.icu.impl.Utility; 28 import com.ibm.icu.lang.CharSequences; 29 import com.ibm.icu.text.SimpleFormatter; 30 import com.ibm.icu.text.Transform; 31 import com.ibm.icu.text.UTF16; 32 import com.ibm.icu.text.UnicodeSet; 33 import com.ibm.icu.text.UnicodeSet.SpanCondition; 34 import com.ibm.icu.text.UnicodeSetSpanner; 35 import com.ibm.icu.util.ICUUncheckedIOException; 36 37 public class Annotations { 38 private static final boolean DEBUG = false; 39 40 public static final String BAD_MARKER = "⊗"; 41 public static final String MISSING_MARKER = "⊖"; 42 public static final String ENGLISH_MARKER = "⊕"; 43 public static final String EQUIVALENT = "≣"; 44 45 public static final Splitter splitter = Splitter.on(Pattern.compile("[|;]")).trimResults().omitEmptyStrings(); 46 static final Splitter dotSplitter = Splitter.on(".").trimResults(); 47 48 static final Map<String, Map<String, AnnotationSet>> cache = new ConcurrentHashMap<>(); 49 static final Set<String> LOCALES; 50 static final String DIR; 51 private static final AnnotationSet ENGLISH_DATA; 52 53 private final Set<String> annotations; 54 private final String tts; 55 56 static { 57 File directory = new File(CLDRPaths.COMMON_DIRECTORY, "annotations"); 58 try { 59 DIR = directory.getCanonicalPath(); 60 } catch (IOException e) { 61 throw new ICUUncheckedIOException(e); 62 } 63 if (DEBUG) { 64 System.out.println(DIR); 65 } 66 Builder<String> temp = ImmutableSet.builder(); 67 for (File file : directory.listFiles()) { 68 if (DEBUG) { 69 try { file.getCanonicalPath()70 System.out.println(file.getCanonicalPath()); 71 } catch (IOException e) { 72 } 73 } 74 String name = file.toString(); 75 String shortName = file.getName(); 76 if (!shortName.endsWith(".xml") || // skip non-XML 77 shortName.startsWith("#") || // skip other junk files 78 shortName.startsWith(".") 79 // || shortName.contains("001") // skip world english for now 80 ) continue; // skip dot files (backups, etc) 81 temp.add(dotSplitter.split(shortName).iterator().next()); 82 } 83 LOCALES = temp.build(); 84 ENGLISH_DATA = getDataSet("en"); 85 } 86 87 static class MyHandler extends SimpleHandler { 88 private final String locale; 89 private final UnicodeMap<Annotations> localeData = new UnicodeMap<>(); 90 private final AnnotationSet parentData; 91 private final Map<String, AnnotationSet> dirCache; 92 MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData)93 public MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData) { 94 this.locale = locale; 95 this.parentData = parentData; 96 this.dirCache = dirCache; 97 } 98 cleanup()99 public AnnotationSet cleanup() { 100 // add parent data (may be overridden) 101 UnicodeMap<Annotations> templocaleData = null; 102 if (parentData != null) { 103 templocaleData = new UnicodeMap<>(); 104 UnicodeSet keys = new UnicodeSet(parentData.baseData.keySet()).addAll(localeData.keySet()); 105 for (String key : keys) { 106 Annotations parentValue = parentData.baseData.get(key); 107 Annotations myValue = localeData.get(key); 108 if (parentValue == null) { 109 templocaleData.put(key, myValue); 110 } else if (myValue == null) { 111 templocaleData.put(key, parentValue); 112 } else { // need to combine 113 String tts = myValue.tts == null 114 ? parentValue.tts : myValue.tts; 115 Set<String> annotations = myValue.annotations == null || myValue.annotations.isEmpty() 116 ? parentValue.annotations : myValue.annotations; 117 templocaleData.put(key, new Annotations(annotations, tts)); 118 } 119 } 120 } 121 122 final AnnotationSet result = new AnnotationSet(locale, localeData, templocaleData); 123 dirCache.put(locale, result); 124 return result; 125 } 126 127 @Override handlePathValue(String path, String value)128 public void handlePathValue(String path, String value) { 129 XPathParts parts = XPathParts.getFrozenInstance(path); 130 String lastElement = parts.getElement(-1); 131 if (!lastElement.equals("annotation")) { 132 if (!"identity".equals(parts.getElement(1))) { 133 throw new IllegalArgumentException("Unexpected path"); 134 } 135 return; 136 } 137 String usString = parts.getAttributeValue(-1, "cp"); 138 UnicodeSet us1 = usString.startsWith("[") && usString.endsWith("]") ? new UnicodeSet(usString) : new UnicodeSet().add(usString); 139 UnicodeSet us = new UnicodeSet(); 140 for (String s : us1) { 141 us.add(s.replace(EmojiConstants.EMOJI_VARIANT_STRING, "")); 142 } 143 String tts = parts.getAttributeValue(-1, "tts"); 144 String type = parts.getAttributeValue(-1, "type"); 145 String alt = parts.getAttributeValue(-1, "alt"); 146 147 if (alt != null) { 148 // do nothing for now 149 } else if ("tts".equals(type)) { 150 addItems(localeData, us, Collections.<String> emptySet(), value); 151 } else { 152 Set<String> attributes = new TreeSet<>(splitter.splitToList(value)); 153 addItems(localeData, us, attributes, tts); 154 } 155 } 156 addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts)157 private void addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts) { 158 for (String entry : us) { 159 addItems(unicodeMap, entry, attributes, tts); 160 } 161 } 162 addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts)163 private void addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts) { 164 Annotations annotations = unicodeMap.get(entry); 165 if (annotations == null) { 166 unicodeMap.put(entry, new Annotations(attributes, tts)); 167 } else { 168 unicodeMap.put(entry, annotations.add(attributes, tts)); // creates new item 169 } 170 } 171 } 172 Annotations(Set<String> attributes, String tts2)173 public Annotations(Set<String> attributes, String tts2) { 174 annotations = attributes == null ? Collections.<String> emptySet() : ImmutableSet.copyOf(attributes); 175 tts = tts2; 176 } 177 add(Set<String> attributes, String tts2)178 public Annotations add(Set<String> attributes, String tts2) { 179 return new Annotations(getKeywords() == null ? attributes : attributes == null ? getKeywords() : union(attributes, getKeywords()), 180 getShortName() == null ? tts2 : tts2 == null ? getShortName() : throwDup()); 181 } 182 throwDup()183 private String throwDup() { 184 throw new IllegalArgumentException("Duplicate tts"); 185 } 186 union(Set<String> a, Set<String> b)187 private Set<String> union(Set<String> a, Set<String> b) { 188 TreeSet<String> result = new TreeSet<>(a); 189 result.addAll(b); 190 return result; 191 } 192 getAvailable()193 public static Set<String> getAvailable() { 194 return LOCALES; 195 } 196 getAvailableLocales()197 public static Set<String> getAvailableLocales() { 198 return LOCALES; 199 } 200 201 public static final class AnnotationSet { 202 203 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 204 205 static final Factory factory = CONFIG.getCldrFactory(); 206 static final CLDRFile ENGLISH = CONFIG.getEnglish(); 207 static final CLDRFile ENGLISH_ANNOTATIONS = null; 208 static final Map<String,String> englishSubdivisionIdToName = EmojiSubdivisionNames.getSubdivisionIdToName("en"); 209 //CLDRConfig.getInstance().getAnnotationsFactory().make("en", false); 210 211 private final String locale; 212 private final UnicodeMap<Annotations> baseData; 213 private final UnicodeMap<Annotations> unresolvedData; 214 private final CLDRFile cldrFile; 215 private final Map<String, String> subdivisionIdToName; 216 private final SimpleFormatter initialPattern; 217 private final Pattern initialRegexPattern; 218 private final XListFormatter listPattern; 219 private final Set<String> flagLabelSet; 220 private final Set<String> keycapLabelSet; 221 private final String keycapLabel; 222 private final String flagLabel; 223 // private final String maleLabel; 224 // private final String femaleLabel; 225 private final Map<String, Annotations> localeCache = new ConcurrentHashMap<>(); 226 227 static UnicodeSetSpanner uss = new UnicodeSetSpanner(EmojiConstants.COMPONENTS); // must be sync'ed 228 AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource)229 private AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource) { 230 this.locale = locale; 231 unresolvedData = source.freeze(); 232 this.baseData = resolvedSource == null ? unresolvedData : resolvedSource.freeze(); 233 cldrFile = factory.make(locale, true); 234 subdivisionIdToName = EmojiSubdivisionNames.getSubdivisionIdToName(locale); 235 listPattern = new XListFormatter(cldrFile, EmojiConstants.COMPOSED_NAME_LIST); 236 final String initialPatternString = getStringValue("//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]"); 237 initialPattern = SimpleFormatter.compile(initialPatternString); 238 final String regexPattern = ("\\Q" + initialPatternString.replace("{0}", "\\E.*\\Q").replace("{1}", "\\E.*\\Q") + "\\E") 239 .replace("\\Q\\E", ""); // HACK to detect use of prefix pattern 240 initialRegexPattern = Pattern.compile(regexPattern); 241 flagLabelSet = getLabelSet("flag"); 242 flagLabel = flagLabelSet.isEmpty() ? null : flagLabelSet.iterator().next(); 243 keycapLabelSet = getLabelSet("keycap"); 244 keycapLabel = keycapLabelSet.isEmpty() ? null : keycapLabelSet.iterator().next(); 245 // maleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"male\"]"); 246 // femaleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"female\"]"); 247 } 248 249 /** 250 * @deprecated Use {@link #getLabelSet(String)} instead 251 */ getLabelSet()252 private Set<String> getLabelSet() { 253 return getLabelSet("flag"); 254 } 255 getLabelSet(String typeAttributeValue)256 private Set<String> getLabelSet(String typeAttributeValue) { 257 String label = getStringValue("//ldml/characterLabels/characterLabel[@type=\"" + typeAttributeValue + "\"]"); 258 return label == null ? Collections.<String> emptySet() : Collections.singleton(label); 259 } 260 getStringValue(String xpath)261 private String getStringValue(String xpath) { 262 return getStringValue(xpath, cldrFile, ENGLISH); 263 } 264 getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english)265 private String getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english) { 266 String result = cldrFile2.getStringValue(xpath); 267 if (result == null) { 268 return ENGLISH_MARKER + english.getStringValue(xpath); 269 } 270 String sourceLocale = cldrFile2.getSourceLocaleID(xpath, null); 271 if (sourceLocale.equals(XMLSource.CODE_FALLBACK_ID) || sourceLocale.equals(XMLSource.ROOT_ID)) { 272 return MISSING_MARKER + result; 273 } 274 return result; 275 } 276 getShortName(String code)277 public String getShortName(String code) { 278 return getShortName(code, null); 279 } 280 getShortName(String code, Transform<String, String> otherSource)281 public String getShortName(String code, Transform<String, String> otherSource) { 282 if (code.equals("♀️")) { 283 int debug = 0; 284 } 285 286 code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""); 287 Annotations stock = baseData.get(code); 288 if (stock != null && stock.tts != null) { 289 return stock.tts; 290 } 291 stock = localeCache.get(code); 292 if (stock != null) { 293 return stock.tts; 294 } 295 stock = synthesize(code, otherSource); 296 if (stock != null) { 297 localeCache.put(code, stock); 298 return stock.tts; 299 } 300 return null; 301 } 302 getKeywords(String code)303 public Set<String> getKeywords(String code) { 304 code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""); 305 Annotations stock = baseData.get(code); 306 if (stock != null && stock.annotations != null) { 307 return stock.annotations; 308 } 309 stock = localeCache.get(code); 310 if (stock != null) { 311 return stock.annotations; 312 } 313 stock = synthesize(code, null); 314 if (stock != null) { 315 localeCache.put(code, stock); 316 return stock.annotations; 317 } 318 return Collections.<String> emptySet(); 319 } 320 321 /** Returns the set of all keys for which annotations are available. WARNING: keys have the Emoji Presentation Selector removed! 322 */ keySet()323 public UnicodeSet keySet() { 324 return baseData.keySet(); 325 } 326 synthesize(String code, Transform<String, String> otherSource)327 private Annotations synthesize(String code, Transform<String, String> otherSource) { 328 if (code.equals("♂")) { 329 int debug = 0; 330 } 331 String shortName = null; 332 int len = code.codePointCount(0, code.length()); 333 boolean isKeycap10 = code.equals(""); 334 if (len == 1 && !isKeycap10) { 335 String tempName = null; 336 if (locale.equals("en")) { 337 if (otherSource != null) { 338 tempName = otherSource.transform(code); 339 } 340 if (tempName == null) { 341 return null; 342 } 343 return new Annotations(Collections.<String> emptySet(), tempName); 344 } else { // fall back to English if possible, but mark it. 345 tempName = getDataSet("en").getShortName(code); 346 if (tempName == null) { 347 return null; 348 } 349 return new Annotations(Collections.<String> emptySet(), ENGLISH_MARKER + tempName); 350 } 351 } else if (EmojiConstants.REGIONAL_INDICATORS.containsAll(code)) { 352 String countryCode = EmojiConstants.getFlagCode(code); 353 String path = CLDRFile.getKey(CLDRFile.TERRITORY_NAME, countryCode); 354 String regionName = getStringValue(path); 355 if (regionName == null) { 356 regionName = ENGLISH_MARKER + ENGLISH.getStringValue(path); 357 } 358 String flagName = flagLabel == null ? regionName : initialPattern.format(flagLabel, regionName); 359 return new Annotations(flagLabelSet, flagName); 360 } else if (code.startsWith(EmojiConstants.BLACK_FLAG) 361 && code.endsWith(EmojiConstants.TAG_TERM)) { 362 String subdivisionCode = EmojiConstants.getTagSpec(code); 363 String subdivisionName = subdivisionIdToName.get(subdivisionCode); 364 if (subdivisionName == null) { 365 subdivisionName = englishSubdivisionIdToName.get(subdivisionCode); 366 if (subdivisionName != null) { 367 subdivisionName = ENGLISH_MARKER + subdivisionCode; 368 } else { 369 subdivisionName = MISSING_MARKER + subdivisionCode; 370 } 371 } 372 String flagName = flagLabel == null ? subdivisionName : initialPattern.format(flagLabel, subdivisionName); 373 return new Annotations(flagLabelSet, flagName); 374 } else if (isKeycap10 || code.contains(EmojiConstants.KEYCAP_MARK_STRING)) { 375 final String rem = code.equals("") ? "10" : UTF16.valueOf(code.charAt(0)); 376 shortName = initialPattern.format(keycapLabel, rem); 377 return new Annotations(keycapLabelSet, shortName); 378 } 379 UnicodeSet skipSet = EmojiConstants.REM_SKIP_SET; 380 String rem = ""; 381 SimpleFormatter startPattern = initialPattern; 382 if (EmojiConstants.COMPONENTS.containsSome(code)) { 383 synchronized (uss) { 384 rem = uss.deleteFrom(code, SpanCondition.NOT_CONTAINED); 385 code = uss.deleteFrom(code, SpanCondition.CONTAINED); 386 } 387 } 388 if (code.contains(EmojiConstants.JOINER_STRING)) { 389 // if (code.endsWith(EmojiConstants.JOINER_MALE_SIGN)){ 390 // if (matchesInitialPattern(code)) { // "♂️","police officer: man, medium-light skin tone" 391 // rem = EmojiConstants.MAN + rem; 392 // code = code.substring(0,code.length()-EmojiConstants.JOINER_MALE_SIGN.length()); 393 // } // otherwise "♂️","man biking: dark skin tone" 394 // } else if (code.endsWith(EmojiConstants.JOINER_FEMALE_SIGN)){ 395 // if (matchesInitialPattern(code)) { // 396 // rem = EmojiConstants.WOMAN + rem; 397 // code = code.substring(0,code.length()-EmojiConstants.JOINER_FEMALE_SIGN.length()); 398 // } 399 // } else 400 if (code.contains(EmojiConstants.KISS)) { 401 rem = code + rem; 402 code = ""; 403 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 404 } else if (code.contains(EmojiConstants.HEART)) { 405 rem = code + rem; 406 code = ""; 407 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 408 } else if (code.contains(EmojiConstants.HANDSHAKE)) { 409 code = code.startsWith(EmojiConstants.MAN) ? "" 410 : code.endsWith(EmojiConstants.MAN) ? "" 411 : ""; 412 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 413 } else if (EmojiConstants.FAMILY_MARKERS.containsAll(code)) { 414 rem = code + rem; 415 code = ""; 416 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 417 // } else { 418 // startPattern = listPattern; 419 } 420 // left over is "⚖","judge: man, dark skin tone" 421 } 422 return getBasePlusRemainder(cldrFile, code, rem, skipSet, startPattern, otherSource); 423 } 424 matchesInitialPattern(String code)425 private boolean matchesInitialPattern(String code) { 426 Annotations baseAnnotation = baseData.get(code); 427 String baseName = baseAnnotation == null ? null : baseAnnotation.getShortName(); 428 return baseName != null && initialRegexPattern.matcher(baseName).matches(); 429 } 430 getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, Transform<String, String> otherSource)431 private Annotations getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, 432 Transform<String, String> otherSource) { 433 String shortName = null; 434 Set<String> annotations = new LinkedHashSet<>(); 435 boolean needMarker = true; 436 437 if (base != null) { 438 needMarker = false; 439 Annotations stock = baseData.get(base); 440 if (stock != null) { 441 shortName = stock.getShortName(); 442 annotations.addAll(stock.getKeywords()); 443 } else if (otherSource != null) { 444 shortName = otherSource.transform(base); 445 if (shortName == null) { 446 return null; 447 } 448 } else { 449 return null; 450 } 451 } 452 453 boolean hackBlond = EmojiConstants.HAIR_EXPLICIT.contains(base.codePointAt(0)); 454 Collection<String> arguments = new ArrayList<>(); 455 int lastSkin = -1; 456 457 for (int mod : CharSequences.codePoints(rem)) { 458 if (ignore.contains(mod)) { 459 continue; 460 } 461 if (EmojiConstants.MODIFIERS.contains(mod)) { 462 if (lastSkin == mod) { 463 continue; 464 } 465 lastSkin = mod; // collapse skin tones. TODO fix if we ever do multi-skin families 466 } 467 Annotations stock = baseData.get(mod); 468 String modName = null; 469 if (stock != null) { 470 modName = stock.getShortName(); 471 } else if (otherSource != null) { 472 modName = otherSource.transform(base); 473 } 474 if (modName == null) { 475 needMarker = true; 476 if (ENGLISH_DATA != null) { 477 Annotations engName = ENGLISH_DATA.baseData.get(mod); 478 if (engName != null) { 479 modName = engName.getShortName(); 480 } 481 } 482 if (modName == null) { 483 modName = Utility.hex(mod); // ultimate fallback 484 } 485 } 486 if (hackBlond && shortName != null) { 487 // HACK: make the blond names look like the other hair names 488 // Split the short name into pieces, if possible, and insert the modName first 489 String sep = initialPattern.format("", ""); 490 int splitPoint = shortName.indexOf(sep); 491 if (splitPoint >= 0) { 492 String modName0 = shortName.substring(splitPoint+sep.length()); 493 shortName = shortName.substring(0, splitPoint); 494 if (modName != null) { 495 arguments.add(modName); 496 annotations.add(modName); 497 } 498 modName = modName0; 499 } 500 hackBlond = false; 501 } 502 503 if (modName != null) { 504 arguments.add(modName); 505 annotations.add(modName); 506 } 507 } 508 if (!arguments.isEmpty()) { 509 shortName = pattern.format(shortName, listPattern.format(arguments)); 510 } 511 Annotations result = new Annotations(annotations, (needMarker ? ENGLISH_MARKER : "") + shortName); 512 return result; 513 } 514 515 /** 516 * @deprecated Use {@link #toString(String,boolean,AnnotationSet)} instead 517 */ toString(String code, boolean html)518 public String toString(String code, boolean html) { 519 return toString(code, html, null); 520 } 521 toString(String code, boolean html, AnnotationSet parentAnnotations)522 public String toString(String code, boolean html, AnnotationSet parentAnnotations) { 523 if (locale.equals("be") && code.equals("")) { 524 int debug = 0; 525 } 526 String shortName = getShortName(code); 527 if (shortName == null || shortName.startsWith(BAD_MARKER) || shortName.startsWith(ENGLISH_MARKER)) { 528 return MISSING_MARKER; 529 } 530 531 String parentShortName = parentAnnotations == null ? null : parentAnnotations.getShortName(code); 532 if (shortName != null && Objects.equal(shortName, parentShortName)) { 533 shortName = EQUIVALENT; 534 } 535 536 Set<String> keywords = getKeywordsMinus(code); 537 Set<String> parentKeywords = parentAnnotations == null ? null : parentAnnotations.getKeywordsMinus(code); 538 if (keywords != null && !keywords.isEmpty() && Objects.equal(keywords, parentKeywords)) { 539 keywords = Collections.singleton(EQUIVALENT); 540 } 541 542 String result = CollectionUtilities.join(keywords, " |\u00a0"); 543 if (shortName != null) { 544 String ttsString = (html ? "*<b>" : "*") + shortName + (html ? "</b>" : "*"); 545 if (result.isEmpty()) { 546 result = ttsString; 547 } else { 548 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result; 549 } 550 } 551 return result; 552 } 553 getExplicitValues()554 public UnicodeMap<Annotations> getExplicitValues() { 555 return baseData; 556 } 557 getUnresolvedExplicitValues()558 public UnicodeMap<Annotations> getUnresolvedExplicitValues() { 559 return unresolvedData; 560 } 561 getKeywordsMinus(String code)562 public Set<String> getKeywordsMinus(String code) { 563 String shortName = getShortName(code); 564 Set<String> keywords = getKeywords(code); 565 if (shortName != null && keywords.contains(shortName)) { 566 keywords = new LinkedHashSet<String>(keywords); 567 keywords.remove(shortName); 568 } 569 return keywords; 570 } 571 } 572 getDataSet(String locale)573 public static AnnotationSet getDataSet(String locale) { 574 return getDataSet(DIR, locale); 575 } 576 getDataSet(String dir, String locale)577 public static AnnotationSet getDataSet(String dir, String locale) { 578 if (dir == null) { 579 dir = DIR; 580 } 581 Map<String, AnnotationSet> dirCache = cache.get(dir); 582 if (dirCache == null) { 583 cache.put(dir, dirCache = new ConcurrentHashMap<>()); 584 } 585 AnnotationSet result = dirCache.get(locale); 586 if (result != null) { 587 return result; 588 } 589 if (!LOCALES.contains(locale)) { 590 return null; 591 } 592 String parentString = LocaleIDParser.getSimpleParent(locale); 593 AnnotationSet parentData = null; 594 if (parentString != null && !parentString.equals("root")) { 595 parentData = getDataSet(dir, parentString); 596 } 597 MyHandler myHandler = new MyHandler(dirCache, locale, parentData); 598 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 599 xfr.read(dir + "/" + locale + ".xml", -1, true); 600 return myHandler.cleanup(); 601 } 602 getData(String locale)603 public static UnicodeMap<Annotations> getData(String locale) { 604 return getData(DIR, locale); 605 } 606 getData(String dir, String locale)607 public static UnicodeMap<Annotations> getData(String dir, String locale) { 608 AnnotationSet result = getDataSet(dir, locale); 609 return result == null ? null : result.baseData; 610 } 611 612 @Override toString()613 public String toString() { 614 return toString(false); 615 } 616 toString(boolean html)617 public String toString(boolean html) { 618 Set<String> annotations2 = getKeywords(); 619 if (getShortName() != null && annotations2.contains(getShortName())) { 620 annotations2 = new LinkedHashSet<String>(getKeywords()); 621 annotations2.remove(getShortName()); 622 } 623 String result = CollectionUtilities.join(annotations2, " |\u00a0"); 624 if (getShortName() != null) { 625 String ttsString = (html ? "*<b>" : "*") + getShortName() + (html ? "</b>" : "*"); 626 if (result.isEmpty()) { 627 result = ttsString; 628 } else { 629 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result; 630 } 631 } 632 return result; 633 } 634 635 /** 636 * @return the annotations 637 */ getKeywords()638 public Set<String> getKeywords() { 639 return annotations; 640 } 641 642 /** 643 * @return the tts 644 */ getShortName()645 public String getShortName() { 646 return tts; 647 } 648 main(String[] args)649 public static void main(String[] args) { 650 if (true) { 651 writeList(); 652 } else { 653 writeEnglish(); 654 } 655 } 656 writeList()657 private static void writeList() { 658 AnnotationSet eng = Annotations.getDataSet("en"); 659 Annotations an = eng.baseData.get("❤"); 660 final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues(); 661 Set<String> keys = new TreeSet<>(ChartAnnotations.RBC); 662 map.keySet().addAllTo(keys); 663 // keys.add("⚖"); 664 for (String key : keys) { 665 System.out.println(Utility.hex(key, 4, "_").toLowerCase(Locale.ROOT) 666 + "\t" + key 667 + "\t" + map.get(key).getShortName() 668 + "\t" + CollectionUtilities.join(map.get(key).getKeywords(), " | ")); 669 } 670 for (String s : Arrays.asList( 671 "", "❤️", 672 "", "❤️", 673 "", "", 674 "", "", 675 "⚖", "⚖", "⚖", "⚖", 676 "", "♂️", "♂️", "♀️", "♀️", 677 "", "", "♂️", "♂️", "♀️", "♀️")) { 678 final String shortName = eng.getShortName(s); 679 final Set<String> keywords = eng.getKeywords(s); 680 System.out.println("{\"" + s + "\",\"" + shortName + "\",\"" + CollectionUtilities.join(keywords, "|") + "\"},"); 681 } 682 } 683 writeEnglish()684 private static void writeEnglish() { 685 AnnotationSet eng = Annotations.getDataSet("en"); 686 System.out.println(Annotations.getAvailable()); 687 AnnotationSet eng100 = Annotations.getDataSet("en_001"); 688 UnicodeMap<Annotations> map100 = eng100.getUnresolvedExplicitValues(); 689 final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues(); 690 Set<String> keys = new TreeSet<>(ChartAnnotations.RBC); 691 map.keySet().addAllTo(keys); 692 for (String key : keys) { 693 Annotations value = map.get(key); 694 Annotations value100 = map100.get(key); 695 Set<String> keywords100 = (value100 == null ? null : value100.getKeywords()); 696 System.out.println(key + "\tname\t" 697 + "\t" + value.getShortName() 698 + "\t" + (value100 == null ? "" : value100.getShortName()) 699 + "\t" + CollectionUtilities.join(value.getKeywords(), " | ") 700 + "\t" + (keywords100 == null ? "" : CollectionUtilities.join(keywords100, " | "))); 701 } 702 } 703 } 704