1 package org.unicode.cldr.util; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.util.ArrayList; 6 import java.util.Arrays; 7 import java.util.Collection; 8 import java.util.Collections; 9 import java.util.LinkedHashSet; 10 import java.util.Locale; 11 import java.util.Map; 12 import java.util.Set; 13 import java.util.TreeSet; 14 import java.util.concurrent.ConcurrentHashMap; 15 import java.util.regex.Pattern; 16 17 import org.unicode.cldr.tool.ChartAnnotations; 18 import org.unicode.cldr.tool.SubdivisionNames; 19 import org.unicode.cldr.util.XMLFileReader.SimpleHandler; 20 21 import com.google.common.base.Joiner; 22 import com.google.common.base.Objects; 23 import com.google.common.base.Splitter; 24 import com.google.common.collect.ImmutableSet; 25 import com.google.common.collect.ImmutableSet.Builder; 26 import com.ibm.icu.dev.util.UnicodeMap; 27 import com.ibm.icu.impl.Utility; 28 import com.ibm.icu.lang.CharSequences; 29 import com.ibm.icu.text.SimpleFormatter; 30 import com.ibm.icu.text.Transform; 31 import com.ibm.icu.text.UTF16; 32 import com.ibm.icu.text.UnicodeSet; 33 import com.ibm.icu.text.UnicodeSet.SpanCondition; 34 import com.ibm.icu.text.UnicodeSetSpanner; 35 import com.ibm.icu.util.ICUUncheckedIOException; 36 37 public class Annotations { 38 private static final boolean DEBUG = false; 39 40 public static final String BAD_MARKER = "⊗"; 41 public static final String MISSING_MARKER = "⊖"; 42 public static final String ENGLISH_MARKER = "⊕"; 43 public static final String EQUIVALENT = "≣"; 44 public static final String NEUTRAL_HOLDING = ""; 45 46 public static final Splitter splitter = Splitter.on(Pattern.compile("[|;]")).trimResults().omitEmptyStrings(); 47 static final Splitter dotSplitter = Splitter.on(".").trimResults(); 48 49 static final Map<String, Map<String, AnnotationSet>> cache = new ConcurrentHashMap<>(); 50 static final Set<String> LOCALES; 51 static final String DIR; 52 private static final AnnotationSet ENGLISH_DATA; 53 54 private final Set<String> annotations; 55 private final String tts; 56 57 static { 58 File directory = new File(CLDRPaths.COMMON_DIRECTORY, "annotations"); 59 DIR = PathUtilities.getNormalizedPathString(directory); 60 if (DEBUG) { 61 System.out.println(DIR); 62 } 63 Builder<String> temp = ImmutableSet.builder(); 64 for (File file : directory.listFiles()) { 65 if (DEBUG) { PathUtilities.getNormalizedPathString(file)66 System.out.println(PathUtilities.getNormalizedPathString(file)); 67 } 68 String name = file.toString(); 69 String shortName = file.getName(); 70 if (!shortName.endsWith(".xml") || // skip non-XML 71 shortName.startsWith("#") || // skip other junk files 72 shortName.startsWith(".") 73 // || shortName.contains("001") // skip world english for now 74 ) continue; // skip dot files (backups, etc) 75 temp.add(dotSplitter.split(shortName).iterator().next()); 76 } 77 LOCALES = temp.build(); 78 ENGLISH_DATA = getDataSet("en"); 79 } 80 81 static class MyHandler extends SimpleHandler { 82 private final String locale; 83 private final UnicodeMap<Annotations> localeData = new UnicodeMap<>(); 84 private final AnnotationSet parentData; 85 private final Map<String, AnnotationSet> dirCache; 86 MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData)87 public MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData) { 88 this.locale = locale; 89 this.parentData = parentData; 90 this.dirCache = dirCache; 91 } 92 cleanup()93 public AnnotationSet cleanup() { 94 // add parent data (may be overridden) 95 UnicodeMap<Annotations> templocaleData = null; 96 if (parentData != null) { 97 templocaleData = new UnicodeMap<>(); 98 UnicodeSet keys = new UnicodeSet(parentData.baseData.keySet()).addAll(localeData.keySet()); 99 for (String key : keys) { 100 Annotations parentValue = parentData.baseData.get(key); 101 Annotations myValue = localeData.get(key); 102 if (parentValue == null) { 103 templocaleData.put(key, myValue); 104 } else if (myValue == null) { 105 templocaleData.put(key, parentValue); 106 } else { // need to combine 107 String tts = myValue.tts == null 108 ? parentValue.tts : myValue.tts; 109 Set<String> annotations = myValue.annotations == null || myValue.annotations.isEmpty() 110 ? parentValue.annotations : myValue.annotations; 111 templocaleData.put(key, new Annotations(annotations, tts)); 112 } 113 } 114 } 115 116 final AnnotationSet result = new AnnotationSet(locale, localeData, templocaleData); 117 dirCache.put(locale, result); 118 return result; 119 } 120 121 static final Pattern SPACES = Pattern.compile("\\s+"); 122 123 @Override handlePathValue(String path, String value)124 public void handlePathValue(String path, String value) { 125 if (value.contains(CldrUtility.INHERITANCE_MARKER)) { 126 return; // skip all ^^^ 127 } 128 XPathParts parts = XPathParts.getFrozenInstance(path); 129 String lastElement = parts.getElement(-1); 130 if (!lastElement.equals("annotation")) { 131 if (!"identity".equals(parts.getElement(1))) { 132 throw new IllegalArgumentException("Unexpected path"); 133 } 134 return; 135 } 136 String usString = parts.getAttributeValue(-1, "cp"); 137 UnicodeSet us1 = usString.startsWith("[") && usString.endsWith("]") ? new UnicodeSet(usString) : new UnicodeSet().add(usString); 138 UnicodeSet us = new UnicodeSet(); 139 for (String s : us1) { 140 us.add(s.replace(EmojiConstants.EMOJI_VARIANT_STRING, "")); 141 } 142 String tts = parts.getAttributeValue(-1, "tts"); 143 String type = parts.getAttributeValue(-1, "type"); 144 String alt = parts.getAttributeValue(-1, "alt"); 145 146 // clean up value 147 String value2 = SPACES.matcher(value).replaceAll(" ").trim(); 148 if (!value2.equals(value)) { 149 value = value2; 150 } 151 if (alt != null) { 152 // do nothing for now 153 } else if ("tts".equals(type)) { 154 addItems(localeData, us, Collections.<String> emptySet(), value); 155 } else { 156 Set<String> attributes = new TreeSet<>(splitter.splitToList(value)); 157 addItems(localeData, us, attributes, tts); 158 } 159 } 160 addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts)161 private void addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts) { 162 for (String entry : us) { 163 addItems(unicodeMap, entry, attributes, tts); 164 } 165 } 166 addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts)167 private void addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts) { 168 Annotations annotations = unicodeMap.get(entry); 169 if (annotations == null) { 170 unicodeMap.put(entry, new Annotations(attributes, tts)); 171 } else { 172 unicodeMap.put(entry, annotations.add(attributes, tts)); // creates new item 173 } 174 } 175 } 176 Annotations(Set<String> attributes, String tts2)177 public Annotations(Set<String> attributes, String tts2) { 178 annotations = attributes == null ? Collections.<String> emptySet() : ImmutableSet.copyOf(attributes); 179 for (String attr : annotations) { 180 if (attr.contains(CldrUtility.INHERITANCE_MARKER)) { 181 throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER); 182 } 183 184 } 185 tts = tts2; 186 if (tts != null && tts.contains(CldrUtility.INHERITANCE_MARKER)) { 187 throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER); 188 } 189 } 190 add(Set<String> attributes, String tts2)191 public Annotations add(Set<String> attributes, String tts2) { 192 return new Annotations(getKeywords() == null ? attributes : attributes == null ? getKeywords() : union(attributes, getKeywords()), 193 getShortName() == null ? tts2 : tts2 == null ? getShortName() : throwDup()); 194 } 195 throwDup()196 private String throwDup() { 197 throw new IllegalArgumentException("Duplicate tts"); 198 } 199 union(Set<String> a, Set<String> b)200 private Set<String> union(Set<String> a, Set<String> b) { 201 TreeSet<String> result = new TreeSet<>(a); 202 result.addAll(b); 203 return result; 204 } 205 getAvailable()206 public static Set<String> getAvailable() { 207 return LOCALES; 208 } 209 getAvailableLocales()210 public static Set<String> getAvailableLocales() { 211 return LOCALES; 212 } 213 214 public static final class AnnotationSet { 215 216 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 217 218 static final Factory factory = CONFIG.getCldrFactory(); 219 static final CLDRFile ENGLISH = CONFIG.getEnglish(); 220 static final CLDRFile ENGLISH_ANNOTATIONS = null; 221 static final SubdivisionNames englishSubdivisionIdToName = new SubdivisionNames("en", "main"); 222 //CLDRConfig.getInstance().getAnnotationsFactory().make("en", false); 223 224 private final String locale; 225 private final UnicodeMap<Annotations> baseData; 226 private final UnicodeMap<Annotations> unresolvedData; 227 private final CLDRFile cldrFile; 228 private final SubdivisionNames subdivisionIdToName; 229 private final SimpleFormatter initialPattern; 230 private final Pattern initialRegexPattern; 231 private final XListFormatter listPattern; 232 private final Set<String> flagLabelSet; 233 private final Set<String> keycapLabelSet; 234 private final String keycapLabel; 235 private final String flagLabel; 236 // private final String maleLabel; 237 // private final String femaleLabel; 238 private final Map<String, Annotations> localeCache = new ConcurrentHashMap<>(); 239 240 static UnicodeSetSpanner uss = new UnicodeSetSpanner(EmojiConstants.COMPONENTS); // must be sync'ed 241 AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource)242 private AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource) { 243 this.locale = locale; 244 unresolvedData = source.freeze(); 245 this.baseData = resolvedSource == null ? unresolvedData : resolvedSource.freeze(); 246 cldrFile = factory.make(locale, true); 247 subdivisionIdToName = new SubdivisionNames(locale, "main", "subdivisions"); 248 // EmojiSubdivisionNames.getSubdivisionIdToName(locale); 249 listPattern = new XListFormatter(cldrFile, EmojiConstants.COMPOSED_NAME_LIST); 250 final String initialPatternString = getStringValue("//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]"); 251 initialPattern = SimpleFormatter.compile(initialPatternString); 252 final String regexPattern = ("\\Q" + initialPatternString.replace("{0}", "\\E.*\\Q").replace("{1}", "\\E.*\\Q") + "\\E") 253 .replace("\\Q\\E", ""); // HACK to detect use of prefix pattern 254 initialRegexPattern = Pattern.compile(regexPattern); 255 flagLabelSet = getLabelSet("flag"); 256 flagLabel = flagLabelSet.isEmpty() ? null : flagLabelSet.iterator().next(); 257 keycapLabelSet = getLabelSet("keycap"); 258 keycapLabel = keycapLabelSet.isEmpty() ? null : keycapLabelSet.iterator().next(); 259 // maleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"male\"]"); 260 // femaleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"female\"]"); 261 } 262 263 /** 264 * @deprecated Use {@link #getLabelSet(String)} instead 265 */ 266 @Deprecated getLabelSet()267 private Set<String> getLabelSet() { 268 return getLabelSet("flag"); 269 } 270 getLabelSet(String typeAttributeValue)271 private Set<String> getLabelSet(String typeAttributeValue) { 272 String label = getStringValue("//ldml/characterLabels/characterLabel[@type=\"" + typeAttributeValue + "\"]"); 273 return label == null ? Collections.<String> emptySet() : Collections.singleton(label); 274 } 275 getStringValue(String xpath)276 private String getStringValue(String xpath) { 277 return getStringValue(xpath, cldrFile, ENGLISH); 278 } 279 getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english)280 private String getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english) { 281 String result = cldrFile2.getStringValueWithBailey(xpath); 282 if (result == null) { 283 return ENGLISH_MARKER + english.getStringValueWithBailey(xpath); 284 } 285 String sourceLocale = cldrFile2.getSourceLocaleID(xpath, null); 286 if (sourceLocale.equals(XMLSource.CODE_FALLBACK_ID) || sourceLocale.equals(XMLSource.ROOT_ID)) { 287 return MISSING_MARKER + result; 288 } 289 return result; 290 } 291 getShortName(String code)292 public String getShortName(String code) { 293 return getShortName(code, null); 294 } 295 getShortName(String code, Transform<String, String> otherSource)296 public String getShortName(String code, Transform<String, String> otherSource) { 297 if (code.equals("♀️")) { 298 int debug = 0; 299 } 300 301 code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""); 302 Annotations stock = baseData.get(code); 303 if (stock != null && stock.tts != null) { 304 return stock.tts; 305 } 306 stock = localeCache.get(code); 307 if (stock != null) { 308 return stock.tts; 309 } 310 stock = synthesize(code, otherSource); 311 if (stock != null) { 312 localeCache.put(code, stock); 313 return stock.tts; 314 } 315 return null; 316 } 317 getKeywords(String code)318 public Set<String> getKeywords(String code) { 319 code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""); 320 Annotations stock = baseData.get(code); 321 if (stock != null && stock.annotations != null) { 322 return stock.annotations; 323 } 324 stock = localeCache.get(code); 325 if (stock != null) { 326 return stock.annotations; 327 } 328 stock = synthesize(code, null); 329 if (stock != null) { 330 localeCache.put(code, stock); 331 return stock.annotations; 332 } 333 return Collections.<String> emptySet(); 334 } 335 336 /** Returns the set of all keys for which annotations are available. WARNING: keys have the Emoji Presentation Selector removed! 337 */ keySet()338 public UnicodeSet keySet() { 339 return baseData.keySet(); 340 } 341 synthesize(String code, Transform<String, String> otherSource)342 private Annotations synthesize(String code, Transform<String, String> otherSource) { 343 if (code.equals("♂")) { 344 int debug = 0; 345 } 346 String shortName = null; 347 int len = code.codePointCount(0, code.length()); 348 boolean isKeycap10 = code.equals(""); 349 if (len == 1 && !isKeycap10) { 350 String tempName = null; 351 if (locale.equals("en")) { 352 if (otherSource != null) { 353 tempName = otherSource.transform(code); 354 } 355 if (tempName == null) { 356 return null; 357 } 358 return new Annotations(Collections.<String> emptySet(), tempName); 359 } else { // fall back to English if possible, but mark it. 360 tempName = getDataSet("en").getShortName(code); 361 if (tempName == null) { 362 return null; 363 } 364 return new Annotations(Collections.<String> emptySet(), ENGLISH_MARKER + tempName); 365 } 366 } else if (EmojiConstants.REGIONAL_INDICATORS.containsAll(code)) { 367 String countryCode = EmojiConstants.getFlagCode(code); 368 String path = CLDRFile.getKey(CLDRFile.TERRITORY_NAME, countryCode); 369 String regionName = getStringValue(path); 370 if (regionName == null) { 371 regionName = ENGLISH_MARKER + ENGLISH.getStringValueWithBailey(path); 372 } 373 String flagName = flagLabel == null ? regionName : initialPattern.format(flagLabel, regionName); 374 return new Annotations(flagLabelSet, flagName); 375 } else if (code.startsWith(EmojiConstants.BLACK_FLAG) 376 && code.endsWith(EmojiConstants.TAG_TERM)) { 377 String subdivisionCode = EmojiConstants.getTagSpec(code); 378 String subdivisionName = subdivisionIdToName.get(subdivisionCode); 379 if (subdivisionName == null) { 380 // subdivisionName = englishSubdivisionIdToName.get(subdivisionCode); 381 // if (subdivisionName != null) { 382 // subdivisionName = ENGLISH_MARKER + subdivisionCode; 383 // } else { 384 subdivisionName = MISSING_MARKER + subdivisionCode; 385 // } 386 } 387 String flagName = flagLabel == null ? subdivisionName : initialPattern.format(flagLabel, subdivisionName); 388 return new Annotations(flagLabelSet, flagName); 389 } else if (isKeycap10 || code.contains(EmojiConstants.KEYCAP_MARK_STRING)) { 390 final String rem = code.equals("") ? "10" : UTF16.valueOf(code.charAt(0)); 391 shortName = initialPattern.format(keycapLabel, rem); 392 return new Annotations(keycapLabelSet, shortName); 393 } 394 UnicodeSet skipSet = EmojiConstants.REM_SKIP_SET; 395 String rem = ""; 396 SimpleFormatter startPattern = initialPattern; 397 if (EmojiConstants.COMPONENTS.containsSome(code)) { 398 synchronized (uss) { 399 rem = uss.deleteFrom(code, SpanCondition.NOT_CONTAINED); 400 code = uss.deleteFrom(code, SpanCondition.CONTAINED); 401 } 402 } 403 if (code.contains(EmojiConstants.JOINER_STRING)) { 404 // if (code.endsWith(EmojiConstants.JOINER_MALE_SIGN)){ 405 // if (matchesInitialPattern(code)) { // "♂️","police officer: man, medium-light skin tone" 406 // rem = EmojiConstants.MAN + rem; 407 // code = code.substring(0,code.length()-EmojiConstants.JOINER_MALE_SIGN.length()); 408 // } // otherwise "♂️","man biking: dark skin tone" 409 // } else if (code.endsWith(EmojiConstants.JOINER_FEMALE_SIGN)){ 410 // if (matchesInitialPattern(code)) { // 411 // rem = EmojiConstants.WOMAN + rem; 412 // code = code.substring(0,code.length()-EmojiConstants.JOINER_FEMALE_SIGN.length()); 413 // } 414 // } else 415 if (code.contains(EmojiConstants.KISS)) { 416 rem = code + rem; 417 code = ""; 418 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 419 } else if (code.contains(EmojiConstants.HEART) && !code.startsWith(EmojiConstants.HEART)) { 420 rem = code + rem; 421 code = ""; 422 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 423 } else if (code.contains(EmojiConstants.HANDSHAKE)) { 424 code = code.startsWith(EmojiConstants.MAN) ? "" 425 : code.endsWith(EmojiConstants.MAN) ? "" 426 : code.startsWith(EmojiConstants.WOMAN) ? "" 427 : NEUTRAL_HOLDING; 428 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 429 } else if (EmojiConstants.FAMILY_MARKERS.containsAll(code)) { 430 rem = code + rem; 431 code = ""; 432 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 433 // } else { 434 // startPattern = listPattern; 435 } 436 // left over is "⚖","judge: man, dark skin tone" 437 } 438 return getBasePlusRemainder(cldrFile, code, rem, skipSet, startPattern, otherSource); 439 } 440 matchesInitialPattern(String code)441 private boolean matchesInitialPattern(String code) { 442 Annotations baseAnnotation = baseData.get(code); 443 String baseName = baseAnnotation == null ? null : baseAnnotation.getShortName(); 444 return baseName != null && initialRegexPattern.matcher(baseName).matches(); 445 } 446 getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, Transform<String, String> otherSource)447 private Annotations getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, 448 Transform<String, String> otherSource) { 449 String shortName = null; 450 Set<String> annotations = new LinkedHashSet<>(); 451 boolean needMarker = true; 452 453 if (base != null) { 454 needMarker = false; 455 Annotations stock = baseData.get(base); 456 if (stock != null) { 457 shortName = stock.getShortName(); 458 annotations.addAll(stock.getKeywords()); 459 } else if (otherSource != null) { 460 shortName = otherSource.transform(base); 461 } else { 462 return null; 463 } 464 if (shortName == null) { 465 return null; 466 } 467 } 468 469 boolean hackBlond = EmojiConstants.HAIR_EXPLICIT.contains(base.codePointAt(0)); 470 Collection<String> arguments = new ArrayList<>(); 471 int lastSkin = -1; 472 473 for (int mod : CharSequences.codePoints(rem)) { 474 if (ignore.contains(mod)) { 475 continue; 476 } 477 if (EmojiConstants.MODIFIERS.contains(mod)) { 478 if (lastSkin == mod) { 479 continue; 480 } 481 lastSkin = mod; // collapse skin tones. TODO fix if we ever do multi-skin families 482 } 483 Annotations stock = baseData.get(mod); 484 String modName = null; 485 if (stock != null) { 486 modName = stock.getShortName(); 487 } else if (otherSource != null) { 488 modName = otherSource.transform(base); 489 } 490 if (modName == null) { 491 needMarker = true; 492 if (ENGLISH_DATA != null) { 493 Annotations engName = ENGLISH_DATA.baseData.get(mod); 494 if (engName != null) { 495 modName = engName.getShortName(); 496 } 497 } 498 if (modName == null) { 499 modName = Utility.hex(mod); // ultimate fallback 500 } 501 } 502 if (hackBlond && shortName != null) { 503 // HACK: make the blond names look like the other hair names 504 // Split the short name into pieces, if possible, and insert the modName first 505 String sep = initialPattern.format("", ""); 506 int splitPoint = shortName.indexOf(sep); 507 if (splitPoint >= 0) { 508 String modName0 = shortName.substring(splitPoint+sep.length()); 509 shortName = shortName.substring(0, splitPoint); 510 if (modName != null) { 511 arguments.add(modName); 512 annotations.add(modName); 513 } 514 modName = modName0; 515 } 516 hackBlond = false; 517 } 518 519 if (modName != null) { 520 arguments.add(modName); 521 annotations.add(modName); 522 } 523 } 524 if (!arguments.isEmpty()) { 525 shortName = pattern.format(shortName, listPattern.format(arguments)); 526 } 527 Annotations result = new Annotations(annotations, (needMarker ? ENGLISH_MARKER : "") + shortName); 528 return result; 529 } 530 531 /** 532 * @deprecated Use {@link #toString(String,boolean,AnnotationSet)} instead 533 */ 534 @Deprecated toString(String code, boolean html)535 public String toString(String code, boolean html) { 536 return toString(code, html, null); 537 } 538 toString(String code, boolean html, AnnotationSet parentAnnotations)539 public String toString(String code, boolean html, AnnotationSet parentAnnotations) { 540 if (locale.equals("be") && code.equals("")) { 541 int debug = 0; 542 } 543 String shortName = getShortName(code); 544 if (shortName == null || shortName.startsWith(BAD_MARKER) || shortName.startsWith(ENGLISH_MARKER)) { 545 return MISSING_MARKER; 546 } 547 548 String parentShortName = parentAnnotations == null ? null : parentAnnotations.getShortName(code); 549 if (shortName != null && Objects.equal(shortName, parentShortName)) { 550 shortName = EQUIVALENT; 551 } 552 553 Set<String> keywords = getKeywordsMinus(code); 554 Set<String> parentKeywords = parentAnnotations == null ? null : parentAnnotations.getKeywordsMinus(code); 555 if (keywords != null && !keywords.isEmpty() && Objects.equal(keywords, parentKeywords)) { 556 keywords = Collections.singleton(EQUIVALENT); 557 } 558 559 String result = Joiner.on(" |\u00a0").join(keywords); 560 if (shortName != null) { 561 String ttsString = (html ? "*<b>" : "*") + shortName + (html ? "</b>" : "*"); 562 if (result.isEmpty()) { 563 result = ttsString; 564 } else { 565 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result; 566 } 567 } 568 return result; 569 } 570 getExplicitValues()571 public UnicodeMap<Annotations> getExplicitValues() { 572 return baseData; 573 } 574 getUnresolvedExplicitValues()575 public UnicodeMap<Annotations> getUnresolvedExplicitValues() { 576 return unresolvedData; 577 } 578 getKeywordsMinus(String code)579 public Set<String> getKeywordsMinus(String code) { 580 String shortName = getShortName(code); 581 Set<String> keywords = getKeywords(code); 582 if (shortName != null && keywords.contains(shortName)) { 583 keywords = new LinkedHashSet<>(keywords); 584 keywords.remove(shortName); 585 } 586 return keywords; 587 } 588 } 589 getDataSet(String locale)590 public static AnnotationSet getDataSet(String locale) { 591 return getDataSet(DIR, locale); 592 } 593 getDataSet(String dir, String locale)594 public static AnnotationSet getDataSet(String dir, String locale) { 595 if (dir == null) { 596 dir = DIR; 597 } 598 Map<String, AnnotationSet> dirCache = cache.get(dir); 599 if (dirCache == null) { 600 cache.put(dir, dirCache = new ConcurrentHashMap<>()); 601 } 602 AnnotationSet result = dirCache.get(locale); 603 if (result != null) { 604 return result; 605 } 606 if (!LOCALES.contains(locale)) { 607 return null; 608 } 609 String parentString = LocaleIDParser.getParent(locale); 610 AnnotationSet parentData = null; 611 if (parentString != null && !parentString.equals("root")) { 612 parentData = getDataSet(dir, parentString); 613 } 614 MyHandler myHandler = new MyHandler(dirCache, locale, parentData); 615 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 616 xfr.read(dir + "/" + locale + ".xml", -1, true); 617 return myHandler.cleanup(); 618 } 619 getData(String locale)620 public static UnicodeMap<Annotations> getData(String locale) { 621 return getData(DIR, locale); 622 } 623 getData(String dir, String locale)624 public static UnicodeMap<Annotations> getData(String dir, String locale) { 625 AnnotationSet result = getDataSet(dir, locale); 626 return result == null ? null : result.baseData; 627 } 628 629 @Override toString()630 public String toString() { 631 return toString(false); 632 } 633 toString(boolean html)634 public String toString(boolean html) { 635 Set<String> annotations2 = getKeywords(); 636 if (getShortName() != null && annotations2.contains(getShortName())) { 637 annotations2 = new LinkedHashSet<>(getKeywords()); 638 annotations2.remove(getShortName()); 639 } 640 String result = Joiner.on(" |\u00a0").join(annotations2); 641 if (getShortName() != null) { 642 String ttsString = (html ? "*<b>" : "*") + getShortName() + (html ? "</b>" : "*"); 643 if (result.isEmpty()) { 644 result = ttsString; 645 } else { 646 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result; 647 } 648 } 649 return result; 650 } 651 652 /** 653 * @return the annotations 654 */ getKeywords()655 public Set<String> getKeywords() { 656 return annotations; 657 } 658 659 /** 660 * @return the tts 661 */ getShortName()662 public String getShortName() { 663 return tts; 664 } 665 main(String[] args)666 public static void main(String[] args) { 667 if (true) { 668 writeList(); 669 } else { 670 writeEnglish(); 671 } 672 } 673 writeList()674 private static void writeList() { 675 AnnotationSet eng = Annotations.getDataSet("en"); 676 Annotations an = eng.baseData.get("❤"); 677 final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues(); 678 Set<String> keys = new TreeSet<>(ChartAnnotations.RBC); 679 map.keySet().addAllTo(keys); 680 // keys.add("⚖"); 681 for (String key : keys) { 682 System.out.println(Utility.hex(key, 4, "_").toLowerCase(Locale.ROOT) 683 + "\t" + key 684 + "\t" + map.get(key).getShortName() 685 + "\t" + Joiner.on(" | ").join(map.get(key).getKeywords())); 686 } 687 for (String s : Arrays.asList( 688 "", "❤️", 689 "", "❤️", 690 "", "", 691 "", "", 692 "⚖", "⚖", "⚖", "⚖", 693 "", "♂️", "♂️", "♀️", "♀️", 694 "", "", "♂️", "♂️", "♀️", "♀️")) { 695 final String shortName = eng.getShortName(s); 696 final Set<String> keywords = eng.getKeywords(s); 697 System.out.println("{\"" + s + "\",\"" + shortName + "\",\"" + Joiner.on("|") 698 .join(keywords) + "\"},"); 699 } 700 } 701 writeEnglish()702 private static void writeEnglish() { 703 AnnotationSet eng = Annotations.getDataSet("en"); 704 System.out.println(Annotations.getAvailable()); 705 AnnotationSet eng100 = Annotations.getDataSet("en_001"); 706 UnicodeMap<Annotations> map100 = eng100.getUnresolvedExplicitValues(); 707 final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues(); 708 Set<String> keys = new TreeSet<>(ChartAnnotations.RBC); 709 map.keySet().addAllTo(keys); 710 for (String key : keys) { 711 Annotations value = map.get(key); 712 Annotations value100 = map100.get(key); 713 Set<String> keywords100 = (value100 == null ? null : value100.getKeywords()); 714 System.out.println(key + "\tname\t" 715 + "\t" + value.getShortName() 716 + "\t" + (value100 == null ? "" : value100.getShortName()) 717 + "\t" + Joiner.on(" | ").join(value.getKeywords()) 718 + "\t" + (keywords100 == null ? "" : Joiner.on(" | ").join(keywords100))); 719 } 720 } 721 } 722