1 package org.unicode.cldr.util;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.LinkedHashSet;
10 import java.util.Locale;
11 import java.util.Map;
12 import java.util.Set;
13 import java.util.TreeSet;
14 import java.util.concurrent.ConcurrentHashMap;
15 import java.util.regex.Pattern;
16 
17 import org.unicode.cldr.tool.ChartAnnotations;
18 import org.unicode.cldr.tool.SubdivisionNames;
19 import org.unicode.cldr.util.XMLFileReader.SimpleHandler;
20 
21 import com.google.common.base.Joiner;
22 import com.google.common.base.Objects;
23 import com.google.common.base.Splitter;
24 import com.google.common.collect.ImmutableSet;
25 import com.google.common.collect.ImmutableSet.Builder;
26 import com.ibm.icu.dev.util.UnicodeMap;
27 import com.ibm.icu.impl.Utility;
28 import com.ibm.icu.lang.CharSequences;
29 import com.ibm.icu.text.SimpleFormatter;
30 import com.ibm.icu.text.Transform;
31 import com.ibm.icu.text.UTF16;
32 import com.ibm.icu.text.UnicodeSet;
33 import com.ibm.icu.text.UnicodeSet.SpanCondition;
34 import com.ibm.icu.text.UnicodeSetSpanner;
35 import com.ibm.icu.util.ICUUncheckedIOException;
36 
37 public class Annotations {
38     private static final boolean DEBUG = false;
39 
40     public static final String BAD_MARKER = "⊗";
41     public static final String MISSING_MARKER = "⊖";
42     public static final String ENGLISH_MARKER = "⊕";
43     public static final String EQUIVALENT = "≣";
44     public static final String NEUTRAL_HOLDING = "��‍��‍��";
45 
46     public static final Splitter splitter = Splitter.on(Pattern.compile("[|;]")).trimResults().omitEmptyStrings();
47     static final Splitter dotSplitter = Splitter.on(".").trimResults();
48 
49     static final Map<String, Map<String, AnnotationSet>> cache = new ConcurrentHashMap<>();
50     static final Set<String> LOCALES;
51     static final String DIR;
52     private static final AnnotationSet ENGLISH_DATA;
53 
54     private final Set<String> annotations;
55     private final String tts;
56 
57     static {
58         File directory = new File(CLDRPaths.COMMON_DIRECTORY, "annotations");
59         DIR = PathUtilities.getNormalizedPathString(directory);
60         if (DEBUG) {
61             System.out.println(DIR);
62         }
63         Builder<String> temp = ImmutableSet.builder();
64         for (File file : directory.listFiles()) {
65             if (DEBUG) {
PathUtilities.getNormalizedPathString(file)66                 System.out.println(PathUtilities.getNormalizedPathString(file));
67             }
68             String name = file.toString();
69             String shortName = file.getName();
70             if (!shortName.endsWith(".xml") || // skip non-XML
71                 shortName.startsWith("#") || // skip other junk files
72                 shortName.startsWith(".")
73 //                || shortName.contains("001") // skip world english for now
74                 ) continue; // skip dot files (backups, etc)
75             temp.add(dotSplitter.split(shortName).iterator().next());
76         }
77         LOCALES = temp.build();
78         ENGLISH_DATA = getDataSet("en");
79     }
80 
81     static class MyHandler extends SimpleHandler {
82         private final String locale;
83         private final UnicodeMap<Annotations> localeData = new UnicodeMap<>();
84         private final AnnotationSet parentData;
85         private final Map<String, AnnotationSet> dirCache;
86 
MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData)87         public MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData) {
88             this.locale = locale;
89             this.parentData = parentData;
90             this.dirCache = dirCache;
91         }
92 
cleanup()93         public AnnotationSet cleanup() {
94             // add parent data (may be overridden)
95             UnicodeMap<Annotations> templocaleData = null;
96             if (parentData != null) {
97                 templocaleData = new UnicodeMap<>();
98                 UnicodeSet keys = new UnicodeSet(parentData.baseData.keySet()).addAll(localeData.keySet());
99                 for (String key : keys) {
100                     Annotations parentValue = parentData.baseData.get(key);
101                     Annotations myValue = localeData.get(key);
102                     if (parentValue == null) {
103                         templocaleData.put(key, myValue);
104                     } else if (myValue == null) {
105                         templocaleData.put(key, parentValue);
106                     } else { // need to combine
107                         String tts = myValue.tts == null
108                             ? parentValue.tts : myValue.tts;
109                         Set<String> annotations = myValue.annotations == null || myValue.annotations.isEmpty()
110                             ? parentValue.annotations : myValue.annotations;
111                         templocaleData.put(key, new Annotations(annotations, tts));
112                     }
113                 }
114             }
115 
116             final AnnotationSet result = new AnnotationSet(locale, localeData, templocaleData);
117             dirCache.put(locale, result);
118             return result;
119         }
120 
121         static final Pattern SPACES = Pattern.compile("\\s+");
122 
123         @Override
handlePathValue(String path, String value)124         public void handlePathValue(String path, String value) {
125             if (value.contains(CldrUtility.INHERITANCE_MARKER)) {
126                 return; // skip all ^^^
127             }
128             XPathParts parts = XPathParts.getFrozenInstance(path);
129             String lastElement = parts.getElement(-1);
130             if (!lastElement.equals("annotation")) {
131                 if (!"identity".equals(parts.getElement(1))) {
132                     throw new IllegalArgumentException("Unexpected path");
133                 }
134                 return;
135             }
136             String usString = parts.getAttributeValue(-1, "cp");
137             UnicodeSet us1 = usString.startsWith("[") && usString.endsWith("]") ? new UnicodeSet(usString) : new UnicodeSet().add(usString);
138             UnicodeSet us = new UnicodeSet();
139             for (String s : us1) {
140                 us.add(s.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""));
141             }
142             String tts = parts.getAttributeValue(-1, "tts");
143             String type = parts.getAttributeValue(-1, "type");
144             String alt = parts.getAttributeValue(-1, "alt");
145 
146             // clean up value
147             String value2 = SPACES.matcher(value).replaceAll(" ").trim();
148             if (!value2.equals(value)) {
149                 value = value2;
150             }
151             if (alt != null) {
152                 // do nothing for now
153             } else if ("tts".equals(type)) {
154                 addItems(localeData, us, Collections.<String> emptySet(), value);
155             } else {
156                 Set<String> attributes = new TreeSet<>(splitter.splitToList(value));
157                 addItems(localeData, us, attributes, tts);
158             }
159         }
160 
addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts)161         private void addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts) {
162             for (String entry : us) {
163                 addItems(unicodeMap, entry, attributes, tts);
164             }
165         }
166 
addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts)167         private void addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts) {
168             Annotations annotations = unicodeMap.get(entry);
169             if (annotations == null) {
170                 unicodeMap.put(entry, new Annotations(attributes, tts));
171             } else {
172                 unicodeMap.put(entry, annotations.add(attributes, tts)); // creates new item
173             }
174         }
175     }
176 
Annotations(Set<String> attributes, String tts2)177     public Annotations(Set<String> attributes, String tts2) {
178         annotations = attributes == null ? Collections.<String> emptySet() : ImmutableSet.copyOf(attributes);
179         for (String attr : annotations) {
180             if (attr.contains(CldrUtility.INHERITANCE_MARKER)) {
181                 throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER);
182             }
183 
184         }
185         tts = tts2;
186         if (tts != null && tts.contains(CldrUtility.INHERITANCE_MARKER)) {
187             throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER);
188         }
189     }
190 
add(Set<String> attributes, String tts2)191     public Annotations add(Set<String> attributes, String tts2) {
192         return new Annotations(getKeywords() == null ? attributes : attributes == null ? getKeywords() : union(attributes, getKeywords()),
193             getShortName() == null ? tts2 : tts2 == null ? getShortName() : throwDup());
194     }
195 
throwDup()196     private String throwDup() {
197         throw new IllegalArgumentException("Duplicate tts");
198     }
199 
union(Set<String> a, Set<String> b)200     private Set<String> union(Set<String> a, Set<String> b) {
201         TreeSet<String> result = new TreeSet<>(a);
202         result.addAll(b);
203         return result;
204     }
205 
getAvailable()206     public static Set<String> getAvailable() {
207         return LOCALES;
208     }
209 
getAvailableLocales()210     public static Set<String> getAvailableLocales() {
211         return LOCALES;
212     }
213 
214     public static final class AnnotationSet {
215 
216         private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
217 
218         static final Factory factory = CONFIG.getCldrFactory();
219         static final CLDRFile ENGLISH = CONFIG.getEnglish();
220         static final CLDRFile ENGLISH_ANNOTATIONS = null;
221         static final SubdivisionNames englishSubdivisionIdToName = new SubdivisionNames("en", "main");
222         //CLDRConfig.getInstance().getAnnotationsFactory().make("en", false);
223 
224         private final String locale;
225         private final UnicodeMap<Annotations> baseData;
226         private final UnicodeMap<Annotations> unresolvedData;
227         private final CLDRFile cldrFile;
228         private final SubdivisionNames subdivisionIdToName;
229         private final SimpleFormatter initialPattern;
230         private final Pattern initialRegexPattern;
231         private final XListFormatter listPattern;
232         private final Set<String> flagLabelSet;
233         private final Set<String> keycapLabelSet;
234         private final String keycapLabel;
235         private final String flagLabel;
236 //        private final String maleLabel;
237 //        private final String femaleLabel;
238         private final Map<String, Annotations> localeCache = new ConcurrentHashMap<>();
239 
240         static UnicodeSetSpanner uss = new UnicodeSetSpanner(EmojiConstants.COMPONENTS); // must be sync'ed
241 
AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource)242         private AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource) {
243             this.locale = locale;
244             unresolvedData = source.freeze();
245             this.baseData = resolvedSource == null ? unresolvedData : resolvedSource.freeze();
246             cldrFile = factory.make(locale, true);
247             subdivisionIdToName = new SubdivisionNames(locale, "main", "subdivisions");
248 // EmojiSubdivisionNames.getSubdivisionIdToName(locale);
249             listPattern = new XListFormatter(cldrFile, EmojiConstants.COMPOSED_NAME_LIST);
250             final String initialPatternString = getStringValue("//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]");
251             initialPattern = SimpleFormatter.compile(initialPatternString);
252             final String regexPattern = ("\\Q" + initialPatternString.replace("{0}", "\\E.*\\Q").replace("{1}", "\\E.*\\Q") + "\\E")
253                 .replace("\\Q\\E", ""); // HACK to detect use of prefix pattern
254             initialRegexPattern = Pattern.compile(regexPattern);
255             flagLabelSet = getLabelSet("flag");
256             flagLabel = flagLabelSet.isEmpty() ? null : flagLabelSet.iterator().next();
257             keycapLabelSet = getLabelSet("keycap");
258             keycapLabel = keycapLabelSet.isEmpty() ? null : keycapLabelSet.iterator().next();
259 //            maleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"male\"]");
260 //            femaleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"female\"]");
261         }
262 
263         /**
264          * @deprecated Use {@link #getLabelSet(String)} instead
265          */
266         @Deprecated
getLabelSet()267         private Set<String> getLabelSet() {
268             return getLabelSet("flag");
269         }
270 
getLabelSet(String typeAttributeValue)271         private Set<String> getLabelSet(String typeAttributeValue) {
272             String label = getStringValue("//ldml/characterLabels/characterLabel[@type=\"" + typeAttributeValue + "\"]");
273             return label == null ? Collections.<String> emptySet() : Collections.singleton(label);
274         }
275 
getStringValue(String xpath)276         private String getStringValue(String xpath) {
277             return getStringValue(xpath, cldrFile, ENGLISH);
278         }
279 
getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english)280         private String getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english) {
281             String result = cldrFile2.getStringValueWithBailey(xpath);
282             if (result == null) {
283                 return ENGLISH_MARKER + english.getStringValueWithBailey(xpath);
284             }
285             String sourceLocale = cldrFile2.getSourceLocaleID(xpath, null);
286             if (sourceLocale.equals(XMLSource.CODE_FALLBACK_ID) || sourceLocale.equals(XMLSource.ROOT_ID)) {
287                 return MISSING_MARKER + result;
288             }
289             return result;
290         }
291 
getShortName(String code)292         public String getShortName(String code) {
293             return getShortName(code, null);
294         }
295 
getShortName(String code, Transform<String, String> otherSource)296         public String getShortName(String code, Transform<String, String> otherSource) {
297             if (code.equals("��‍♀️")) {
298                 int debug = 0;
299             }
300 
301             code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, "");
302             Annotations stock = baseData.get(code);
303             if (stock != null && stock.tts != null) {
304                 return stock.tts;
305             }
306             stock = localeCache.get(code);
307             if (stock != null) {
308                 return stock.tts;
309             }
310             stock = synthesize(code, otherSource);
311             if (stock != null) {
312                 localeCache.put(code, stock);
313                 return stock.tts;
314             }
315             return null;
316         }
317 
getKeywords(String code)318         public Set<String> getKeywords(String code) {
319             code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, "");
320             Annotations stock = baseData.get(code);
321             if (stock != null && stock.annotations != null) {
322                 return stock.annotations;
323             }
324             stock = localeCache.get(code);
325             if (stock != null) {
326                 return stock.annotations;
327             }
328             stock = synthesize(code, null);
329             if (stock != null) {
330                 localeCache.put(code, stock);
331                 return stock.annotations;
332             }
333             return Collections.<String> emptySet();
334         }
335 
336         /** Returns the set of all keys for which annotations are available. WARNING: keys have the Emoji Presentation Selector removed!
337          */
keySet()338         public UnicodeSet keySet() {
339             return baseData.keySet();
340         }
341 
synthesize(String code, Transform<String, String> otherSource)342         private Annotations synthesize(String code, Transform<String, String> otherSource) {
343             if (code.equals("����‍♂")) {
344                 int debug = 0;
345             }
346             String shortName = null;
347             int len = code.codePointCount(0, code.length());
348             boolean isKeycap10 = code.equals("��");
349             if (len == 1 && !isKeycap10) {
350                 String tempName = null;
351                 if (locale.equals("en")) {
352                     if (otherSource != null) {
353                         tempName = otherSource.transform(code);
354                     }
355                     if (tempName == null) {
356                         return null;
357                     }
358                     return new Annotations(Collections.<String> emptySet(), tempName);
359                 } else { // fall back to English if possible, but mark it.
360                     tempName = getDataSet("en").getShortName(code);
361                     if (tempName == null) {
362                         return null;
363                     }
364                     return new Annotations(Collections.<String> emptySet(), ENGLISH_MARKER + tempName);
365                 }
366             } else if (EmojiConstants.REGIONAL_INDICATORS.containsAll(code)) {
367                 String countryCode = EmojiConstants.getFlagCode(code);
368                 String path = CLDRFile.getKey(CLDRFile.TERRITORY_NAME, countryCode);
369                 String regionName = getStringValue(path);
370                 if (regionName == null) {
371                     regionName = ENGLISH_MARKER + ENGLISH.getStringValueWithBailey(path);
372                 }
373                 String flagName = flagLabel == null ? regionName : initialPattern.format(flagLabel, regionName);
374                 return new Annotations(flagLabelSet, flagName);
375             } else if (code.startsWith(EmojiConstants.BLACK_FLAG)
376                 && code.endsWith(EmojiConstants.TAG_TERM)) {
377                 String subdivisionCode = EmojiConstants.getTagSpec(code);
378                 String subdivisionName = subdivisionIdToName.get(subdivisionCode);
379                 if (subdivisionName == null) {
380 //                    subdivisionName = englishSubdivisionIdToName.get(subdivisionCode);
381 //                    if (subdivisionName != null) {
382 //                        subdivisionName = ENGLISH_MARKER + subdivisionCode;
383 //                    } else {
384                         subdivisionName = MISSING_MARKER + subdivisionCode;
385 //                    }
386                 }
387                 String flagName = flagLabel == null ? subdivisionName : initialPattern.format(flagLabel, subdivisionName);
388                 return new Annotations(flagLabelSet, flagName);
389             } else if (isKeycap10 || code.contains(EmojiConstants.KEYCAP_MARK_STRING)) {
390                 final String rem = code.equals("��") ? "10" : UTF16.valueOf(code.charAt(0));
391                 shortName = initialPattern.format(keycapLabel, rem);
392                 return new Annotations(keycapLabelSet, shortName);
393             }
394             UnicodeSet skipSet = EmojiConstants.REM_SKIP_SET;
395             String rem = "";
396             SimpleFormatter startPattern = initialPattern;
397             if (EmojiConstants.COMPONENTS.containsSome(code)) {
398                 synchronized (uss) {
399                     rem = uss.deleteFrom(code, SpanCondition.NOT_CONTAINED);
400                     code = uss.deleteFrom(code, SpanCondition.CONTAINED);
401                 }
402             }
403             if (code.contains(EmojiConstants.JOINER_STRING)) {
404 //                if (code.endsWith(EmojiConstants.JOINER_MALE_SIGN)){
405 //                    if (matchesInitialPattern(code)) { // "����‍♂️","police officer: man, medium-light skin tone"
406 //                        rem = EmojiConstants.MAN + rem;
407 //                        code = code.substring(0,code.length()-EmojiConstants.JOINER_MALE_SIGN.length());
408 //                    } // otherwise "����‍♂️","man biking: dark skin tone"
409 //                } else if (code.endsWith(EmojiConstants.JOINER_FEMALE_SIGN)){
410 //                    if (matchesInitialPattern(code)) { //
411 //                        rem = EmojiConstants.WOMAN + rem;
412 //                        code = code.substring(0,code.length()-EmojiConstants.JOINER_FEMALE_SIGN.length());
413 //                    }
414 //                } else
415                 if (code.contains(EmojiConstants.KISS)) {
416                     rem = code + rem;
417                     code = "��";
418                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
419                 } else if (code.contains(EmojiConstants.HEART) && !code.startsWith(EmojiConstants.HEART)) {
420                     rem = code + rem;
421                     code = "��";
422                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
423                 } else if (code.contains(EmojiConstants.HANDSHAKE)) {
424                     code = code.startsWith(EmojiConstants.MAN) ? "��"
425                         : code.endsWith(EmojiConstants.MAN) ? "��"
426                             : code.startsWith(EmojiConstants.WOMAN) ? "��"
427                             : NEUTRAL_HOLDING;
428                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
429                 } else if (EmojiConstants.FAMILY_MARKERS.containsAll(code)) {
430                     rem = code + rem;
431                     code = "��";
432                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
433 //                } else {
434 //                    startPattern = listPattern;
435                 }
436                 // left over is "����‍⚖","judge: man, dark skin tone"
437             }
438             return getBasePlusRemainder(cldrFile, code, rem, skipSet, startPattern, otherSource);
439         }
440 
matchesInitialPattern(String code)441         private boolean matchesInitialPattern(String code) {
442             Annotations baseAnnotation = baseData.get(code);
443             String baseName = baseAnnotation == null ? null : baseAnnotation.getShortName();
444             return baseName != null && initialRegexPattern.matcher(baseName).matches();
445         }
446 
getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, Transform<String, String> otherSource)447         private Annotations getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern,
448             Transform<String, String> otherSource) {
449             String shortName = null;
450             Set<String> annotations = new LinkedHashSet<>();
451             boolean needMarker = true;
452 
453             if (base != null) {
454                 needMarker = false;
455                 Annotations stock = baseData.get(base);
456                 if (stock != null) {
457                     shortName = stock.getShortName();
458                     annotations.addAll(stock.getKeywords());
459                 } else if (otherSource != null) {
460                     shortName = otherSource.transform(base);
461                 } else {
462                     return null;
463                 }
464                 if (shortName == null) {
465                     return null;
466                 }
467             }
468 
469             boolean hackBlond = EmojiConstants.HAIR_EXPLICIT.contains(base.codePointAt(0));
470             Collection<String> arguments = new ArrayList<>();
471             int lastSkin = -1;
472 
473             for (int mod : CharSequences.codePoints(rem)) {
474                 if (ignore.contains(mod)) {
475                     continue;
476                 }
477                 if (EmojiConstants.MODIFIERS.contains(mod)) {
478                     if (lastSkin == mod) {
479                         continue;
480                     }
481                     lastSkin = mod; // collapse skin tones. TODO fix if we ever do multi-skin families
482                 }
483                 Annotations stock = baseData.get(mod);
484                 String modName = null;
485                 if (stock != null) {
486                     modName = stock.getShortName();
487                 } else if (otherSource != null) {
488                     modName = otherSource.transform(base);
489                 }
490                 if (modName == null) {
491                     needMarker = true;
492                     if (ENGLISH_DATA != null) {
493                         Annotations engName = ENGLISH_DATA.baseData.get(mod);
494                         if (engName != null) {
495                             modName = engName.getShortName();
496                         }
497                     }
498                     if (modName == null) {
499                         modName = Utility.hex(mod); // ultimate fallback
500                     }
501                 }
502                 if (hackBlond && shortName != null) {
503                     // HACK: make the blond names look like the other hair names
504                     // Split the short name into pieces, if possible, and insert the modName first
505                     String sep = initialPattern.format("", "");
506                     int splitPoint = shortName.indexOf(sep);
507                     if (splitPoint >= 0) {
508                         String modName0 = shortName.substring(splitPoint+sep.length());
509                         shortName = shortName.substring(0, splitPoint);
510                         if (modName != null) {
511                             arguments.add(modName);
512                             annotations.add(modName);
513                         }
514                         modName = modName0;
515                     }
516                     hackBlond = false;
517                 }
518 
519                 if (modName != null) {
520                     arguments.add(modName);
521                     annotations.add(modName);
522                 }
523             }
524             if (!arguments.isEmpty()) {
525                 shortName = pattern.format(shortName, listPattern.format(arguments));
526             }
527             Annotations result = new Annotations(annotations, (needMarker ? ENGLISH_MARKER : "") + shortName);
528             return result;
529         }
530 
531         /**
532          * @deprecated Use {@link #toString(String,boolean,AnnotationSet)} instead
533          */
534         @Deprecated
toString(String code, boolean html)535         public String toString(String code, boolean html) {
536             return toString(code, html, null);
537         }
538 
toString(String code, boolean html, AnnotationSet parentAnnotations)539         public String toString(String code, boolean html, AnnotationSet parentAnnotations) {
540             if (locale.equals("be") && code.equals("��")) {
541                 int debug = 0;
542             }
543             String shortName = getShortName(code);
544             if (shortName == null || shortName.startsWith(BAD_MARKER) || shortName.startsWith(ENGLISH_MARKER)) {
545                 return MISSING_MARKER;
546             }
547 
548             String parentShortName = parentAnnotations == null ? null : parentAnnotations.getShortName(code);
549             if (shortName != null && Objects.equal(shortName, parentShortName)) {
550                 shortName = EQUIVALENT;
551             }
552 
553             Set<String> keywords = getKeywordsMinus(code);
554             Set<String> parentKeywords = parentAnnotations == null ? null : parentAnnotations.getKeywordsMinus(code);
555             if (keywords != null && !keywords.isEmpty() && Objects.equal(keywords, parentKeywords)) {
556                 keywords = Collections.singleton(EQUIVALENT);
557             }
558 
559             String result = Joiner.on(" |\u00a0").join(keywords);
560             if (shortName != null) {
561                 String ttsString = (html ? "*<b>" : "*") + shortName + (html ? "</b>" : "*");
562                 if (result.isEmpty()) {
563                     result = ttsString;
564                 } else {
565                     result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result;
566                 }
567             }
568             return result;
569         }
570 
getExplicitValues()571         public UnicodeMap<Annotations> getExplicitValues() {
572             return baseData;
573         }
574 
getUnresolvedExplicitValues()575         public UnicodeMap<Annotations> getUnresolvedExplicitValues() {
576             return unresolvedData;
577         }
578 
getKeywordsMinus(String code)579         public Set<String> getKeywordsMinus(String code) {
580             String shortName = getShortName(code);
581             Set<String> keywords = getKeywords(code);
582             if (shortName != null && keywords.contains(shortName)) {
583                 keywords = new LinkedHashSet<>(keywords);
584                 keywords.remove(shortName);
585             }
586             return keywords;
587         }
588     }
589 
getDataSet(String locale)590     public static AnnotationSet getDataSet(String locale) {
591         return getDataSet(DIR, locale);
592     }
593 
getDataSet(String dir, String locale)594     public static AnnotationSet getDataSet(String dir, String locale) {
595         if (dir == null) {
596             dir = DIR;
597         }
598         Map<String, AnnotationSet> dirCache = cache.get(dir);
599         if (dirCache == null) {
600             cache.put(dir, dirCache = new ConcurrentHashMap<>());
601         }
602         AnnotationSet result = dirCache.get(locale);
603         if (result != null) {
604             return result;
605         }
606         if (!LOCALES.contains(locale)) {
607             return null;
608         }
609         String parentString = LocaleIDParser.getParent(locale);
610         AnnotationSet parentData = null;
611         if (parentString != null && !parentString.equals("root")) {
612             parentData = getDataSet(dir, parentString);
613         }
614         MyHandler myHandler = new MyHandler(dirCache, locale, parentData);
615         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
616         xfr.read(dir + "/" + locale + ".xml", -1, true);
617         return myHandler.cleanup();
618     }
619 
getData(String locale)620     public static UnicodeMap<Annotations> getData(String locale) {
621         return getData(DIR, locale);
622     }
623 
getData(String dir, String locale)624     public static UnicodeMap<Annotations> getData(String dir, String locale) {
625         AnnotationSet result = getDataSet(dir, locale);
626         return result == null ? null : result.baseData;
627     }
628 
629     @Override
toString()630     public String toString() {
631         return toString(false);
632     }
633 
toString(boolean html)634     public String toString(boolean html) {
635         Set<String> annotations2 = getKeywords();
636         if (getShortName() != null && annotations2.contains(getShortName())) {
637             annotations2 = new LinkedHashSet<>(getKeywords());
638             annotations2.remove(getShortName());
639         }
640         String result = Joiner.on(" |\u00a0").join(annotations2);
641         if (getShortName() != null) {
642             String ttsString = (html ? "*<b>" : "*") + getShortName() + (html ? "</b>" : "*");
643             if (result.isEmpty()) {
644                 result = ttsString;
645             } else {
646                 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result;
647             }
648         }
649         return result;
650     }
651 
652     /**
653      * @return the annotations
654      */
getKeywords()655     public Set<String> getKeywords() {
656         return annotations;
657     }
658 
659     /**
660      * @return the tts
661      */
getShortName()662     public String getShortName() {
663         return tts;
664     }
665 
main(String[] args)666     public static void main(String[] args) {
667         if (true) {
668             writeList();
669         } else {
670             writeEnglish();
671         }
672     }
673 
writeList()674     private static void writeList() {
675         AnnotationSet eng = Annotations.getDataSet("en");
676         Annotations an = eng.baseData.get("❤");
677         final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues();
678         Set<String> keys = new TreeSet<>(ChartAnnotations.RBC);
679         map.keySet().addAllTo(keys);
680 //        keys.add("����‍⚖");
681         for (String key : keys) {
682             System.out.println(Utility.hex(key, 4, "_").toLowerCase(Locale.ROOT)
683                 + "\t" + key
684                 + "\t" + map.get(key).getShortName()
685                 + "\t" + Joiner.on(" | ").join(map.get(key).getKeywords()));
686         }
687         for (String s : Arrays.asList(
688             "��", "��‍❤️‍��‍��",
689             "��", "��‍❤️‍��",
690             "��", "��‍��‍��",
691             "����", "����",
692             "��‍⚖", "����‍⚖", "��‍⚖", "����‍⚖",
693             "��", "��‍♂️", "����‍♂️", "��‍♀️", "����‍♀️",
694             "��", "����", "��‍♂️", "����‍♂️", "��‍♀️", "����‍♀️")) {
695             final String shortName = eng.getShortName(s);
696             final Set<String> keywords = eng.getKeywords(s);
697             System.out.println("{\"" + s + "\",\"" + shortName + "\",\"" + Joiner.on("|")
698                 .join(keywords) + "\"},");
699         }
700     }
701 
writeEnglish()702     private static void writeEnglish() {
703         AnnotationSet eng = Annotations.getDataSet("en");
704         System.out.println(Annotations.getAvailable());
705         AnnotationSet eng100 = Annotations.getDataSet("en_001");
706         UnicodeMap<Annotations> map100 = eng100.getUnresolvedExplicitValues();
707         final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues();
708         Set<String> keys = new TreeSet<>(ChartAnnotations.RBC);
709         map.keySet().addAllTo(keys);
710         for (String key : keys) {
711             Annotations value = map.get(key);
712             Annotations value100 = map100.get(key);
713             Set<String> keywords100 = (value100 == null ? null : value100.getKeywords());
714             System.out.println(key + "\tname\t"
715                 + "\t" + value.getShortName()
716                 + "\t" + (value100 == null ? "" : value100.getShortName())
717                 + "\t" + Joiner.on(" | ").join(value.getKeywords())
718                 + "\t" + (keywords100 == null ? "" : Joiner.on(" | ").join(keywords100)));
719         }
720     }
721 }
722