1 package org.unicode.cldr.util;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.LinkedHashSet;
10 import java.util.Locale;
11 import java.util.Map;
12 import java.util.Set;
13 import java.util.TreeSet;
14 import java.util.concurrent.ConcurrentHashMap;
15 import java.util.regex.Pattern;
16 
17 import org.unicode.cldr.test.EmojiSubdivisionNames;
18 import org.unicode.cldr.tool.ChartAnnotations;
19 import org.unicode.cldr.util.XMLFileReader.SimpleHandler;
20 
21 import com.google.common.base.Objects;
22 import com.google.common.base.Splitter;
23 import com.google.common.collect.ImmutableSet;
24 import com.google.common.collect.ImmutableSet.Builder;
25 import com.ibm.icu.dev.util.CollectionUtilities;
26 import com.ibm.icu.dev.util.UnicodeMap;
27 import com.ibm.icu.impl.Utility;
28 import com.ibm.icu.lang.CharSequences;
29 import com.ibm.icu.text.SimpleFormatter;
30 import com.ibm.icu.text.Transform;
31 import com.ibm.icu.text.UTF16;
32 import com.ibm.icu.text.UnicodeSet;
33 import com.ibm.icu.text.UnicodeSet.SpanCondition;
34 import com.ibm.icu.text.UnicodeSetSpanner;
35 import com.ibm.icu.util.ICUUncheckedIOException;
36 
37 public class Annotations {
38     private static final boolean DEBUG = false;
39 
40     public static final String BAD_MARKER = "⊗";
41     public static final String MISSING_MARKER = "⊖";
42     public static final String ENGLISH_MARKER = "⊕";
43     public static final String EQUIVALENT = "≣";
44 
45     public static final Splitter splitter = Splitter.on(Pattern.compile("[|;]")).trimResults().omitEmptyStrings();
46     static final Splitter dotSplitter = Splitter.on(".").trimResults();
47 
48     static final Map<String, Map<String, AnnotationSet>> cache = new ConcurrentHashMap<>();
49     static final Set<String> LOCALES;
50     static final String DIR;
51     private static final AnnotationSet ENGLISH_DATA;
52 
53     private final Set<String> annotations;
54     private final String tts;
55 
56     static {
57         File directory = new File(CLDRPaths.COMMON_DIRECTORY, "annotations");
58         try {
59             DIR = directory.getCanonicalPath();
60         } catch (IOException e) {
61             throw new ICUUncheckedIOException(e);
62         }
63         if (DEBUG) {
64             System.out.println(DIR);
65         }
66         Builder<String> temp = ImmutableSet.builder();
67         for (File file : directory.listFiles()) {
68             if (DEBUG) {
69                 try {
file.getCanonicalPath()70                     System.out.println(file.getCanonicalPath());
71                 } catch (IOException e) {
72                 }
73             }
74             String name = file.toString();
75             String shortName = file.getName();
76             if (!shortName.endsWith(".xml") || // skip non-XML
77                 shortName.startsWith("#") || // skip other junk files
78                 shortName.startsWith(".")
79 //                || shortName.contains("001") // skip world english for now
80                 ) continue; // skip dot files (backups, etc)
81             temp.add(dotSplitter.split(shortName).iterator().next());
82         }
83         LOCALES = temp.build();
84         ENGLISH_DATA = getDataSet("en");
85     }
86 
87     static class MyHandler extends SimpleHandler {
88         private final String locale;
89         private final UnicodeMap<Annotations> localeData = new UnicodeMap<>();
90         private final AnnotationSet parentData;
91         private final Map<String, AnnotationSet> dirCache;
92 
MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData)93         public MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData) {
94             this.locale = locale;
95             this.parentData = parentData;
96             this.dirCache = dirCache;
97         }
98 
cleanup()99         public AnnotationSet cleanup() {
100             // add parent data (may be overridden)
101             UnicodeMap<Annotations> templocaleData = null;
102             if (parentData != null) {
103                 templocaleData = new UnicodeMap<>();
104                 UnicodeSet keys = new UnicodeSet(parentData.baseData.keySet()).addAll(localeData.keySet());
105                 for (String key : keys) {
106                     Annotations parentValue = parentData.baseData.get(key);
107                     Annotations myValue = localeData.get(key);
108                     if (parentValue == null) {
109                         templocaleData.put(key, myValue);
110                     } else if (myValue == null) {
111                         templocaleData.put(key, parentValue);
112                     } else { // need to combine
113                         String tts = myValue.tts == null
114                             ? parentValue.tts : myValue.tts;
115                         Set<String> annotations = myValue.annotations == null || myValue.annotations.isEmpty()
116                             ? parentValue.annotations : myValue.annotations;
117                         templocaleData.put(key, new Annotations(annotations, tts));
118                     }
119                 }
120             }
121 
122             final AnnotationSet result = new AnnotationSet(locale, localeData, templocaleData);
123             dirCache.put(locale, result);
124             return result;
125         }
126 
127         @Override
handlePathValue(String path, String value)128         public void handlePathValue(String path, String value) {
129             XPathParts parts = XPathParts.getFrozenInstance(path);
130             String lastElement = parts.getElement(-1);
131             if (!lastElement.equals("annotation")) {
132                 if (!"identity".equals(parts.getElement(1))) {
133                     throw new IllegalArgumentException("Unexpected path");
134                 }
135                 return;
136             }
137             String usString = parts.getAttributeValue(-1, "cp");
138             UnicodeSet us1 = usString.startsWith("[") && usString.endsWith("]") ? new UnicodeSet(usString) : new UnicodeSet().add(usString);
139             UnicodeSet us = new UnicodeSet();
140             for (String s : us1) {
141                 us.add(s.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""));
142             }
143             String tts = parts.getAttributeValue(-1, "tts");
144             String type = parts.getAttributeValue(-1, "type");
145             String alt = parts.getAttributeValue(-1, "alt");
146 
147             if (alt != null) {
148                 // do nothing for now
149             } else if ("tts".equals(type)) {
150                 addItems(localeData, us, Collections.<String> emptySet(), value);
151             } else {
152                 Set<String> attributes = new TreeSet<>(splitter.splitToList(value));
153                 addItems(localeData, us, attributes, tts);
154             }
155         }
156 
addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts)157         private void addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts) {
158             for (String entry : us) {
159                 addItems(unicodeMap, entry, attributes, tts);
160             }
161         }
162 
addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts)163         private void addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts) {
164             Annotations annotations = unicodeMap.get(entry);
165             if (annotations == null) {
166                 unicodeMap.put(entry, new Annotations(attributes, tts));
167             } else {
168                 unicodeMap.put(entry, annotations.add(attributes, tts)); // creates new item
169             }
170         }
171     }
172 
Annotations(Set<String> attributes, String tts2)173     public Annotations(Set<String> attributes, String tts2) {
174         annotations = attributes == null ? Collections.<String> emptySet() : ImmutableSet.copyOf(attributes);
175         tts = tts2;
176     }
177 
add(Set<String> attributes, String tts2)178     public Annotations add(Set<String> attributes, String tts2) {
179         return new Annotations(getKeywords() == null ? attributes : attributes == null ? getKeywords() : union(attributes, getKeywords()),
180             getShortName() == null ? tts2 : tts2 == null ? getShortName() : throwDup());
181     }
182 
throwDup()183     private String throwDup() {
184         throw new IllegalArgumentException("Duplicate tts");
185     }
186 
union(Set<String> a, Set<String> b)187     private Set<String> union(Set<String> a, Set<String> b) {
188         TreeSet<String> result = new TreeSet<>(a);
189         result.addAll(b);
190         return result;
191     }
192 
getAvailable()193     public static Set<String> getAvailable() {
194         return LOCALES;
195     }
196 
getAvailableLocales()197     public static Set<String> getAvailableLocales() {
198         return LOCALES;
199     }
200 
201     public static final class AnnotationSet {
202 
203         private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
204 
205         static final Factory factory = CONFIG.getCldrFactory();
206         static final CLDRFile ENGLISH = CONFIG.getEnglish();
207         static final CLDRFile ENGLISH_ANNOTATIONS = null;
208         static final Map<String,String> englishSubdivisionIdToName = EmojiSubdivisionNames.getSubdivisionIdToName("en");
209         //CLDRConfig.getInstance().getAnnotationsFactory().make("en", false);
210 
211         private final String locale;
212         private final UnicodeMap<Annotations> baseData;
213         private final UnicodeMap<Annotations> unresolvedData;
214         private final CLDRFile cldrFile;
215         private final Map<String, String> subdivisionIdToName;
216         private final SimpleFormatter initialPattern;
217         private final Pattern initialRegexPattern;
218         private final XListFormatter listPattern;
219         private final Set<String> flagLabelSet;
220         private final Set<String> keycapLabelSet;
221         private final String keycapLabel;
222         private final String flagLabel;
223 //        private final String maleLabel;
224 //        private final String femaleLabel;
225         private final Map<String, Annotations> localeCache = new ConcurrentHashMap<>();
226 
227         static UnicodeSetSpanner uss = new UnicodeSetSpanner(EmojiConstants.COMPONENTS); // must be sync'ed
228 
AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource)229         private AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource) {
230             this.locale = locale;
231             unresolvedData = source.freeze();
232             this.baseData = resolvedSource == null ? unresolvedData : resolvedSource.freeze();
233             cldrFile = factory.make(locale, true);
234             subdivisionIdToName = EmojiSubdivisionNames.getSubdivisionIdToName(locale);
235             listPattern = new XListFormatter(cldrFile, EmojiConstants.COMPOSED_NAME_LIST);
236             final String initialPatternString = getStringValue("//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]");
237             initialPattern = SimpleFormatter.compile(initialPatternString);
238             final String regexPattern = ("\\Q" + initialPatternString.replace("{0}", "\\E.*\\Q").replace("{1}", "\\E.*\\Q") + "\\E")
239                 .replace("\\Q\\E", ""); // HACK to detect use of prefix pattern
240             initialRegexPattern = Pattern.compile(regexPattern);
241             flagLabelSet = getLabelSet("flag");
242             flagLabel = flagLabelSet.isEmpty() ? null : flagLabelSet.iterator().next();
243             keycapLabelSet = getLabelSet("keycap");
244             keycapLabel = keycapLabelSet.isEmpty() ? null : keycapLabelSet.iterator().next();
245 //            maleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"male\"]");
246 //            femaleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"female\"]");
247         }
248 
249         /**
250          * @deprecated Use {@link #getLabelSet(String)} instead
251          */
getLabelSet()252         private Set<String> getLabelSet() {
253             return getLabelSet("flag");
254         }
255 
getLabelSet(String typeAttributeValue)256         private Set<String> getLabelSet(String typeAttributeValue) {
257             String label = getStringValue("//ldml/characterLabels/characterLabel[@type=\"" + typeAttributeValue + "\"]");
258             return label == null ? Collections.<String> emptySet() : Collections.singleton(label);
259         }
260 
getStringValue(String xpath)261         private String getStringValue(String xpath) {
262             return getStringValue(xpath, cldrFile, ENGLISH);
263         }
264 
getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english)265         private String getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english) {
266             String result = cldrFile2.getStringValue(xpath);
267             if (result == null) {
268                 return ENGLISH_MARKER + english.getStringValue(xpath);
269             }
270             String sourceLocale = cldrFile2.getSourceLocaleID(xpath, null);
271             if (sourceLocale.equals(XMLSource.CODE_FALLBACK_ID) || sourceLocale.equals(XMLSource.ROOT_ID)) {
272                 return MISSING_MARKER + result;
273             }
274             return result;
275         }
276 
getShortName(String code)277         public String getShortName(String code) {
278             return getShortName(code, null);
279         }
280 
getShortName(String code, Transform<String, String> otherSource)281         public String getShortName(String code, Transform<String, String> otherSource) {
282             if (code.equals("��‍♀️")) {
283                 int debug = 0;
284             }
285 
286             code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, "");
287             Annotations stock = baseData.get(code);
288             if (stock != null && stock.tts != null) {
289                 return stock.tts;
290             }
291             stock = localeCache.get(code);
292             if (stock != null) {
293                 return stock.tts;
294             }
295             stock = synthesize(code, otherSource);
296             if (stock != null) {
297                 localeCache.put(code, stock);
298                 return stock.tts;
299             }
300             return null;
301         }
302 
getKeywords(String code)303         public Set<String> getKeywords(String code) {
304             code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, "");
305             Annotations stock = baseData.get(code);
306             if (stock != null && stock.annotations != null) {
307                 return stock.annotations;
308             }
309             stock = localeCache.get(code);
310             if (stock != null) {
311                 return stock.annotations;
312             }
313             stock = synthesize(code, null);
314             if (stock != null) {
315                 localeCache.put(code, stock);
316                 return stock.annotations;
317             }
318             return Collections.<String> emptySet();
319         }
320 
321         /** Returns the set of all keys for which annotations are available. WARNING: keys have the Emoji Presentation Selector removed!
322          */
keySet()323         public UnicodeSet keySet() {
324             return baseData.keySet();
325         }
326 
synthesize(String code, Transform<String, String> otherSource)327         private Annotations synthesize(String code, Transform<String, String> otherSource) {
328             if (code.equals("����‍♂")) {
329                 int debug = 0;
330             }
331             String shortName = null;
332             int len = code.codePointCount(0, code.length());
333             boolean isKeycap10 = code.equals("��");
334             if (len == 1 && !isKeycap10) {
335                 String tempName = null;
336                 if (locale.equals("en")) {
337                     if (otherSource != null) {
338                         tempName = otherSource.transform(code);
339                     }
340                     if (tempName == null) {
341                         return null;
342                     }
343                     return new Annotations(Collections.<String> emptySet(), tempName);
344                 } else { // fall back to English if possible, but mark it.
345                     tempName = getDataSet("en").getShortName(code);
346                     if (tempName == null) {
347                         return null;
348                     }
349                     return new Annotations(Collections.<String> emptySet(), ENGLISH_MARKER + tempName);
350                 }
351             } else if (EmojiConstants.REGIONAL_INDICATORS.containsAll(code)) {
352                 String countryCode = EmojiConstants.getFlagCode(code);
353                 String path = CLDRFile.getKey(CLDRFile.TERRITORY_NAME, countryCode);
354                 String regionName = getStringValue(path);
355                 if (regionName == null) {
356                     regionName = ENGLISH_MARKER + ENGLISH.getStringValue(path);
357                 }
358                 String flagName = flagLabel == null ? regionName : initialPattern.format(flagLabel, regionName);
359                 return new Annotations(flagLabelSet, flagName);
360             } else if (code.startsWith(EmojiConstants.BLACK_FLAG)
361                 && code.endsWith(EmojiConstants.TAG_TERM)) {
362                 String subdivisionCode = EmojiConstants.getTagSpec(code);
363                 String subdivisionName = subdivisionIdToName.get(subdivisionCode);
364                 if (subdivisionName == null) {
365                     subdivisionName = englishSubdivisionIdToName.get(subdivisionCode);
366                     if (subdivisionName != null) {
367                         subdivisionName = ENGLISH_MARKER + subdivisionCode;
368                     } else {
369                         subdivisionName = MISSING_MARKER + subdivisionCode;
370                     }
371                 }
372                 String flagName = flagLabel == null ? subdivisionName : initialPattern.format(flagLabel, subdivisionName);
373                 return new Annotations(flagLabelSet, flagName);
374             } else if (isKeycap10 || code.contains(EmojiConstants.KEYCAP_MARK_STRING)) {
375                 final String rem = code.equals("��") ? "10" : UTF16.valueOf(code.charAt(0));
376                 shortName = initialPattern.format(keycapLabel, rem);
377                 return new Annotations(keycapLabelSet, shortName);
378             }
379             UnicodeSet skipSet = EmojiConstants.REM_SKIP_SET;
380             String rem = "";
381             SimpleFormatter startPattern = initialPattern;
382             if (EmojiConstants.COMPONENTS.containsSome(code)) {
383                 synchronized (uss) {
384                     rem = uss.deleteFrom(code, SpanCondition.NOT_CONTAINED);
385                     code = uss.deleteFrom(code, SpanCondition.CONTAINED);
386                 }
387             }
388             if (code.contains(EmojiConstants.JOINER_STRING)) {
389 //                if (code.endsWith(EmojiConstants.JOINER_MALE_SIGN)){
390 //                    if (matchesInitialPattern(code)) { // "����‍♂️","police officer: man, medium-light skin tone"
391 //                        rem = EmojiConstants.MAN + rem;
392 //                        code = code.substring(0,code.length()-EmojiConstants.JOINER_MALE_SIGN.length());
393 //                    } // otherwise "����‍♂️","man biking: dark skin tone"
394 //                } else if (code.endsWith(EmojiConstants.JOINER_FEMALE_SIGN)){
395 //                    if (matchesInitialPattern(code)) { //
396 //                        rem = EmojiConstants.WOMAN + rem;
397 //                        code = code.substring(0,code.length()-EmojiConstants.JOINER_FEMALE_SIGN.length());
398 //                    }
399 //                } else
400                 if (code.contains(EmojiConstants.KISS)) {
401                     rem = code + rem;
402                     code = "��";
403                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
404                 } else if (code.contains(EmojiConstants.HEART)) {
405                     rem = code + rem;
406                     code = "��";
407                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
408                 } else if (code.contains(EmojiConstants.HANDSHAKE)) {
409                     code = code.startsWith(EmojiConstants.MAN) ? "��"
410                         : code.endsWith(EmojiConstants.MAN) ? "��"
411                             : "��";
412                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
413                 } else if (EmojiConstants.FAMILY_MARKERS.containsAll(code)) {
414                     rem = code + rem;
415                     code = "��";
416                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
417 //                } else {
418 //                    startPattern = listPattern;
419                 }
420                 // left over is "����‍⚖","judge: man, dark skin tone"
421             }
422             return getBasePlusRemainder(cldrFile, code, rem, skipSet, startPattern, otherSource);
423         }
424 
matchesInitialPattern(String code)425         private boolean matchesInitialPattern(String code) {
426             Annotations baseAnnotation = baseData.get(code);
427             String baseName = baseAnnotation == null ? null : baseAnnotation.getShortName();
428             return baseName != null && initialRegexPattern.matcher(baseName).matches();
429         }
430 
getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, Transform<String, String> otherSource)431         private Annotations getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern,
432             Transform<String, String> otherSource) {
433             String shortName = null;
434             Set<String> annotations = new LinkedHashSet<>();
435             boolean needMarker = true;
436 
437             if (base != null) {
438                 needMarker = false;
439                 Annotations stock = baseData.get(base);
440                 if (stock != null) {
441                     shortName = stock.getShortName();
442                     annotations.addAll(stock.getKeywords());
443                 } else if (otherSource != null) {
444                     shortName = otherSource.transform(base);
445                     if (shortName == null) {
446                         return null;
447                     }
448                 } else {
449                     return null;
450                 }
451             }
452 
453             boolean hackBlond = EmojiConstants.HAIR_EXPLICIT.contains(base.codePointAt(0));
454             Collection<String> arguments = new ArrayList<>();
455             int lastSkin = -1;
456 
457             for (int mod : CharSequences.codePoints(rem)) {
458                 if (ignore.contains(mod)) {
459                     continue;
460                 }
461                 if (EmojiConstants.MODIFIERS.contains(mod)) {
462                     if (lastSkin == mod) {
463                         continue;
464                     }
465                     lastSkin = mod; // collapse skin tones. TODO fix if we ever do multi-skin families
466                 }
467                 Annotations stock = baseData.get(mod);
468                 String modName = null;
469                 if (stock != null) {
470                     modName = stock.getShortName();
471                 } else if (otherSource != null) {
472                     modName = otherSource.transform(base);
473                 }
474                 if (modName == null) {
475                     needMarker = true;
476                     if (ENGLISH_DATA != null) {
477                         Annotations engName = ENGLISH_DATA.baseData.get(mod);
478                         if (engName != null) {
479                             modName = engName.getShortName();
480                         }
481                     }
482                     if (modName == null) {
483                         modName = Utility.hex(mod); // ultimate fallback
484                     }
485                 }
486                 if (hackBlond && shortName != null) {
487                     // HACK: make the blond names look like the other hair names
488                     // Split the short name into pieces, if possible, and insert the modName first
489                     String sep = initialPattern.format("", "");
490                     int splitPoint = shortName.indexOf(sep);
491                     if (splitPoint >= 0) {
492                         String modName0 = shortName.substring(splitPoint+sep.length());
493                         shortName = shortName.substring(0, splitPoint);
494                         if (modName != null) {
495                             arguments.add(modName);
496                             annotations.add(modName);
497                         }
498                         modName = modName0;
499                     }
500                     hackBlond = false;
501                 }
502 
503                 if (modName != null) {
504                     arguments.add(modName);
505                     annotations.add(modName);
506                 }
507             }
508             if (!arguments.isEmpty()) {
509                 shortName = pattern.format(shortName, listPattern.format(arguments));
510             }
511             Annotations result = new Annotations(annotations, (needMarker ? ENGLISH_MARKER : "") + shortName);
512             return result;
513         }
514 
515         /**
516          * @deprecated Use {@link #toString(String,boolean,AnnotationSet)} instead
517          */
toString(String code, boolean html)518         public String toString(String code, boolean html) {
519             return toString(code, html, null);
520         }
521 
toString(String code, boolean html, AnnotationSet parentAnnotations)522         public String toString(String code, boolean html, AnnotationSet parentAnnotations) {
523             if (locale.equals("be") && code.equals("��")) {
524                 int debug = 0;
525             }
526             String shortName = getShortName(code);
527             if (shortName == null || shortName.startsWith(BAD_MARKER) || shortName.startsWith(ENGLISH_MARKER)) {
528                 return MISSING_MARKER;
529             }
530 
531             String parentShortName = parentAnnotations == null ? null : parentAnnotations.getShortName(code);
532             if (shortName != null && Objects.equal(shortName, parentShortName)) {
533                 shortName = EQUIVALENT;
534             }
535 
536             Set<String> keywords = getKeywordsMinus(code);
537             Set<String> parentKeywords = parentAnnotations == null ? null : parentAnnotations.getKeywordsMinus(code);
538             if (keywords != null && !keywords.isEmpty() && Objects.equal(keywords, parentKeywords)) {
539                 keywords = Collections.singleton(EQUIVALENT);
540             }
541 
542             String result = CollectionUtilities.join(keywords, " |\u00a0");
543             if (shortName != null) {
544                 String ttsString = (html ? "*<b>" : "*") + shortName + (html ? "</b>" : "*");
545                 if (result.isEmpty()) {
546                     result = ttsString;
547                 } else {
548                     result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result;
549                 }
550             }
551             return result;
552         }
553 
getExplicitValues()554         public UnicodeMap<Annotations> getExplicitValues() {
555             return baseData;
556         }
557 
getUnresolvedExplicitValues()558         public UnicodeMap<Annotations> getUnresolvedExplicitValues() {
559             return unresolvedData;
560         }
561 
getKeywordsMinus(String code)562         public Set<String> getKeywordsMinus(String code) {
563             String shortName = getShortName(code);
564             Set<String> keywords = getKeywords(code);
565             if (shortName != null && keywords.contains(shortName)) {
566                 keywords = new LinkedHashSet<String>(keywords);
567                 keywords.remove(shortName);
568             }
569             return keywords;
570         }
571     }
572 
getDataSet(String locale)573     public static AnnotationSet getDataSet(String locale) {
574         return getDataSet(DIR, locale);
575     }
576 
getDataSet(String dir, String locale)577     public static AnnotationSet getDataSet(String dir, String locale) {
578         if (dir == null) {
579             dir = DIR;
580         }
581         Map<String, AnnotationSet> dirCache = cache.get(dir);
582         if (dirCache == null) {
583             cache.put(dir, dirCache = new ConcurrentHashMap<>());
584         }
585         AnnotationSet result = dirCache.get(locale);
586         if (result != null) {
587             return result;
588         }
589         if (!LOCALES.contains(locale)) {
590             return null;
591         }
592         String parentString = LocaleIDParser.getSimpleParent(locale);
593         AnnotationSet parentData = null;
594         if (parentString != null && !parentString.equals("root")) {
595             parentData = getDataSet(dir, parentString);
596         }
597         MyHandler myHandler = new MyHandler(dirCache, locale, parentData);
598         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
599         xfr.read(dir + "/" + locale + ".xml", -1, true);
600         return myHandler.cleanup();
601     }
602 
getData(String locale)603     public static UnicodeMap<Annotations> getData(String locale) {
604         return getData(DIR, locale);
605     }
606 
getData(String dir, String locale)607     public static UnicodeMap<Annotations> getData(String dir, String locale) {
608         AnnotationSet result = getDataSet(dir, locale);
609         return result == null ? null : result.baseData;
610     }
611 
612     @Override
toString()613     public String toString() {
614         return toString(false);
615     }
616 
toString(boolean html)617     public String toString(boolean html) {
618         Set<String> annotations2 = getKeywords();
619         if (getShortName() != null && annotations2.contains(getShortName())) {
620             annotations2 = new LinkedHashSet<String>(getKeywords());
621             annotations2.remove(getShortName());
622         }
623         String result = CollectionUtilities.join(annotations2, " |\u00a0");
624         if (getShortName() != null) {
625             String ttsString = (html ? "*<b>" : "*") + getShortName() + (html ? "</b>" : "*");
626             if (result.isEmpty()) {
627                 result = ttsString;
628             } else {
629                 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result;
630             }
631         }
632         return result;
633     }
634 
635     /**
636      * @return the annotations
637      */
getKeywords()638     public Set<String> getKeywords() {
639         return annotations;
640     }
641 
642     /**
643      * @return the tts
644      */
getShortName()645     public String getShortName() {
646         return tts;
647     }
648 
main(String[] args)649     public static void main(String[] args) {
650         if (true) {
651             writeList();
652         } else {
653             writeEnglish();
654         }
655     }
656 
writeList()657     private static void writeList() {
658         AnnotationSet eng = Annotations.getDataSet("en");
659         Annotations an = eng.baseData.get("❤");
660         final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues();
661         Set<String> keys = new TreeSet<>(ChartAnnotations.RBC);
662         map.keySet().addAllTo(keys);
663 //        keys.add("����‍⚖");
664         for (String key : keys) {
665             System.out.println(Utility.hex(key, 4, "_").toLowerCase(Locale.ROOT)
666                 + "\t" + key
667                 + "\t" + map.get(key).getShortName()
668                 + "\t" + CollectionUtilities.join(map.get(key).getKeywords(), " | "));
669         }
670         for (String s : Arrays.asList(
671             "��", "��‍❤️‍��‍��",
672             "��", "��‍❤️‍��",
673             "��", "��‍��‍��",
674             "����", "����",
675             "��‍⚖", "����‍⚖", "��‍⚖", "����‍⚖",
676             "��", "��‍♂️", "����‍♂️", "��‍♀️", "����‍♀️",
677             "��", "����", "��‍♂️", "����‍♂️", "��‍♀️", "����‍♀️")) {
678             final String shortName = eng.getShortName(s);
679             final Set<String> keywords = eng.getKeywords(s);
680             System.out.println("{\"" + s + "\",\"" + shortName + "\",\"" + CollectionUtilities.join(keywords, "|") + "\"},");
681         }
682     }
683 
writeEnglish()684     private static void writeEnglish() {
685         AnnotationSet eng = Annotations.getDataSet("en");
686         System.out.println(Annotations.getAvailable());
687         AnnotationSet eng100 = Annotations.getDataSet("en_001");
688         UnicodeMap<Annotations> map100 = eng100.getUnresolvedExplicitValues();
689         final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues();
690         Set<String> keys = new TreeSet<>(ChartAnnotations.RBC);
691         map.keySet().addAllTo(keys);
692         for (String key : keys) {
693             Annotations value = map.get(key);
694             Annotations value100 = map100.get(key);
695             Set<String> keywords100 = (value100 == null ? null : value100.getKeywords());
696             System.out.println(key + "\tname\t"
697                 + "\t" + value.getShortName()
698                 + "\t" + (value100 == null ? "" : value100.getShortName())
699                 + "\t" + CollectionUtilities.join(value.getKeywords(), " | ")
700                 + "\t" + (keywords100 == null ? "" : CollectionUtilities.join(keywords100, " | ")));
701         }
702     }
703 }
704