1 package org.unicode.cldr.util;
2 
3 import java.util.ArrayList;
4 import java.util.HashMap;
5 import java.util.Iterator;
6 import java.util.LinkedHashMap;
7 import java.util.LinkedHashSet;
8 import java.util.List;
9 import java.util.Map;
10 import java.util.Map.Entry;
11 import java.util.Set;
12 
13 import org.unicode.cldr.draft.FileUtilities;
14 
15 import com.google.common.base.CharMatcher;
16 import com.google.common.base.Splitter;
17 import com.google.common.collect.ImmutableMap;
18 import com.google.common.collect.ImmutableSet;
19 import com.ibm.icu.dev.util.UnicodeMap;
20 import com.ibm.icu.impl.Utility;
21 import com.ibm.icu.lang.CharSequences;
22 import com.ibm.icu.text.UnicodeSet;
23 import com.ibm.icu.util.ICUException;
24 
25 public class Emoji {
26     public static final String EMOJI_VARIANT = "\uFE0F";
27     public static final String COMBINING_ENCLOSING_KEYCAP = "\u20E3";
28     public static final String ZWJ = "\u200D";
29     public static final UnicodeSet REGIONAL_INDICATORS = new UnicodeSet(0x1F1E6, 0x1F1FF).freeze();
30     public static final UnicodeSet MODIFIERS = new UnicodeSet("[��-��]").freeze();
31     public static final UnicodeSet TAGS = new UnicodeSet(0xE0000, 0xE007F).freeze();
32     public static final UnicodeSet FAMILY = new UnicodeSet("[\u200D ��-�� �� ❤]").freeze();
33     public static final UnicodeSet GENDER = new UnicodeSet().add(0x2640).add(0x2642).freeze();
34     public static final UnicodeSet SPECIALS = new UnicodeSet("["
35         + "{��‍⬛}{��‍❄}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��‍��}{��‍��} {��‍��} {��‍☠} {��‍��} {��‍��} {��‍��} {��‍��} {��‍��} {��‍��} {��‍��}"
36         + "{��‍⚧}{��‍⚕}{��‍⚖}{��‍✈}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}"
37         + "{❤‍��}, {❤‍��}, {��‍��}, {��‍��}" // #E13.1
38         + "]").freeze();
39     // May have to add from above, if there is a failure in testAnnotationPaths. Failure will be like:
40     // got java.util.TreeSet<[//ldml/annotations/annotation[@cp="��‍⚧"][@type="tts"], //ldml/annotations/annotation[@cp="��‍⚕"][@type="tts"], ...
41     // just extract the items in "...", and change into {...} for adding above.
42     // Example: //ldml/annotations/annotation[@cp="��‍⚕"] ==> {��‍⚕}
43     public static final UnicodeSet MAN_WOMAN = new UnicodeSet("[�� ��]").freeze();
44     public static final UnicodeSet OBJECT = new UnicodeSet("[�� �� �� �� �� �� �� �� ✈ �� �� �� �� �� �� ⚖ ⚕]").freeze();
45 
46     static final UnicodeMap<String> emojiToMajorCategory = new UnicodeMap<>();
47     static final UnicodeMap<String> emojiToMinorCategory = new UnicodeMap<>();
48     static final UnicodeMap<String> toName = new UnicodeMap<>();
49     static {
50         emojiToMajorCategory.setErrorOnReset(true);
51         emojiToMinorCategory.setErrorOnReset(true);
52         toName.setErrorOnReset(true);
53     }
54     /**
55      * A mapping from a majorCategory to a unique ordering number, based on the first time it is encountered.
56      */
57     static final Map<String, Long> majorToOrder = new HashMap<>();
58     /**
59      * A mapping from a minorCategory to a unique ordering number, based on the first time it is encountered.
60      */
61     static final Map<String, Long> minorToOrder = new HashMap<>();
62     static final Map<String, Long> emojiToOrder = new LinkedHashMap<>();
63     static final UnicodeSet nonConstructed = new UnicodeSet();
64     static final UnicodeSet allRgi = new UnicodeSet();
65     static final UnicodeSet allRgiNoES = new UnicodeSet();
66 
67     static {
68         /*
69             # group: Smileys & People
70             # subgroup: face-positive
71             1F600 ; fully-qualified     # �� grinning face
72          */
73         Splitter semi = Splitter.on(CharMatcher.anyOf(";#")).trimResults();
74         String majorCategory = null;
75         String minorCategory = null;
76         long majorOrder = 0;
77         long minorOrder = 0;
78         //Multimap<Pair<Integer,Integer>,String> majorPlusMinorToEmoji = TreeMultimap.create();
79         for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) {
80             if (line.startsWith("#")) {
81                 line = line.substring(1).trim();
82                 if (line.startsWith("group:")) {
83                     majorCategory = line.substring("group:".length()).trim();
84                     Long oldMajorOrder = majorToOrder.get(majorCategory);
85                     if (oldMajorOrder == null) {
majorToOrder.put(majorCategory, majorOrder = majorToOrder.size())86                         majorToOrder.put(majorCategory, majorOrder = majorToOrder.size());
87                     } else {
88                         majorOrder = oldMajorOrder;
89                     }
90                 } else if (line.startsWith("subgroup:")) {
91                     minorCategory = line.substring("subgroup:".length()).trim();
92                     Long oldMinorOrder = minorToOrder.get(minorCategory);
93                     if (oldMinorOrder == null) {
minorToOrder.put(minorCategory, minorOrder = minorToOrder.size())94                         minorToOrder.put(minorCategory, minorOrder = minorToOrder.size());
95                     } else {
96                         minorOrder = oldMinorOrder;
97                     }
98                 }
99                 continue;
100             }
101             line = line.trim();
102             if (line.isEmpty()) {
103                 continue;
104             }
105             Iterator<String> it = semi.split(line).iterator();
106 
107             String emojiHex = it.next();
108             if (emojiHex.contains("1F48F")) {
109                 int debug = 0;
110             }
111 
112             String original = Utility.fromHex(emojiHex, 4, " ");
113             if (original.contains("��")) {
114                 if (false) {
115                     System.out.println(original + "\t" + Utility.hex(original));
116                 }
117             }
118 
119             String type = it.next();
120             if (type.startsWith("fully-qualified")) {
121                 allRgi.add(original);
original.replace(Emoji.EMOJI_VARIANT, "")122                 allRgiNoES.add(original.replace(Emoji.EMOJI_VARIANT, ""));
123             }
emojiToMajorCategory.put(original, majorCategory)124             emojiToMajorCategory.put(original, majorCategory);
emojiToMinorCategory.put(original, minorCategory)125             emojiToMinorCategory.put(original, minorCategory);
126             String comment = it.next();
127             // The comment is now of the form:  # �� E0.6 beaming face with smiling eyes
128             int spacePos = comment.indexOf(' ');
129             spacePos = comment.indexOf(' ', spacePos+1); // get second space
130             String name = comment.substring(spacePos+1).trim();
toName.put(original, name)131             toName.put(original, name);
132 
133             // add all the non-constructed values to a set for annotations
134 
135             String minimal = original.replace(EMOJI_VARIANT, "");
136 
137             // Add the order. If it is not minimal, add that also.
138             if (!emojiToOrder.containsKey(original)) {
putUnique(emojiToOrder, original, emojiToOrder.size()*100L)139                 putUnique(emojiToOrder, original, emojiToOrder.size()*100L);
140             }
141             if (!emojiToOrder.containsKey(minimal)) {
putUnique(emojiToOrder, minimal, emojiToOrder.size()*100L)142                 putUnique(emojiToOrder, minimal, emojiToOrder.size()*100L);
143             }
144             //
145             // majorPlusMinorToEmoji.put(Pair.of(majorOrder, minorOrder), minimal);
146 
147             boolean singleton = CharSequences.getSingleCodePoint(minimal) != Integer.MAX_VALUE;
148 //            if (!emojiToOrder.containsKey(minimal)) {
149 //                emojiToOrder.put(minimal, emojiToOrder.size());
150 //            }
151 
152             // skip constructed values
153             if (minimal.contains(COMBINING_ENCLOSING_KEYCAP)
154                 || REGIONAL_INDICATORS.containsSome(minimal)
155                 || TAGS.containsSome(minimal)
156                 || !singleton && MODIFIERS.containsSome(minimal)
157                 || !singleton && FAMILY.containsAll(minimal)) {
158                 // do nothing
159             } else if (minimal.contains(ZWJ)) { // only do certain ZWJ sequences
160                 if (SPECIALS.contains(minimal)
161                     || GENDER.containsSome(minimal)
162                     || MAN_WOMAN.contains(minimal.codePointAt(0)) && OBJECT.contains(minimal.codePointBefore(minimal.length()))) {
163                     nonConstructed.add(minimal);
164                 }
165             } else if (!minimal.contains("��")) {
166                 nonConstructed.add(minimal);
167             }
168         }
169 //        for (Entry<Pair<Integer,Integer>, String> entry : majorPlusMinorToEmoji.entries()) {
170 //            String minimal = entry.getValue();
171 //            emojiToOrder.put(minimal, emojiToOrder.size());
172 //        }
emojiToMajorCategory.freeze()173         emojiToMajorCategory.freeze();
emojiToMinorCategory.freeze()174         emojiToMinorCategory.freeze();
175         nonConstructed.add(MODIFIERS); // needed for names
nonConstructed.freeze()176         nonConstructed.freeze();
toName.freeze()177         toName.freeze();
allRgi.freeze()178         allRgi.freeze();
allRgiNoES.freeze()179         allRgiNoES.freeze();
180     }
181 
putUnique(Map<K, V> map, K key, V value)182     private static <K, V> void putUnique(Map<K, V> map, K key, V value) {
183         V oldValue = map.put(key, value);
184         if (oldValue != null) {
185             throw new ICUException("Attempt to change value of " + map
186                 + " for " + key
187                 + " from " + oldValue
188                 + " to " + value
189                 );
190         }
191     }
192 
getAllRgi()193     public static UnicodeSet getAllRgi() {
194         return allRgi;
195     }
196 
getAllRgiNoES()197     public static UnicodeSet getAllRgiNoES() {
198         return allRgiNoES;
199     }
200 
201     public static final UnicodeMap<String> EXTRA_SYMBOL_MINOR_CATEGORIES = new UnicodeMap<>();
202     public static final Map<String,Long> EXTRA_SYMBOL_ORDER;
203     private static final boolean DEBUG = false;
204     static {
205         String[][] data = {
206             {"arrow", "→ ↓ ↑ ← ↔ ↕ ⇆ ⇅"},
207             {"alphanum", "© ® ℗ ™ µ"},
208             {"geometric", "▼ ▶ ▲ ◀ ● ○ ◯ ◊"},
209             {"math", "× ÷ √ ∞ ∆ ∇ ⁻ ¹ ² ³ ≡ ∈ ⊂ ∩ ∪ ° + ± − = ≈ ≠ > < ≤ ≥ ¬ | ~"},
210             {"punctuation", "§ † ‡ \\u0020  , 、 ، ; : ؛ ! ¡ ? ¿ ؟ ¶ ※ / \\ & # % ‰ ′ ″ ‴ @ * ♪ ♭ ♯ ` ´ ^ ¨ ‐ ― _ - – — • · . … 。 ‧ ・ ‘ ’ ‚ ' “ ” „ » « ( ) [ ] { } 〔 〕 〈 〉 《 》 「 」 『 』 〖 〗 【 】"},
211             {"currency", "€ £ ¥ ₹ ₽ $ ¢ ฿ ₪ ₺ ₫ ₱ ₩ ₡ ₦ ₮ ৳ ₴ ₸ ₲ ₵ ៛ ₭ ֏ ₥ ₾ ₼ ₿ ؋"},
212             {"other-symbol", "‾‽‸⁂↚↛↮↙↜↝↞↟↠↡↢↣↤↥↦↧↨↫↬↭↯↰↱↲↳↴↵↶↷↸↹↺↻↼↽↾↿⇀⇁⇂⇃⇄⇇⇈⇉⇊⇋⇌⇐⇍⇑⇒⇏⇓⇔⇎⇖⇗⇘⇙⇚⇛⇜⇝⇞⇟⇠⇡⇢⇣⇤⇥⇦⇧⇨⇩⇪⇵∀∂∃∅∉∋∎∏∑≮≯∓∕⁄∗∘∙∝∟∠∣∥∧∫∬∮∴∵∶∷∼∽∾≃≅≌≒≖≣≦≧≪≫≬≳≺≻⊁⊃⊆⊇⊕⊖⊗⊘⊙⊚⊛⊞⊟⊥⊮⊰⊱⋭⊶⊹⊿⋁⋂⋃⋅⋆⋈⋒⋘⋙⋮⋯⋰⋱■□▢▣▤▥▦▧▨▩▬▭▮▰△▴▵▷▸▹►▻▽▾▿◁◂◃◄◅◆◇◈◉◌◍◎◐◑◒◓◔◕◖◗◘◙◜◝◞◟◠◡◢◣◤◥◦◳◷◻◽◿⨧⨯⨼⩣⩽⪍⪚⪺₢₣₤₰₳₶₷₨﷼"},
213         };
214         // get the maximum suborder for each subcategory
215         Map<String, Long> subcategoryToMaxSuborder = new HashMap<>();
216         for (String[] row : data) {
217             final String subcategory = row[0];
218             for (Entry<String, String> entry : emojiToMinorCategory.entrySet()) {
219                 if (entry.getValue().equals(subcategory)) {
220                     String emoji = entry.getKey();
221                     Long order = emojiToOrder.get(emoji);
222                     Long currentMax = subcategoryToMaxSuborder.get(subcategory);
223                     if (currentMax == null || currentMax < order) {
subcategoryToMaxSuborder.put(subcategory, order)224                         subcategoryToMaxSuborder.put(subcategory, order);
225                     }
226                 }
227             }
228         }
229         if (DEBUG) System.out.println(subcategoryToMaxSuborder);
230         Map<String,Long> _EXTRA_SYMBOL_ORDER = new LinkedHashMap<>();
231         for (String[] row : data) {
232             final String subcategory = row[0];
233             final String characters = row[1];
234 
235             List<String> items = new ArrayList<>();
236             for (int cp : With.codePointArray(characters)) {
237                 if (cp != ' ') {
With.fromCodePoint(cp)238                     items.add(With.fromCodePoint(cp));
239                 }
240             }
241             final UnicodeSet uset = new UnicodeSet().addAll(items);
242             if (uset.containsSome(EXTRA_SYMBOL_MINOR_CATEGORIES.keySet())) {
243                 throw new IllegalArgumentException("Duplicate values in " + EXTRA_SYMBOL_MINOR_CATEGORIES);
244             }
EXTRA_SYMBOL_MINOR_CATEGORIES.putAll(uset, subcategory)245             EXTRA_SYMBOL_MINOR_CATEGORIES.putAll(uset, subcategory);
246             long count = subcategoryToMaxSuborder.get(subcategory);
247             for (String s : items) {
248                 ++count;
_EXTRA_SYMBOL_ORDER.put(s, count)249                 _EXTRA_SYMBOL_ORDER.put(s, count);
250             }
subcategoryToMaxSuborder.put(subcategory, count)251             subcategoryToMaxSuborder.put(subcategory, count);
252         }
253         if (DEBUG) System.out.println(_EXTRA_SYMBOL_ORDER);
EXTRA_SYMBOL_MINOR_CATEGORIES.freeze()254         EXTRA_SYMBOL_MINOR_CATEGORIES.freeze();
255         EXTRA_SYMBOL_ORDER = ImmutableMap.copyOf(_EXTRA_SYMBOL_ORDER);
256     }
257 
getMinorCategory(String emoji)258     public static String getMinorCategory(String emoji) {
259         String minorCat = emojiToMinorCategory.get(emoji);
260         if (minorCat == null) {
261             minorCat = EXTRA_SYMBOL_MINOR_CATEGORIES.get(emoji);
262             if (minorCat == null) {
263                 throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji
264                     + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ...");
265             }
266         }
267         return minorCat;
268     }
269 
getName(String emoji)270     public static String getName(String emoji) {
271         return toName.get(emoji);
272     }
273 
getEmojiToOrder(String emoji)274     public static long getEmojiToOrder(String emoji) {
275         Long result = emojiToOrder.get(emoji);
276         if (result == null) {
277             result = EXTRA_SYMBOL_ORDER.get(emoji);
278             if (result == null) {
279                 throw new InternalCldrException("No Order found for " + emoji
280                     + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ...");
281             }
282         }
283         return result;
284     }
285 
getEmojiMinorOrder(String minor)286     public static long getEmojiMinorOrder(String minor) {
287         Long result = minorToOrder.get(minor);
288         if (result == null) {
289             throw new InternalCldrException("No minor category (aka subgroup) found for " + minor
290                 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ...");
291         }
292         return result;
293     }
294 
getMajorCategory(String emoji)295     public static String getMajorCategory(String emoji) {
296         String majorCat = emojiToMajorCategory.get(emoji);
297         if (majorCat == null) {
298             if (EXTRA_SYMBOL_MINOR_CATEGORIES.containsKey(emoji)) {
299                 majorCat = "Symbols";
300             } else {
301                 throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji
302                     + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"major\", ...");
303             }
304         }
305         return majorCat;
306     }
307 
getMajorCategories()308     public static Set<String> getMajorCategories() {
309         return emojiToMajorCategory.values();
310     }
311 
getMinorCategories()312     public static Set<String> getMinorCategories() {
313         return emojiToMinorCategory.values();
314     }
315 
getMinorCategoriesWithExtras()316     public static Set<String> getMinorCategoriesWithExtras() {
317         Set<String> result = new LinkedHashSet<>(emojiToMinorCategory.values());
318         result.addAll(EXTRA_SYMBOL_MINOR_CATEGORIES.getAvailableValues());
319         return ImmutableSet.copyOf(result);
320     }
321 
getEmojiInMinorCategoriesWithExtras(String minorCategory)322     public static UnicodeSet getEmojiInMinorCategoriesWithExtras(String minorCategory) {
323         return new UnicodeSet(emojiToMinorCategory.getSet(minorCategory))
324             .addAll(EXTRA_SYMBOL_MINOR_CATEGORIES.getSet(minorCategory))
325             .freeze();
326     }
327 
getNonConstructed()328     public static UnicodeSet getNonConstructed() {
329         return nonConstructed;
330     }
331 
332     private static Set<String> NAME_PATHS = null;
333     private static Set<String> KEYWORD_PATHS = null;
334     public static final String TYPE_TTS = "[@type=\"tts\"]";
335 
getNamePaths()336     public static synchronized Set<String> getNamePaths() {
337         return NAME_PATHS != null ? NAME_PATHS : (NAME_PATHS = buildPaths(TYPE_TTS));
338     }
339 
getKeywordPaths()340     public static synchronized Set<String> getKeywordPaths() {
341         return KEYWORD_PATHS != null ? KEYWORD_PATHS : (KEYWORD_PATHS = buildPaths(""));
342     }
343 
buildPaths(String suffix)344     private static ImmutableSet<String> buildPaths(String suffix) {
345         ImmutableSet.Builder<String> builder = ImmutableSet.builder();
346         for (String s : Emoji.getNonConstructed()) {
347             String base = "//ldml/annotations/annotation[@cp=\"" + s + "\"]" + suffix;
348             builder.add(base);
349         }
350         return builder.build();
351     }
352 }
353