1 package org.unicode.cldr.util;
2 
3 import java.util.HashMap;
4 import java.util.Iterator;
5 import java.util.Map;
6 import java.util.Set;
7 
8 import org.unicode.cldr.draft.FileUtilities;
9 
10 import com.google.common.base.Splitter;
11 import com.google.common.collect.ImmutableSet;
12 import com.ibm.icu.dev.util.UnicodeMap;
13 import com.ibm.icu.impl.Utility;
14 import com.ibm.icu.lang.CharSequences;
15 import com.ibm.icu.text.UnicodeSet;
16 
17 public class Emoji {
18     public static final String EMOJI_VARIANT = "\uFE0F";
19     public static final String COMBINING_ENCLOSING_KEYCAP = "\u20E3";
20     public static final String ZWJ = "\u200D";
21     public static final UnicodeSet REGIONAL_INDICATORS = new UnicodeSet(0x1F1E6, 0x1F1FF).freeze();
22     public static final UnicodeSet MODIFIERS = new UnicodeSet("[��-��]").freeze();
23     public static final UnicodeSet TAGS = new UnicodeSet(0xE0000, 0xE007F).freeze();
24     public static final UnicodeSet FAMILY = new UnicodeSet("[\u200D ��-�� �� ❤]").freeze();
25     public static final UnicodeSet GENDER = new UnicodeSet().add(0x2640).add(0x2642).freeze();
26     public static final UnicodeSet SPECIALS = new UnicodeSet("[{��‍��}{��‍��}{��‍☠}]").freeze();
27     public static final UnicodeSet MAN_WOMAN = new UnicodeSet("[�� ��]").freeze();
28     public static final UnicodeSet OBJECT = new UnicodeSet("[�� �� �� �� �� �� �� �� ✈ �� �� �� �� �� �� ⚖ ⚕]").freeze();
29 
30     static final UnicodeMap<String> emojiToMajorCategory = new UnicodeMap<>();
31     static final UnicodeMap<String> emojiToMinorCategory = new UnicodeMap<>();
32     static final Map<String, Integer> minorToOrder = new HashMap<>();
33     static final UnicodeSet nonConstructed = new UnicodeSet();
34     static final UnicodeSet allRgi = new UnicodeSet();
35     static final UnicodeSet allRgiNoES = new UnicodeSet();
36 
37     static {
38         /*
39             # group: Smileys & People
40             # subgroup: face-positive
41             1F600 ; fully-qualified     # �� grinning face
42          */
43         Splitter semi = Splitter.on(';').trimResults();
44         String majorCategory = null;
45         String minorCategory = null;
46         for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) {
47             if (line.startsWith("#")) {
48                 line = line.substring(1).trim();
49                 if (line.startsWith("group:")) {
50                     majorCategory = line.substring("group:".length()).trim();
51                 } else if (line.startsWith("subgroup:")) {
52                     minorCategory = line.substring("subgroup:".length()).trim();
53                     if (!minorToOrder.containsKey(minorCategory)) {
minorToOrder.put(minorCategory, minorToOrder.size())54                         minorToOrder.put(minorCategory, minorToOrder.size());
55                     }
56                 }
57                 continue;
58             }
59             line = line.trim();
60             if (line.isEmpty()) {
61                 continue;
62             }
63             Iterator<String> it = semi.split(line).iterator();
64             String emojiHex = it.next();
65             String original = Utility.fromHex(emojiHex, 4, " ");
66             String type = it.next();
67             if (type.startsWith("fully-qualified")) {
68                 allRgi.add(original);
original.replace(Emoji.EMOJI_VARIANT, "")69                 allRgiNoES.add(original.replace(Emoji.EMOJI_VARIANT, ""));
70             }
emojiToMajorCategory.put(original, majorCategory)71             emojiToMajorCategory.put(original, majorCategory);
emojiToMinorCategory.put(original, minorCategory)72             emojiToMinorCategory.put(original, minorCategory);
73 
74             // add all the non-constructed values to a set for annotations
75 
76             String minimal = original.replace(EMOJI_VARIANT, "");
77             boolean singleton = CharSequences.getSingleCodePoint(minimal) != Integer.MAX_VALUE;
78 
79             // skip constructed values
80             if (minimal.contains(COMBINING_ENCLOSING_KEYCAP)
81                 || REGIONAL_INDICATORS.containsSome(minimal)
82                 || TAGS.containsSome(minimal)
83                 || !singleton && MODIFIERS.containsSome(minimal)
84                 || !singleton && FAMILY.containsAll(minimal)) {
85                 // do nothing
86             } else if (minimal.contains(ZWJ)) { // only do certain ZWJ sequences
87                 if (SPECIALS.contains(minimal)
88                     || GENDER.containsSome(minimal)
89                     || MAN_WOMAN.contains(minimal.codePointAt(0)) && OBJECT.contains(minimal.codePointBefore(minimal.length()))) {
90                     nonConstructed.add(minimal);
91                 }
92             } else if (!minimal.contains("��")) {
93                 nonConstructed.add(minimal);
94             }
95 
96         }
emojiToMajorCategory.freeze()97         emojiToMajorCategory.freeze();
emojiToMinorCategory.freeze()98         emojiToMinorCategory.freeze();
99         nonConstructed.add(MODIFIERS); // needed for names
nonConstructed.freeze()100         nonConstructed.freeze();
allRgi.freeze()101         allRgi.freeze();
allRgiNoES.freeze()102         allRgiNoES.freeze();
103     }
104 
getAllRgi()105     public static UnicodeSet getAllRgi() {
106         return allRgi;
107     }
108 
getAllRgiNoES()109     public static UnicodeSet getAllRgiNoES() {
110         return allRgiNoES;
111     }
112 
getMinorCategory(String emoji)113     public static String getMinorCategory(String emoji) {
114         String minorCat = emojiToMinorCategory.get(emoji);
115         if (minorCat == null) {
116             throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji
117                 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ...");
118         }
119         return minorCat;
120     }
121 
getMinorToOrder(String minor)122     public static int getMinorToOrder(String minor) {
123         Integer result = minorToOrder.get(minor);
124         return result == null ? Integer.MAX_VALUE : result;
125     }
126 
getMajorCategory(String emoji)127     public static String getMajorCategory(String emoji) {
128         String majorCat = emojiToMajorCategory.get(emoji);
129         if (majorCat == null) {
130             throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji
131                 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"major\", ...");
132         }
133         return majorCat;
134     }
135 
getMajorCategories()136     public static Set<String> getMajorCategories() {
137         return emojiToMajorCategory.values();
138     }
139 
getMinorCategories()140     public static Set<String> getMinorCategories() {
141         return emojiToMinorCategory.values();
142     }
143 
getNonConstructed()144     public static UnicodeSet getNonConstructed() {
145         return nonConstructed;
146     }
147 
148     private static Set<String> NAME_PATHS = null;
149     private static Set<String> KEYWORD_PATHS = null;
150     public static final String TYPE_TTS = "[@type=\"tts\"]";
151 
getNamePaths()152     public static synchronized Set<String> getNamePaths() {
153         return NAME_PATHS != null ? NAME_PATHS : (NAME_PATHS = buildPaths(TYPE_TTS));
154     }
155 
getKeywordPaths()156     public static synchronized Set<String> getKeywordPaths() {
157         return KEYWORD_PATHS != null ? KEYWORD_PATHS : (KEYWORD_PATHS = buildPaths(""));
158     }
159 
buildPaths(String suffix)160     private static ImmutableSet<String> buildPaths(String suffix) {
161         ImmutableSet.Builder<String> builder = ImmutableSet.builder();
162         for (String s : Emoji.getNonConstructed()) {
163             String base = "//ldml/annotations/annotation[@cp=\"" + s + "\"]" + suffix;
164             builder.add(base);
165         }
166         return builder.build();
167     }
168 }
169