1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.impl;
11 
12 import java.io.IOException;
13 import java.nio.ByteBuffer;
14 import java.util.Iterator;
15 import java.util.MissingResourceException;
16 
17 import com.ibm.icu.lang.UCharacter;
18 import com.ibm.icu.lang.UCharacter.HangulSyllableType;
19 import com.ibm.icu.lang.UCharacter.NumericType;
20 import com.ibm.icu.lang.UCharacterCategory;
21 import com.ibm.icu.lang.UProperty;
22 import com.ibm.icu.lang.UScript;
23 import com.ibm.icu.text.Normalizer2;
24 import com.ibm.icu.text.UTF16;
25 import com.ibm.icu.text.UnicodeSet;
26 import com.ibm.icu.util.CodePointMap;
27 import com.ibm.icu.util.CodePointTrie;
28 import com.ibm.icu.util.ICUException;
29 import com.ibm.icu.util.VersionInfo;
30 
31 /**
32 * <p>Internal class used for Unicode character property database.</p>
33 * <p>This classes store binary data read from uprops.icu.
34 * It does not have the capability to parse the data into more high-level
35 * information. It only returns bytes of information when required.</p>
36 * <p>Due to the form most commonly used for retrieval, array of char is used
37 * to store the binary data.</p>
38 * <p>UCharacterPropertyDB also contains information on accessing indexes to
39 * significant points in the binary data.</p>
40 * <p>Responsibility for molding the binary data into more meaning form lies on
41 * <a href=UCharacter.html>UCharacter</a>.</p>
42 * @author Syn Wee Quek
43 * @since release 2.1, february 1st 2002
44 */
45 
46 public final class UCharacterProperty
47 {
48     // public data members -----------------------------------------------
49 
50     /*
51      * public singleton instance
52      */
53     public static final UCharacterProperty INSTANCE;
54 
55     /**
56     * Trie data
57     */
58     public Trie2_16 m_trie_;
59     /**
60     * Unicode version
61     */
62     public VersionInfo m_unicodeVersion_;
63     /**
64     * Latin capital letter i with dot above
65     */
66     public static final char LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_ = 0x130;
67     /**
68     * Latin small letter i with dot above
69     */
70     public static final char LATIN_SMALL_LETTER_DOTLESS_I_ = 0x131;
71     /**
72     * Latin lowercase i
73     */
74     public static final char LATIN_SMALL_LETTER_I_ = 0x69;
75     /**
76     * Character type mask
77     */
78     public static final int TYPE_MASK = 0x1F;
79 
80     // uprops.h enum UPropertySource --------------------------------------- ***
81 
82     /** No source, not a supported property. */
83     public static final int SRC_NONE=0;
84     /** From uchar.c/uprops.icu main trie */
85     public static final int SRC_CHAR=1;
86     /** From uchar.c/uprops.icu properties vectors trie */
87     public static final int SRC_PROPSVEC=2;
88     /** From unames.c/unames.icu */
89     public static final int SRC_NAMES=3;
90     /** From ucase.c/ucase.icu */
91     public static final int SRC_CASE=4;
92     /** From ubidi_props.c/ubidi.icu */
93     public static final int SRC_BIDI=5;
94     /** From uchar.c/uprops.icu main trie as well as properties vectors trie */
95     public static final int SRC_CHAR_AND_PROPSVEC=6;
96     /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */
97     public static final int SRC_CASE_AND_NORM=7;
98     /** From normalizer2impl.cpp/nfc.nrm */
99     public static final int SRC_NFC=8;
100     /** From normalizer2impl.cpp/nfkc.nrm */
101     public static final int SRC_NFKC=9;
102     /** From normalizer2impl.cpp/nfkc_cf.nrm */
103     public static final int SRC_NFKC_CF=10;
104     /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */
105     public static final int SRC_NFC_CANON_ITER=11;
106     // Text layout properties.
107     public static final int SRC_INPC=12;
108     public static final int SRC_INSC=13;
109     public static final int SRC_VO=14;
110     /** One more than the highest UPropertySource (SRC_) constant. */
111     public static final int SRC_COUNT=15;
112 
113     // hardcoded text layout properties ----------------------------------
114     // TODO(ICU-20111): move to a data file and load on demand
115 
makeTrie(String data)116     private static final CodePointTrie makeTrie(String data) {
117         // One char == one byte.
118         // U+0000 and U+007A='z' are swapped because
119         // Java class String literals encode U+0000 and U+0080..U+07FF in two bytes.
120         byte[] bytes = new byte[data.length()];
121         for (int i = 0; i < bytes.length; ++i) {
122             char c = data.charAt(i);
123             if (c == 0) {
124                 c = 'z';
125             } else if (c == 'z') {
126                 c = 0;
127             }
128             assert 0 <= c && c <= 0xff;
129             bytes[i] = (byte)c;
130         }
131         return CodePointTrie.fromBinary(null, null, ByteBuffer.wrap(bytes));
132     }
133 
134     // Do not store the data in static String variables because
135     // those would not be garbage-collected.
136 
137     private static final class InPCTrie {
138         static final CodePointTrie INSTANCE = makeTrie(
139                 "\63\151\162\124\102z\375\2\162\13\2zzz\220z" +
140                 "zz\100zzzzzzzzzzzzz" +
141                 "zzzzzzzzzzzzzzzz" +
142                 "zzzzzzzzzzzzzzzz" +
143                 "zzzzzzzzzzzzzzzz" +
144                 "zzzzzzzz\200z\300z\377z\77\1" +
145                 "\176\1\276\1\176\1\376\1\76\2\176\2\274\2\374\2" +
146                 "\74\3\173\3\76\2\273\3\373\3\71\4\167\4\255\4" +
147                 "\341\4\41\5\61\5\161\5\231\5\331\5\31\6\126\6" +
148                 "\267\2\306\2\322\2\306\2\355\2zz\20z\40z" +
149                 "\60z\100z\120z\140z\160zzz\20z\40z" +
150                 "\60zzz\20z\40z\60zzz\20z\40z" +
151                 "\60zzz\20z\40z\60zzz\20z\40z" +
152                 "\60zzz\20z\40z\60zzz\20z\40z" +
153                 "\60zzz\20z\40z\60z\200z\220z\240z" +
154                 "\260z\300z\320z\340z\360z\377z\17\1\37\1" +
155                 "\57\1\77\1\117\1\137\1\157\1\176\1\216\1\236\1" +
156                 "\256\1\276\1\316\1\336\1\356\1\176\1\216\1\236\1" +
157                 "\256\1\376\1\16\2\36\2\56\2\76\2\116\2\136\2" +
158                 "\156\2\176\2\216\2\236\2\256\2\274\2\314\2\334\2" +
159                 "\354\2\374\2\14\3\34\3\54\3\74\3\114\3\134\3" +
160                 "\154\3\173\3\213\3\233\3\253\3\76\2\116\2\136\2" +
161                 "\156\2\273\3\313\3\333\3\353\3\373\3\13\4\33\4" +
162                 "\53\4\71\4\111\4\131\4\151\4\167\4\207\4\227\4" +
163                 "\247\4\255\4\275\4\315\4\335\4\341\4\361\4\1\5" +
164                 "\21\5\41\5\61\5\101\5\121\5\61\5\101\5\121\5" +
165                 "\141\5\161\5\201\5\221\5\241\5\231\5\251\5\271\5" +
166                 "\311\5\331\5\351\5\371\5\11\6\31\6\51\6\71\6" +
167                 "\111\6\126\6\146\6\166\6\206\6zzzz\213\6" +
168                 "\232\6zz\251\6\270\6\307\6\325\6\345\6zz" +
169                 "zzzzzzzzzzzzzzzz" +
170                 "zzzzzzzzzzzzzzzz" +
171                 "zzzzzzzzzz\363\6zz\363\6" +
172                 "zz\1\7zz\1\7zzzzzz\13\7" +
173                 "\33\7\51\7zzzzzzzzzzzz" +
174                 "zzzzzzzzzzzzzzzz" +
175                 "zzzzzzzz\71\7\111\7zzzz" +
176                 "zzzzzzzzzz\131\7\150\7zz" +
177                 "zzzz\162\7zzzzzz\176\7\215\7" +
178                 "\233\7zzzzzzzzzzzzzz" +
179                 "zz\253\7zzzz\267\7\307\7zz\314\7" +
180                 "\54\5\201zzz\334\7zzzzzz\352\7" +
181                 "\373\3zzzz\372\7\7\10zzzzzz" +
182                 "zzzzzzzzzzzz\27\10\47\10" +
183                 "\65\10zzzzzzzzzzzzzz" +
184                 "zzzzzzzzzzzzzzzz" +
185                 "\263\2\77\10zz\114\10zzzzzzzz" +
186                 "zz\1\1zzzz\130\10\144\10zz\164\10" +
187                 "\202\10zzzz\222\10zz\240\10\373\3zz" +
188                 "zz\200zzzzz\260\10\300\10zz\271\2" +
189                 "zzzz\307\10\326\10\343\10zzzz\361\10" +
190                 "zzzzzz\1\11\275\2zz\21\11\121\1" +
191                 "zzzzzzzzzzzzzzzz" +
192                 "zzzzzzzzzzzz\41\11zz" +
193                 "\60\11zzzz\100\11zzzzzzzz" +
194                 "zzzzzzzzzzzzzzzz" +
195                 "zzzzzzzzzzzzzzzz" +
196                 "zzzzzzzzzzzzzzzz" +
197                 "\120\11zzzz\130\11\146\11zzzzzz" +
198                 "\201zzzzz\166\11zzzzzzzz" +
199                 "\55\5zz\201\11\221\11\313\3zzzz\131\6" +
200                 "\201zzzzz\236\11\256\11zzzzzz" +
201                 "\273\11\313\11zzzzzzzzzzzz" +
202                 "zzzzzz\161z\333\11zz\377zzz" +
203                 "zz\346\11\366\11\117\1\4\12\53\5zzzz" +
204                 "zzzzzzzzzzzz\234\11\24\12" +
205                 "\157\1zzzzzzzzzz\44\12\63\12" +
206                 "zzzzzzzzzzzzzzzz" +
207                 "zzzzzzzzzz\353\2\103\12\343z" +
208                 "\24\2zzzzzz\123\12\276\2zzzz" +
209                 "zzzzzz\143\12\163\12zzzzzz" +
210                 "zzzz\173\12\213\12zzzzzzzz" +
211                 "zzzzzzzzzzzzzzzz" +
212                 "zz\227\12\246\12zzzzzzzzzz" +
213                 "zzzzzzzzzzzzzzzz" +
214                 "zzzzzzzzzzzzzzzz" +
215                 "zzzzzzzzzzzzzz\265\12" +
216                 "zzzz\302\12zz\321\12zzzz\335\12" +
217                 "\347\12zzzzzzzzzzzzzz" +
218                 "zzzzzzzzzzzzzzzz" +
219                 "zzzzzzzzzzzzzz\353\2" +
220                 "\367\12zzzzzzzzzz\7\13\17\13" +
221                 "\36\13zzzzzzzzzzzzzz" +
222                 "\55\13\74\13zzzzzz\104\13\124\13zz" +
223                 "zzzzzzzzzzzzzzzz" +
224                 "zzzzzzzzzzzz\141\13zz" +
225                 "zzzzzzzzzzzzzzzz" +
226                 "zzzzzzzzzzzzzz\105z" +
227                 "\115z\115z\115z\135z\175z\235z\275z\335z" +
228                 "\2z\2z\354z\12\1\51\1\111\1\2z\2z" +
229                 "\2z\2z\2z\2z\2z\2z\2z\2z" +
230                 "\2z\2z\2z\2z\2z\2z\2z\2z" +
231                 "\2z\2z\2z\2z\2z\2z\2z\2z" +
232                 "\2z\2z\2z\2z\2z\2z\151\1\210\1" +
233                 "\2z\2z\2z\2z\2z\2z\2z\2z" +
234                 "\2z\2z\250\1\2z\2z\310\1\346\1\3\2" +
235                 "\41\2\77\2\137\2\175\2\227\2zzzzzz" +
236                 "zzzzzzzzzzzzzzzz" +
237                 "zzzzzzzzzzzzzzzz" +
238                 "zzzzzzzzzzzzzzzz" +
239                 "zzzzzzzzzzzzzzzz" +
240                 "zzzzzzzzzzzzzzzz" +
241                 "zzzzzzzzzzzzzzzz" +
242                 "zzzzzzzzzzzzzzzz" +
243                 "zzzzzzzzzz\10\10\10\7zz" +
244                 "zzzzzzzzzzzzzzzz" +
245                 "zzzzzzzzzzzzzzzz" +
246                 "zzzzzzzzzzzzzzzz" +
247                 "zzzz\10\7\1z\7\4\7\1\1\1\1\10" +
248                 "\10\10\10\7\7\7\7\1\4\7z\10\1\10\10\10" +
249                 "\1\1zzzzzzzzzz\1\1zz" +
250                 "zzzzzzzzzzzzzzzz" +
251                 "zzzzzzzzzz\10\7\7zzz" +
252                 "zzzzzzzzzzzzzzzz" +
253                 "zzzzzzzzzzzzzzzz" +
254                 "zzzzzzzzzzzzzzzz" +
255                 "zzzzz\1z\7\4\7\1\1\1\1zz" +
256                 "\4\4zz\5\5\1zzzzzzzzz" +
257                 "\7zzzzzzzzzz\1\1zzz" +
258                 "zzzzzzzzzzzzzzzz" +
259                 "zzzzzzz\10z\10\10\7zzzz" +
260                 "zzzzzzzzzzzzzzzz" +
261                 "zzzzzzzzzzzzzzzz" +
262                 "zzzzzzzzzzzzzzzz" +
263                 "zzzz\1z\7\4\7\1\1zzzz\10" +
264                 "\10zz\10\10\1zzzzzzzzzz" +
265                 "zzzzzzzzzzzzzzzz" +
266                 "zzzzzzzz\10\10zzz\1zz" +
267                 "zzzzzzzz\7\1\1\1\1\10z\10" +
268                 "\10\15z\7\7\1zzzzzzzzzz" +
269                 "zzzzzzzzzz\1\1zzzz" +
270                 "zzzzzzzzzzzzzzzz" +
271                 "zz\10\10\10\10\10\10z\10\7\7zzzz" +
272                 "zzzzzzzzzzzzzzzz" +
273                 "zzzzzzzzzzzzzzzz" +
274                 "zzzzzzzzzzzzzzzz" +
275                 "zzzz\1z\7\10\7\1\1\1\1zz\4" +
276                 "\13zz\5\14\1zzzzzzzz\10\15" +
277                 "zzzzzzzzzz\1\1zzzz" +
278                 "zzzzzzzzzzzzzzzz" +
279                 "zzzzzzzz\10zzzzzzz" +
280                 "zzzzzzzzzzzzzzzz" +
281                 "zzzzzzzzzzzzzzzz" +
282                 "zzzzzzzzzzzzzzzz" +
283                 "zzzz\7\7\10\7\7zzz\4\4\4z" +
284                 "\5\5\5\10zzzzzzzzz\7zz" +
285                 "zzzzzzzzzzzzzzzz" +
286                 "zzzzzzzzzzzzzzzz" +
287                 "zzzzzz\10\7\7\7\10zzzzz" +
288                 "zzzzzzzzzzzzzzzz" +
289                 "zzzzzzzzzzzzzzzz" +
290                 "zzzzzzzzzzzzzzzz" +
291                 "zzzz\10\10\7\7\7\7z\10\10\11z\10" +
292                 "\10\10\10zzzzzzz\10\1zzzz" +
293                 "zzzzzzz\1\1zzzzzzz" +
294                 "zzzzzzzzzzzzzzzz" +
295                 "zzzzz\15\7\7\7\7z\10\15\15z\15" +
296                 "\15\10\10zzzzzzz\7\7zzzz" +
297                 "zzzzzzz\1\1zzzzzzz" +
298                 "zzzzzzzzzzzzzzzz" +
299                 "zzzzz\10\10\7\7zzzzzzz" +
300                 "zzzzzzzzzzzzzzzz" +
301                 "zzzzzzzzzzzzzzzz" +
302                 "zzzzzzzzzzzzzzzz" +
303                 "\10\10z\7\7\7\1\1z\4\4\4z\5\5\5" +
304                 "\10zzzzzzzzz\7zzzzz" +
305                 "zzzzz\1\1zzzzzzzzz" +
306                 "zzzzzzzzzzzzzzzz" +
307                 "zzz\7\7zzzzzzzzzzz" +
308                 "zzzzzzzzzzzzzzzz" +
309                 "zzzzzzzzzzzzzzzz" +
310                 "zzzzzzzzzzzzzzzz" +
311                 "z\10zzzz\7\7\7\10\10\1z\1z\7" +
312                 "\4\13\4\5\14\5\7zzzzzzzzz" +
313                 "zzzzzzzzz\7\7zzzzz" +
314                 "zzzzzzzzzzzzzzzz" +
315                 "zzzzzzzzzzzzzzzz" +
316                 "zzzzzzzzzzz\7\10\7\7\10" +
317                 "\10\10\10\1\1\1zzzzz\16\16\16\16\16" +
318                 "\7z\10\10\10\10\10\10\10\10zzzzzz" +
319                 "zzzzzzzzzzzzzzzz" +
320                 "zzzzzzzzzzzzzzzz" +
321                 "zzzzzzzzzzz\7\10\7\7\10" +
322                 "\10\10\10\1\1z\10\1zzz\16\16\16\16\16" +
323                 "zzz\10\10\10\10\10\10zzzzzzz" +
324                 "zzzzzzzzzzzzzzzz" +
325                 "zzzzzzzzzzzzzzzz" +
326                 "zzzzzzzzzzz\1\1zzz" +
327                 "zzzzzzzzzzzzzzzz" +
328                 "zzzzzzzz\1z\1z\10zzz" +
329                 "z\7\4zzzzzzzzzzzzz" +
330                 "zzzzzzzzzzzzzzzz" +
331                 "zzzzzzzzzzzzzzzz" +
332                 "zzzz\1\10\11\1\1\11\11\11\11\10\10\10" +
333                 "\10\10\7\10\11\10\10\1z\10\10zzzzz" +
334                 "\1\1\1\1\1\1\1\1\1\1\1z\1\1\1\1" +
335                 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1" +
336                 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1" +
337                 "zzzzzz\1zzzzzzzzz" +
338                 "zzzzzzzzzzzzzzzz" +
339                 "zzzzzzzzzzzzzzzz" +
340                 "zzzzzzzzzzzzzzzz" +
341                 "\7\7\10\10\1\4\10\10\10\10\10\1\7z\10\7" +
342                 "z\1\1zzzzzz\7\7\1\1zzz" +
343                 "z\1\1z\7\7\7zz\7\7\7\7\7\7\7" +
344                 "zz\10\10\10\10zzzzzzzzzz" +
345                 "z\1\7\4\10\10\7\7\7\7\7\7\1z\7z" +
346                 "zzzzzzzzz\7\7\7\10zz\10" +
347                 "\1\1zzzzzzzzzzz\10\1z" +
348                 "zzzzzzzzzzz\7\10\10\10\10" +
349                 "\1\1\1\13\14\5\4\4\4\5\5\10\7\7\10\10" +
350                 "\10\10\10\10\10z\10zzzzzzzzz" +
351                 "\10zz\10\10\1\7\7\15\15\10\10\7\7\7z" +
352                 "zzz\7\7\1\7\7\7\7\7\7\1\10\1z" +
353                 "zzz\7\7\7\7\7\16\16\16\7\7\16\7\7" +
354                 "\7\7\7zzzzzzz\7\7zzzz" +
355                 "zzz\10\1\4\7\10zzzzz\4\1\7" +
356                 "\10\10\10\1\1\1\1z\7\10\7\7\10\10\10\10" +
357                 "\1\1\10\1\7\4\4\4\10\10\10\10\10\10\10\10" +
358                 "\10\10zz\1\10\10\10\10\7zzzzzz" +
359                 "zzzzz\10\7\10\10\1\1\1\3\11\12\4" +
360                 "\4\5\5\10\15\7zzzzzzzzzz" +
361                 "z\10\1\10\10\10z\7\1\1\10\1\4\7\10\10" +
362                 "\7z\1\1zzzzzz\10\7\10\10\7\7" +
363                 "\7\10\7\10zzzz\7\7\7\4\4\13\7\7" +
364                 "\1\10\10\10\10\4\4\10\1zzzzzzz" +
365                 "z\10\10\10z\6\1\1\1\1\1\10\10\1\1\1" +
366                 "\1\10\7\6\6\6\6\6\6\6zzzz\1z" +
367                 "zzz\10zz\7zzzzzzzz\10" +
368                 "zzzz\10zzzz\7\7\1\10\7zz" +
369                 "zzzzzz\7\7\7\7\7\7\7\7\7\7" +
370                 "\7\7\1\10zzzzzzzzzz\10\10" +
371                 "\10\10\10\10\10\10\10\10\10\10\10\10\10\10zz" +
372                 "zzzzzzzzzzz\10zzzz" +
373                 "zzzzzzz\1\1\1zzzzzz" +
374                 "z\1\1\1\10\1\1\1\1\10zzz\10\7\7" +
375                 "\10\10\1\1\4\4\10\7\7\2\3zzzzz" +
376                 "zzzzzzzzzz\10\10\10\10\1\10" +
377                 "\4\10\1\7\4\1\1zzzzzzzzz" +
378                 "\10zzzzzzzz\10\7zzzzz" +
379                 "zzzzzz\7\10\7zz\10\7\10\10\1" +
380                 "\16\16\10\10\16\7\16\16\7\10\10zzzzz" +
381                 "zzzzzz\4\1\10\4\7zzz\7\7" +
382                 "\10\7\7\1\7\7z\7\1zz\6\1\1z\10" +
383                 "\6zzzzz\1\1\1\10zzzzzz" +
384                 "zz\10\1\1zzzzz\7\10\7zzz" +
385                 "zzzzzzzzzz\10\10\10\10\1\1" +
386                 "\1\1\10\10\10\10\10zzzzzzzzz" +
387                 "\7\4\7\1\1\10\10\7\7\1\1zzzzz" +
388                 "zz\10\10\10\1\1\4\10\11\11\10\1\1z\10" +
389                 "zzzzzzzzzzz\7\4\7\1\1" +
390                 "\1\1\1\1\10\10\10\15\7zzzzzzz" +
391                 "z\1z\10\1zzzzzzzzzzz" +
392                 "z\7\7\7\1\10\10\15\15\10\7\10\10zzz" +
393                 "zzz\10z\7\4\7\1\1\10\10\10\10\1\1" +
394                 "zzzzzzzzzzz\1\1z\7\7" +
395                 "\10\7\7\7\7zz\4\4zz\5\5\7zz" +
396                 "\7\7zz\10\10\10\10\10\10\10zzz\7\7" +
397                 "\1\10\10\7\1zzzzzzzzz\7\4" +
398                 "\7\1\1\1\1\1\1\4\10\13\5\7\5\10\7\1" +
399                 "\1zzzzzzzzzzzz\4\7\1" +
400                 "\1\1\1zz\4\13\5\14\10\10\7\1\7\7\7" +
401                 "\1\1\1\1\1\1\10\10\7\7\10\7\1zzz" +
402                 "zzzzzzzz\10\7\10\4\7\1\1\10" +
403                 "\10\10\10\7\1zzzzzzzzzzz" +
404                 "zz\1z\10\7\7\10\10\1\1\4\10\1\10\10" +
405                 "\10zzzzzzzzzzzz\7\4\7" +
406                 "\1\1\1\10\10\10\10\10\7\1\1zzzzz" +
407                 "\10\1\1\10\10\10\10\10\10\1zzzzz\1" +
408                 "\1\10\10\10\10\7z\1\1\1\1z\10\1\1\10" +
409                 "\10\10\7\7\1\1\1zzzzzzzzz" +
410                 "z\1\1\1\1\1\1\10\7\10zzzzzz" +
411                 "z\10\10\1\1\1\1\1z\10\10\10\10\10\10\7" +
412                 "\1zz\1\1\1\1\1\1\1\1\1\1\1\1\1" +
413                 "\1z\7\1\1\1\1\1\1\4\1\10\7\10\10z" +
414                 "zzzzzzzz\10\10\10\10\10\1zz" +
415                 "z\10z\10\10z\10\10\1\10\1zz\1zz" +
416                 "zzzzzzzz\7\7\7\7\7z\10\10" +
417                 "z\7\7\10\7zzzzzzzzz\10\1" +
418                 "\4\7zzzzzzzzzz");
419     }
420 
421     private static final class InSCTrie {
422         static final CodePointTrie INSTANCE = makeTrie(
423                 "\63\151\162\124\102z\102\3\170\17\4z\100z\220z" +
424                 "zz\100z\140z\224z\100z\100z\100z\100z" +
425                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
426                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
427                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
428                 "\100z\100z\100z\100z\324z\22\1\122\1\220\1" +
429                 "\317\1\15\2\114\2\212\2\312\2\10\3\106\3\204\3" +
430                 "\304\3\2\4\101\4\177\4\277\4\375\4\75\5\175\5" +
431                 "\274\5\374\5\73\6\173\6\233\6\333\6\33\7\130\7" +
432                 "\370\2\13\3\27\3\13\3\62\3zz\20z\40z" +
433                 "\60z\100z\120z\140z\160z\140z\160z\200z" +
434                 "\220z\224z\244z\264z\304z\100z\120z\140z" +
435                 "\160z\100z\120z\140z\160z\100z\120z\140z" +
436                 "\160z\100z\120z\140z\160z\100z\120z\140z" +
437                 "\160z\100z\120z\140z\160z\100z\120z\140z" +
438                 "\160z\100z\120z\140z\160z\324z\344z\364z" +
439                 "\4\1\22\1\42\1\62\1\102\1\122\1\142\1\162\1" +
440                 "\202\1\220\1\240\1\260\1\300\1\317\1\337\1\357\1" +
441                 "\377\1\15\2\35\2\55\2\75\2\114\2\134\2\154\2" +
442                 "\174\2\212\2\232\2\252\2\272\2\312\2\332\2\352\2" +
443                 "\372\2\10\3\30\3\50\3\70\3\106\3\126\3\146\3" +
444                 "\166\3\204\3\224\3\244\3\264\3\304\3\324\3\344\3" +
445                 "\364\3\2\4\22\4\42\4\62\4\101\4\121\4\141\4" +
446                 "\161\4\177\4\217\4\237\4\257\4\277\4\317\4\337\4" +
447                 "\357\4\375\4\15\5\35\5\55\5\75\5\115\5\135\5" +
448                 "\155\5\175\5\215\5\235\5\255\5\274\5\314\5\334\5" +
449                 "\354\5\374\5\14\6\34\6\54\6\73\6\113\6\133\6" +
450                 "\153\6\173\6\213\6\233\6\253\6\233\6\253\6\273\6" +
451                 "\313\6\333\6\353\6\373\6\13\7\33\7\53\7\73\7" +
452                 "\113\7\130\7\150\7\170\7\210\7\351z\351z\230\7" +
453                 "\243\7\263\7\303\7\322\7\341\7\357\7\377\7\100z" +
454                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
455                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
456                 "\100z\100z\100z\100z\100z\17\10\35\10\346z" +
457                 "\35\10\346z\55\10\17\10\75\10\351z\351z\115\10" +
458                 "\131\10\143\10\162\10\60z\100z\100z\100z\100z" +
459                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
460                 "\100z\100z\100z\100z\202\10\154\1\222\10\242\10" +
461                 "\55\2\351z\262\10\302\10\351z\351z\164\3\322\10" +
462                 "\341\10\60z\100z\100z\351z\361\10\351z\351z" +
463                 "\1\11\16\11\36\11\52\11\60z\60z\100z\100z" +
464                 "\100z\100z\100z\100z\72\11\346z\351z\112\11" +
465                 "\126\11\60z\100z\100z\146\11\351z\165\11\205\11" +
466                 "\351z\351z\225\11\245\11\351z\351z\265\11\302\11" +
467                 "\322\11\100z\100z\100z\100z\100z\100z\100z" +
468                 "\100z\342\11\360\11\376\11\100z\100z\100z\100z" +
469                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
470                 "\100z\100z\100z\10\12\24\12\44\12\100z\100z" +
471                 "\100z\100z\100z\132\7\62\12\100z\100z\100z" +
472                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
473                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
474                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
475                 "\100z\164z\100z\100z\100z\102\12\351z\117\12" +
476                 "\100z\351z\137\12\155\12\174\12\326z\347z\351z" +
477                 "\214\12\230\12\60z\250\12\266\12\306\12\351z\324\12" +
478                 "\351z\344\12\363\12\100z\100z\3\13\351z\351z" +
479                 "\22\13\227\2\60z\42\13\62\13\343z\351z\211\10" +
480                 "\102\13\122\13\60z\351z\141\13\351z\351z\351z" +
481                 "\161\13\201\13\100z\221\13\241\13\100z\100z\100z" +
482                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
483                 "\100z\261\13\301\13\316\13\60z\336\13\356\13\351z" +
484                 "\370\13\61z\100z\100z\100z\100z\100z\100z" +
485                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
486                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
487                 "\100z\100z\100z\100z\100z\10\14\346z\351z" +
488                 "\212\10\30\14\46\14\60\14\100\14\120\14\351z\351z" +
489                 "\140\14\100z\100z\100z\100z\160\14\351z\213\10" +
490                 "\200\14\220\14\240\14\351z\255\14\325z\350z\351z" +
491                 "\275\14\315\14\60z\272\6\65z\341z\353\3\206\10" +
492                 "\335\14\100z\100z\100z\100z\355\14\155\1\374\14" +
493                 "\337z\351z\14\15\34\15\60z\54\15\142\1\162\1" +
494                 "\74\15\10\3\114\15\134\15\355\11\100z\100z\100z" +
495                 "\100z\100z\100z\100z\100z\333z\351z\351z" +
496                 "\154\15\0\15\212\15\100z\100z\231\15\351z\351z" +
497                 "\37\11\251\15\60z\100z\100z\100z\100z\100z" +
498                 "\100z\100z\100z\100z\100z\333z\351z\377z" +
499                 "\271\15\311\15\321\15\100z\100z\333z\351z\351z" +
500                 "\341\15\361\15\60z\100z\100z\337z\351z\1\16" +
501                 "\16\16\60z\100z\100z\100z\351z\36\16\56\16" +
502                 "\76\16\100z\100z\100z\100z\100z\100z\100z" +
503                 "\100z\100z\100z\100z\100z\337z\351z\206\10" +
504                 "\116\16\100z\100z\100z\100z\100z\100z\100z" +
505                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
506                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
507                 "\100z\100z\100z\100z\100z\136\16\351z\351z" +
508                 "\153\16\173\16\213\16\351z\351z\227\16\241\16\100z" +
509                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
510                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
511                 "\100z\100z\100z\100z\100z\261\16\351z\377z" +
512                 "\301\16\321\16\273\6\341\16\125\5\351z\357\16\53\7" +
513                 "\377\16\100z\100z\100z\100z\17\17\351z\351z" +
514                 "\36\17\56\17\60z\76\17\351z\112\17\127\17\60z" +
515                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
516                 "\100z\100z\100z\100z\100z\100z\351z\147\17" +
517                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
518                 "\100z\100z\100z\100z\100z\100z\100z\100z" +
519                 "\105z\125z\125z\125z\145z\205z\245z\305z" +
520                 "\345z\4z\4z\365z\24\1\64\1\124\1\4z" +
521                 "\164\1\4z\175\1\4z\4z\4z\4z\4z" +
522                 "\4z\4z\4z\4z\4z\4z\4z\4z" +
523                 "\4z\4z\4z\4z\4z\4z\4z\4z" +
524                 "\4z\4z\4z\4z\4z\4z\4z\4z" +
525                 "\4z\4z\4z\235\1\275\1\4z\4z\4z" +
526                 "\4z\4z\4z\4z\4z\4z\4z\335\1" +
527                 "\4z\4z\375\1\35\2\75\2\135\2\175\2\235\2" +
528                 "\275\2\330\2zzzzzzzzzzzz" +
529                 "zzzzzzzzzzzzzzzz" +
530                 "zzzzzzzzzzzzzzzz" +
531                 "z\14zz\30\30\30\30\30\30\30\30\30\30zz" +
532                 "zzzzzzzzzzzzzzzz" +
533                 "zzzzzzzzzzzzzzzz" +
534                 "zzzzzzzzzzzzzzzz" +
535                 "zzzzzzzzzzzzzzzz" +
536                 "zzzz\14zzzzzzzzzzz" +
537                 "zzzzzz\34\34zzzzzzzz" +
538                 "zzzzzzzzzzzzzzz\14" +
539                 "zzzzzzzzzzzzzzzz" +
540                 "zzzzzzzzzzzzzzzz" +
541                 "zzzzzzzz\2\2\2\40\43\43\43\43" +
542                 "\43\43\43\43\43\43\43\43\43\43\43\43\43\5\5\5" +
543                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" +
544                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" +
545                 "\5\5\42\42\27\1\42\42\42\42\42\42\42\42\42\42" +
546                 "\42\42\42\37\42\42z\4\4zz\42\42\42\5\5" +
547                 "\5\5\5\5\5\5\43\43\42\42zz\30\30\30\30" +
548                 "\30\30\30\30\30\30zz\43\43\43\43\43\43\5\5" +
549                 "\5\5\5\5\5\5\14\2\2\40z\43\43\43\43\43" +
550                 "\43\43\43zz\43\43zz\43\43\5\5\5\5\5" +
551                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5z" +
552                 "\5\5\5\5\5\5\5z\5zzz\5\5\5\5" +
553                 "zz\27\1\42\42\42\42\42zz\42\42zz\42" +
554                 "\42\37\6zzzzzzzz\42zzzz" +
555                 "\5\5z\5\43\43\42\42zz\30\30\30\30\30\30" +
556                 "\30\30\30\30\5\5zzzzzzzzzz" +
557                 "\2z\34z\2\2\40z\43\43\43\43\43\43zz" +
558                 "zz\43\43zz\43\43\5\5\5\5\5\5\5\5" +
559                 "\5\5\5\5\5\5\5\5\5\5\5\5z\5\5\5" +
560                 "\5\5\5\5z\5\5z\5\5z\5\5zz\27" +
561                 "z\42\42\42zzzz\42\42zz\42\42\37z" +
562                 "zz\4zzzzzzz\5\5\5\5z\5" +
563                 "zzzzzzz\30\30\30\30\30\30\30\30\30" +
564                 "\30\2\22\14\14z\13zzzzzzzzz" +
565                 "z\2\2\40z\43\43\43\43\43\43\43\43\43z\43" +
566                 "\43\43z\43\43\5\5\5\5\5\5\5\5\5\5\5" +
567                 "\5\5\5\5\5\5\5\5\5z\5\5\5\5\5\5" +
568                 "\5z\5\5z\5\5\5\5\5zz\27\1\42\42" +
569                 "\42\42\42\42z\42\42\42z\42\42\37zzzz" +
570                 "zzzzzzzzzzzzzz\43\43" +
571                 "\42\42zz\30\30\30\30\30\30\30\30\30\30zz" +
572                 "zzzzzzz\5\4\4\4\27\27\27z\2" +
573                 "\2\40z\43\43\43\43\43\43\43\43zz\43\43z" +
574                 "z\43\43\5\5\5\5\5\5\5\5\5\5\5\5\5" +
575                 "\5\5\5\5\5\5\5z\5\5\5\5\5\5\5z" +
576                 "\5\5z\5\5\5\5\5zz\27\1\42\42\42\42" +
577                 "\42zz\42\42zz\42\42\37zzzzzz" +
578                 "zz\42\42zzzz\5\5z\5\43\43\42\42" +
579                 "zz\30\30\30\30\30\30\30\30\30\30z\5zz" +
580                 "zzzzzzzzzzzz\2\25z\43" +
581                 "\43\43\43\43\43zzz\43\43\43z\43\43\43\5" +
582                 "zzz\5\5z\5z\5\5zzz\5\5z" +
583                 "zz\5\5\5zzz\5\5\5\5\5\5\5\5" +
584                 "\5\5\5\5zzzz\42\42\42zzz\42\42" +
585                 "\42z\42\42\42\37zzzzzzzzz\42" +
586                 "zzzzzzzzzzzzzz\30\30" +
587                 "\30\30\30\30\30\30\30\30zzzzzzzz" +
588                 "zzzzzzzz\2\2\2\40\2\43\43\43" +
589                 "\43\43\43\43\43z\43\43\43z\43\43\43\5\5\5" +
590                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" +
591                 "\5z\5\5\5\5\5\5\5\5\5\5\5\5\5\5" +
592                 "\5\5zzz\1\42\42\42\42\42z\42\42\42z" +
593                 "\42\42\42\37zzzzzzz\42\42z\5\5" +
594                 "\5zzzzz\43\43\42\42zz\30\30\30\30" +
595                 "\30\30\30\30\30\30zzzzzzzzzz" +
596                 "zzzzzz\2\2\40z\43\43\43\43\43\43" +
597                 "\43\43z\43\43\43z\43\43\43\5\5\5\5\5\5" +
598                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5z\5" +
599                 "\5\5\5\5\5\5\5\5\5z\5\5\5\5\5z" +
600                 "z\27\1\42\42\42\42\42z\42\42\42z\42\42\42" +
601                 "\37zzzzzzz\42\42zzzzzz" +
602                 "z\5z\43\43\42\42zz\30\30\30\30\30\30\30" +
603                 "\30\30\30z\21\21zzzzzzzzzz" +
604                 "zzz\2\2\2\40z\43\43\43\43\43\43\43\43" +
605                 "z\43\43\43z\43\43\43\5\5\5\5\5\5\5\5" +
606                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" +
607                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\32\32" +
608                 "\1\42\42\42\42\42z\42\42\42z\42\42\42\37\15" +
609                 "zzzzz\6\6\6\42zzzzzzz" +
610                 "\43\43\43\42\42zz\30\30\30\30\30\30\30\30\30" +
611                 "\30zzzzzzzzzz\6\6\6\6\6" +
612                 "\6zz\2\40z\43\43\43\43\43\43\43\43\43\43" +
613                 "\43\43\43\43\43\43\43\43zzz\5\5\5\5\5" +
614                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" +
615                 "\5\5\5z\5\5\5\5\5\5\5\5\5z\5z" +
616                 "z\5\5\5\5\5\5\5zzz\37zzzz" +
617                 "\42\42\42\42\42\42z\42z\42\42\42\42\42\42\42" +
618                 "\42zzzzzz\30\30\30\30\30\30\30\30\30" +
619                 "\30zz\42\42zzzzzzzzzzz" +
620                 "z\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" +
621                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" +
622                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5z" +
623                 "\42\42\42\42\42\42\42\42\42\42\32zzzzz" +
624                 "\42\42\42\42\42\42z\42\36\36\36\36\12\2\32z" +
625                 "\30\30\30\30\30\30\30\30\30\30zzzzzz" +
626                 "zzzzzzzzzzzzzzzz" +
627                 "zzzzzzzzzzzzzzzz" +
628                 "\5\5z\5zz\5\5z\5zz\5zzz" +
629                 "zzz\5\5\5\5z\5\5\5\5\5\5\5z" +
630                 "\5\5\5z\5z\5zz\5\5z\5\5z\42" +
631                 "\42\42\42\42\42\42\42\42\42z\42\13\13zz\42" +
632                 "\42\42\42\42zzz\36\36\36\36z\2zz\30" +
633                 "\30\30\30\30\30\30\30\30\30zz\5\5\5\5z" +
634                 "zzzzzzzzzzzzzzzz" +
635                 "zzzzzzzzzzzzzzz\30" +
636                 "\30\30\30\30\30\30\30\30\30\30\30\30\30\30\30\30" +
637                 "\30\30\30z\34z\34z\27zzzzzz\5" +
638                 "\5\5\5\5\5\5\5z\5\5\5\5\5\5\5\5" +
639                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" +
640                 "\5\5\5\5\5\5\5\5\5\5\5\5zzzz" +
641                 "\42\42\42\42\42\42\42\42\42\42\42\42\42\2\40\42" +
642                 "\42\2\2\32\1zz\10\10\10\10\10\17\17\17\17" +
643                 "\17\17\17\17\17\17\17z\17\17\17\17\17\17\17\17" +
644                 "\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17" +
645                 "\17\17\17\17\17\17\17\17\17\17\17\17zzzz" +
646                 "zz\34zzzzzzzzzzzzz" +
647                 "zzzzzzzzzzzzzzzz" +
648                 "zzzzzzzzzzzzzzzz" +
649                 "zzzzzzzzzzzz\5\43\43\43" +
650                 "\43\43\43\43\43\43\43\42\42\42\42\42\42\2\36\40" +
651                 "\23\32\13\13\13\13\5\30\30\30\30\30\30\30\30\30" +
652                 "\30z\14zz\14z\5\5\43\43\43\43\42\42\42" +
653                 "\42\5\5\5\5\13\13\5\42\36\36\5\5\42\42\36" +
654                 "\36\36\36\36\5\5\42\42\42\42\5\5\5\5\5\5" +
655                 "\5\5\5\5\5\13\42\42\42\42\36\36\36\36\36\36" +
656                 "\36\5\36\30\30\30\30\30\30\30\30\30\30\36\36\42" +
657                 "\42zz\43\43\43\5\5\5\5\5\5\5\5\5\5" +
658                 "z\5\5\42\42\32zzzzzzzzzz" +
659                 "z\5\5\42\42zzzzzzzzzzz" +
660                 "z\5z\42\42zzzzzzzzzzz" +
661                 "z\5\5\5\43\43\43\43\43\43\43\43\43\43\43\43" +
662                 "\43zz\42\42\42\42\42\42\42\42\42\42\2\40\42" +
663                 "\33\33\34\20\12\34\34\32\23\34zzzzzz" +
664                 "zz\1\34zz\14\5\5\5\5\5\5\5\5\5" +
665                 "\5\5\5\5\5\5\42\42\42\42\42\42\42\42\42\17" +
666                 "\17\17zzzz\7\7\2\7\7\7\7\7\7\7" +
667                 "\42\34zzzz\5\5\5\41\41\41\41\41\41\41" +
668                 "\41\41\41\41zz\35\35\35\35\35zzzzz" +
669                 "zzzzzz\42\42\42\42\42\42\42\42\42\42" +
670                 "\42\42\42\42\42\42\7\7\7\7\7\7\7\36\36z" +
671                 "zzzzz\5\5\5\5\5\5\5\42\42\42\42" +
672                 "\42zzzz\5\5\5\5\5\5\5\5\5\5\5" +
673                 "\5\5\43\43\43\5\5\13\13\17\7\7\11\17\17\17" +
674                 "\17z\23\42\42\42\42\42\42\42\42\42\42\42\42\42" +
675                 "\42\42\2\36\36\36\36\36\32\34\34zz\34\2\2" +
676                 "\2\20\40\43\43\43\43\43\43\43\43\43\43\43\5\5" +
677                 "\5\5\27\42\42\42\42\42\42\42\42\42\42\42\37\5" +
678                 "\5\5\5\5\5\5zzzz\2\20\40\43\43\43" +
679                 "\43\43\43\43\5\5\5\5\5\5\17\17\17\42\42\42" +
680                 "\42\42\42\32\23\17\17\5\5\30\30\30\30\30\30\30" +
681                 "\30\30\30\1\5\5\5\7\7\5\5\5\5\43\43\27" +
682                 "\42\42\42\42\42\42\42\42\42\7\7\32\32zzz" +
683                 "zzzzzzzzz\5\5\5\5\17\17\42" +
684                 "\42\42\42\42\42\42\7\7\7\7\2\2\34\27zz" +
685                 "zzzzzz\30\30\30\30\30\30\30\30\30\30" +
686                 "zzz\5\5\5\4\4\4z\4\4\4\4\4\4" +
687                 "\4\4\4\4\4\4zzzzzzzzzz" +
688                 "zzzz\40\40\4\21\21\4\4\4zzzz" +
689                 "zzzzzzz\34zzzzzzzz" +
690                 "zzzz\26\24zz\14\14\14\14\14zzz" +
691                 "zzzzzzzz\34\34\34zzzzz" +
692                 "zzzzzz\43\43z\43\43\43\32\5\5\5" +
693                 "\5\2\5\5\5\5\42\42\42\42\42zzzzz" +
694                 "zzz\5\5\5\5\5\5\5\5\5\5\5\5\5" +
695                 "\5\41\41\5\5\5\5\41\17\17\5\5\5\5\5\5" +
696                 "\5\17\5\2zzzzzzzzzzzz" +
697                 "\5\5\5\5\13\42\42\42\42\42\42\42\42\42\42\42" +
698                 "\37\2zzzzzzzzzz\4\4\4\4" +
699                 "\4\4\4\4\4\4\4\4\4\4\4\4\2\2zz" +
700                 "zzzzzzzz\43\42\30\30\30\30\30\30" +
701                 "\30\30\30\30\5\5\5\5\5\5\41\41\41\41\41\41" +
702                 "\41\41\41\36\36\36zz\5\5\5\5\5\5\5\42" +
703                 "\42\42\42\42\42\42\42\7\7\7\32zzzzz" +
704                 "zzzzzzz\2\2\20\40\43\43\43\43\43" +
705                 "\5\5\5\43\43\43\5\5\5\27\42\42\42\42\42\42" +
706                 "\42\42\42\17\13\13\5\5\5\5\5\42z\5\5\5" +
707                 "\5\5\5\5\5\5\30\30\30\30\30\30\30\30\30\30" +
708                 "\5\5\5\5\5z\42\42\42\13\13\13\13zzz" +
709                 "zzzzzz\7\7\7\7\7\7\7\7\7\7" +
710                 "\7\7\7\7zz\5\5\5\14\14\14zzz\5" +
711                 "\36\36\36\5\5\42\42\42\42\42\42\42\42\42\42\42" +
712                 "\42\42\42\42\36\35\36\35zzzzzzzz" +
713                 "zzzzz\43\43\5\5\5\5\5\5\5\5\5" +
714                 "\42\42\42\42\42zzzzz\40\23zzzz" +
715                 "zzzzz\5\5\5\5\5\5\5\5\5\5\5" +
716                 "\5\5\5\43\43\5\43\5\5\5\5\5\5\5\5\5" +
717                 "\7\7\7\7\7\42\42\42\42\42\42\42\42z\36\32" +
718                 "zz\5\42\42\42z\42\42zzzzz\42\42" +
719                 "\2\40\5\5\5\5z\5\5\5z\5\5\5\5\5" +
720                 "\5\5zz\27\27\27zzzz\23\2\2\40\21" +
721                 "\21\43\43\43\43\43\43\43\43\43\43\43\42\42\42\42" +
722                 "\42\42\37zzzzzzzzz\3\3\3\3" +
723                 "\3\3\3\3\3\3\3\3\3\3\30\30\30\30\30\30" +
724                 "\30\30\30\30zzzzzzzzzzzz" +
725                 "zzz\31\2\2\40\43\43\43\43\43\43\43\43\43" +
726                 "\43\5\5\5\42\42\42\42\42\42\42\42\42\37\27z" +
727                 "zzzz\2\2\40\43\43\43\43\5\5\5\5\5" +
728                 "\5\5\5\5\42\42\42\23\32z\30\30\30\30\30\30" +
729                 "\30\30\30\30zzzz\5\42\42zzzzz" +
730                 "zzzz\41\41\41\41\41\5\5\5\5\5\5\5" +
731                 "\5\5\5\5\27zzzzzzzzzzz" +
732                 "z\5\5\5\42\42\42\42\42\42\42\42\42\42\42\42" +
733                 "\42\37\1\16\16zzzzz\34\27\42\42zz" +
734                 "z\42\42\42\42\2\37\27\22zzzzzz\4" +
735                 "z\43\43\43\43\5\5\5z\5z\5\5\5\5z" +
736                 "\5\5\5\5\5\5\5\5\5zzzzzzz" +
737                 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\2" +
738                 "\42\42\42\42\42\42\42\42\42\27\32zzzzz" +
739                 "\2\2\2\40z\43\43\43\43\43\43\43\43zz\43" +
740                 "\5z\5\5z\5\5\5\5\5z\27\27\1\42\42" +
741                 "zzzzzzz\42zzzzzz\2\2" +
742                 "\43\43\42\42zz\4\4\4\4\4\4\4zzz" +
743                 "\5\5\5\5\5\42\42\42\42\42\42\42\42\42\42\42" +
744                 "\37\2\2\40\27\1zzzzzzzz\30\30" +
745                 "\30\30\30\30\30\30\30\30zzzz\34z\43\43" +
746                 "\43\43\43\43\43\43\43\43\43\43\43\43\5\2\40\37" +
747                 "\27\1zzzzzzzzzzz\42\42\42" +
748                 "\42\42\42zz\42\42\42\42\2\2\40\37\27zz" +
749                 "zzzzzzzzzzzzz\43\43\43" +
750                 "\43\42\42zz\42\42\42\42\42\42\42\42\42\42\42" +
751                 "\42\42\2\40\37\42zzzzzzzzzz" +
752                 "zzzzz\5\5\5\5\5\5\5\5\5\5\5" +
753                 "\2\40\42\42\42\42\42\42\37\27zzzzzz" +
754                 "zz\5\5\5\5\5\5\5\5\5\5\5zz\13" +
755                 "\13\13\42\42\42\42\42\42\42\42\42\42\42\32zz" +
756                 "zz\30\30\30\30\30\30\30\30\30\30\30\30zz" +
757                 "zz\42\42\42\42\42\42\42\2\40\37\27zzz" +
758                 "zz\43\42\42\42\42\42\42\42\42\42\42\5\5\5" +
759                 "\5\5\34\32\2\2\2\2\40\16\13\13\13\13\14z" +
760                 "zzzz\14z\23zzzzzzzz\43" +
761                 "\42\42\42\42\42\42\42\42\42\42\42\5\5\5\5z" +
762                 "z\16\16\16\16\7\7\7\7\7\7\2\40\22\23z" +
763                 "zz\1zz\43\43\43\43\43\43\43\43\43z\43" +
764                 "\43\43\43\5\5\42\42\42\42\42\42\42z\42\42\42" +
765                 "\42\2\2\40\37\1zzzzzzzzzz" +
766                 "zzzzz\30\30\30\30\30\30\30\30\30\30\30" +
767                 "\30\30zzz\17\17\17\17\17\17\17\17\17\17\17" +
768                 "\17\17\17\42\42\42\42\42\2\2zzzzzz" +
769                 "zzz\43\43\43\43\43\43\43z\43\43z\43\5" +
770                 "\5\5\5\42\42\42\42\42\42zzz\42z\42\42" +
771                 "z\42\2\40\27\42\32\23\15\13zzzzzz" +
772                 "zz\43\43\43\43\43\43z\43\43z\43\43\5\5" +
773                 "\5\5\5\5\5\5\5\5\42\42\42\42\42z\42\42" +
774                 "\2\40\23zzzzzzzz\5\5\14\42\42" +
775                 "\42\42zzzzzzzzzz");
776     }
777 
778     private static final class VoTrie {
779         static final CodePointTrie INSTANCE = makeTrie(
780                 "\63\151\162\124\102z\114\4\74\3\14zzz\200\10" +
781                 "zz\100z\131z\230zzzzzzzzz" +
782                 "zzzzzz\320zzzzzzzzz" +
783                 "zzzzzzzzzzzzzzzz" +
784                 "zzzzzzzzzzzzzzzz" +
785                 "zzzzzzzzzzzzzzzz" +
786                 "zzzzzzzzzzzzzzzz" +
787                 "zzzzzzzzzzzzzzzz" +
788                 "zzzzzzzzzzzzzzzz" +
789                 "\73\3\125\3\143\3\171\3\231\3\267\3\322\3\354\3" +
790                 "\125\3\125\3\125\3\14\4\125\3\125\3\125\3\14\4" +
791                 "\54\4\54\4\54\4\54\4\54\4\54\4\54\4\54\4" +
792                 "\54\4\54\4\54\4\54\4\54\4\54\4\54\4\54\4" +
793                 "\54\4\54\4\54\4\54\4\54\4\54\4\54\4\54\4" +
794                 "\54\4\54\4\54\4\54\4\54\4\54\4\54\4\54\4" +
795                 "\54\4\54\4\54\4\54\4\54\4\54\4\54\4\54\4" +
796                 "\54\4\54\4\54\4\54\4\125\3\125\3\125\3\14\4" +
797                 "\125\3\125\3\125\3\14\4zz\20z\40z\60z" +
798                 "\100z\120z\140z\160z\131z\151z\171z\211z" +
799                 "\230z\250z\270z\310zzz\20z\40z\60z" +
800                 "zz\20z\40z\60zzz\20z\40z\60z" +
801                 "zz\20z\40z\60z\320z\340z\360zz\1" +
802                 "zz\20z\40z\60zzz\20z\40z\60z" +
803                 "zz\20z\40z\60zzz\20z\40z\60z" +
804                 "zz\20z\40z\60zzz\20z\40z\60z" +
805                 "zz\20z\40z\60zzz\20z\40z\60z" +
806                 "zzzzzzzzzzzzzzzz" +
807                 "zzzzzzzzzzzzzzzz" +
808                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
809                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
810                 "\17\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
811                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
812                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
813                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
814                 "zzzzzzzzzzzzzzzz" +
815                 "zzzzzzzzzzzzzzzz" +
816                 "zzzzzzzzzzzzzzzz" +
817                 "\20\1\20\1\20\1\20\1\20\1zzzzzz" +
818                 "zzzzzzzzzzzzzzzz" +
819                 "zzzzzzzzzz\251z\226z\36\1" +
820                 "\54\1\256z\252zzzzzzzzzzz" +
821                 "zz\3\1\74\1zz\114\1\130\1\146\1\13\1" +
822                 "\165\1\20\1\20\1\20\1\204\1zzzzzz" +
823                 "zzzzzzzz\162zzz\366zzz" +
824                 "zzzzzzzzzzzzzzzz" +
825                 "zzzzzz\220\1\20\1\230\1zzzz" +
826                 "zzzz\3\1\20\1\25\1zz\354z\250\1" +
827                 "\266\1\16\1\20\1\20\1\306\1\20\1\20\1\20\1" +
828                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
829                 "\20\1\20\1zzzzzzzzzzzz" +
830                 "zzzzzzzz\20\1\20\1\20\1\20\1" +
831                 "\20\1\20\1\26\1\20\1\20\1\20\1\20\1\20\1" +
832                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
833                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\30\1" +
834                 "\12\1\20\1\322\1zzzzzzzzzz" +
835                 "zzzzzzzzzzzzzzzz" +
836                 "zzzzzzzz\16\1\20\1zzzz" +
837                 "\26\1zzzzzzzzzz\10\1\20\1" +
838                 "\342\1\24\1\20\1zzzzzzzzzz" +
839                 "zzzzzz\20\1\20\1\20\1\20\1\20\1" +
840                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
841                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
842                 "\20\1\20\1\20\1\361\1\377\1\20\1\16\2\35\2" +
843                 "\20\1\52\2\20\1\67\2\106\2\126\2\20\1\52\2" +
844                 "\20\1\67\2\141\2\20\1\20\1\156\2\20\1\20\1" +
845                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
846                 "\20\1\20\1\176\2\20\1\20\1\20\1\20\1\20\1" +
847                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
848                 "\20\1\20\1\20\1\176\2\176\2\176\2\176\2\176\2" +
849                 "\206\2\20\1\216\2\20\1\20\1\20\1\20\1\20\1" +
850                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
851                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
852                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
853                 "\20\1\20\1\20\1zzzzzzzzzz" +
854                 "zzzzzzzzzzzzzzzz" +
855                 "zzzzzzzzzzzzzzzz" +
856                 "zz\20\1\20\1zzzzzzzzzz" +
857                 "zzzzzz\20\1zz\20\1\27\1\233\2" +
858                 "\252\2zzzzzzzzzzzzzz" +
859                 "zzzz\272\2\311\2\20\1\331\2\20\1\351\2" +
860                 "\370\2zzzzzzzzzzzzzz" +
861                 "\10\3\30\3zzzzzzzzzzzz" +
862                 "zzzzzzzzzzzzzzzz" +
863                 "zzzzzzzzzzzzzzzz" +
864                 "zzzz\20\1\20\1zzzzzzzz" +
865                 "zzzzzzzzzzzzzzzz" +
866                 "zzzzzzzzzzzzzzzz" +
867                 "zzzzzzzz\20\1\20\1\20\1\20\1" +
868                 "\20\1\20\1\20\1\20\1zzzzzzzz" +
869                 "zzzzzzzzzzzzzzzz" +
870                 "zzzzzzzzzzzzzzzz" +
871                 "zzzzzzzzzzzzzzzz" +
872                 "zzzz\20\1\20\1\20\1\20\1\20\1\20\1" +
873                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
874                 "\20\1\20\1\20\1\20\1\20\1zzzzzz" +
875                 "zz\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
876                 "\20\1\20\1zzzzzzzzzzzz" +
877                 "zzzzzzzzzzzzzzzz" +
878                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
879                 "\20\1\20\1zzzzzzzzzzzz" +
880                 "zzzz\50\3\20\1\20\1\20\1\20\1\20\1" +
881                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
882                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
883                 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" +
884                 "\20\1\20\1\22\1\204z\230z\250z\250z\250z" +
885                 "\250z\250z\250z\310z\14z\350zz\1\25\1" +
886                 "\14z\14z\14z\64\1\123\1\162\1\221\1\14z" +
887                 "\253\1\14z\313\1\353\1\13\2\43\2\43\2\43\2" +
888                 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" +
889                 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" +
890                 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" +
891                 "\43\2\43\2\43\2\43\2\43\2\373z\14z\103\2" +
892                 "\14z\43\2\43\2\43\2\43\2\43\2\43\2\43\2" +
893                 "\43\2\43\2\43\2\43\2\43\2\14z\14z\14z" +
894                 "\14z\43\2\43\2\43\2\43\2\43\2\43\2\43\2" +
895                 "\43\2\43\2\43\2\43\2\43\2\43\2\370z\14z" +
896                 "\142\2\14z\14z\14z\14z\202\2\14z\14z" +
897                 "\14z\14z\14z\234\2\14z\14z\375z\14z" +
898                 "\14z\14z\14z\14z\14z\14z\14z\14z" +
899                 "\14z\43\2\43\2\271\2\14z\14z\14z\14z" +
900                 "\14z\43\2z\1\14z\14z\14z\14z\14z" +
901                 "\14z\14z\14z\14z\14z\14z\14z\14z" +
902                 "\14z\14z\14z\14z\14z\14z\274\2\43\2" +
903                 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\370z" +
904                 "\14z\14z\14z\14z\14z\14z\14z\14z" +
905                 "\14z\14z\14z\14z\14z\14z\14z\14z" +
906                 "\14z\14z\332\2\370z\14z\14z\14z\14z" +
907                 "\14z\14z\14z\14z\43\2\372\2\14z\14z" +
908                 "\43\2\375z\14z\14z\14z\14z\14z\14z" +
909                 "\14z\14z\14z\14z\43\2\32\3\43\2\43\2" +
910                 "\310z\265\2\14z\14z\43\2\43\2\43\2\43\2" +
911                 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" +
912                 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" +
913                 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" +
914                 "\43\2\43\2\43\2\33\3\14z\14z\14z\14z" +
915                 "\14z\14z\14z\14z\14z\14z\14z\14z" +
916                 "\14z\14z\14z\14z\14z\14z\14z\14z" +
917                 "\14z\14z\14z\14z\14z\14z\14z\14z" +
918                 "\14z\14z\14z\14zzzzzzzzz" +
919                 "zzzzzzzzzzzzzzzz" +
920                 "zzzzzzzzzzzzzzzz" +
921                 "zzzzzzzzzzzzzzzz" +
922                 "zzzzzzzzzzzzzzzz" +
923                 "zzzzzzzzzzzzzzzz" +
924                 "zzzzzzzzzzzzzzzz" +
925                 "zzzzzzzzzzzzzzzz" +
926                 "zzzzzzzz\3z\3zzzz\3" +
927                 "zz\3zzzzzzzzzz\3\3\3" +
928                 "zzzzzzzzzzzzzzzz" +
929                 "zzzzzzz\3zzzzzzzz" +
930                 "zzzzzzzzzzzzzzzz" +
931                 "zzzzzzz\3zzzzzzzz" +
932                 "zzzzzzzzzzzzzzzz" +
933                 "zzzzzzzzzzzzzzzz" +
934                 "zz\3\3zzzzzzzzzzzz" +
935                 "zzzzzzzz\3\3\3\3\3\3\3\3" +
936                 "\3\3\3\3\3\3\3\3zzzzzzzz" +
937                 "z\3\3zzz\3zzzz\3\3\3zz" +
938                 "zzzz\3z\3\3\3zzzzzzz" +
939                 "zzzz\3\3z\3\3\3\3\3\3\3zz" +
940                 "zzz\3\3z\3\3zzzzzz\3\3" +
941                 "\3\3z\3z\3z\3zzzz\3zzz" +
942                 "zz\3\3\3\3\3\3z\3\3z\3\3\3\3" +
943                 "\3\3\3\3\3\3zz\3\3\3\3\3\3\3\3" +
944                 "zzzz\3\3\3\3\3\1\1\3zzzz" +
945                 "\3\3\3\3\3\3\3\3\3\3\3\3\3\3z\3" +
946                 "\3\3\3\3\3\3\3\3\3\3zzzz\3\3" +
947                 "\3z\3\3\3\3\3\3\3\3\3\3\3\3zz" +
948                 "zzzzzzzzzz\3\3z\3\3\3" +
949                 "\3\3\3\3\3\3\3\3\3\3\2\2\3\3\3\3" +
950                 "\3\1\1\1\1\1\1\1\1\3\3\1\1\1\1\1" +
951                 "\1\1\1\1\1\1\1\3\3\3\3\3\3\3\3\3" +
952                 "\3\3\3\3\3\3\2\3\2\3\2\3\2\3\2\3" +
953                 "\3\3\3\3\3\2\3\3\3\3\3\3\3\3\3\3" +
954                 "\3\3\2\3\2\3\2\3\3\3\3\3\3\2\3\3" +
955                 "\3\3\3\2\2\3\3\3\3\2\2\3\3\3\1\2" +
956                 "\3\2\3\2\3\2\3\2\3\3\3\3\3\3\2\2" +
957                 "\3\3\3\3\3\1\3\3\3\3\3\3\3\2\3\3" +
958                 "\3\3\3\3\3\3\2\2\2\2\2\2\2\2\2\2" +
959                 "\2\2\2\2\2\2\3\3\3\3\3\3\3\3\3\3" +
960                 "\3\2\2\2\2\2\3\3\3\3\3z\1\1\1\1" +
961                 "\1\1\3\3\3zzzz\3\3\3\3\3\3\3" +
962                 "\3\3z\2\3\3\3\3\3\3\1\1\3\3\2z" +
963                 "\2\3\3\3\3\3\3\3\3\3\3\1\1zzz" +
964                 "\2\3\3\3\3\3\3\3\3\3\3\3\1\3\1\3" +
965                 "\1\3\3\3\3\3\3\3\3\3\3\3\1\1\1\1" +
966                 "\1zzzzzzzzzzzzzzz" +
967                 "\3\3\3\1\3\3\3\3zzzzzzzz" +
968                 "\3\3\3\3\3\3\3\3\3zzz\3\3zz" +
969                 "\2\2\3\3\3\3\3\3\3\3\3\3\3\3\3\3" +
970                 "zzzz");
971     }
972 
973     // public methods ----------------------------------------------------
974 
975     /**
976     * Gets the main property value for code point ch.
977     * @param ch code point whose property value is to be retrieved
978     * @return property value of code point
979     */
getProperty(int ch)980     public final int getProperty(int ch)
981     {
982         return m_trie_.get(ch);
983     }
984 
985     /**
986      * Gets the unicode additional properties.
987      * Java version of C u_getUnicodeProperties().
988      * @param codepoint codepoint whose additional properties is to be
989      *                  retrieved
990      * @param column The column index.
991      * @return unicode properties
992      */
getAdditional(int codepoint, int column)993     public int getAdditional(int codepoint, int column) {
994         assert column >= 0;
995         if (column >= m_additionalColumnsCount_) {
996             return 0;
997         }
998         return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column];
999     }
1000 
1001     static final int MY_MASK = UCharacterProperty.TYPE_MASK
1002         & ((1<<UCharacterCategory.UPPERCASE_LETTER) |
1003             (1<<UCharacterCategory.LOWERCASE_LETTER) |
1004             (1<<UCharacterCategory.TITLECASE_LETTER) |
1005             (1<<UCharacterCategory.MODIFIER_LETTER) |
1006             (1<<UCharacterCategory.OTHER_LETTER));
1007 
1008 
1009        /**
1010      * <p>Get the "age" of the code point.</p>
1011      * <p>The "age" is the Unicode version when the code point was first
1012      * designated (as a non-character or for Private Use) or assigned a
1013      * character.</p>
1014      * <p>This can be useful to avoid emitting code points to receiving
1015      * processes that do not accept newer characters.</p>
1016      * <p>The data is from the UCD file DerivedAge.txt.</p>
1017      * <p>This API does not check the validity of the codepoint.</p>
1018      * @param codepoint The code point.
1019      * @return the Unicode version number
1020      */
getAge(int codepoint)1021     public VersionInfo getAge(int codepoint)
1022     {
1023         int version = getAdditional(codepoint, 0) >> AGE_SHIFT_;
1024         return VersionInfo.getInstance(
1025                            (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
1026                            version & LAST_NIBBLE_MASK_, 0, 0);
1027     }
1028 
1029     private static final int GC_CN_MASK = getMask(UCharacter.UNASSIGNED);
1030     private static final int GC_CC_MASK = getMask(UCharacter.CONTROL);
1031     private static final int GC_CS_MASK = getMask(UCharacter.SURROGATE);
1032     private static final int GC_ZS_MASK = getMask(UCharacter.SPACE_SEPARATOR);
1033     private static final int GC_ZL_MASK = getMask(UCharacter.LINE_SEPARATOR);
1034     private static final int GC_ZP_MASK = getMask(UCharacter.PARAGRAPH_SEPARATOR);
1035     /** Mask constant for multiple UCharCategory bits (Z Separators). */
1036     private static final int GC_Z_MASK = GC_ZS_MASK|GC_ZL_MASK|GC_ZP_MASK;
1037 
1038     /**
1039      * Checks if c is in
1040      * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
1041      * with space=\p{Whitespace} and Control=Cc.
1042      * Implements UCHAR_POSIX_GRAPH.
1043      * @internal
1044      */
isgraphPOSIX(int c)1045     private static final boolean isgraphPOSIX(int c) {
1046         /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
1047         /* comparing ==0 returns FALSE for the categories mentioned */
1048         return (getMask(UCharacter.getType(c))&
1049                 (GC_CC_MASK|GC_CS_MASK|GC_CN_MASK|GC_Z_MASK))
1050                ==0;
1051     }
1052 
1053     // binary properties --------------------------------------------------- ***
1054 
1055     private class BinaryProperty {
1056         int column;  // SRC_PROPSVEC column, or "source" if mask==0
1057         int mask;
BinaryProperty(int column, int mask)1058         BinaryProperty(int column, int mask) {
1059             this.column=column;
1060             this.mask=mask;
1061         }
BinaryProperty(int source)1062         BinaryProperty(int source) {
1063             this.column=source;
1064             this.mask=0;
1065         }
getSource()1066         final int getSource() {
1067             return mask==0 ? column : SRC_PROPSVEC;
1068         }
contains(int c)1069         boolean contains(int c) {
1070             // systematic, directly stored properties
1071             return (getAdditional(c, column)&mask)!=0;
1072         }
1073     }
1074 
1075     private class CaseBinaryProperty extends BinaryProperty {  // case mapping properties
1076         int which;
CaseBinaryProperty(int which)1077         CaseBinaryProperty(int which) {
1078             super(SRC_CASE);
1079             this.which=which;
1080         }
1081         @Override
contains(int c)1082         boolean contains(int c) {
1083             return UCaseProps.INSTANCE.hasBinaryProperty(c, which);
1084         }
1085     }
1086 
1087     private class NormInertBinaryProperty extends BinaryProperty {  // UCHAR_NF*_INERT properties
1088         int which;
NormInertBinaryProperty(int source, int which)1089         NormInertBinaryProperty(int source, int which) {
1090             super(source);
1091             this.which=which;
1092         }
1093         @Override
contains(int c)1094         boolean contains(int c) {
1095             return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_INERT).isInert(c);
1096         }
1097     }
1098 
1099     BinaryProperty[] binProps={
1100         /*
1101          * Binary-property implementations must be in order of corresponding UProperty,
1102          * and there must be exactly one entry per binary UProperty.
1103          */
1104         new BinaryProperty(1, (1<<ALPHABETIC_PROPERTY_)),
1105         new BinaryProperty(1, (1<<ASCII_HEX_DIGIT_PROPERTY_)),
1106         new BinaryProperty(SRC_BIDI) {  // UCHAR_BIDI_CONTROL
1107             @Override
1108             boolean contains(int c) {
1109                 return UBiDiProps.INSTANCE.isBidiControl(c);
1110             }
1111         },
1112         new BinaryProperty(SRC_BIDI) {  // UCHAR_BIDI_MIRRORED
1113             @Override
1114             boolean contains(int c) {
1115                 return UBiDiProps.INSTANCE.isMirrored(c);
1116             }
1117         },
1118         new BinaryProperty(1, (1<<DASH_PROPERTY_)),
1119         new BinaryProperty(1, (1<<DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_)),
1120         new BinaryProperty(1, (1<<DEPRECATED_PROPERTY_)),
1121         new BinaryProperty(1, (1<<DIACRITIC_PROPERTY_)),
1122         new BinaryProperty(1, (1<<EXTENDER_PROPERTY_)),
1123         new BinaryProperty(SRC_NFC) {  // UCHAR_FULL_COMPOSITION_EXCLUSION
1124             @Override
1125             boolean contains(int c) {
1126                 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
1127                 Normalizer2Impl impl=Norm2AllModes.getNFCInstance().impl;
1128                 return impl.isCompNo(impl.getNorm16(c));
1129             }
1130         },
1131         new BinaryProperty(1, (1<<GRAPHEME_BASE_PROPERTY_)),
1132         new BinaryProperty(1, (1<<GRAPHEME_EXTEND_PROPERTY_)),
1133         new BinaryProperty(1, (1<<GRAPHEME_LINK_PROPERTY_)),
1134         new BinaryProperty(1, (1<<HEX_DIGIT_PROPERTY_)),
1135         new BinaryProperty(1, (1<<HYPHEN_PROPERTY_)),
1136         new BinaryProperty(1, (1<<ID_CONTINUE_PROPERTY_)),
1137         new BinaryProperty(1, (1<<ID_START_PROPERTY_)),
1138         new BinaryProperty(1, (1<<IDEOGRAPHIC_PROPERTY_)),
1139         new BinaryProperty(1, (1<<IDS_BINARY_OPERATOR_PROPERTY_)),
1140         new BinaryProperty(1, (1<<IDS_TRINARY_OPERATOR_PROPERTY_)),
1141         new BinaryProperty(SRC_BIDI) {  // UCHAR_JOIN_CONTROL
1142             @Override
1143             boolean contains(int c) {
1144                 return UBiDiProps.INSTANCE.isJoinControl(c);
1145             }
1146         },
1147         new BinaryProperty(1, (1<<LOGICAL_ORDER_EXCEPTION_PROPERTY_)),
1148         new CaseBinaryProperty(UProperty.LOWERCASE),
1149         new BinaryProperty(1, (1<<MATH_PROPERTY_)),
1150         new BinaryProperty(1, (1<<NONCHARACTER_CODE_POINT_PROPERTY_)),
1151         new BinaryProperty(1, (1<<QUOTATION_MARK_PROPERTY_)),
1152         new BinaryProperty(1, (1<<RADICAL_PROPERTY_)),
1153         new CaseBinaryProperty(UProperty.SOFT_DOTTED),
1154         new BinaryProperty(1, (1<<TERMINAL_PUNCTUATION_PROPERTY_)),
1155         new BinaryProperty(1, (1<<UNIFIED_IDEOGRAPH_PROPERTY_)),
1156         new CaseBinaryProperty(UProperty.UPPERCASE),
1157         new BinaryProperty(1, (1<<WHITE_SPACE_PROPERTY_)),
1158         new BinaryProperty(1, (1<<XID_CONTINUE_PROPERTY_)),
1159         new BinaryProperty(1, (1<<XID_START_PROPERTY_)),
1160         new CaseBinaryProperty(UProperty.CASE_SENSITIVE),
1161         new BinaryProperty(1, (1<<S_TERM_PROPERTY_)),
1162         new BinaryProperty(1, (1<<VARIATION_SELECTOR_PROPERTY_)),
1163         new NormInertBinaryProperty(SRC_NFC, UProperty.NFD_INERT),
1164         new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKD_INERT),
1165         new NormInertBinaryProperty(SRC_NFC, UProperty.NFC_INERT),
1166         new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKC_INERT),
1167         new BinaryProperty(SRC_NFC_CANON_ITER) {  // UCHAR_SEGMENT_STARTER
1168             @Override
1169             boolean contains(int c) {
1170                 return Norm2AllModes.getNFCInstance().impl.
1171                     ensureCanonIterData().isCanonSegmentStarter(c);
1172             }
1173         },
1174         new BinaryProperty(1, (1<<PATTERN_SYNTAX)),
1175         new BinaryProperty(1, (1<<PATTERN_WHITE_SPACE)),
1176         new BinaryProperty(SRC_CHAR_AND_PROPSVEC) {  // UCHAR_POSIX_ALNUM
1177             @Override
1178             boolean contains(int c) {
1179                 return UCharacter.isUAlphabetic(c) || UCharacter.isDigit(c);
1180             }
1181         },
1182         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_BLANK
1183             @Override
1184             boolean contains(int c) {
1185                 // "horizontal space"
1186                 if(c<=0x9f) {
1187                     return c==9 || c==0x20; /* TAB or SPACE */
1188                 } else {
1189                     /* Zs */
1190                     return UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR;
1191                 }
1192             }
1193         },
1194         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_GRAPH
1195             @Override
1196             boolean contains(int c) {
1197                 return isgraphPOSIX(c);
1198             }
1199         },
1200         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_PRINT
1201             @Override
1202             boolean contains(int c) {
1203                 /*
1204                  * Checks if codepoint is in \p{graph}\p{blank} - \p{cntrl}.
1205                  *
1206                  * The only cntrl character in graph+blank is TAB (in blank).
1207                  * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
1208                  */
1209                 return (UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR) || isgraphPOSIX(c);
1210             }
1211         },
1212         new BinaryProperty(SRC_CHAR) {  // UCHAR_POSIX_XDIGIT
1213             @Override
1214             boolean contains(int c) {
1215                 /* check ASCII and Fullwidth ASCII a-fA-F */
1216                 if(
1217                     (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
1218                     (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
1219                 ) {
1220                     return true;
1221                 }
1222                 return UCharacter.getType(c)==UCharacter.DECIMAL_DIGIT_NUMBER;
1223             }
1224         },
1225         new CaseBinaryProperty(UProperty.CASED),
1226         new CaseBinaryProperty(UProperty.CASE_IGNORABLE),
1227         new CaseBinaryProperty(UProperty.CHANGES_WHEN_LOWERCASED),
1228         new CaseBinaryProperty(UProperty.CHANGES_WHEN_UPPERCASED),
1229         new CaseBinaryProperty(UProperty.CHANGES_WHEN_TITLECASED),
1230         new BinaryProperty(SRC_CASE_AND_NORM) {  // UCHAR_CHANGES_WHEN_CASEFOLDED
1231             @Override
1232             boolean contains(int c) {
1233                 String nfd=Norm2AllModes.getNFCInstance().impl.getDecomposition(c);
1234                 if(nfd!=null) {
1235                     /* c has a decomposition */
1236                     c=nfd.codePointAt(0);
1237                     if(Character.charCount(c)!=nfd.length()) {
1238                         /* multiple code points */
1239                         c=-1;
1240                     }
1241                 } else if(c<0) {
1242                     return false;  /* protect against bad input */
1243                 }
1244                 if(c>=0) {
1245                     /* single code point */
1246                     UCaseProps csp=UCaseProps.INSTANCE;
1247                     UCaseProps.dummyStringBuilder.setLength(0);
1248                     return csp.toFullFolding(c, UCaseProps.dummyStringBuilder,
1249                                              UCharacter.FOLD_CASE_DEFAULT)>=0;
1250                 } else {
1251                     String folded=UCharacter.foldCase(nfd, true);
1252                     return !folded.equals(nfd);
1253                 }
1254             }
1255         },
1256         new CaseBinaryProperty(UProperty.CHANGES_WHEN_CASEMAPPED),
1257         new BinaryProperty(SRC_NFKC_CF) {  // UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
1258             @Override
1259             boolean contains(int c) {
1260                 Normalizer2Impl kcf=Norm2AllModes.getNFKC_CFInstance().impl;
1261                 String src=UTF16.valueOf(c);
1262                 StringBuilder dest=new StringBuilder();
1263                 // Small destCapacity for NFKC_CF(c).
1264                 Normalizer2Impl.ReorderingBuffer buffer=new Normalizer2Impl.ReorderingBuffer(kcf, dest, 5);
1265                 kcf.compose(src, 0, src.length(), false, true, buffer);
1266                 return !Normalizer2Impl.UTF16Plus.equal(dest, src);
1267             }
1268         },
1269         new BinaryProperty(2, 1<<PROPS_2_EMOJI),
1270         new BinaryProperty(2, 1<<PROPS_2_EMOJI_PRESENTATION),
1271         new BinaryProperty(2, 1<<PROPS_2_EMOJI_MODIFIER),
1272         new BinaryProperty(2, 1<<PROPS_2_EMOJI_MODIFIER_BASE),
1273         new BinaryProperty(2, 1<<PROPS_2_EMOJI_COMPONENT),
1274         new BinaryProperty(SRC_PROPSVEC) {  // REGIONAL_INDICATOR
1275             // Property starts are a subset of lb=RI etc.
1276             @Override
1277             boolean contains(int c) {
1278                 return 0x1F1E6<=c && c<=0x1F1FF;
1279             }
1280         },
1281         new BinaryProperty(1, 1<<PREPENDED_CONCATENATION_MARK),
1282         new BinaryProperty(2, 1<<PROPS_2_EXTENDED_PICTOGRAPHIC),
1283     };
1284 
hasBinaryProperty(int c, int which)1285     public boolean hasBinaryProperty(int c, int which) {
1286          if(which<UProperty.BINARY_START || UProperty.BINARY_LIMIT<=which) {
1287             // not a known binary property
1288             return false;
1289         } else {
1290             return binProps[which].contains(c);
1291         }
1292     }
1293 
1294     // int-value and enumerated properties --------------------------------- ***
1295 
getType(int c)1296     public int getType(int c) {
1297         return getProperty(c)&TYPE_MASK;
1298     }
1299 
1300     /*
1301      * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
1302      * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
1303      */
1304     private static final int /* UHangulSyllableType */ gcbToHst[]={
1305         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_OTHER */
1306         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CONTROL */
1307         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CR */
1308         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_EXTEND */
1309         HangulSyllableType.LEADING_JAMO,     /* U_GCB_L */
1310         HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_LF */
1311         HangulSyllableType.LV_SYLLABLE,      /* U_GCB_LV */
1312         HangulSyllableType.LVT_SYLLABLE,     /* U_GCB_LVT */
1313         HangulSyllableType.TRAILING_JAMO,    /* U_GCB_T */
1314         HangulSyllableType.VOWEL_JAMO        /* U_GCB_V */
1315         /*
1316          * Omit GCB values beyond what we need for hst.
1317          * The code below checks for the array length.
1318          */
1319     };
1320 
1321     private class IntProperty {
1322         int column;  // SRC_PROPSVEC column, or "source" if mask==0
1323         int mask;
1324         int shift;
IntProperty(int column, int mask, int shift)1325         IntProperty(int column, int mask, int shift) {
1326             this.column=column;
1327             this.mask=mask;
1328             this.shift=shift;
1329         }
IntProperty(int source)1330         IntProperty(int source) {
1331             this.column=source;
1332             this.mask=0;
1333         }
getSource()1334         final int getSource() {
1335             return mask==0 ? column : SRC_PROPSVEC;
1336         }
getValue(int c)1337         int getValue(int c) {
1338             // systematic, directly stored properties
1339             return (getAdditional(c, column)&mask)>>>shift;
1340         }
getMaxValue(int which)1341         int getMaxValue(int which) {
1342             return (getMaxValues(column)&mask)>>>shift;
1343         }
1344     }
1345 
1346     private class BiDiIntProperty extends IntProperty {
BiDiIntProperty()1347         BiDiIntProperty() {
1348             super(SRC_BIDI);
1349         }
1350         @Override
getMaxValue(int which)1351         int getMaxValue(int which) {
1352             return UBiDiProps.INSTANCE.getMaxValue(which);
1353         }
1354     }
1355 
1356     private class CombiningClassIntProperty extends IntProperty {
CombiningClassIntProperty(int source)1357         CombiningClassIntProperty(int source) {
1358             super(source);
1359         }
1360         @Override
getMaxValue(int which)1361         int getMaxValue(int which) {
1362             return 0xff;
1363         }
1364     }
1365 
1366     private class NormQuickCheckIntProperty extends IntProperty {  // UCHAR_NF*_QUICK_CHECK properties
1367         int which;
1368         int max;
NormQuickCheckIntProperty(int source, int which, int max)1369         NormQuickCheckIntProperty(int source, int which, int max) {
1370             super(source);
1371             this.which=which;
1372             this.max=max;
1373         }
1374         @Override
getValue(int c)1375         int getValue(int c) {
1376             return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_QUICK_CHECK).getQuickCheck(c);
1377         }
1378         @Override
getMaxValue(int which)1379         int getMaxValue(int which) {
1380             return max;
1381         }
1382     }
1383 
1384     IntProperty intProps[]={
1385         new BiDiIntProperty() {  // BIDI_CLASS
1386             @Override
1387             int getValue(int c) {
1388                 return UBiDiProps.INSTANCE.getClass(c);
1389             }
1390         },
1391         new IntProperty(0, BLOCK_MASK_, BLOCK_SHIFT_),
1392         new CombiningClassIntProperty(SRC_NFC) {  // CANONICAL_COMBINING_CLASS
1393             @Override
1394             int getValue(int c) {
1395                 return Normalizer2.getNFDInstance().getCombiningClass(c);
1396             }
1397         },
1398         new IntProperty(2, DECOMPOSITION_TYPE_MASK_, 0),
1399         new IntProperty(0, EAST_ASIAN_MASK_, EAST_ASIAN_SHIFT_),
1400         new IntProperty(SRC_CHAR) {  // GENERAL_CATEGORY
1401             @Override
1402             int getValue(int c) {
1403                 return getType(c);
1404             }
1405             @Override
1406             int getMaxValue(int which) {
1407                 return UCharacterCategory.CHAR_CATEGORY_COUNT-1;
1408             }
1409         },
1410         new BiDiIntProperty() {  // JOINING_GROUP
1411             @Override
1412             int getValue(int c) {
1413                 return UBiDiProps.INSTANCE.getJoiningGroup(c);
1414             }
1415         },
1416         new BiDiIntProperty() {  // JOINING_TYPE
1417             @Override
1418             int getValue(int c) {
1419                 return UBiDiProps.INSTANCE.getJoiningType(c);
1420             }
1421         },
1422         new IntProperty(2, LB_MASK, LB_SHIFT),  // LINE_BREAK
1423         new IntProperty(SRC_CHAR) {  // NUMERIC_TYPE
1424             @Override
1425             int getValue(int c) {
1426                 return ntvGetType(getNumericTypeValue(getProperty(c)));
1427             }
1428             @Override
1429             int getMaxValue(int which) {
1430                 return NumericType.COUNT-1;
1431             }
1432         },
1433         new IntProperty(0, SCRIPT_MASK_, 0) {
1434             @Override
1435             int getValue(int c) {
1436                 return UScript.getScript(c);
1437             }
1438         },
1439         new IntProperty(SRC_PROPSVEC) {  // HANGUL_SYLLABLE_TYPE
1440             @Override
1441             int getValue(int c) {
1442                 /* see comments on gcbToHst[] above */
1443                 int gcb=(getAdditional(c, 2)&GCB_MASK)>>>GCB_SHIFT;
1444                 if(gcb<gcbToHst.length) {
1445                     return gcbToHst[gcb];
1446                 } else {
1447                     return HangulSyllableType.NOT_APPLICABLE;
1448                 }
1449             }
1450             @Override
1451             int getMaxValue(int which) {
1452                 return HangulSyllableType.COUNT-1;
1453             }
1454         },
1455         // max=1=YES -- these are never "maybe", only "no" or "yes"
1456         new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFD_QUICK_CHECK, 1),
1457         new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKD_QUICK_CHECK, 1),
1458         // max=2=MAYBE
1459         new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFC_QUICK_CHECK, 2),
1460         new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKC_QUICK_CHECK, 2),
1461         new CombiningClassIntProperty(SRC_NFC) {  // LEAD_CANONICAL_COMBINING_CLASS
1462             @Override
1463             int getValue(int c) {
1464                 return Norm2AllModes.getNFCInstance().impl.getFCD16(c)>>8;
1465             }
1466         },
1467         new CombiningClassIntProperty(SRC_NFC) {  // TRAIL_CANONICAL_COMBINING_CLASS
1468             @Override
1469             int getValue(int c) {
1470                 return Norm2AllModes.getNFCInstance().impl.getFCD16(c)&0xff;
1471             }
1472         },
1473         new IntProperty(2, GCB_MASK, GCB_SHIFT),  // GRAPHEME_CLUSTER_BREAK
1474         new IntProperty(2, SB_MASK, SB_SHIFT),  // SENTENCE_BREAK
1475         new IntProperty(2, WB_MASK, WB_SHIFT),  // WORD_BREAK
1476         new BiDiIntProperty() {  // BIDI_PAIRED_BRACKET_TYPE
1477             @Override
1478             int getValue(int c) {
1479                 return UBiDiProps.INSTANCE.getPairedBracketType(c);
1480             }
1481         },
1482         new IntProperty(SRC_INPC) {
1483             @Override
1484             int getValue(int c) {
1485                 return InPCTrie.INSTANCE.get(c);
1486             }
1487             @Override
1488             int getMaxValue(int which) {
1489                 return 14;
1490             }
1491         },
1492         new IntProperty(SRC_INSC) {
1493             @Override
1494             int getValue(int c) {
1495                 return InSCTrie.INSTANCE.get(c);
1496             }
1497             @Override
1498             int getMaxValue(int which) {
1499                 return 35;
1500             }
1501         },
1502         new IntProperty(SRC_VO) {
1503             @Override
1504             int getValue(int c) {
1505                 return VoTrie.INSTANCE.get(c);
1506             }
1507             @Override
1508             int getMaxValue(int which) {
1509                 return 3;
1510             }
1511         },
1512     };
1513 
getIntPropertyValue(int c, int which)1514     public int getIntPropertyValue(int c, int which) {
1515         if(which<UProperty.INT_START) {
1516             if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
1517                 return binProps[which].contains(c) ? 1 : 0;
1518             }
1519         } else if(which<UProperty.INT_LIMIT) {
1520             return intProps[which-UProperty.INT_START].getValue(c);
1521         } else if (which == UProperty.GENERAL_CATEGORY_MASK) {
1522             return getMask(getType(c));
1523         }
1524         return 0; // undefined
1525     }
1526 
getIntPropertyMaxValue(int which)1527     public int getIntPropertyMaxValue(int which) {
1528         if(which<UProperty.INT_START) {
1529             if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
1530                 return 1;  // maximum TRUE for all binary properties
1531             }
1532         } else if(which<UProperty.INT_LIMIT) {
1533             return intProps[which-UProperty.INT_START].getMaxValue(which);
1534         }
1535         return -1; // undefined
1536     }
1537 
getSource(int which)1538     final int getSource(int which) {
1539         if(which<UProperty.BINARY_START) {
1540             return SRC_NONE; /* undefined */
1541         } else if(which<UProperty.BINARY_LIMIT) {
1542             return binProps[which].getSource();
1543         } else if(which<UProperty.INT_START) {
1544             return SRC_NONE; /* undefined */
1545         } else if(which<UProperty.INT_LIMIT) {
1546             return intProps[which-UProperty.INT_START].getSource();
1547         } else if(which<UProperty.STRING_START) {
1548             switch(which) {
1549             case UProperty.GENERAL_CATEGORY_MASK:
1550             case UProperty.NUMERIC_VALUE:
1551                 return SRC_CHAR;
1552 
1553             default:
1554                 return SRC_NONE;
1555             }
1556         } else if(which<UProperty.STRING_LIMIT) {
1557             switch(which) {
1558             case UProperty.AGE:
1559                 return SRC_PROPSVEC;
1560 
1561             case UProperty.BIDI_MIRRORING_GLYPH:
1562                 return SRC_BIDI;
1563 
1564             case UProperty.CASE_FOLDING:
1565             case UProperty.LOWERCASE_MAPPING:
1566             case UProperty.SIMPLE_CASE_FOLDING:
1567             case UProperty.SIMPLE_LOWERCASE_MAPPING:
1568             case UProperty.SIMPLE_TITLECASE_MAPPING:
1569             case UProperty.SIMPLE_UPPERCASE_MAPPING:
1570             case UProperty.TITLECASE_MAPPING:
1571             case UProperty.UPPERCASE_MAPPING:
1572                 return SRC_CASE;
1573 
1574             case UProperty.ISO_COMMENT:
1575             case UProperty.NAME:
1576             case UProperty.UNICODE_1_NAME:
1577                 return SRC_NAMES;
1578 
1579             default:
1580                 return SRC_NONE;
1581             }
1582         } else {
1583             switch(which) {
1584             case UProperty.SCRIPT_EXTENSIONS:
1585                 return SRC_PROPSVEC;
1586             default:
1587                 return SRC_NONE; /* undefined */
1588             }
1589         }
1590     }
1591 
1592     /**
1593      * <p>
1594      * Unicode property names and property value names are compared
1595      * "loosely". Property[Value]Aliases.txt say:
1596      * <quote>
1597      *   "With loose matching of property names, the case distinctions,
1598      *    whitespace, and '_' are ignored."
1599      * </quote>
1600      * </p>
1601      * <p>
1602      * This function does just that, for ASCII (char *) name strings.
1603      * It is almost identical to ucnv_compareNames() but also ignores
1604      * ASCII White_Space characters (U+0009..U+000d).
1605      * </p>
1606      * @param name1 name to compare
1607      * @param name2 name to compare
1608      * @return 0 if names are equal, < 0 if name1 is less than name2 and > 0
1609      *         if name1 is greater than name2.
1610      */
1611     /* to be implemented in 2.4
1612      * public static int comparePropertyNames(String name1, String name2)
1613     {
1614         int result = 0;
1615         int i1 = 0;
1616         int i2 = 0;
1617         while (true) {
1618             char ch1 = 0;
1619             char ch2 = 0;
1620             // Ignore delimiters '-', '_', and ASCII White_Space
1621             if (i1 < name1.length()) {
1622                 ch1 = name1.charAt(i1 ++);
1623             }
1624             while (ch1 == '-' || ch1 == '_' || ch1 == ' ' || ch1 == '\t'
1625                    || ch1 == '\n' // synwee what is || ch1 == '\v'
1626                    || ch1 == '\f' || ch1=='\r') {
1627                 if (i1 < name1.length()) {
1628                     ch1 = name1.charAt(i1 ++);
1629                 }
1630                 else {
1631                     ch1 = 0;
1632                 }
1633             }
1634             if (i2 < name2.length()) {
1635                 ch2 = name2.charAt(i2 ++);
1636             }
1637             while (ch2 == '-' || ch2 == '_' || ch2 == ' ' || ch2 == '\t'
1638                    || ch2 == '\n' // synwee what is || ch1 == '\v'
1639                    || ch2 == '\f' || ch2=='\r') {
1640                 if (i2 < name2.length()) {
1641                     ch2 = name2.charAt(i2 ++);
1642                 }
1643                 else {
1644                     ch2 = 0;
1645                 }
1646             }
1647 
1648             // If we reach the ends of both strings then they match
1649             if (ch1 == 0 && ch2 == 0) {
1650                 return 0;
1651             }
1652 
1653             // Case-insensitive comparison
1654             if (ch1 != ch2) {
1655                 result = Character.toLowerCase(ch1)
1656                                                 - Character.toLowerCase(ch2);
1657                 if (result != 0) {
1658                     return result;
1659                 }
1660             }
1661         }
1662     }
1663     */
1664 
1665     /**
1666      * Get the the maximum values for some enum/int properties.
1667      * @return maximum values for the integer properties.
1668      */
getMaxValues(int column)1669     public int getMaxValues(int column)
1670     {
1671        // return m_maxBlockScriptValue_;
1672 
1673         switch(column) {
1674         case 0:
1675             return m_maxBlockScriptValue_;
1676         case 2:
1677             return m_maxJTGValue_;
1678         default:
1679             return 0;
1680         }
1681     }
1682 
1683     /**
1684      * Gets the type mask
1685      * @param type character type
1686      * @return mask
1687      */
getMask(int type)1688     public static final int getMask(int type)
1689     {
1690         return 1 << type;
1691     }
1692 
1693 
1694     /**
1695      * Returns the digit values of characters like 'A' - 'Z', normal,
1696      * half-width and full-width. This method assumes that the other digit
1697      * characters are checked by the calling method.
1698      * @param ch character to test
1699      * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
1700      *         its corresponding digit will be returned.
1701      */
getEuropeanDigit(int ch)1702     public static int getEuropeanDigit(int ch) {
1703         if ((ch > 0x7a && ch < 0xff21)
1704             || ch < 0x41 || (ch > 0x5a && ch < 0x61)
1705             || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
1706             return -1;
1707         }
1708         if (ch <= 0x7a) {
1709             // ch >= 0x41 or ch < 0x61
1710             return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
1711         }
1712         // ch >= 0xff21
1713         if (ch <= 0xff3a) {
1714             return ch + 10 - 0xff21;
1715         }
1716         // ch >= 0xff41 && ch <= 0xff5a
1717         return ch + 10 - 0xff41;
1718     }
1719 
digit(int c)1720     public int digit(int c) {
1721         int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_;
1722         if(value<=9) {
1723             return value;
1724         } else {
1725             return -1;
1726         }
1727     }
1728 
getNumericValue(int c)1729     public int getNumericValue(int c) {
1730         // slightly pruned version of getUnicodeNumericValue(), plus getEuropeanDigit()
1731         int ntv = getNumericTypeValue(getProperty(c));
1732 
1733         if(ntv==NTV_NONE_) {
1734             return getEuropeanDigit(c);
1735         } else if(ntv<NTV_DIGIT_START_) {
1736             /* decimal digit */
1737             return ntv-NTV_DECIMAL_START_;
1738         } else if(ntv<NTV_NUMERIC_START_) {
1739             /* other digit */
1740             return ntv-NTV_DIGIT_START_;
1741         } else if(ntv<NTV_FRACTION_START_) {
1742             /* small integer */
1743             return ntv-NTV_NUMERIC_START_;
1744         } else if(ntv<NTV_LARGE_START_) {
1745             /* fraction */
1746             return -2;
1747         } else if(ntv<NTV_BASE60_START_) {
1748             /* large, single-significant-digit integer */
1749             int mant=(ntv>>5)-14;
1750             int exp=(ntv&0x1f)+2;
1751             if(exp<9 || (exp==9 && mant<=2)) {
1752                 int numValue=mant;
1753                 do {
1754                     numValue*=10;
1755                 } while(--exp>0);
1756                 return numValue;
1757             } else {
1758                 return -2;
1759             }
1760         } else if(ntv<NTV_FRACTION20_START_) {
1761             /* sexagesimal (base 60) integer */
1762             int numValue=(ntv>>2)-0xbf;
1763             int exp=(ntv&3)+1;
1764 
1765             switch(exp) {
1766             case 4:
1767                 numValue*=60*60*60*60;
1768                 break;
1769             case 3:
1770                 numValue*=60*60*60;
1771                 break;
1772             case 2:
1773                 numValue*=60*60;
1774                 break;
1775             case 1:
1776                 numValue*=60;
1777                 break;
1778             case 0:
1779             default:
1780                 break;
1781             }
1782 
1783             return numValue;
1784         } else if(ntv<NTV_RESERVED_START_) {
1785             // fraction-20 e.g. 3/80
1786             return -2;
1787         } else {
1788             /* reserved */
1789             return -2;
1790         }
1791     }
1792 
getUnicodeNumericValue(int c)1793     public double getUnicodeNumericValue(int c) {
1794         // equivalent to c version double u_getNumericValue(UChar32 c)
1795         int ntv = getNumericTypeValue(getProperty(c));
1796 
1797         if(ntv==NTV_NONE_) {
1798             return UCharacter.NO_NUMERIC_VALUE;
1799         } else if(ntv<NTV_DIGIT_START_) {
1800             /* decimal digit */
1801             return ntv-NTV_DECIMAL_START_;
1802         } else if(ntv<NTV_NUMERIC_START_) {
1803             /* other digit */
1804             return ntv-NTV_DIGIT_START_;
1805         } else if(ntv<NTV_FRACTION_START_) {
1806             /* small integer */
1807             return ntv-NTV_NUMERIC_START_;
1808         } else if(ntv<NTV_LARGE_START_) {
1809             /* fraction */
1810             int numerator=(ntv>>4)-12;
1811             int denominator=(ntv&0xf)+1;
1812             return (double)numerator/denominator;
1813         } else if(ntv<NTV_BASE60_START_) {
1814             /* large, single-significant-digit integer */
1815             double numValue;
1816             int mant=(ntv>>5)-14;
1817             int exp=(ntv&0x1f)+2;
1818             numValue=mant;
1819 
1820             /* multiply by 10^exp without math.h */
1821             while(exp>=4) {
1822                 numValue*=10000.;
1823                 exp-=4;
1824             }
1825             switch(exp) {
1826             case 3:
1827                 numValue*=1000.;
1828                 break;
1829             case 2:
1830                 numValue*=100.;
1831                 break;
1832             case 1:
1833                 numValue*=10.;
1834                 break;
1835             case 0:
1836             default:
1837                 break;
1838             }
1839 
1840             return numValue;
1841         } else if(ntv<NTV_FRACTION20_START_) {
1842             /* sexagesimal (base 60) integer */
1843             int numValue=(ntv>>2)-0xbf;
1844             int exp=(ntv&3)+1;
1845 
1846             switch(exp) {
1847             case 4:
1848                 numValue*=60*60*60*60;
1849                 break;
1850             case 3:
1851                 numValue*=60*60*60;
1852                 break;
1853             case 2:
1854                 numValue*=60*60;
1855                 break;
1856             case 1:
1857                 numValue*=60;
1858                 break;
1859             case 0:
1860             default:
1861                 break;
1862             }
1863 
1864             return numValue;
1865         } else if(ntv<NTV_RESERVED_START_) {
1866             // fraction-20 e.g. 3/80
1867             int frac20=ntv-NTV_FRACTION20_START_;  // 0..0x17
1868             int numerator=2*(frac20&3)+1;
1869             int denominator=20<<(frac20>>2);
1870             return (double)numerator/denominator;
1871         } else {
1872             /* reserved */
1873             return UCharacter.NO_NUMERIC_VALUE;
1874         }
1875     }
1876 
1877     // protected variables -----------------------------------------------
1878 
1879     /**
1880      * Extra property trie
1881      */
1882     Trie2_16 m_additionalTrie_;
1883     /**
1884      * Extra property vectors, 1st column for age and second for binary
1885      * properties.
1886      */
1887     int m_additionalVectors_[];
1888     /**
1889      * Number of additional columns
1890      */
1891     int m_additionalColumnsCount_;
1892     /**
1893      * Maximum values for block, bits used as in vector word
1894      * 0
1895      */
1896     int m_maxBlockScriptValue_;
1897     /**
1898      * Maximum values for script, bits used as in vector word
1899      * 0
1900      */
1901      int m_maxJTGValue_;
1902 
1903     /**
1904      * Script_Extensions data
1905      */
1906     public char[] m_scriptExtensions_;
1907 
1908     // private variables -------------------------------------------------
1909 
1910     /**
1911     * Default name of the datafile
1912     */
1913     private static final String DATA_FILE_NAME_ = "uprops.icu";
1914 
1915     // property data constants -------------------------------------------------
1916 
1917     /**
1918      * Numeric types and values in the main properties words.
1919      */
1920     private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6;
getNumericTypeValue(int props)1921     private static final int getNumericTypeValue(int props) {
1922         return props >> NUMERIC_TYPE_VALUE_SHIFT_;
1923     }
1924     /* constants for the storage form of numeric types and values */
1925     /** No numeric value. */
1926     private static final int NTV_NONE_ = 0;
1927     /** Decimal digits: nv=0..9 */
1928     private static final int NTV_DECIMAL_START_ = 1;
1929     /** Other digits: nv=0..9 */
1930     private static final int NTV_DIGIT_START_ = 11;
1931     /** Small integers: nv=0..154 */
1932     private static final int NTV_NUMERIC_START_ = 21;
1933     /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */
1934     private static final int NTV_FRACTION_START_ = 0xb0;
1935     /**
1936      * Large integers:
1937      * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
1938      * (only one significant decimal digit)
1939      */
1940     private static final int NTV_LARGE_START_ = 0x1e0;
1941     /**
1942      * Sexagesimal numbers:
1943      * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
1944      */
1945     private static final int NTV_BASE60_START_=0x300;
1946     /**
1947      * Fraction-20 values:
1948      * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
1949      * numerator: num = 2*(frac20&3)+1
1950      * denominator: den = 20<<(frac20>>2)
1951      */
1952     private static final int NTV_FRACTION20_START_ = NTV_BASE60_START_ + 36;  // 0x300+9*4=0x324
1953     /** No numeric value (yet). */
1954     private static final int NTV_RESERVED_START_ = NTV_FRACTION20_START_ + 24;  // 0x324+6*4=0x34c
1955 
ntvGetType(int ntv)1956     private static final int ntvGetType(int ntv) {
1957         return
1958             (ntv==NTV_NONE_) ? NumericType.NONE :
1959             (ntv<NTV_DIGIT_START_) ?  NumericType.DECIMAL :
1960             (ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT :
1961             NumericType.NUMERIC;
1962     }
1963 
1964     /*
1965      * Properties in vector word 0
1966      * Bits
1967      * 31..24   DerivedAge version major/minor one nibble each
1968      * 23..22   3..1: Bits 7..0 = Script_Extensions index
1969      *             3: Script value from Script_Extensions
1970      *             2: Script=Inherited
1971      *             1: Script=Common
1972      *             0: Script=bits 7..0
1973      * 21..20   reserved
1974      * 19..17   East Asian Width
1975      * 16.. 8   UBlockCode
1976      *  7.. 0   UScriptCode
1977      */
1978 
1979     /**
1980      * Script_Extensions: mask includes Script
1981      */
1982     public static final int SCRIPT_X_MASK = 0x00c000ff;
1983     //private static final int SCRIPT_X_SHIFT = 22;
1984     /**
1985      * Integer properties mask and shift values for East Asian cell width.
1986      * Equivalent to icu4c UPROPS_EA_MASK
1987      */
1988     private static final int EAST_ASIAN_MASK_ = 0x000e0000;
1989     /**
1990      * Integer properties mask and shift values for East Asian cell width.
1991      * Equivalent to icu4c UPROPS_EA_SHIFT
1992      */
1993     private static final int EAST_ASIAN_SHIFT_ = 17;
1994     /**
1995      * Integer properties mask and shift values for blocks.
1996      * Equivalent to icu4c UPROPS_BLOCK_MASK
1997      */
1998     private static final int BLOCK_MASK_ = 0x0001ff00;
1999     /**
2000      * Integer properties mask and shift values for blocks.
2001      * Equivalent to icu4c UPROPS_BLOCK_SHIFT
2002      */
2003     private static final int BLOCK_SHIFT_ = 8;
2004     /**
2005      * Integer properties mask and shift values for scripts.
2006      * Equivalent to icu4c UPROPS_SHIFT_MASK
2007      */
2008     public static final int SCRIPT_MASK_ = 0x000000ff;
2009 
2010     /* SCRIPT_X_WITH_COMMON must be the lowest value that involves Script_Extensions. */
2011     public static final int SCRIPT_X_WITH_COMMON = 0x400000;
2012     public static final int SCRIPT_X_WITH_INHERITED = 0x800000;
2013     public static final int SCRIPT_X_WITH_OTHER = 0xc00000;
2014 
2015     /**
2016      * Additional properties used in internal trie data
2017      */
2018     /*
2019      * Properties in vector word 1
2020      * Each bit encodes one binary property.
2021      * The following constants represent the bit number, use 1<<UPROPS_XYZ.
2022      * UPROPS_BINARY_1_TOP<=32!
2023      *
2024      * Keep this list of property enums in sync with
2025      * propListNames[] in icu/source/tools/genprops/props2.c!
2026      *
2027      * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
2028      */
2029     private static final int WHITE_SPACE_PROPERTY_ = 0;
2030     private static final int DASH_PROPERTY_ = 1;
2031     private static final int HYPHEN_PROPERTY_ = 2;
2032     private static final int QUOTATION_MARK_PROPERTY_ = 3;
2033     private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
2034     private static final int MATH_PROPERTY_ = 5;
2035     private static final int HEX_DIGIT_PROPERTY_ = 6;
2036     private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
2037     private static final int ALPHABETIC_PROPERTY_ = 8;
2038     private static final int IDEOGRAPHIC_PROPERTY_ = 9;
2039     private static final int DIACRITIC_PROPERTY_ = 10;
2040     private static final int EXTENDER_PROPERTY_ = 11;
2041     private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
2042     private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
2043     private static final int GRAPHEME_LINK_PROPERTY_ = 14;
2044     private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
2045     private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
2046     private static final int RADICAL_PROPERTY_ = 17;
2047     private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
2048     private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
2049     private static final int DEPRECATED_PROPERTY_ = 20;
2050     private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
2051     private static final int XID_START_PROPERTY_ = 22;
2052     private static final int XID_CONTINUE_PROPERTY_ = 23;
2053     private static final int ID_START_PROPERTY_    = 24;
2054     private static final int ID_CONTINUE_PROPERTY_ = 25;
2055     private static final int GRAPHEME_BASE_PROPERTY_ = 26;
2056     private static final int S_TERM_PROPERTY_ = 27;
2057     private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
2058     private static final int PATTERN_SYNTAX = 29;                   /* new in ICU 3.4 and Unicode 4.1 */
2059     private static final int PATTERN_WHITE_SPACE = 30;
2060     private static final int PREPENDED_CONCATENATION_MARK = 31;     // new in ICU 60 and Unicode 10
2061 
2062     /*
2063      * Properties in vector word 2
2064      * Bits
2065      * 31..26   http://www.unicode.org/reports/tr51/#Emoji_Properties
2066      * 25..20   Line Break
2067      * 19..15   Sentence Break
2068      * 14..10   Word Break
2069      *  9.. 5   Grapheme Cluster Break
2070      *  4.. 0   Decomposition Type
2071      */
2072     private static final int PROPS_2_EXTENDED_PICTOGRAPHIC=26;
2073     private static final int PROPS_2_EMOJI_COMPONENT = 27;
2074     private static final int PROPS_2_EMOJI = 28;
2075     private static final int PROPS_2_EMOJI_PRESENTATION = 29;
2076     private static final int PROPS_2_EMOJI_MODIFIER = 30;
2077     private static final int PROPS_2_EMOJI_MODIFIER_BASE = 31;
2078 
2079     private static final int LB_MASK          = 0x03f00000;
2080     private static final int LB_SHIFT         = 20;
2081 
2082     private static final int SB_MASK          = 0x000f8000;
2083     private static final int SB_SHIFT         = 15;
2084 
2085     private static final int WB_MASK          = 0x00007c00;
2086     private static final int WB_SHIFT         = 10;
2087 
2088     private static final int GCB_MASK         = 0x000003e0;
2089     private static final int GCB_SHIFT        = 5;
2090 
2091     /**
2092      * Integer properties mask for decomposition type.
2093      * Equivalent to icu4c UPROPS_DT_MASK.
2094      */
2095     private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
2096 
2097     /**
2098      * First nibble shift
2099      */
2100     private static final int FIRST_NIBBLE_SHIFT_ = 0x4;
2101     /**
2102      * Second nibble mask
2103      */
2104     private static final int LAST_NIBBLE_MASK_ = 0xF;
2105     /**
2106      * Age value shift
2107      */
2108     private static final int AGE_SHIFT_ = 24;
2109 
2110 
2111     // private constructors --------------------------------------------------
2112 
2113     /**
2114      * Constructor
2115      * @exception IOException thrown when data reading fails or data corrupted
2116      */
UCharacterProperty()2117     private UCharacterProperty() throws IOException
2118     {
2119         // consistency check
2120         if(binProps.length!=UProperty.BINARY_LIMIT) {
2121             throw new ICUException("binProps.length!=UProperty.BINARY_LIMIT");
2122         }
2123         if(intProps.length!=(UProperty.INT_LIMIT-UProperty.INT_START)) {
2124             throw new ICUException("intProps.length!=(UProperty.INT_LIMIT-UProperty.INT_START)");
2125         }
2126 
2127         // jar access
2128         ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
2129         m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
2130         // Read or skip the 16 indexes.
2131         int propertyOffset = bytes.getInt();
2132         /* exceptionOffset = */ bytes.getInt();
2133         /* caseOffset = */ bytes.getInt();
2134         int additionalOffset = bytes.getInt();
2135         int additionalVectorsOffset = bytes.getInt();
2136         m_additionalColumnsCount_ = bytes.getInt();
2137         int scriptExtensionsOffset = bytes.getInt();
2138         int reservedOffset7 = bytes.getInt();
2139         /* reservedOffset8 = */ bytes.getInt();
2140         /* dataTopOffset = */ bytes.getInt();
2141         m_maxBlockScriptValue_ = bytes.getInt();
2142         m_maxJTGValue_ = bytes.getInt();
2143         ICUBinary.skipBytes(bytes, (16 - 12) << 2);
2144 
2145         // read the main properties trie
2146         m_trie_ = Trie2_16.createFromSerialized(bytes);
2147         int expectedTrieLength = (propertyOffset - 16) * 4;
2148         int trieLength = m_trie_.getSerializedLength();
2149         if(trieLength > expectedTrieLength) {
2150             throw new IOException("uprops.icu: not enough bytes for main trie");
2151         }
2152         // skip padding after trie bytes
2153         ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
2154 
2155         // skip unused intervening data structures
2156         ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4);
2157 
2158         if(m_additionalColumnsCount_ > 0) {
2159             // reads the additional property block
2160             m_additionalTrie_ = Trie2_16.createFromSerialized(bytes);
2161             expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4;
2162             trieLength = m_additionalTrie_.getSerializedLength();
2163             if(trieLength > expectedTrieLength) {
2164                 throw new IOException("uprops.icu: not enough bytes for additional-properties trie");
2165             }
2166             // skip padding after trie bytes
2167             ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
2168 
2169             // additional properties
2170             int size = scriptExtensionsOffset - additionalVectorsOffset;
2171             m_additionalVectors_ = ICUBinary.getInts(bytes, size, 0);
2172         }
2173 
2174         // Script_Extensions
2175         int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2;
2176         if(numChars > 0) {
2177             m_scriptExtensions_ = ICUBinary.getChars(bytes, numChars, 0);
2178         }
2179     }
2180 
2181     private static final class IsAcceptable implements ICUBinary.Authenticate {
2182         @Override
isDataVersionAcceptable(byte version[])2183         public boolean isDataVersionAcceptable(byte version[]) {
2184             return version[0] == 7;
2185         }
2186     }
2187     private static final int DATA_FORMAT = 0x5550726F;  // "UPro"
2188 
2189     // private methods -------------------------------------------------------
2190 
2191     /*
2192      * Compare additional properties to see if it has argument type
2193      * @param property 32 bit properties
2194      * @param type character type
2195      * @return true if property has type
2196      */
2197     /*private boolean compareAdditionalType(int property, int type)
2198     {
2199         return (property & (1 << type)) != 0;
2200     }*/
2201 
2202     // property starts for UnicodeSet -------------------------------------- ***
2203 
2204     private static final int TAB     = 0x0009;
2205     //private static final int LF      = 0x000a;
2206     //private static final int FF      = 0x000c;
2207     private static final int CR      = 0x000d;
2208     private static final int U_A     = 0x0041;
2209     private static final int U_F     = 0x0046;
2210     private static final int U_Z     = 0x005a;
2211     private static final int U_a     = 0x0061;
2212     private static final int U_f     = 0x0066;
2213     private static final int U_z     = 0x007a;
2214     private static final int DEL     = 0x007f;
2215     private static final int NL      = 0x0085;
2216     private static final int NBSP    = 0x00a0;
2217     private static final int CGJ     = 0x034f;
2218     private static final int FIGURESP= 0x2007;
2219     private static final int HAIRSP  = 0x200a;
2220     //private static final int ZWNJ    = 0x200c;
2221     //private static final int ZWJ     = 0x200d;
2222     private static final int RLM     = 0x200f;
2223     private static final int NNBSP   = 0x202f;
2224     private static final int WJ      = 0x2060;
2225     private static final int INHSWAP = 0x206a;
2226     private static final int NOMDIG  = 0x206f;
2227     private static final int U_FW_A  = 0xff21;
2228     private static final int U_FW_F  = 0xff26;
2229     private static final int U_FW_Z  = 0xff3a;
2230     private static final int U_FW_a  = 0xff41;
2231     private static final int U_FW_f  = 0xff46;
2232     private static final int U_FW_z  = 0xff5a;
2233     private static final int ZWNBSP  = 0xfeff;
2234 
addPropertyStarts(UnicodeSet set)2235     public UnicodeSet addPropertyStarts(UnicodeSet set) {
2236         /* add the start code point of each same-value range of the main trie */
2237         Iterator<Trie2.Range> trieIterator = m_trie_.iterator();
2238         Trie2.Range range;
2239         while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
2240             set.add(range.startCodePoint);
2241         }
2242 
2243         /* add code points with hardcoded properties, plus the ones following them */
2244 
2245         /* add for u_isblank() */
2246         set.add(TAB);
2247         set.add(TAB+1);
2248 
2249         /* add for IS_THAT_CONTROL_SPACE() */
2250         set.add(CR+1); /* range TAB..CR */
2251         set.add(0x1c);
2252         set.add(0x1f+1);
2253         set.add(NL);
2254         set.add(NL+1);
2255 
2256         /* add for u_isIDIgnorable() what was not added above */
2257         set.add(DEL); /* range DEL..NBSP-1, NBSP added below */
2258         set.add(HAIRSP);
2259         set.add(RLM+1);
2260         set.add(INHSWAP);
2261         set.add(NOMDIG+1);
2262         set.add(ZWNBSP);
2263         set.add(ZWNBSP+1);
2264 
2265         /* add no-break spaces for u_isWhitespace() what was not added above */
2266         set.add(NBSP);
2267         set.add(NBSP+1);
2268         set.add(FIGURESP);
2269         set.add(FIGURESP+1);
2270         set.add(NNBSP);
2271         set.add(NNBSP+1);
2272 
2273         /* add for u_charDigitValue() */
2274         // TODO remove when UCharacter.getHanNumericValue() is changed to just return
2275         // Unicode numeric values
2276         set.add(0x3007);
2277         set.add(0x3008);
2278         set.add(0x4e00);
2279         set.add(0x4e01);
2280         set.add(0x4e8c);
2281         set.add(0x4e8d);
2282         set.add(0x4e09);
2283         set.add(0x4e0a);
2284         set.add(0x56db);
2285         set.add(0x56dc);
2286         set.add(0x4e94);
2287         set.add(0x4e95);
2288         set.add(0x516d);
2289         set.add(0x516e);
2290         set.add(0x4e03);
2291         set.add(0x4e04);
2292         set.add(0x516b);
2293         set.add(0x516c);
2294         set.add(0x4e5d);
2295         set.add(0x4e5e);
2296 
2297         /* add for u_digit() */
2298         set.add(U_a);
2299         set.add(U_z+1);
2300         set.add(U_A);
2301         set.add(U_Z+1);
2302         set.add(U_FW_a);
2303         set.add(U_FW_z+1);
2304         set.add(U_FW_A);
2305         set.add(U_FW_Z+1);
2306 
2307         /* add for u_isxdigit() */
2308         set.add(U_f+1);
2309         set.add(U_F+1);
2310         set.add(U_FW_f+1);
2311         set.add(U_FW_F+1);
2312 
2313         /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
2314         set.add(WJ); /* range WJ..NOMDIG */
2315         set.add(0xfff0);
2316         set.add(0xfffb+1);
2317         set.add(0xe0000);
2318         set.add(0xe0fff+1);
2319 
2320         /* add for UCHAR_GRAPHEME_BASE and others */
2321         set.add(CGJ);
2322         set.add(CGJ+1);
2323 
2324         return set; // for chaining
2325     }
2326 
upropsvec_addPropertyStarts(UnicodeSet set)2327     public void upropsvec_addPropertyStarts(UnicodeSet set) {
2328         /* add the start code point of each same-value range of the properties vectors trie */
2329         if(m_additionalColumnsCount_>0) {
2330             /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */
2331             Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator();
2332             Trie2.Range range;
2333             while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
2334                 set.add(range.startCodePoint);
2335             }
2336         }
2337     }
2338 
ulayout_addPropertyStarts(int src, UnicodeSet set)2339     public UnicodeSet ulayout_addPropertyStarts(int src, UnicodeSet set) {
2340         CodePointTrie trie;
2341         switch (src) {
2342         case SRC_INPC:
2343             trie = InPCTrie.INSTANCE;
2344             break;
2345         case SRC_INSC:
2346             trie = InSCTrie.INSTANCE;
2347             break;
2348         case SRC_VO:
2349             trie = VoTrie.INSTANCE;
2350             break;
2351         default:
2352             throw new IllegalStateException();
2353         }
2354 
2355         // Add the start code point of each same-value range of the trie.
2356         CodePointMap.Range range = new CodePointMap.Range();
2357         int start = 0;
2358         while (trie.getRange(start, null, range)) {
2359             set.add(start);
2360             start = range.getEnd() + 1;
2361         }
2362         return set;
2363     }
2364 
2365     // This static initializer block must be placed after
2366     // other static member initialization
2367     static {
2368         try {
2369             INSTANCE = new UCharacterProperty();
2370         }
2371         catch (IOException e) {
2372             throw new MissingResourceException(e.getMessage(),"","");
2373         }
2374     }
2375 
2376 /*----------------------------------------------------------------
2377  * Inclusions list
2378  *----------------------------------------------------------------*/
2379 
2380     /*
2381      * Return a set of characters for property enumeration.
2382      * The set implicitly contains 0x110000 as well, which is one more than the highest
2383      * Unicode code point.
2384      *
2385      * This set is used as an ordered list - its code points are ordered, and
2386      * consecutive code points (in Unicode code point order) in the set define a range.
2387      * For each two consecutive characters (start, limit) in the set,
2388      * all of the UCD/normalization and related properties for
2389      * all code points start..limit-1 are all the same,
2390      * except for character names and ISO comments.
2391      *
2392      * All Unicode code points U+0000..U+10ffff are covered by these ranges.
2393      * The ranges define a partition of the Unicode code space.
2394      * ICU uses the inclusions set to enumerate properties for generating
2395      * UnicodeSets containing all code points that have a certain property value.
2396      *
2397      * The Inclusion List is generated from the UCD. It is generated
2398      * by enumerating the data tries, and code points for hardcoded properties
2399      * are added as well.
2400      *
2401      * --------------------------------------------------------------------------
2402      *
2403      * The following are ideas for getting properties-unique code point ranges,
2404      * with possible optimizations beyond the current implementation.
2405      * These optimizations would require more code and be more fragile.
2406      * The current implementation generates one single list (set) for all properties.
2407      *
2408      * To enumerate properties efficiently, one needs to know ranges of
2409      * repetitive values, so that the value of only each start code point
2410      * can be applied to the whole range.
2411      * This information is in principle available in the uprops.icu/unorm.icu data.
2412      *
2413      * There are two obstacles:
2414      *
2415      * 1. Some properties are computed from multiple data structures,
2416      *    making it necessary to get repetitive ranges by intersecting
2417      *    ranges from multiple tries.
2418      *
2419      * 2. It is not economical to write code for getting repetitive ranges
2420      *    that are precise for each of some 50 properties.
2421      *
2422      * Compromise ideas:
2423      *
2424      * - Get ranges per trie, not per individual property.
2425      *   Each range contains the same values for a whole group of properties.
2426      *   This would generate currently five range sets, two for uprops.icu tries
2427      *   and three for unorm.icu tries.
2428      *
2429      * - Combine sets of ranges for multiple tries to get sufficient sets
2430      *   for properties, e.g., the uprops.icu main and auxiliary tries
2431      *   for all non-normalization properties.
2432      *
2433      * Ideas for representing ranges and combining them:
2434      *
2435      * - A UnicodeSet could hold just the start code points of ranges.
2436      *   Multiple sets are easily combined by or-ing them together.
2437      *
2438      * - Alternatively, a UnicodeSet could hold each even-numbered range.
2439      *   All ranges could be enumerated by using each start code point
2440      *   (for the even-numbered ranges) as well as each limit (end+1) code point
2441      *   (for the odd-numbered ranges).
2442      *   It should be possible to combine two such sets by xor-ing them,
2443      *   but no more than two.
2444      *
2445      * The second way to represent ranges may(?!) yield smaller UnicodeSet arrays,
2446      * but the first one is certainly simpler and applicable for combining more than
2447      * two range sets.
2448      *
2449      * It is possible to combine all range sets for all uprops/unorm tries into one
2450      * set that can be used for all properties.
2451      * As an optimization, there could be less-combined range sets for certain
2452      * groups of properties.
2453      * The relationship of which less-combined range set to use for which property
2454      * depends on the implementation of the properties and must be hardcoded
2455      * - somewhat error-prone and higher maintenance but can be tested easily
2456      * by building property sets "the simple way" in test code.
2457      *
2458      * ---
2459      *
2460      * Do not use a UnicodeSet pattern because that causes infinite recursion;
2461      * UnicodeSet depends on the inclusions set.
2462      *
2463      * ---
2464      *
2465      * getInclusions() is commented out starting 2005-feb-12 because
2466      * UnicodeSet now calls the uxyz_addPropertyStarts() directly,
2467      * and only for the relevant property source.
2468      */
2469     /*
2470     public UnicodeSet getInclusions() {
2471         UnicodeSet set = new UnicodeSet();
2472         NormalizerImpl.addPropertyStarts(set);
2473         addPropertyStarts(set);
2474         return set;
2475     }
2476     */
2477 }
2478