1 /*
2  * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.lang;
27 
28 import dalvik.annotation.optimization.FastNative;
29 // Android-removed: CDS is not used on Android.
30 // import jdk.internal.misc.CDS;
31 import jdk.internal.vm.annotation.IntrinsicCandidate;
32 
33 import java.util.Arrays;
34 import java.util.HashMap;
35 import java.util.Locale;
36 import java.util.Map;
37 
38 import java.lang.constant.Constable;
39 import java.lang.constant.DynamicConstantDesc;
40 import java.util.Optional;
41 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST;
42 import static java.lang.constant.ConstantDescs.CD_char;
43 import static java.lang.constant.ConstantDescs.CD_int;
44 import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
45 
46 // Android-changed: Remove reference to a specific unicode standard version
47 /**
48  * The {@code Character} class wraps a value of the primitive
49  * type {@code char} in an object. An object of class
50  * {@code Character} contains a single field whose type is
51  * {@code char}.
52  * <p>
53  * In addition, this class provides several methods for determining
54  * a character's category (lowercase letter, digit, etc.) and for converting
55  * characters from uppercase to lowercase and vice versa.
56  * <p>
57  * Character information is based on the Unicode Standard
58  * <p>
59  * The methods and data of class {@code Character} are defined by
60  * the information in the <i>UnicodeData</i> file that is part of the
61  * Unicode Character Database maintained by the Unicode
62  * Consortium. This file specifies various properties including name
63  * and general category for every defined Unicode code point or
64  * character range.
65  * <p>
66  * The file and its description are available from the Unicode Consortium at:
67  * <ul>
68  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
69  * </ul>
70  *
71  * <h2><a id="conformance">Unicode Conformance</a></h2>
72  * <p>
73  * The fields and methods of class {@code Character} are defined in terms
74  * of character information from the Unicode Standard, specifically the
75  * <i>UnicodeData</i> file that is part of the Unicode Character Database.
76  * This file specifies properties including name and category for every
77  * assigned Unicode code point or character range. The file is available
78  * from the Unicode Consortium at
79  * <a href="http://www.unicode.org">http://www.unicode.org</a>.
80  * <p>
81  * Character information is based on the Unicode Standard, version 13.0.
82  * <p>
83  * The Java platform has supported different versions of the Unicode
84  * Standard over time. Upgrades to newer versions of the Unicode Standard
85  * occurred in the following Java releases, each indicating the new version:
86  * <table class="striped">
87  * <caption style="display:none">Shows Java releases and supported Unicode versions</caption>
88  * <thead>
89  * <tr><th scope="col">Java release</th>
90  *     <th scope="col">Unicode version</th></tr>
91  * </thead>
92  * <tbody>
93  * <tr><td>Java SE 15</td>
94  *     <td>Unicode 13.0</td></tr>
95  * <tr><td>Java SE 13</td>
96  *     <td>Unicode 12.1</td></tr>
97  * <tr><td>Java SE 12</td>
98  *     <td>Unicode 11.0</td></tr>
99  * <tr><td>Java SE 11</td>
100  *     <td>Unicode 10.0</td></tr>
101  * <tr><td>Java SE 9</td>
102  *     <td>Unicode 8.0</td></tr>
103  * <tr><td>Java SE 8</td>
104  *     <td>Unicode 6.2</td></tr>
105  * <tr><td>Java SE 7</td>
106  *     <td>Unicode 6.0</td></tr>
107  * <tr><td>Java SE 5.0</td>
108  *     <td>Unicode 4.0</td></tr>
109  * <tr><td>Java SE 1.4</td>
110  *     <td>Unicode 3.0</td></tr>
111  * <tr><td>JDK 1.1</td>
112  *     <td>Unicode 2.0</td></tr>
113  * <tr><td>JDK 1.0.2</td>
114  *     <td>Unicode 1.1.5</td></tr>
115  * </tbody>
116  * </table>
117  * Variations from these base Unicode versions, such as recognized appendixes,
118  * are documented elsewhere.
119  * <h2><a id="unicode">Unicode Character Representations</a></h2>
120  *
121  * <p>The {@code char} data type (and therefore the value that a
122  * {@code Character} object encapsulates) are based on the
123  * original Unicode specification, which defined characters as
124  * fixed-width 16-bit entities. The Unicode Standard has since been
125  * changed to allow for characters whose representation requires more
126  * than 16 bits.  The range of legal <em>code point</em>s is now
127  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
128  * (Refer to the <a
129  * href="http://www.unicode.org/reports/tr27/#notation"><i>
130  * definition</i></a> of the U+<i>n</i> notation in the Unicode
131  * Standard.)
132  *
133  * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
134  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
135  * <a id="supplementary">Characters</a> whose code points are greater
136  * than U+FFFF are called <em>supplementary character</em>s.  The Java
137  * platform uses the UTF-16 representation in {@code char} arrays and
138  * in the {@code String} and {@code StringBuffer} classes. In
139  * this representation, supplementary characters are represented as a pair
140  * of {@code char} values, the first from the <em>high-surrogates</em>
141  * range, (&#92;uD800-&#92;uDBFF), the second from the
142  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
143  *
144  * <p>A {@code char} value, therefore, represents Basic
145  * Multilingual Plane (BMP) code points, including the surrogate
146  * code points, or code units of the UTF-16 encoding. An
147  * {@code int} value represents all Unicode code points,
148  * including supplementary code points. The lower (least significant)
149  * 21 bits of {@code int} are used to represent Unicode code
150  * points and the upper (most significant) 11 bits must be zero.
151  * Unless otherwise specified, the behavior with respect to
152  * supplementary characters and surrogate {@code char} values is
153  * as follows:
154  *
155  * <ul>
156  * <li>The methods that only accept a {@code char} value cannot support
157  * supplementary characters. They treat {@code char} values from the
158  * surrogate ranges as undefined characters. For example,
159  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
160  * this specific value if followed by any low-surrogate value in a string
161  * would represent a letter.
162  *
163  * <li>The methods that accept an {@code int} value support all
164  * Unicode characters, including supplementary characters. For
165  * example, {@code Character.isLetter(0x2F81A)} returns
166  * {@code true} because the code point value represents a letter
167  * (a CJK ideograph).
168  * </ul>
169  *
170  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
171  * used for character values in the range between U+0000 and U+10FFFF,
172  * and <em>Unicode code unit</em> is used for 16-bit
173  * {@code char} values that are code units of the <em>UTF-16</em>
174  * encoding. For more information on Unicode terminology, refer to the
175  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
176  *
177  * <!-- Android-removed: paragraph on ValueBased
178  * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
179  * class; programmers should treat instances that are
180  * {@linkplain #equals(Object) equal} as interchangeable and should not
181  * use instances for synchronization, or unpredictable behavior may
182  * occur. For example, in a future release, synchronization may fail.
183  * -->
184  *
185  * @author  Lee Boynton
186  * @author  Guy Steele
187  * @author  Akira Tanaka
188  * @author  Martin Buchholz
189  * @author  Ulf Zibis
190  * @since   1.0
191  */
192 @jdk.internal.ValueBased
193 public final
194 class Character implements java.io.Serializable, Comparable<Character>, Constable {
195     /**
196      * The minimum radix available for conversion to and from strings.
197      * The constant value of this field is the smallest value permitted
198      * for the radix argument in radix-conversion methods such as the
199      * {@code digit} method, the {@code forDigit} method, and the
200      * {@code toString} method of class {@code Integer}.
201      *
202      * @see     Character#digit(char, int)
203      * @see     Character#forDigit(int, int)
204      * @see     Integer#toString(int, int)
205      * @see     Integer#valueOf(String)
206      */
207     public static final int MIN_RADIX = 2;
208 
209     /**
210      * The maximum radix available for conversion to and from strings.
211      * The constant value of this field is the largest value permitted
212      * for the radix argument in radix-conversion methods such as the
213      * {@code digit} method, the {@code forDigit} method, and the
214      * {@code toString} method of class {@code Integer}.
215      *
216      * @see     Character#digit(char, int)
217      * @see     Character#forDigit(int, int)
218      * @see     Integer#toString(int, int)
219      * @see     Integer#valueOf(String)
220      */
221     public static final int MAX_RADIX = 36;
222 
223     /**
224      * The constant value of this field is the smallest value of type
225      * {@code char}, {@code '\u005Cu0000'}.
226      *
227      * @since   1.0.2
228      */
229     public static final char MIN_VALUE = '\u0000';
230 
231     /**
232      * The constant value of this field is the largest value of type
233      * {@code char}, {@code '\u005CuFFFF'}.
234      *
235      * @since   1.0.2
236      */
237     public static final char MAX_VALUE = '\uFFFF';
238 
239     /**
240      * The {@code Class} instance representing the primitive type
241      * {@code char}.
242      *
243      * @since   1.1
244      */
245     @SuppressWarnings("unchecked")
246     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
247 
248     /*
249      * Normative general types
250      */
251 
252     /*
253      * General character types
254      */
255 
256     /**
257      * General category "Cn" in the Unicode specification.
258      * @since   1.1
259      */
260     public static final byte UNASSIGNED = 0;
261 
262     /**
263      * General category "Lu" in the Unicode specification.
264      * @since   1.1
265      */
266     public static final byte UPPERCASE_LETTER = 1;
267 
268     /**
269      * General category "Ll" in the Unicode specification.
270      * @since   1.1
271      */
272     public static final byte LOWERCASE_LETTER = 2;
273 
274     /**
275      * General category "Lt" in the Unicode specification.
276      * @since   1.1
277      */
278     public static final byte TITLECASE_LETTER = 3;
279 
280     /**
281      * General category "Lm" in the Unicode specification.
282      * @since   1.1
283      */
284     public static final byte MODIFIER_LETTER = 4;
285 
286     /**
287      * General category "Lo" in the Unicode specification.
288      * @since   1.1
289      */
290     public static final byte OTHER_LETTER = 5;
291 
292     /**
293      * General category "Mn" in the Unicode specification.
294      * @since   1.1
295      */
296     public static final byte NON_SPACING_MARK = 6;
297 
298     /**
299      * General category "Me" in the Unicode specification.
300      * @since   1.1
301      */
302     public static final byte ENCLOSING_MARK = 7;
303 
304     /**
305      * General category "Mc" in the Unicode specification.
306      * @since   1.1
307      */
308     public static final byte COMBINING_SPACING_MARK = 8;
309 
310     /**
311      * General category "Nd" in the Unicode specification.
312      * @since   1.1
313      */
314     public static final byte DECIMAL_DIGIT_NUMBER = 9;
315 
316     /**
317      * General category "Nl" in the Unicode specification.
318      * @since   1.1
319      */
320     public static final byte LETTER_NUMBER = 10;
321 
322     /**
323      * General category "No" in the Unicode specification.
324      * @since   1.1
325      */
326     public static final byte OTHER_NUMBER = 11;
327 
328     /**
329      * General category "Zs" in the Unicode specification.
330      * @since   1.1
331      */
332     public static final byte SPACE_SEPARATOR = 12;
333 
334     /**
335      * General category "Zl" in the Unicode specification.
336      * @since   1.1
337      */
338     public static final byte LINE_SEPARATOR = 13;
339 
340     /**
341      * General category "Zp" in the Unicode specification.
342      * @since   1.1
343      */
344     public static final byte PARAGRAPH_SEPARATOR = 14;
345 
346     /**
347      * General category "Cc" in the Unicode specification.
348      * @since   1.1
349      */
350     public static final byte CONTROL = 15;
351 
352     /**
353      * General category "Cf" in the Unicode specification.
354      * @since   1.1
355      */
356     public static final byte FORMAT = 16;
357 
358     /**
359      * General category "Co" in the Unicode specification.
360      * @since   1.1
361      */
362     public static final byte PRIVATE_USE = 18;
363 
364     /**
365      * General category "Cs" in the Unicode specification.
366      * @since   1.1
367      */
368     public static final byte SURROGATE = 19;
369 
370     /**
371      * General category "Pd" in the Unicode specification.
372      * @since   1.1
373      */
374     public static final byte DASH_PUNCTUATION = 20;
375 
376     /**
377      * General category "Ps" in the Unicode specification.
378      * @since   1.1
379      */
380     public static final byte START_PUNCTUATION = 21;
381 
382     /**
383      * General category "Pe" in the Unicode specification.
384      * @since   1.1
385      */
386     public static final byte END_PUNCTUATION = 22;
387 
388     /**
389      * General category "Pc" in the Unicode specification.
390      * @since   1.1
391      */
392     public static final byte CONNECTOR_PUNCTUATION = 23;
393 
394     /**
395      * General category "Po" in the Unicode specification.
396      * @since   1.1
397      */
398     public static final byte OTHER_PUNCTUATION = 24;
399 
400     /**
401      * General category "Sm" in the Unicode specification.
402      * @since   1.1
403      */
404     public static final byte MATH_SYMBOL = 25;
405 
406     /**
407      * General category "Sc" in the Unicode specification.
408      * @since   1.1
409      */
410     public static final byte CURRENCY_SYMBOL = 26;
411 
412     /**
413      * General category "Sk" in the Unicode specification.
414      * @since   1.1
415      */
416     public static final byte MODIFIER_SYMBOL = 27;
417 
418     /**
419      * General category "So" in the Unicode specification.
420      * @since   1.1
421      */
422     public static final byte OTHER_SYMBOL = 28;
423 
424     /**
425      * General category "Pi" in the Unicode specification.
426      * @since   1.4
427      */
428     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
429 
430     /**
431      * General category "Pf" in the Unicode specification.
432      * @since   1.4
433      */
434     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
435 
436     /**
437      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
438      */
439     static final int ERROR = 0xFFFFFFFF;
440 
441 
442     /**
443      * Undefined bidirectional character type. Undefined {@code char}
444      * values have undefined directionality in the Unicode specification.
445      * @since 1.4
446      */
447     public static final byte DIRECTIONALITY_UNDEFINED = -1;
448 
449     /**
450      * Strong bidirectional character type "L" in the Unicode specification.
451      * @since 1.4
452      */
453     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
454 
455     /**
456      * Strong bidirectional character type "R" in the Unicode specification.
457      * @since 1.4
458      */
459     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
460 
461     /**
462      * Strong bidirectional character type "AL" in the Unicode specification.
463      * @since 1.4
464      */
465     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
466 
467     /**
468      * Weak bidirectional character type "EN" in the Unicode specification.
469      * @since 1.4
470      */
471     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
472 
473     /**
474      * Weak bidirectional character type "ES" in the Unicode specification.
475      * @since 1.4
476      */
477     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
478 
479     /**
480      * Weak bidirectional character type "ET" in the Unicode specification.
481      * @since 1.4
482      */
483     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
484 
485     /**
486      * Weak bidirectional character type "AN" in the Unicode specification.
487      * @since 1.4
488      */
489     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
490 
491     /**
492      * Weak bidirectional character type "CS" in the Unicode specification.
493      * @since 1.4
494      */
495     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
496 
497     /**
498      * Weak bidirectional character type "NSM" in the Unicode specification.
499      * @since 1.4
500      */
501     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
502 
503     /**
504      * Weak bidirectional character type "BN" in the Unicode specification.
505      * @since 1.4
506      */
507     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
508 
509     /**
510      * Neutral bidirectional character type "B" in the Unicode specification.
511      * @since 1.4
512      */
513     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
514 
515     /**
516      * Neutral bidirectional character type "S" in the Unicode specification.
517      * @since 1.4
518      */
519     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
520 
521     /**
522      * Neutral bidirectional character type "WS" in the Unicode specification.
523      * @since 1.4
524      */
525     public static final byte DIRECTIONALITY_WHITESPACE = 12;
526 
527     /**
528      * Neutral bidirectional character type "ON" in the Unicode specification.
529      * @since 1.4
530      */
531     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
532 
533     /**
534      * Strong bidirectional character type "LRE" in the Unicode specification.
535      * @since 1.4
536      */
537     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
538 
539     /**
540      * Strong bidirectional character type "LRO" in the Unicode specification.
541      * @since 1.4
542      */
543     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
544 
545     /**
546      * Strong bidirectional character type "RLE" in the Unicode specification.
547      * @since 1.4
548      */
549     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
550 
551     /**
552      * Strong bidirectional character type "RLO" in the Unicode specification.
553      * @since 1.4
554      */
555     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
556 
557     /**
558      * Weak bidirectional character type "PDF" in the Unicode specification.
559      * @since 1.4
560      */
561     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
562 
563     /**
564      * Weak bidirectional character type "LRI" in the Unicode specification.
565      * @since 9
566      */
567     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
568 
569     /**
570      * Weak bidirectional character type "RLI" in the Unicode specification.
571      * @since 9
572      */
573     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
574 
575     /**
576      * Weak bidirectional character type "FSI" in the Unicode specification.
577      * @since 9
578      */
579     public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
580 
581     /**
582      * Weak bidirectional character type "PDI" in the Unicode specification.
583      * @since 9
584      */
585     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
586 
587     /**
588      * The minimum value of a
589      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
590      * Unicode high-surrogate code unit</a>
591      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
592      * A high-surrogate is also known as a <i>leading-surrogate</i>.
593      *
594      * @since 1.5
595      */
596     public static final char MIN_HIGH_SURROGATE = '\uD800';
597 
598     /**
599      * The maximum value of a
600      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
601      * Unicode high-surrogate code unit</a>
602      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
603      * A high-surrogate is also known as a <i>leading-surrogate</i>.
604      *
605      * @since 1.5
606      */
607     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
608 
609     /**
610      * The minimum value of a
611      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
612      * Unicode low-surrogate code unit</a>
613      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
614      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
615      *
616      * @since 1.5
617      */
618     public static final char MIN_LOW_SURROGATE  = '\uDC00';
619 
620     /**
621      * The maximum value of a
622      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
623      * Unicode low-surrogate code unit</a>
624      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
625      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
626      *
627      * @since 1.5
628      */
629     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
630 
631     /**
632      * The minimum value of a Unicode surrogate code unit in the
633      * UTF-16 encoding, constant {@code '\u005CuD800'}.
634      *
635      * @since 1.5
636      */
637     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
638 
639     /**
640      * The maximum value of a Unicode surrogate code unit in the
641      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
642      *
643      * @since 1.5
644      */
645     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
646 
647     /**
648      * The minimum value of a
649      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
650      * Unicode supplementary code point</a>, constant {@code U+10000}.
651      *
652      * @since 1.5
653      */
654     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
655 
656     /**
657      * The minimum value of a
658      * <a href="http://www.unicode.org/glossary/#code_point">
659      * Unicode code point</a>, constant {@code U+0000}.
660      *
661      * @since 1.5
662      */
663     public static final int MIN_CODE_POINT = 0x000000;
664 
665     /**
666      * The maximum value of a
667      * <a href="http://www.unicode.org/glossary/#code_point">
668      * Unicode code point</a>, constant {@code U+10FFFF}.
669      *
670      * @since 1.5
671      */
672     public static final int MAX_CODE_POINT = 0X10FFFF;
673 
674     // BEGIN Android-added: Use ICU.
675     // The indices in int[] DIRECTIONALITY are based on icu4c's u_charDirection(),
676     // accessed via getDirectionalityImpl(), implemented in Character.cpp.
677     private static final byte[] DIRECTIONALITY = new byte[] {
678             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
679             DIRECTIONALITY_EUROPEAN_NUMBER,
680             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
681             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
682             DIRECTIONALITY_ARABIC_NUMBER,
683             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
684             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
685             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
686             DIRECTIONALITY_OTHER_NEUTRALS,
687             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
688             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
689             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
690             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
691             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
692             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
693             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
694     // END Android-added: Use ICU.
695 
696     /**
697      * Returns an {@link Optional} containing the nominal descriptor for this
698      * instance.
699      *
700      * @return an {@link Optional} describing the {@linkplain Character} instance
701      * @since 15
702      * @hide
703      */
704     @Override
describeConstable()705     public Optional<DynamicConstantDesc<Character>> describeConstable() {
706         return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value));
707     }
708 
709     /**
710      * Instances of this class represent particular subsets of the Unicode
711      * character set.  The only family of subsets defined in the
712      * {@code Character} class is {@link Character.UnicodeBlock}.
713      * Other portions of the Java API may define other subsets for their
714      * own purposes.
715      *
716      * @since 1.2
717      */
718     public static class Subset  {
719 
720         private String name;
721 
722         /**
723          * Constructs a new {@code Subset} instance.
724          *
725          * @param  name  The name of this subset
726          * @throws NullPointerException if name is {@code null}
727          */
Subset(String name)728         protected Subset(String name) {
729             if (name == null) {
730                 throw new NullPointerException("name");
731             }
732             this.name = name;
733         }
734 
735         /**
736          * Compares two {@code Subset} objects for equality.
737          * This method returns {@code true} if and only if
738          * {@code this} and the argument refer to the same
739          * object; since this method is {@code final}, this
740          * guarantee holds for all subclasses.
741          */
equals(Object obj)742         public final boolean equals(Object obj) {
743             return (this == obj);
744         }
745 
746         /**
747          * Returns the standard hash code as defined by the
748          * {@link Object#hashCode} method.  This method
749          * is {@code final} in order to ensure that the
750          * {@code equals} and {@code hashCode} methods will
751          * be consistent in all subclasses.
752          */
hashCode()753         public final int hashCode() {
754             return super.hashCode();
755         }
756 
757         /**
758          * Returns the name of this subset.
759          */
toString()760         public final String toString() {
761             return name;
762         }
763     }
764 
765     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
766     // for the latest specification of Unicode Blocks.
767 
768     /**
769      * A family of character subsets representing the character blocks in the
770      * Unicode specification. Character blocks generally define characters
771      * used for a specific script or purpose. A character is contained by
772      * at most one Unicode block.
773      *
774      * @since 1.2
775      */
776     public static final class UnicodeBlock extends Subset {
777         /**
778          * 684 - the expected number of entities
779          * 0.75 - the default load factor of HashMap
780          */
781         private static final int NUM_ENTITIES = 684;
782         private static Map<String, UnicodeBlock> map =
783                 new HashMap<>((int)(NUM_ENTITIES / 0.75f + 1.0f));
784 
785         /**
786          * Creates a UnicodeBlock with the given identifier name.
787          * This name must be the same as the block identifier.
788          */
UnicodeBlock(String idName)789         private UnicodeBlock(String idName) {
790             super(idName);
791             map.put(idName, this);
792         }
793 
794         // BEGIN Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
795         // Add a (String, boolean) constructor for use by SURROGATES_AREA.
UnicodeBlock(String idName, boolean isMap)796         private UnicodeBlock(String idName, boolean isMap) {
797             super(idName);
798             if (isMap) {
799                 map.put(idName, this);
800             }
801         }
802         // END Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
803 
804         /**
805          * Creates a UnicodeBlock with the given identifier name and
806          * alias name.
807          */
UnicodeBlock(String idName, String alias)808         private UnicodeBlock(String idName, String alias) {
809             this(idName);
810             map.put(alias, this);
811         }
812 
813         /**
814          * Creates a UnicodeBlock with the given identifier name and
815          * alias names.
816          */
UnicodeBlock(String idName, String... aliases)817         private UnicodeBlock(String idName, String... aliases) {
818             this(idName);
819             for (String alias : aliases)
820                 map.put(alias, this);
821         }
822 
823         /**
824          * Constant for the "Basic Latin" Unicode character block.
825          * @since 1.2
826          */
827         public static final UnicodeBlock  BASIC_LATIN =
828             new UnicodeBlock("BASIC_LATIN",
829                              "BASIC LATIN",
830                              "BASICLATIN");
831 
832         /**
833          * Constant for the "Latin-1 Supplement" Unicode character block.
834          * @since 1.2
835          */
836         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
837             new UnicodeBlock("LATIN_1_SUPPLEMENT",
838                              "LATIN-1 SUPPLEMENT",
839                              "LATIN-1SUPPLEMENT");
840 
841         /**
842          * Constant for the "Latin Extended-A" Unicode character block.
843          * @since 1.2
844          */
845         public static final UnicodeBlock LATIN_EXTENDED_A =
846             new UnicodeBlock("LATIN_EXTENDED_A",
847                              "LATIN EXTENDED-A",
848                              "LATINEXTENDED-A");
849 
850         /**
851          * Constant for the "Latin Extended-B" Unicode character block.
852          * @since 1.2
853          */
854         public static final UnicodeBlock LATIN_EXTENDED_B =
855             new UnicodeBlock("LATIN_EXTENDED_B",
856                              "LATIN EXTENDED-B",
857                              "LATINEXTENDED-B");
858 
859         /**
860          * Constant for the "IPA Extensions" Unicode character block.
861          * @since 1.2
862          */
863         public static final UnicodeBlock IPA_EXTENSIONS =
864             new UnicodeBlock("IPA_EXTENSIONS",
865                              "IPA EXTENSIONS",
866                              "IPAEXTENSIONS");
867 
868         /**
869          * Constant for the "Spacing Modifier Letters" Unicode character block.
870          * @since 1.2
871          */
872         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
873             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
874                              "SPACING MODIFIER LETTERS",
875                              "SPACINGMODIFIERLETTERS");
876 
877         /**
878          * Constant for the "Combining Diacritical Marks" Unicode character block.
879          * @since 1.2
880          */
881         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
882             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
883                              "COMBINING DIACRITICAL MARKS",
884                              "COMBININGDIACRITICALMARKS");
885 
886         /**
887          * Constant for the "Greek and Coptic" Unicode character block.
888          * <p>
889          * This block was previously known as the "Greek" block.
890          *
891          * @since 1.2
892          */
893         public static final UnicodeBlock GREEK =
894             new UnicodeBlock("GREEK",
895                              "GREEK AND COPTIC",
896                              "GREEKANDCOPTIC");
897 
898         /**
899          * Constant for the "Cyrillic" Unicode character block.
900          * @since 1.2
901          */
902         public static final UnicodeBlock CYRILLIC =
903             new UnicodeBlock("CYRILLIC");
904 
905         /**
906          * Constant for the "Armenian" Unicode character block.
907          * @since 1.2
908          */
909         public static final UnicodeBlock ARMENIAN =
910             new UnicodeBlock("ARMENIAN");
911 
912         /**
913          * Constant for the "Hebrew" Unicode character block.
914          * @since 1.2
915          */
916         public static final UnicodeBlock HEBREW =
917             new UnicodeBlock("HEBREW");
918 
919         /**
920          * Constant for the "Arabic" Unicode character block.
921          * @since 1.2
922          */
923         public static final UnicodeBlock ARABIC =
924             new UnicodeBlock("ARABIC");
925 
926         /**
927          * Constant for the "Devanagari" Unicode character block.
928          * @since 1.2
929          */
930         public static final UnicodeBlock DEVANAGARI =
931             new UnicodeBlock("DEVANAGARI");
932 
933         /**
934          * Constant for the "Bengali" Unicode character block.
935          * @since 1.2
936          */
937         public static final UnicodeBlock BENGALI =
938             new UnicodeBlock("BENGALI");
939 
940         /**
941          * Constant for the "Gurmukhi" Unicode character block.
942          * @since 1.2
943          */
944         public static final UnicodeBlock GURMUKHI =
945             new UnicodeBlock("GURMUKHI");
946 
947         /**
948          * Constant for the "Gujarati" Unicode character block.
949          * @since 1.2
950          */
951         public static final UnicodeBlock GUJARATI =
952             new UnicodeBlock("GUJARATI");
953 
954         /**
955          * Constant for the "Oriya" Unicode character block.
956          * @since 1.2
957          */
958         public static final UnicodeBlock ORIYA =
959             new UnicodeBlock("ORIYA");
960 
961         /**
962          * Constant for the "Tamil" Unicode character block.
963          * @since 1.2
964          */
965         public static final UnicodeBlock TAMIL =
966             new UnicodeBlock("TAMIL");
967 
968         /**
969          * Constant for the "Telugu" Unicode character block.
970          * @since 1.2
971          */
972         public static final UnicodeBlock TELUGU =
973             new UnicodeBlock("TELUGU");
974 
975         /**
976          * Constant for the "Kannada" Unicode character block.
977          * @since 1.2
978          */
979         public static final UnicodeBlock KANNADA =
980             new UnicodeBlock("KANNADA");
981 
982         /**
983          * Constant for the "Malayalam" Unicode character block.
984          * @since 1.2
985          */
986         public static final UnicodeBlock MALAYALAM =
987             new UnicodeBlock("MALAYALAM");
988 
989         /**
990          * Constant for the "Thai" Unicode character block.
991          * @since 1.2
992          */
993         public static final UnicodeBlock THAI =
994             new UnicodeBlock("THAI");
995 
996         /**
997          * Constant for the "Lao" Unicode character block.
998          * @since 1.2
999          */
1000         public static final UnicodeBlock LAO =
1001             new UnicodeBlock("LAO");
1002 
1003         /**
1004          * Constant for the "Tibetan" Unicode character block.
1005          * @since 1.2
1006          */
1007         public static final UnicodeBlock TIBETAN =
1008             new UnicodeBlock("TIBETAN");
1009 
1010         /**
1011          * Constant for the "Georgian" Unicode character block.
1012          * @since 1.2
1013          */
1014         public static final UnicodeBlock GEORGIAN =
1015             new UnicodeBlock("GEORGIAN");
1016 
1017         /**
1018          * Constant for the "Hangul Jamo" Unicode character block.
1019          * @since 1.2
1020          */
1021         public static final UnicodeBlock HANGUL_JAMO =
1022             new UnicodeBlock("HANGUL_JAMO",
1023                              "HANGUL JAMO",
1024                              "HANGULJAMO");
1025 
1026         /**
1027          * Constant for the "Latin Extended Additional" Unicode character block.
1028          * @since 1.2
1029          */
1030         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
1031             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
1032                              "LATIN EXTENDED ADDITIONAL",
1033                              "LATINEXTENDEDADDITIONAL");
1034 
1035         /**
1036          * Constant for the "Greek Extended" Unicode character block.
1037          * @since 1.2
1038          */
1039         public static final UnicodeBlock GREEK_EXTENDED =
1040             new UnicodeBlock("GREEK_EXTENDED",
1041                              "GREEK EXTENDED",
1042                              "GREEKEXTENDED");
1043 
1044         /**
1045          * Constant for the "General Punctuation" Unicode character block.
1046          * @since 1.2
1047          */
1048         public static final UnicodeBlock GENERAL_PUNCTUATION =
1049             new UnicodeBlock("GENERAL_PUNCTUATION",
1050                              "GENERAL PUNCTUATION",
1051                              "GENERALPUNCTUATION");
1052 
1053         /**
1054          * Constant for the "Superscripts and Subscripts" Unicode character
1055          * block.
1056          * @since 1.2
1057          */
1058         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
1059             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
1060                              "SUPERSCRIPTS AND SUBSCRIPTS",
1061                              "SUPERSCRIPTSANDSUBSCRIPTS");
1062 
1063         /**
1064          * Constant for the "Currency Symbols" Unicode character block.
1065          * @since 1.2
1066          */
1067         public static final UnicodeBlock CURRENCY_SYMBOLS =
1068             new UnicodeBlock("CURRENCY_SYMBOLS",
1069                              "CURRENCY SYMBOLS",
1070                              "CURRENCYSYMBOLS");
1071 
1072         /**
1073          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
1074          * character block.
1075          * <p>
1076          * This block was previously known as "Combining Marks for Symbols".
1077          * @since 1.2
1078          */
1079         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
1080             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
1081                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
1082                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
1083                              "COMBINING MARKS FOR SYMBOLS",
1084                              "COMBININGMARKSFORSYMBOLS");
1085 
1086         /**
1087          * Constant for the "Letterlike Symbols" Unicode character block.
1088          * @since 1.2
1089          */
1090         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
1091             new UnicodeBlock("LETTERLIKE_SYMBOLS",
1092                              "LETTERLIKE SYMBOLS",
1093                              "LETTERLIKESYMBOLS");
1094 
1095         /**
1096          * Constant for the "Number Forms" Unicode character block.
1097          * @since 1.2
1098          */
1099         public static final UnicodeBlock NUMBER_FORMS =
1100             new UnicodeBlock("NUMBER_FORMS",
1101                              "NUMBER FORMS",
1102                              "NUMBERFORMS");
1103 
1104         /**
1105          * Constant for the "Arrows" Unicode character block.
1106          * @since 1.2
1107          */
1108         public static final UnicodeBlock ARROWS =
1109             new UnicodeBlock("ARROWS");
1110 
1111         /**
1112          * Constant for the "Mathematical Operators" Unicode character block.
1113          * @since 1.2
1114          */
1115         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1116             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1117                              "MATHEMATICAL OPERATORS",
1118                              "MATHEMATICALOPERATORS");
1119 
1120         /**
1121          * Constant for the "Miscellaneous Technical" Unicode character block.
1122          * @since 1.2
1123          */
1124         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1125             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1126                              "MISCELLANEOUS TECHNICAL",
1127                              "MISCELLANEOUSTECHNICAL");
1128 
1129         /**
1130          * Constant for the "Control Pictures" Unicode character block.
1131          * @since 1.2
1132          */
1133         public static final UnicodeBlock CONTROL_PICTURES =
1134             new UnicodeBlock("CONTROL_PICTURES",
1135                              "CONTROL PICTURES",
1136                              "CONTROLPICTURES");
1137 
1138         /**
1139          * Constant for the "Optical Character Recognition" Unicode character block.
1140          * @since 1.2
1141          */
1142         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1143             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1144                              "OPTICAL CHARACTER RECOGNITION",
1145                              "OPTICALCHARACTERRECOGNITION");
1146 
1147         /**
1148          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1149          * @since 1.2
1150          */
1151         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1152             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1153                              "ENCLOSED ALPHANUMERICS",
1154                              "ENCLOSEDALPHANUMERICS");
1155 
1156         /**
1157          * Constant for the "Box Drawing" Unicode character block.
1158          * @since 1.2
1159          */
1160         public static final UnicodeBlock BOX_DRAWING =
1161             new UnicodeBlock("BOX_DRAWING",
1162                              "BOX DRAWING",
1163                              "BOXDRAWING");
1164 
1165         /**
1166          * Constant for the "Block Elements" Unicode character block.
1167          * @since 1.2
1168          */
1169         public static final UnicodeBlock BLOCK_ELEMENTS =
1170             new UnicodeBlock("BLOCK_ELEMENTS",
1171                              "BLOCK ELEMENTS",
1172                              "BLOCKELEMENTS");
1173 
1174         /**
1175          * Constant for the "Geometric Shapes" Unicode character block.
1176          * @since 1.2
1177          */
1178         public static final UnicodeBlock GEOMETRIC_SHAPES =
1179             new UnicodeBlock("GEOMETRIC_SHAPES",
1180                              "GEOMETRIC SHAPES",
1181                              "GEOMETRICSHAPES");
1182 
1183         /**
1184          * Constant for the "Miscellaneous Symbols" Unicode character block.
1185          * @since 1.2
1186          */
1187         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1188             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1189                              "MISCELLANEOUS SYMBOLS",
1190                              "MISCELLANEOUSSYMBOLS");
1191 
1192         /**
1193          * Constant for the "Dingbats" Unicode character block.
1194          * @since 1.2
1195          */
1196         public static final UnicodeBlock DINGBATS =
1197             new UnicodeBlock("DINGBATS");
1198 
1199         /**
1200          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1201          * @since 1.2
1202          */
1203         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1204             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1205                              "CJK SYMBOLS AND PUNCTUATION",
1206                              "CJKSYMBOLSANDPUNCTUATION");
1207 
1208         /**
1209          * Constant for the "Hiragana" Unicode character block.
1210          * @since 1.2
1211          */
1212         public static final UnicodeBlock HIRAGANA =
1213             new UnicodeBlock("HIRAGANA");
1214 
1215         /**
1216          * Constant for the "Katakana" Unicode character block.
1217          * @since 1.2
1218          */
1219         public static final UnicodeBlock KATAKANA =
1220             new UnicodeBlock("KATAKANA");
1221 
1222         /**
1223          * Constant for the "Bopomofo" Unicode character block.
1224          * @since 1.2
1225          */
1226         public static final UnicodeBlock BOPOMOFO =
1227             new UnicodeBlock("BOPOMOFO");
1228 
1229         /**
1230          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1231          * @since 1.2
1232          */
1233         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1234             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1235                              "HANGUL COMPATIBILITY JAMO",
1236                              "HANGULCOMPATIBILITYJAMO");
1237 
1238         /**
1239          * Constant for the "Kanbun" Unicode character block.
1240          * @since 1.2
1241          */
1242         public static final UnicodeBlock KANBUN =
1243             new UnicodeBlock("KANBUN");
1244 
1245         /**
1246          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1247          * @since 1.2
1248          */
1249         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1250             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1251                              "ENCLOSED CJK LETTERS AND MONTHS",
1252                              "ENCLOSEDCJKLETTERSANDMONTHS");
1253 
1254         /**
1255          * Constant for the "CJK Compatibility" Unicode character block.
1256          * @since 1.2
1257          */
1258         public static final UnicodeBlock CJK_COMPATIBILITY =
1259             new UnicodeBlock("CJK_COMPATIBILITY",
1260                              "CJK COMPATIBILITY",
1261                              "CJKCOMPATIBILITY");
1262 
1263         /**
1264          * Constant for the "CJK Unified Ideographs" Unicode character block.
1265          * @since 1.2
1266          */
1267         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1268             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1269                              "CJK UNIFIED IDEOGRAPHS",
1270                              "CJKUNIFIEDIDEOGRAPHS");
1271 
1272         /**
1273          * Constant for the "Hangul Syllables" Unicode character block.
1274          * @since 1.2
1275          */
1276         public static final UnicodeBlock HANGUL_SYLLABLES =
1277             new UnicodeBlock("HANGUL_SYLLABLES",
1278                              "HANGUL SYLLABLES",
1279                              "HANGULSYLLABLES");
1280 
1281         /**
1282          * Constant for the "Private Use Area" Unicode character block.
1283          * @since 1.2
1284          */
1285         public static final UnicodeBlock PRIVATE_USE_AREA =
1286             new UnicodeBlock("PRIVATE_USE_AREA",
1287                              "PRIVATE USE AREA",
1288                              "PRIVATEUSEAREA");
1289 
1290         /**
1291          * Constant for the "CJK Compatibility Ideographs" Unicode character
1292          * block.
1293          * @since 1.2
1294          */
1295         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1296             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1297                              "CJK COMPATIBILITY IDEOGRAPHS",
1298                              "CJKCOMPATIBILITYIDEOGRAPHS");
1299 
1300         /**
1301          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1302          * @since 1.2
1303          */
1304         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1305             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1306                              "ALPHABETIC PRESENTATION FORMS",
1307                              "ALPHABETICPRESENTATIONFORMS");
1308 
1309         /**
1310          * Constant for the "Arabic Presentation Forms-A" Unicode character
1311          * block.
1312          * @since 1.2
1313          */
1314         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1315             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1316                              "ARABIC PRESENTATION FORMS-A",
1317                              "ARABICPRESENTATIONFORMS-A");
1318 
1319         /**
1320          * Constant for the "Combining Half Marks" Unicode character block.
1321          * @since 1.2
1322          */
1323         public static final UnicodeBlock COMBINING_HALF_MARKS =
1324             new UnicodeBlock("COMBINING_HALF_MARKS",
1325                              "COMBINING HALF MARKS",
1326                              "COMBININGHALFMARKS");
1327 
1328         /**
1329          * Constant for the "CJK Compatibility Forms" Unicode character block.
1330          * @since 1.2
1331          */
1332         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1333             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1334                              "CJK COMPATIBILITY FORMS",
1335                              "CJKCOMPATIBILITYFORMS");
1336 
1337         /**
1338          * Constant for the "Small Form Variants" Unicode character block.
1339          * @since 1.2
1340          */
1341         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1342             new UnicodeBlock("SMALL_FORM_VARIANTS",
1343                              "SMALL FORM VARIANTS",
1344                              "SMALLFORMVARIANTS");
1345 
1346         /**
1347          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1348          * @since 1.2
1349          */
1350         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1351             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1352                              "ARABIC PRESENTATION FORMS-B",
1353                              "ARABICPRESENTATIONFORMS-B");
1354 
1355         /**
1356          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1357          * block.
1358          * @since 1.2
1359          */
1360         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1361             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1362                              "HALFWIDTH AND FULLWIDTH FORMS",
1363                              "HALFWIDTHANDFULLWIDTHFORMS");
1364 
1365         /**
1366          * Constant for the "Specials" Unicode character block.
1367          * @since 1.2
1368          */
1369         public static final UnicodeBlock SPECIALS =
1370             new UnicodeBlock("SPECIALS");
1371 
1372         /**
1373          * @deprecated
1374          * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1375          * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1376          * These constants match the block definitions of the Unicode Standard.
1377          * The {@link #of(char)} and {@link #of(int)} methods return the
1378          * standard constants.
1379          */
1380         @Deprecated(since="1.5")
1381         public static final UnicodeBlock SURROGATES_AREA =
1382             // Android-changed: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
1383             // new UnicodeBlock("SURROGATES_AREA");
1384             new UnicodeBlock("SURROGATES_AREA", false);
1385 
1386         /**
1387          * Constant for the "Syriac" Unicode character block.
1388          * @since 1.4
1389          */
1390         public static final UnicodeBlock SYRIAC =
1391             new UnicodeBlock("SYRIAC");
1392 
1393         /**
1394          * Constant for the "Thaana" Unicode character block.
1395          * @since 1.4
1396          */
1397         public static final UnicodeBlock THAANA =
1398             new UnicodeBlock("THAANA");
1399 
1400         /**
1401          * Constant for the "Sinhala" Unicode character block.
1402          * @since 1.4
1403          */
1404         public static final UnicodeBlock SINHALA =
1405             new UnicodeBlock("SINHALA");
1406 
1407         /**
1408          * Constant for the "Myanmar" Unicode character block.
1409          * @since 1.4
1410          */
1411         public static final UnicodeBlock MYANMAR =
1412             new UnicodeBlock("MYANMAR");
1413 
1414         /**
1415          * Constant for the "Ethiopic" Unicode character block.
1416          * @since 1.4
1417          */
1418         public static final UnicodeBlock ETHIOPIC =
1419             new UnicodeBlock("ETHIOPIC");
1420 
1421         /**
1422          * Constant for the "Cherokee" Unicode character block.
1423          * @since 1.4
1424          */
1425         public static final UnicodeBlock CHEROKEE =
1426             new UnicodeBlock("CHEROKEE");
1427 
1428         /**
1429          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1430          * @since 1.4
1431          */
1432         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1433             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1434                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1435                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1436 
1437         /**
1438          * Constant for the "Ogham" Unicode character block.
1439          * @since 1.4
1440          */
1441         public static final UnicodeBlock OGHAM =
1442             new UnicodeBlock("OGHAM");
1443 
1444         /**
1445          * Constant for the "Runic" Unicode character block.
1446          * @since 1.4
1447          */
1448         public static final UnicodeBlock RUNIC =
1449             new UnicodeBlock("RUNIC");
1450 
1451         /**
1452          * Constant for the "Khmer" Unicode character block.
1453          * @since 1.4
1454          */
1455         public static final UnicodeBlock KHMER =
1456             new UnicodeBlock("KHMER");
1457 
1458         /**
1459          * Constant for the "Mongolian" Unicode character block.
1460          * @since 1.4
1461          */
1462         public static final UnicodeBlock MONGOLIAN =
1463             new UnicodeBlock("MONGOLIAN");
1464 
1465         /**
1466          * Constant for the "Braille Patterns" Unicode character block.
1467          * @since 1.4
1468          */
1469         public static final UnicodeBlock BRAILLE_PATTERNS =
1470             new UnicodeBlock("BRAILLE_PATTERNS",
1471                              "BRAILLE PATTERNS",
1472                              "BRAILLEPATTERNS");
1473 
1474         /**
1475          * Constant for the "CJK Radicals Supplement" Unicode character block.
1476          * @since 1.4
1477          */
1478         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1479             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1480                              "CJK RADICALS SUPPLEMENT",
1481                              "CJKRADICALSSUPPLEMENT");
1482 
1483         /**
1484          * Constant for the "Kangxi Radicals" Unicode character block.
1485          * @since 1.4
1486          */
1487         public static final UnicodeBlock KANGXI_RADICALS =
1488             new UnicodeBlock("KANGXI_RADICALS",
1489                              "KANGXI RADICALS",
1490                              "KANGXIRADICALS");
1491 
1492         /**
1493          * Constant for the "Ideographic Description Characters" Unicode character block.
1494          * @since 1.4
1495          */
1496         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1497             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1498                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1499                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1500 
1501         /**
1502          * Constant for the "Bopomofo Extended" Unicode character block.
1503          * @since 1.4
1504          */
1505         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1506             new UnicodeBlock("BOPOMOFO_EXTENDED",
1507                              "BOPOMOFO EXTENDED",
1508                              "BOPOMOFOEXTENDED");
1509 
1510         /**
1511          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1512          * @since 1.4
1513          */
1514         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1515             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1516                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1517                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1518 
1519         /**
1520          * Constant for the "Yi Syllables" Unicode character block.
1521          * @since 1.4
1522          */
1523         public static final UnicodeBlock YI_SYLLABLES =
1524             new UnicodeBlock("YI_SYLLABLES",
1525                              "YI SYLLABLES",
1526                              "YISYLLABLES");
1527 
1528         /**
1529          * Constant for the "Yi Radicals" Unicode character block.
1530          * @since 1.4
1531          */
1532         public static final UnicodeBlock YI_RADICALS =
1533             new UnicodeBlock("YI_RADICALS",
1534                              "YI RADICALS",
1535                              "YIRADICALS");
1536 
1537         /**
1538          * Constant for the "Cyrillic Supplement" Unicode character block.
1539          * This block was previously known as the "Cyrillic Supplementary" block.
1540          * @since 1.5
1541          */
1542         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1543             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1544                              "CYRILLIC SUPPLEMENTARY",
1545                              "CYRILLICSUPPLEMENTARY",
1546                              "CYRILLIC SUPPLEMENT",
1547                              "CYRILLICSUPPLEMENT");
1548 
1549         /**
1550          * Constant for the "Tagalog" Unicode character block.
1551          * @since 1.5
1552          */
1553         public static final UnicodeBlock TAGALOG =
1554             new UnicodeBlock("TAGALOG");
1555 
1556         /**
1557          * Constant for the "Hanunoo" Unicode character block.
1558          * @since 1.5
1559          */
1560         public static final UnicodeBlock HANUNOO =
1561             new UnicodeBlock("HANUNOO");
1562 
1563         /**
1564          * Constant for the "Buhid" Unicode character block.
1565          * @since 1.5
1566          */
1567         public static final UnicodeBlock BUHID =
1568             new UnicodeBlock("BUHID");
1569 
1570         /**
1571          * Constant for the "Tagbanwa" Unicode character block.
1572          * @since 1.5
1573          */
1574         public static final UnicodeBlock TAGBANWA =
1575             new UnicodeBlock("TAGBANWA");
1576 
1577         /**
1578          * Constant for the "Limbu" Unicode character block.
1579          * @since 1.5
1580          */
1581         public static final UnicodeBlock LIMBU =
1582             new UnicodeBlock("LIMBU");
1583 
1584         /**
1585          * Constant for the "Tai Le" Unicode character block.
1586          * @since 1.5
1587          */
1588         public static final UnicodeBlock TAI_LE =
1589             new UnicodeBlock("TAI_LE",
1590                              "TAI LE",
1591                              "TAILE");
1592 
1593         /**
1594          * Constant for the "Khmer Symbols" Unicode character block.
1595          * @since 1.5
1596          */
1597         public static final UnicodeBlock KHMER_SYMBOLS =
1598             new UnicodeBlock("KHMER_SYMBOLS",
1599                              "KHMER SYMBOLS",
1600                              "KHMERSYMBOLS");
1601 
1602         /**
1603          * Constant for the "Phonetic Extensions" Unicode character block.
1604          * @since 1.5
1605          */
1606         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1607             new UnicodeBlock("PHONETIC_EXTENSIONS",
1608                              "PHONETIC EXTENSIONS",
1609                              "PHONETICEXTENSIONS");
1610 
1611         /**
1612          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1613          * @since 1.5
1614          */
1615         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1616             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1617                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1618                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1619 
1620         /**
1621          * Constant for the "Supplemental Arrows-A" Unicode character block.
1622          * @since 1.5
1623          */
1624         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1625             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1626                              "SUPPLEMENTAL ARROWS-A",
1627                              "SUPPLEMENTALARROWS-A");
1628 
1629         /**
1630          * Constant for the "Supplemental Arrows-B" Unicode character block.
1631          * @since 1.5
1632          */
1633         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1634             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1635                              "SUPPLEMENTAL ARROWS-B",
1636                              "SUPPLEMENTALARROWS-B");
1637 
1638         /**
1639          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1640          * character block.
1641          * @since 1.5
1642          */
1643         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1644             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1645                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1646                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1647 
1648         /**
1649          * Constant for the "Supplemental Mathematical Operators" Unicode
1650          * character block.
1651          * @since 1.5
1652          */
1653         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1654             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1655                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1656                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1657 
1658         /**
1659          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1660          * block.
1661          * @since 1.5
1662          */
1663         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1664             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1665                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1666                              "MISCELLANEOUSSYMBOLSANDARROWS");
1667 
1668         /**
1669          * Constant for the "Katakana Phonetic Extensions" Unicode character
1670          * block.
1671          * @since 1.5
1672          */
1673         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1674             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1675                              "KATAKANA PHONETIC EXTENSIONS",
1676                              "KATAKANAPHONETICEXTENSIONS");
1677 
1678         /**
1679          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1680          * @since 1.5
1681          */
1682         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1683             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1684                              "YIJING HEXAGRAM SYMBOLS",
1685                              "YIJINGHEXAGRAMSYMBOLS");
1686 
1687         /**
1688          * Constant for the "Variation Selectors" Unicode character block.
1689          * @since 1.5
1690          */
1691         public static final UnicodeBlock VARIATION_SELECTORS =
1692             new UnicodeBlock("VARIATION_SELECTORS",
1693                              "VARIATION SELECTORS",
1694                              "VARIATIONSELECTORS");
1695 
1696         /**
1697          * Constant for the "Linear B Syllabary" Unicode character block.
1698          * @since 1.5
1699          */
1700         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1701             new UnicodeBlock("LINEAR_B_SYLLABARY",
1702                              "LINEAR B SYLLABARY",
1703                              "LINEARBSYLLABARY");
1704 
1705         /**
1706          * Constant for the "Linear B Ideograms" Unicode character block.
1707          * @since 1.5
1708          */
1709         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1710             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1711                              "LINEAR B IDEOGRAMS",
1712                              "LINEARBIDEOGRAMS");
1713 
1714         /**
1715          * Constant for the "Aegean Numbers" Unicode character block.
1716          * @since 1.5
1717          */
1718         public static final UnicodeBlock AEGEAN_NUMBERS =
1719             new UnicodeBlock("AEGEAN_NUMBERS",
1720                              "AEGEAN NUMBERS",
1721                              "AEGEANNUMBERS");
1722 
1723         /**
1724          * Constant for the "Old Italic" Unicode character block.
1725          * @since 1.5
1726          */
1727         public static final UnicodeBlock OLD_ITALIC =
1728             new UnicodeBlock("OLD_ITALIC",
1729                              "OLD ITALIC",
1730                              "OLDITALIC");
1731 
1732         /**
1733          * Constant for the "Gothic" Unicode character block.
1734          * @since 1.5
1735          */
1736         public static final UnicodeBlock GOTHIC =
1737             new UnicodeBlock("GOTHIC");
1738 
1739         /**
1740          * Constant for the "Ugaritic" Unicode character block.
1741          * @since 1.5
1742          */
1743         public static final UnicodeBlock UGARITIC =
1744             new UnicodeBlock("UGARITIC");
1745 
1746         /**
1747          * Constant for the "Deseret" Unicode character block.
1748          * @since 1.5
1749          */
1750         public static final UnicodeBlock DESERET =
1751             new UnicodeBlock("DESERET");
1752 
1753         /**
1754          * Constant for the "Shavian" Unicode character block.
1755          * @since 1.5
1756          */
1757         public static final UnicodeBlock SHAVIAN =
1758             new UnicodeBlock("SHAVIAN");
1759 
1760         /**
1761          * Constant for the "Osmanya" Unicode character block.
1762          * @since 1.5
1763          */
1764         public static final UnicodeBlock OSMANYA =
1765             new UnicodeBlock("OSMANYA");
1766 
1767         /**
1768          * Constant for the "Cypriot Syllabary" Unicode character block.
1769          * @since 1.5
1770          */
1771         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1772             new UnicodeBlock("CYPRIOT_SYLLABARY",
1773                              "CYPRIOT SYLLABARY",
1774                              "CYPRIOTSYLLABARY");
1775 
1776         /**
1777          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1778          * @since 1.5
1779          */
1780         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1781             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1782                              "BYZANTINE MUSICAL SYMBOLS",
1783                              "BYZANTINEMUSICALSYMBOLS");
1784 
1785         /**
1786          * Constant for the "Musical Symbols" Unicode character block.
1787          * @since 1.5
1788          */
1789         public static final UnicodeBlock MUSICAL_SYMBOLS =
1790             new UnicodeBlock("MUSICAL_SYMBOLS",
1791                              "MUSICAL SYMBOLS",
1792                              "MUSICALSYMBOLS");
1793 
1794         /**
1795          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1796          * @since 1.5
1797          */
1798         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1799             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1800                              "TAI XUAN JING SYMBOLS",
1801                              "TAIXUANJINGSYMBOLS");
1802 
1803         /**
1804          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1805          * character block.
1806          * @since 1.5
1807          */
1808         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1809             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1810                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1811                              "MATHEMATICALALPHANUMERICSYMBOLS");
1812 
1813         /**
1814          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1815          * character block.
1816          * @since 1.5
1817          */
1818         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1819             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1820                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1821                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1822 
1823         /**
1824          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1825          * @since 1.5
1826          */
1827         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1828             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1829                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1830                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1831 
1832         /**
1833          * Constant for the "Tags" Unicode character block.
1834          * @since 1.5
1835          */
1836         public static final UnicodeBlock TAGS =
1837             new UnicodeBlock("TAGS");
1838 
1839         /**
1840          * Constant for the "Variation Selectors Supplement" Unicode character
1841          * block.
1842          * @since 1.5
1843          */
1844         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1845             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1846                              "VARIATION SELECTORS SUPPLEMENT",
1847                              "VARIATIONSELECTORSSUPPLEMENT");
1848 
1849         /**
1850          * Constant for the "Supplementary Private Use Area-A" Unicode character
1851          * block.
1852          * @since 1.5
1853          */
1854         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1855             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1856                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1857                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1858 
1859         /**
1860          * Constant for the "Supplementary Private Use Area-B" Unicode character
1861          * block.
1862          * @since 1.5
1863          */
1864         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1865             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1866                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1867                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1868 
1869         /**
1870          * Constant for the "High Surrogates" Unicode character block.
1871          * This block represents codepoint values in the high surrogate
1872          * range: U+D800 through U+DB7F
1873          *
1874          * @since 1.5
1875          */
1876         public static final UnicodeBlock HIGH_SURROGATES =
1877             new UnicodeBlock("HIGH_SURROGATES",
1878                              "HIGH SURROGATES",
1879                              "HIGHSURROGATES");
1880 
1881         /**
1882          * Constant for the "High Private Use Surrogates" Unicode character
1883          * block.
1884          * This block represents codepoint values in the private use high
1885          * surrogate range: U+DB80 through U+DBFF
1886          *
1887          * @since 1.5
1888          */
1889         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1890             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1891                              "HIGH PRIVATE USE SURROGATES",
1892                              "HIGHPRIVATEUSESURROGATES");
1893 
1894         /**
1895          * Constant for the "Low Surrogates" Unicode character block.
1896          * This block represents codepoint values in the low surrogate
1897          * range: U+DC00 through U+DFFF
1898          *
1899          * @since 1.5
1900          */
1901         public static final UnicodeBlock LOW_SURROGATES =
1902             new UnicodeBlock("LOW_SURROGATES",
1903                              "LOW SURROGATES",
1904                              "LOWSURROGATES");
1905 
1906         /**
1907          * Constant for the "Arabic Supplement" Unicode character block.
1908          * @since 1.7
1909          */
1910         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1911             new UnicodeBlock("ARABIC_SUPPLEMENT",
1912                              "ARABIC SUPPLEMENT",
1913                              "ARABICSUPPLEMENT");
1914 
1915         /**
1916          * Constant for the "NKo" Unicode character block.
1917          * @since 1.7
1918          */
1919         public static final UnicodeBlock NKO =
1920             new UnicodeBlock("NKO");
1921 
1922         /**
1923          * Constant for the "Samaritan" Unicode character block.
1924          * @since 1.7
1925          */
1926         public static final UnicodeBlock SAMARITAN =
1927             new UnicodeBlock("SAMARITAN");
1928 
1929         /**
1930          * Constant for the "Mandaic" Unicode character block.
1931          * @since 1.7
1932          */
1933         public static final UnicodeBlock MANDAIC =
1934             new UnicodeBlock("MANDAIC");
1935 
1936         /**
1937          * Constant for the "Ethiopic Supplement" Unicode character block.
1938          * @since 1.7
1939          */
1940         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1941             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1942                              "ETHIOPIC SUPPLEMENT",
1943                              "ETHIOPICSUPPLEMENT");
1944 
1945         /**
1946          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1947          * Unicode character block.
1948          * @since 1.7
1949          */
1950         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1951             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1952                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1953                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1954 
1955         /**
1956          * Constant for the "New Tai Lue" Unicode character block.
1957          * @since 1.7
1958          */
1959         public static final UnicodeBlock NEW_TAI_LUE =
1960             new UnicodeBlock("NEW_TAI_LUE",
1961                              "NEW TAI LUE",
1962                              "NEWTAILUE");
1963 
1964         /**
1965          * Constant for the "Buginese" Unicode character block.
1966          * @since 1.7
1967          */
1968         public static final UnicodeBlock BUGINESE =
1969             new UnicodeBlock("BUGINESE");
1970 
1971         /**
1972          * Constant for the "Tai Tham" Unicode character block.
1973          * @since 1.7
1974          */
1975         public static final UnicodeBlock TAI_THAM =
1976             new UnicodeBlock("TAI_THAM",
1977                              "TAI THAM",
1978                              "TAITHAM");
1979 
1980         /**
1981          * Constant for the "Balinese" Unicode character block.
1982          * @since 1.7
1983          */
1984         public static final UnicodeBlock BALINESE =
1985             new UnicodeBlock("BALINESE");
1986 
1987         /**
1988          * Constant for the "Sundanese" Unicode character block.
1989          * @since 1.7
1990          */
1991         public static final UnicodeBlock SUNDANESE =
1992             new UnicodeBlock("SUNDANESE");
1993 
1994         /**
1995          * Constant for the "Batak" Unicode character block.
1996          * @since 1.7
1997          */
1998         public static final UnicodeBlock BATAK =
1999             new UnicodeBlock("BATAK");
2000 
2001         /**
2002          * Constant for the "Lepcha" Unicode character block.
2003          * @since 1.7
2004          */
2005         public static final UnicodeBlock LEPCHA =
2006             new UnicodeBlock("LEPCHA");
2007 
2008         /**
2009          * Constant for the "Ol Chiki" Unicode character block.
2010          * @since 1.7
2011          */
2012         public static final UnicodeBlock OL_CHIKI =
2013             new UnicodeBlock("OL_CHIKI",
2014                              "OL CHIKI",
2015                              "OLCHIKI");
2016 
2017         /**
2018          * Constant for the "Vedic Extensions" Unicode character block.
2019          * @since 1.7
2020          */
2021         public static final UnicodeBlock VEDIC_EXTENSIONS =
2022             new UnicodeBlock("VEDIC_EXTENSIONS",
2023                              "VEDIC EXTENSIONS",
2024                              "VEDICEXTENSIONS");
2025 
2026         /**
2027          * Constant for the "Phonetic Extensions Supplement" Unicode character
2028          * block.
2029          * @since 1.7
2030          */
2031         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
2032             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
2033                              "PHONETIC EXTENSIONS SUPPLEMENT",
2034                              "PHONETICEXTENSIONSSUPPLEMENT");
2035 
2036         /**
2037          * Constant for the "Combining Diacritical Marks Supplement" Unicode
2038          * character block.
2039          * @since 1.7
2040          */
2041         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
2042             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
2043                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
2044                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
2045 
2046         /**
2047          * Constant for the "Glagolitic" Unicode character block.
2048          * @since 1.7
2049          */
2050         public static final UnicodeBlock GLAGOLITIC =
2051             new UnicodeBlock("GLAGOLITIC");
2052 
2053         /**
2054          * Constant for the "Latin Extended-C" Unicode character block.
2055          * @since 1.7
2056          */
2057         public static final UnicodeBlock LATIN_EXTENDED_C =
2058             new UnicodeBlock("LATIN_EXTENDED_C",
2059                              "LATIN EXTENDED-C",
2060                              "LATINEXTENDED-C");
2061 
2062         /**
2063          * Constant for the "Coptic" Unicode character block.
2064          * @since 1.7
2065          */
2066         public static final UnicodeBlock COPTIC =
2067             new UnicodeBlock("COPTIC");
2068 
2069         /**
2070          * Constant for the "Georgian Supplement" Unicode character block.
2071          * @since 1.7
2072          */
2073         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
2074             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
2075                              "GEORGIAN SUPPLEMENT",
2076                              "GEORGIANSUPPLEMENT");
2077 
2078         /**
2079          * Constant for the "Tifinagh" Unicode character block.
2080          * @since 1.7
2081          */
2082         public static final UnicodeBlock TIFINAGH =
2083             new UnicodeBlock("TIFINAGH");
2084 
2085         /**
2086          * Constant for the "Ethiopic Extended" Unicode character block.
2087          * @since 1.7
2088          */
2089         public static final UnicodeBlock ETHIOPIC_EXTENDED =
2090             new UnicodeBlock("ETHIOPIC_EXTENDED",
2091                              "ETHIOPIC EXTENDED",
2092                              "ETHIOPICEXTENDED");
2093 
2094         /**
2095          * Constant for the "Cyrillic Extended-A" Unicode character block.
2096          * @since 1.7
2097          */
2098         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2099             new UnicodeBlock("CYRILLIC_EXTENDED_A",
2100                              "CYRILLIC EXTENDED-A",
2101                              "CYRILLICEXTENDED-A");
2102 
2103         /**
2104          * Constant for the "Supplemental Punctuation" Unicode character block.
2105          * @since 1.7
2106          */
2107         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2108             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
2109                              "SUPPLEMENTAL PUNCTUATION",
2110                              "SUPPLEMENTALPUNCTUATION");
2111 
2112         /**
2113          * Constant for the "CJK Strokes" Unicode character block.
2114          * @since 1.7
2115          */
2116         public static final UnicodeBlock CJK_STROKES =
2117             new UnicodeBlock("CJK_STROKES",
2118                              "CJK STROKES",
2119                              "CJKSTROKES");
2120 
2121         /**
2122          * Constant for the "Lisu" Unicode character block.
2123          * @since 1.7
2124          */
2125         public static final UnicodeBlock LISU =
2126             new UnicodeBlock("LISU");
2127 
2128         /**
2129          * Constant for the "Vai" Unicode character block.
2130          * @since 1.7
2131          */
2132         public static final UnicodeBlock VAI =
2133             new UnicodeBlock("VAI");
2134 
2135         /**
2136          * Constant for the "Cyrillic Extended-B" Unicode character block.
2137          * @since 1.7
2138          */
2139         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2140             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2141                              "CYRILLIC EXTENDED-B",
2142                              "CYRILLICEXTENDED-B");
2143 
2144         /**
2145          * Constant for the "Bamum" Unicode character block.
2146          * @since 1.7
2147          */
2148         public static final UnicodeBlock BAMUM =
2149             new UnicodeBlock("BAMUM");
2150 
2151         /**
2152          * Constant for the "Modifier Tone Letters" Unicode character block.
2153          * @since 1.7
2154          */
2155         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2156             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2157                              "MODIFIER TONE LETTERS",
2158                              "MODIFIERTONELETTERS");
2159 
2160         /**
2161          * Constant for the "Latin Extended-D" Unicode character block.
2162          * @since 1.7
2163          */
2164         public static final UnicodeBlock LATIN_EXTENDED_D =
2165             new UnicodeBlock("LATIN_EXTENDED_D",
2166                              "LATIN EXTENDED-D",
2167                              "LATINEXTENDED-D");
2168 
2169         /**
2170          * Constant for the "Syloti Nagri" Unicode character block.
2171          * @since 1.7
2172          */
2173         public static final UnicodeBlock SYLOTI_NAGRI =
2174             new UnicodeBlock("SYLOTI_NAGRI",
2175                              "SYLOTI NAGRI",
2176                              "SYLOTINAGRI");
2177 
2178         /**
2179          * Constant for the "Common Indic Number Forms" Unicode character block.
2180          * @since 1.7
2181          */
2182         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2183             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2184                              "COMMON INDIC NUMBER FORMS",
2185                              "COMMONINDICNUMBERFORMS");
2186 
2187         /**
2188          * Constant for the "Phags-pa" Unicode character block.
2189          * @since 1.7
2190          */
2191         public static final UnicodeBlock PHAGS_PA =
2192             new UnicodeBlock("PHAGS_PA",
2193                              "PHAGS-PA");
2194 
2195         /**
2196          * Constant for the "Saurashtra" Unicode character block.
2197          * @since 1.7
2198          */
2199         public static final UnicodeBlock SAURASHTRA =
2200             new UnicodeBlock("SAURASHTRA");
2201 
2202         /**
2203          * Constant for the "Devanagari Extended" Unicode character block.
2204          * @since 1.7
2205          */
2206         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2207             new UnicodeBlock("DEVANAGARI_EXTENDED",
2208                              "DEVANAGARI EXTENDED",
2209                              "DEVANAGARIEXTENDED");
2210 
2211         /**
2212          * Constant for the "Kayah Li" Unicode character block.
2213          * @since 1.7
2214          */
2215         public static final UnicodeBlock KAYAH_LI =
2216             new UnicodeBlock("KAYAH_LI",
2217                              "KAYAH LI",
2218                              "KAYAHLI");
2219 
2220         /**
2221          * Constant for the "Rejang" Unicode character block.
2222          * @since 1.7
2223          */
2224         public static final UnicodeBlock REJANG =
2225             new UnicodeBlock("REJANG");
2226 
2227         /**
2228          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2229          * @since 1.7
2230          */
2231         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2232             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2233                              "HANGUL JAMO EXTENDED-A",
2234                              "HANGULJAMOEXTENDED-A");
2235 
2236         /**
2237          * Constant for the "Javanese" Unicode character block.
2238          * @since 1.7
2239          */
2240         public static final UnicodeBlock JAVANESE =
2241             new UnicodeBlock("JAVANESE");
2242 
2243         /**
2244          * Constant for the "Cham" Unicode character block.
2245          * @since 1.7
2246          */
2247         public static final UnicodeBlock CHAM =
2248             new UnicodeBlock("CHAM");
2249 
2250         /**
2251          * Constant for the "Myanmar Extended-A" Unicode character block.
2252          * @since 1.7
2253          */
2254         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2255             new UnicodeBlock("MYANMAR_EXTENDED_A",
2256                              "MYANMAR EXTENDED-A",
2257                              "MYANMAREXTENDED-A");
2258 
2259         /**
2260          * Constant for the "Tai Viet" Unicode character block.
2261          * @since 1.7
2262          */
2263         public static final UnicodeBlock TAI_VIET =
2264             new UnicodeBlock("TAI_VIET",
2265                              "TAI VIET",
2266                              "TAIVIET");
2267 
2268         /**
2269          * Constant for the "Ethiopic Extended-A" Unicode character block.
2270          * @since 1.7
2271          */
2272         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2273             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2274                              "ETHIOPIC EXTENDED-A",
2275                              "ETHIOPICEXTENDED-A");
2276 
2277         /**
2278          * Constant for the "Meetei Mayek" Unicode character block.
2279          * @since 1.7
2280          */
2281         public static final UnicodeBlock MEETEI_MAYEK =
2282             new UnicodeBlock("MEETEI_MAYEK",
2283                              "MEETEI MAYEK",
2284                              "MEETEIMAYEK");
2285 
2286         /**
2287          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2288          * @since 1.7
2289          */
2290         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2291             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2292                              "HANGUL JAMO EXTENDED-B",
2293                              "HANGULJAMOEXTENDED-B");
2294 
2295         /**
2296          * Constant for the "Vertical Forms" Unicode character block.
2297          * @since 1.7
2298          */
2299         public static final UnicodeBlock VERTICAL_FORMS =
2300             new UnicodeBlock("VERTICAL_FORMS",
2301                              "VERTICAL FORMS",
2302                              "VERTICALFORMS");
2303 
2304         /**
2305          * Constant for the "Ancient Greek Numbers" Unicode character block.
2306          * @since 1.7
2307          */
2308         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2309             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2310                              "ANCIENT GREEK NUMBERS",
2311                              "ANCIENTGREEKNUMBERS");
2312 
2313         /**
2314          * Constant for the "Ancient Symbols" Unicode character block.
2315          * @since 1.7
2316          */
2317         public static final UnicodeBlock ANCIENT_SYMBOLS =
2318             new UnicodeBlock("ANCIENT_SYMBOLS",
2319                              "ANCIENT SYMBOLS",
2320                              "ANCIENTSYMBOLS");
2321 
2322         /**
2323          * Constant for the "Phaistos Disc" Unicode character block.
2324          * @since 1.7
2325          */
2326         public static final UnicodeBlock PHAISTOS_DISC =
2327             new UnicodeBlock("PHAISTOS_DISC",
2328                              "PHAISTOS DISC",
2329                              "PHAISTOSDISC");
2330 
2331         /**
2332          * Constant for the "Lycian" Unicode character block.
2333          * @since 1.7
2334          */
2335         public static final UnicodeBlock LYCIAN =
2336             new UnicodeBlock("LYCIAN");
2337 
2338         /**
2339          * Constant for the "Carian" Unicode character block.
2340          * @since 1.7
2341          */
2342         public static final UnicodeBlock CARIAN =
2343             new UnicodeBlock("CARIAN");
2344 
2345         /**
2346          * Constant for the "Old Persian" Unicode character block.
2347          * @since 1.7
2348          */
2349         public static final UnicodeBlock OLD_PERSIAN =
2350             new UnicodeBlock("OLD_PERSIAN",
2351                              "OLD PERSIAN",
2352                              "OLDPERSIAN");
2353 
2354         /**
2355          * Constant for the "Imperial Aramaic" Unicode character block.
2356          * @since 1.7
2357          */
2358         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2359             new UnicodeBlock("IMPERIAL_ARAMAIC",
2360                              "IMPERIAL ARAMAIC",
2361                              "IMPERIALARAMAIC");
2362 
2363         /**
2364          * Constant for the "Phoenician" Unicode character block.
2365          * @since 1.7
2366          */
2367         public static final UnicodeBlock PHOENICIAN =
2368             new UnicodeBlock("PHOENICIAN");
2369 
2370         /**
2371          * Constant for the "Lydian" Unicode character block.
2372          * @since 1.7
2373          */
2374         public static final UnicodeBlock LYDIAN =
2375             new UnicodeBlock("LYDIAN");
2376 
2377         /**
2378          * Constant for the "Kharoshthi" Unicode character block.
2379          * @since 1.7
2380          */
2381         public static final UnicodeBlock KHAROSHTHI =
2382             new UnicodeBlock("KHAROSHTHI");
2383 
2384         /**
2385          * Constant for the "Old South Arabian" Unicode character block.
2386          * @since 1.7
2387          */
2388         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2389             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2390                              "OLD SOUTH ARABIAN",
2391                              "OLDSOUTHARABIAN");
2392 
2393         /**
2394          * Constant for the "Avestan" Unicode character block.
2395          * @since 1.7
2396          */
2397         public static final UnicodeBlock AVESTAN =
2398             new UnicodeBlock("AVESTAN");
2399 
2400         /**
2401          * Constant for the "Inscriptional Parthian" Unicode character block.
2402          * @since 1.7
2403          */
2404         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2405             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2406                              "INSCRIPTIONAL PARTHIAN",
2407                              "INSCRIPTIONALPARTHIAN");
2408 
2409         /**
2410          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2411          * @since 1.7
2412          */
2413         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2414             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2415                              "INSCRIPTIONAL PAHLAVI",
2416                              "INSCRIPTIONALPAHLAVI");
2417 
2418         /**
2419          * Constant for the "Old Turkic" Unicode character block.
2420          * @since 1.7
2421          */
2422         public static final UnicodeBlock OLD_TURKIC =
2423             new UnicodeBlock("OLD_TURKIC",
2424                              "OLD TURKIC",
2425                              "OLDTURKIC");
2426 
2427         /**
2428          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2429          * @since 1.7
2430          */
2431         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2432             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2433                              "RUMI NUMERAL SYMBOLS",
2434                              "RUMINUMERALSYMBOLS");
2435 
2436         /**
2437          * Constant for the "Brahmi" Unicode character block.
2438          * @since 1.7
2439          */
2440         public static final UnicodeBlock BRAHMI =
2441             new UnicodeBlock("BRAHMI");
2442 
2443         /**
2444          * Constant for the "Kaithi" Unicode character block.
2445          * @since 1.7
2446          */
2447         public static final UnicodeBlock KAITHI =
2448             new UnicodeBlock("KAITHI");
2449 
2450         /**
2451          * Constant for the "Cuneiform" Unicode character block.
2452          * @since 1.7
2453          */
2454         public static final UnicodeBlock CUNEIFORM =
2455             new UnicodeBlock("CUNEIFORM");
2456 
2457         /**
2458          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2459          * character block.
2460          * @since 1.7
2461          */
2462         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2463             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2464                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2465                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2466 
2467         /**
2468          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2469          * @since 1.7
2470          */
2471         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2472             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2473                              "EGYPTIAN HIEROGLYPHS",
2474                              "EGYPTIANHIEROGLYPHS");
2475 
2476         /**
2477          * Constant for the "Bamum Supplement" Unicode character block.
2478          * @since 1.7
2479          */
2480         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2481             new UnicodeBlock("BAMUM_SUPPLEMENT",
2482                              "BAMUM SUPPLEMENT",
2483                              "BAMUMSUPPLEMENT");
2484 
2485         /**
2486          * Constant for the "Kana Supplement" Unicode character block.
2487          * @since 1.7
2488          */
2489         public static final UnicodeBlock KANA_SUPPLEMENT =
2490             new UnicodeBlock("KANA_SUPPLEMENT",
2491                              "KANA SUPPLEMENT",
2492                              "KANASUPPLEMENT");
2493 
2494         /**
2495          * Constant for the "Ancient Greek Musical Notation" Unicode character
2496          * block.
2497          * @since 1.7
2498          */
2499         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2500             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2501                              "ANCIENT GREEK MUSICAL NOTATION",
2502                              "ANCIENTGREEKMUSICALNOTATION");
2503 
2504         /**
2505          * Constant for the "Counting Rod Numerals" Unicode character block.
2506          * @since 1.7
2507          */
2508         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2509             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2510                              "COUNTING ROD NUMERALS",
2511                              "COUNTINGRODNUMERALS");
2512 
2513         /**
2514          * Constant for the "Mahjong Tiles" Unicode character block.
2515          * @since 1.7
2516          */
2517         public static final UnicodeBlock MAHJONG_TILES =
2518             new UnicodeBlock("MAHJONG_TILES",
2519                              "MAHJONG TILES",
2520                              "MAHJONGTILES");
2521 
2522         /**
2523          * Constant for the "Domino Tiles" Unicode character block.
2524          * @since 1.7
2525          */
2526         public static final UnicodeBlock DOMINO_TILES =
2527             new UnicodeBlock("DOMINO_TILES",
2528                              "DOMINO TILES",
2529                              "DOMINOTILES");
2530 
2531         /**
2532          * Constant for the "Playing Cards" Unicode character block.
2533          * @since 1.7
2534          */
2535         public static final UnicodeBlock PLAYING_CARDS =
2536             new UnicodeBlock("PLAYING_CARDS",
2537                              "PLAYING CARDS",
2538                              "PLAYINGCARDS");
2539 
2540         /**
2541          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2542          * block.
2543          * @since 1.7
2544          */
2545         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2546             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2547                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2548                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2549 
2550         /**
2551          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2552          * block.
2553          * @since 1.7
2554          */
2555         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2556             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2557                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2558                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2559 
2560         /**
2561          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2562          * character block.
2563          * @since 1.7
2564          */
2565         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2566             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2567                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2568                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2569 
2570         /**
2571          * Constant for the "Emoticons" Unicode character block.
2572          * @since 1.7
2573          */
2574         public static final UnicodeBlock EMOTICONS =
2575             new UnicodeBlock("EMOTICONS");
2576 
2577         /**
2578          * Constant for the "Transport And Map Symbols" Unicode character block.
2579          * @since 1.7
2580          */
2581         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2582             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2583                              "TRANSPORT AND MAP SYMBOLS",
2584                              "TRANSPORTANDMAPSYMBOLS");
2585 
2586         /**
2587          * Constant for the "Alchemical Symbols" Unicode character block.
2588          * @since 1.7
2589          */
2590         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2591             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2592                              "ALCHEMICAL SYMBOLS",
2593                              "ALCHEMICALSYMBOLS");
2594 
2595         /**
2596          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2597          * character block.
2598          * @since 1.7
2599          */
2600         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2601             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2602                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2603                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2604 
2605         /**
2606          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2607          * character block.
2608          * @since 1.7
2609          */
2610         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2611             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2612                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2613                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2614 
2615         /**
2616          * Constant for the "Arabic Extended-A" Unicode character block.
2617          * @since 1.8
2618          */
2619         public static final UnicodeBlock ARABIC_EXTENDED_A =
2620             new UnicodeBlock("ARABIC_EXTENDED_A",
2621                              "ARABIC EXTENDED-A",
2622                              "ARABICEXTENDED-A");
2623 
2624         /**
2625          * Constant for the "Sundanese Supplement" Unicode character block.
2626          * @since 1.8
2627          */
2628         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2629             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2630                              "SUNDANESE SUPPLEMENT",
2631                              "SUNDANESESUPPLEMENT");
2632 
2633         /**
2634          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2635          * @since 1.8
2636          */
2637         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2638             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2639                              "MEETEI MAYEK EXTENSIONS",
2640                              "MEETEIMAYEKEXTENSIONS");
2641 
2642         /**
2643          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2644          * @since 1.8
2645          */
2646         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2647             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2648                              "MEROITIC HIEROGLYPHS",
2649                              "MEROITICHIEROGLYPHS");
2650 
2651         /**
2652          * Constant for the "Meroitic Cursive" Unicode character block.
2653          * @since 1.8
2654          */
2655         public static final UnicodeBlock MEROITIC_CURSIVE =
2656             new UnicodeBlock("MEROITIC_CURSIVE",
2657                              "MEROITIC CURSIVE",
2658                              "MEROITICCURSIVE");
2659 
2660         /**
2661          * Constant for the "Sora Sompeng" Unicode character block.
2662          * @since 1.8
2663          */
2664         public static final UnicodeBlock SORA_SOMPENG =
2665             new UnicodeBlock("SORA_SOMPENG",
2666                              "SORA SOMPENG",
2667                              "SORASOMPENG");
2668 
2669         /**
2670          * Constant for the "Chakma" Unicode character block.
2671          * @since 1.8
2672          */
2673         public static final UnicodeBlock CHAKMA =
2674             new UnicodeBlock("CHAKMA");
2675 
2676         /**
2677          * Constant for the "Sharada" Unicode character block.
2678          * @since 1.8
2679          */
2680         public static final UnicodeBlock SHARADA =
2681             new UnicodeBlock("SHARADA");
2682 
2683         /**
2684          * Constant for the "Takri" Unicode character block.
2685          * @since 1.8
2686          */
2687         public static final UnicodeBlock TAKRI =
2688             new UnicodeBlock("TAKRI");
2689 
2690         /**
2691          * Constant for the "Miao" Unicode character block.
2692          * @since 1.8
2693          */
2694         public static final UnicodeBlock MIAO =
2695             new UnicodeBlock("MIAO");
2696 
2697         /**
2698          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2699          * character block.
2700          * @since 1.8
2701          */
2702         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2703             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2704                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2705                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2706 
2707         /**
2708          * Constant for the "Combining Diacritical Marks Extended" Unicode
2709          * character block.
2710          * @since 9
2711          */
2712         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2713             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2714                              "COMBINING DIACRITICAL MARKS EXTENDED",
2715                              "COMBININGDIACRITICALMARKSEXTENDED");
2716 
2717         /**
2718          * Constant for the "Myanmar Extended-B" Unicode character block.
2719          * @since 9
2720          */
2721         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2722             new UnicodeBlock("MYANMAR_EXTENDED_B",
2723                              "MYANMAR EXTENDED-B",
2724                              "MYANMAREXTENDED-B");
2725 
2726         /**
2727          * Constant for the "Latin Extended-E" Unicode character block.
2728          * @since 9
2729          */
2730         public static final UnicodeBlock LATIN_EXTENDED_E =
2731             new UnicodeBlock("LATIN_EXTENDED_E",
2732                              "LATIN EXTENDED-E",
2733                              "LATINEXTENDED-E");
2734 
2735         /**
2736          * Constant for the "Coptic Epact Numbers" Unicode character block.
2737          * @since 9
2738          */
2739         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2740             new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2741                              "COPTIC EPACT NUMBERS",
2742                              "COPTICEPACTNUMBERS");
2743 
2744         /**
2745          * Constant for the "Old Permic" Unicode character block.
2746          * @since 9
2747          */
2748         public static final UnicodeBlock OLD_PERMIC =
2749             new UnicodeBlock("OLD_PERMIC",
2750                              "OLD PERMIC",
2751                              "OLDPERMIC");
2752 
2753         /**
2754          * Constant for the "Elbasan" Unicode character block.
2755          * @since 9
2756          */
2757         public static final UnicodeBlock ELBASAN =
2758             new UnicodeBlock("ELBASAN");
2759 
2760         /**
2761          * Constant for the "Caucasian Albanian" Unicode character block.
2762          * @since 9
2763          */
2764         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2765             new UnicodeBlock("CAUCASIAN_ALBANIAN",
2766                              "CAUCASIAN ALBANIAN",
2767                              "CAUCASIANALBANIAN");
2768 
2769         /**
2770          * Constant for the "Linear A" Unicode character block.
2771          * @since 9
2772          */
2773         public static final UnicodeBlock LINEAR_A =
2774             new UnicodeBlock("LINEAR_A",
2775                              "LINEAR A",
2776                              "LINEARA");
2777 
2778         /**
2779          * Constant for the "Palmyrene" Unicode character block.
2780          * @since 9
2781          */
2782         public static final UnicodeBlock PALMYRENE =
2783             new UnicodeBlock("PALMYRENE");
2784 
2785         /**
2786          * Constant for the "Nabataean" Unicode character block.
2787          * @since 9
2788          */
2789         public static final UnicodeBlock NABATAEAN =
2790             new UnicodeBlock("NABATAEAN");
2791 
2792         /**
2793          * Constant for the "Old North Arabian" Unicode character block.
2794          * @since 9
2795          */
2796         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2797             new UnicodeBlock("OLD_NORTH_ARABIAN",
2798                              "OLD NORTH ARABIAN",
2799                              "OLDNORTHARABIAN");
2800 
2801         /**
2802          * Constant for the "Manichaean" Unicode character block.
2803          * @since 9
2804          */
2805         public static final UnicodeBlock MANICHAEAN =
2806             new UnicodeBlock("MANICHAEAN");
2807 
2808         /**
2809          * Constant for the "Psalter Pahlavi" Unicode character block.
2810          * @since 9
2811          */
2812         public static final UnicodeBlock PSALTER_PAHLAVI =
2813             new UnicodeBlock("PSALTER_PAHLAVI",
2814                              "PSALTER PAHLAVI",
2815                              "PSALTERPAHLAVI");
2816 
2817         /**
2818          * Constant for the "Mahajani" Unicode character block.
2819          * @since 9
2820          */
2821         public static final UnicodeBlock MAHAJANI =
2822             new UnicodeBlock("MAHAJANI");
2823 
2824         /**
2825          * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2826          * @since 9
2827          */
2828         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2829             new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2830                              "SINHALA ARCHAIC NUMBERS",
2831                              "SINHALAARCHAICNUMBERS");
2832 
2833         /**
2834          * Constant for the "Khojki" Unicode character block.
2835          * @since 9
2836          */
2837         public static final UnicodeBlock KHOJKI =
2838             new UnicodeBlock("KHOJKI");
2839 
2840         /**
2841          * Constant for the "Khudawadi" Unicode character block.
2842          * @since 9
2843          */
2844         public static final UnicodeBlock KHUDAWADI =
2845             new UnicodeBlock("KHUDAWADI");
2846 
2847         /**
2848          * Constant for the "Grantha" Unicode character block.
2849          * @since 9
2850          */
2851         public static final UnicodeBlock GRANTHA =
2852             new UnicodeBlock("GRANTHA");
2853 
2854         /**
2855          * Constant for the "Tirhuta" Unicode character block.
2856          * @since 9
2857          */
2858         public static final UnicodeBlock TIRHUTA =
2859             new UnicodeBlock("TIRHUTA");
2860 
2861         /**
2862          * Constant for the "Siddham" Unicode character block.
2863          * @since 9
2864          */
2865         public static final UnicodeBlock SIDDHAM =
2866             new UnicodeBlock("SIDDHAM");
2867 
2868         /**
2869          * Constant for the "Modi" Unicode character block.
2870          * @since 9
2871          */
2872         public static final UnicodeBlock MODI =
2873             new UnicodeBlock("MODI");
2874 
2875         /**
2876          * Constant for the "Warang Citi" Unicode character block.
2877          * @since 9
2878          */
2879         public static final UnicodeBlock WARANG_CITI =
2880             new UnicodeBlock("WARANG_CITI",
2881                              "WARANG CITI",
2882                              "WARANGCITI");
2883 
2884         /**
2885          * Constant for the "Pau Cin Hau" Unicode character block.
2886          * @since 9
2887          */
2888         public static final UnicodeBlock PAU_CIN_HAU =
2889             new UnicodeBlock("PAU_CIN_HAU",
2890                              "PAU CIN HAU",
2891                              "PAUCINHAU");
2892 
2893         /**
2894          * Constant for the "Mro" Unicode character block.
2895          * @since 9
2896          */
2897         public static final UnicodeBlock MRO =
2898             new UnicodeBlock("MRO");
2899 
2900         /**
2901          * Constant for the "Bassa Vah" Unicode character block.
2902          * @since 9
2903          */
2904         public static final UnicodeBlock BASSA_VAH =
2905             new UnicodeBlock("BASSA_VAH",
2906                              "BASSA VAH",
2907                              "BASSAVAH");
2908 
2909         /**
2910          * Constant for the "Pahawh Hmong" Unicode character block.
2911          * @since 9
2912          */
2913         public static final UnicodeBlock PAHAWH_HMONG =
2914             new UnicodeBlock("PAHAWH_HMONG",
2915                              "PAHAWH HMONG",
2916                              "PAHAWHHMONG");
2917 
2918         /**
2919          * Constant for the "Duployan" Unicode character block.
2920          * @since 9
2921          */
2922         public static final UnicodeBlock DUPLOYAN =
2923             new UnicodeBlock("DUPLOYAN");
2924 
2925         /**
2926          * Constant for the "Shorthand Format Controls" Unicode character block.
2927          * @since 9
2928          */
2929         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2930             new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2931                              "SHORTHAND FORMAT CONTROLS",
2932                              "SHORTHANDFORMATCONTROLS");
2933 
2934         /**
2935          * Constant for the "Mende Kikakui" Unicode character block.
2936          * @since 9
2937          */
2938         public static final UnicodeBlock MENDE_KIKAKUI =
2939             new UnicodeBlock("MENDE_KIKAKUI",
2940                              "MENDE KIKAKUI",
2941                              "MENDEKIKAKUI");
2942 
2943         /**
2944          * Constant for the "Ornamental Dingbats" Unicode character block.
2945          * @since 9
2946          */
2947         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2948             new UnicodeBlock("ORNAMENTAL_DINGBATS",
2949                              "ORNAMENTAL DINGBATS",
2950                              "ORNAMENTALDINGBATS");
2951 
2952         /**
2953          * Constant for the "Geometric Shapes Extended" Unicode character block.
2954          * @since 9
2955          */
2956         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2957             new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2958                              "GEOMETRIC SHAPES EXTENDED",
2959                              "GEOMETRICSHAPESEXTENDED");
2960 
2961         /**
2962          * Constant for the "Supplemental Arrows-C" Unicode character block.
2963          * @since 9
2964          */
2965         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2966             new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2967                              "SUPPLEMENTAL ARROWS-C",
2968                              "SUPPLEMENTALARROWS-C");
2969 
2970         /**
2971          * Constant for the "Cherokee Supplement" Unicode character block.
2972          * @since 9
2973          */
2974         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2975             new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2976                              "CHEROKEE SUPPLEMENT",
2977                              "CHEROKEESUPPLEMENT");
2978 
2979         /**
2980          * Constant for the "Hatran" Unicode character block.
2981          * @since 9
2982          */
2983         public static final UnicodeBlock HATRAN =
2984             new UnicodeBlock("HATRAN");
2985 
2986         /**
2987          * Constant for the "Old Hungarian" Unicode character block.
2988          * @since 9
2989          */
2990         public static final UnicodeBlock OLD_HUNGARIAN =
2991             new UnicodeBlock("OLD_HUNGARIAN",
2992                              "OLD HUNGARIAN",
2993                              "OLDHUNGARIAN");
2994 
2995         /**
2996          * Constant for the "Multani" Unicode character block.
2997          * @since 9
2998          */
2999         public static final UnicodeBlock MULTANI =
3000             new UnicodeBlock("MULTANI");
3001 
3002         /**
3003          * Constant for the "Ahom" Unicode character block.
3004          * @since 9
3005          */
3006         public static final UnicodeBlock AHOM =
3007             new UnicodeBlock("AHOM");
3008 
3009         /**
3010          * Constant for the "Early Dynastic Cuneiform" Unicode character block.
3011          * @since 9
3012          */
3013         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
3014             new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
3015                              "EARLY DYNASTIC CUNEIFORM",
3016                              "EARLYDYNASTICCUNEIFORM");
3017 
3018         /**
3019          * Constant for the "Anatolian Hieroglyphs" Unicode character block.
3020          * @since 9
3021          */
3022         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
3023             new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
3024                              "ANATOLIAN HIEROGLYPHS",
3025                              "ANATOLIANHIEROGLYPHS");
3026 
3027         /**
3028          * Constant for the "Sutton SignWriting" Unicode character block.
3029          * @since 9
3030          */
3031         public static final UnicodeBlock SUTTON_SIGNWRITING =
3032             new UnicodeBlock("SUTTON_SIGNWRITING",
3033                              "SUTTON SIGNWRITING",
3034                              "SUTTONSIGNWRITING");
3035 
3036         /**
3037          * Constant for the "Supplemental Symbols and Pictographs" Unicode
3038          * character block.
3039          * @since 9
3040          */
3041         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
3042             new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
3043                              "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
3044                              "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
3045 
3046         /**
3047          * Constant for the "CJK Unified Ideographs Extension E" Unicode
3048          * character block.
3049          * @since 9
3050          */
3051         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
3052             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
3053                              "CJK UNIFIED IDEOGRAPHS EXTENSION E",
3054                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
3055 
3056         /**
3057          * Constant for the "Syriac Supplement" Unicode
3058          * character block.
3059          * @since 11
3060          */
3061         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
3062             new UnicodeBlock("SYRIAC_SUPPLEMENT",
3063                              "SYRIAC SUPPLEMENT",
3064                              "SYRIACSUPPLEMENT");
3065 
3066         /**
3067          * Constant for the "Cyrillic Extended-C" Unicode
3068          * character block.
3069          * @since 11
3070          */
3071         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
3072             new UnicodeBlock("CYRILLIC_EXTENDED_C",
3073                              "CYRILLIC EXTENDED-C",
3074                              "CYRILLICEXTENDED-C");
3075 
3076         /**
3077          * Constant for the "Osage" Unicode
3078          * character block.
3079          * @since 11
3080          */
3081         public static final UnicodeBlock OSAGE =
3082             new UnicodeBlock("OSAGE");
3083 
3084         /**
3085          * Constant for the "Newa" Unicode
3086          * character block.
3087          * @since 11
3088          */
3089         public static final UnicodeBlock NEWA =
3090             new UnicodeBlock("NEWA");
3091 
3092         /**
3093          * Constant for the "Mongolian Supplement" Unicode
3094          * character block.
3095          * @since 11
3096          */
3097         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
3098             new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
3099                              "MONGOLIAN SUPPLEMENT",
3100                              "MONGOLIANSUPPLEMENT");
3101 
3102         /**
3103          * Constant for the "Marchen" Unicode
3104          * character block.
3105          * @since 11
3106          */
3107         public static final UnicodeBlock MARCHEN =
3108             new UnicodeBlock("MARCHEN");
3109 
3110         /**
3111          * Constant for the "Ideographic Symbols and Punctuation" Unicode
3112          * character block.
3113          * @since 11
3114          */
3115         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3116             new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3117                              "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3118                              "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3119 
3120         /**
3121          * Constant for the "Tangut" Unicode
3122          * character block.
3123          * @since 11
3124          */
3125         public static final UnicodeBlock TANGUT =
3126             new UnicodeBlock("TANGUT");
3127 
3128         /**
3129          * Constant for the "Tangut Components" Unicode
3130          * character block.
3131          * @since 11
3132          */
3133         public static final UnicodeBlock TANGUT_COMPONENTS =
3134             new UnicodeBlock("TANGUT_COMPONENTS",
3135                              "TANGUT COMPONENTS",
3136                              "TANGUTCOMPONENTS");
3137 
3138         /**
3139          * Constant for the "Kana Extended-A" Unicode
3140          * character block.
3141          * @since 11
3142          */
3143         public static final UnicodeBlock KANA_EXTENDED_A =
3144             new UnicodeBlock("KANA_EXTENDED_A",
3145                              "KANA EXTENDED-A",
3146                              "KANAEXTENDED-A");
3147         /**
3148          * Constant for the "Glagolitic Supplement" Unicode
3149          * character block.
3150          * @since 11
3151          */
3152         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3153             new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3154                              "GLAGOLITIC SUPPLEMENT",
3155                              "GLAGOLITICSUPPLEMENT");
3156         /**
3157          * Constant for the "Adlam" Unicode
3158          * character block.
3159          * @since 11
3160          */
3161         public static final UnicodeBlock ADLAM =
3162             new UnicodeBlock("ADLAM");
3163 
3164         /**
3165          * Constant for the "Masaram Gondi" Unicode
3166          * character block.
3167          * @since 11
3168          */
3169         public static final UnicodeBlock MASARAM_GONDI =
3170             new UnicodeBlock("MASARAM_GONDI",
3171                              "MASARAM GONDI",
3172                              "MASARAMGONDI");
3173 
3174         /**
3175          * Constant for the "Zanabazar Square" Unicode
3176          * character block.
3177          * @since 11
3178          */
3179         public static final UnicodeBlock ZANABAZAR_SQUARE =
3180             new UnicodeBlock("ZANABAZAR_SQUARE",
3181                              "ZANABAZAR SQUARE",
3182                              "ZANABAZARSQUARE");
3183 
3184         /**
3185          * Constant for the "Nushu" Unicode
3186          * character block.
3187          * @since 11
3188          */
3189         public static final UnicodeBlock NUSHU =
3190             new UnicodeBlock("NUSHU");
3191 
3192         /**
3193          * Constant for the "Soyombo" Unicode
3194          * character block.
3195          * @since 11
3196          */
3197         public static final UnicodeBlock SOYOMBO =
3198             new UnicodeBlock("SOYOMBO");
3199 
3200         /**
3201          * Constant for the "Bhaiksuki" Unicode
3202          * character block.
3203          * @since 11
3204          */
3205         public static final UnicodeBlock BHAIKSUKI =
3206             new UnicodeBlock("BHAIKSUKI");
3207 
3208         /**
3209          * Constant for the "CJK Unified Ideographs Extension F" Unicode
3210          * character block.
3211          * @since 11
3212          */
3213         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3214             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3215                              "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3216                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3217         /**
3218          * Constant for the "Georgian Extended" Unicode
3219          * character block.
3220          * @since 12
3221          */
3222         public static final UnicodeBlock GEORGIAN_EXTENDED =
3223             new UnicodeBlock("GEORGIAN_EXTENDED",
3224                              "GEORGIAN EXTENDED",
3225                              "GEORGIANEXTENDED");
3226 
3227         /**
3228          * Constant for the "Hanifi Rohingya" Unicode
3229          * character block.
3230          * @since 12
3231          */
3232         public static final UnicodeBlock HANIFI_ROHINGYA =
3233             new UnicodeBlock("HANIFI_ROHINGYA",
3234                              "HANIFI ROHINGYA",
3235                              "HANIFIROHINGYA");
3236 
3237         /**
3238          * Constant for the "Old Sogdian" Unicode
3239          * character block.
3240          * @since 12
3241          */
3242         public static final UnicodeBlock OLD_SOGDIAN =
3243             new UnicodeBlock("OLD_SOGDIAN",
3244                              "OLD SOGDIAN",
3245                              "OLDSOGDIAN");
3246 
3247         /**
3248          * Constant for the "Sogdian" Unicode
3249          * character block.
3250          * @since 12
3251          */
3252         public static final UnicodeBlock SOGDIAN =
3253             new UnicodeBlock("SOGDIAN");
3254 
3255         /**
3256          * Constant for the "Dogra" Unicode
3257          * character block.
3258          * @since 12
3259          */
3260         public static final UnicodeBlock DOGRA =
3261             new UnicodeBlock("DOGRA");
3262 
3263         /**
3264          * Constant for the "Gunjala Gondi" Unicode
3265          * character block.
3266          * @since 12
3267          */
3268         public static final UnicodeBlock GUNJALA_GONDI =
3269             new UnicodeBlock("GUNJALA_GONDI",
3270                              "GUNJALA GONDI",
3271                              "GUNJALAGONDI");
3272 
3273         /**
3274          * Constant for the "Makasar" Unicode
3275          * character block.
3276          * @since 12
3277          */
3278         public static final UnicodeBlock MAKASAR =
3279             new UnicodeBlock("MAKASAR");
3280 
3281         /**
3282          * Constant for the "Medefaidrin" Unicode
3283          * character block.
3284          * @since 12
3285          */
3286         public static final UnicodeBlock MEDEFAIDRIN =
3287             new UnicodeBlock("MEDEFAIDRIN");
3288 
3289         /**
3290          * Constant for the "Mayan Numerals" Unicode
3291          * character block.
3292          * @since 12
3293          */
3294         public static final UnicodeBlock MAYAN_NUMERALS =
3295             new UnicodeBlock("MAYAN_NUMERALS",
3296                              "MAYAN NUMERALS",
3297                              "MAYANNUMERALS");
3298 
3299         /**
3300          * Constant for the "Indic Siyaq Numbers" Unicode
3301          * character block.
3302          * @since 12
3303          */
3304         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
3305             new UnicodeBlock("INDIC_SIYAQ_NUMBERS",
3306                              "INDIC SIYAQ NUMBERS",
3307                              "INDICSIYAQNUMBERS");
3308 
3309         /**
3310          * Constant for the "Chess Symbols" Unicode
3311          * character block.
3312          * @since 12
3313          */
3314         public static final UnicodeBlock CHESS_SYMBOLS =
3315             new UnicodeBlock("CHESS_SYMBOLS",
3316                              "CHESS SYMBOLS",
3317                              "CHESSSYMBOLS");
3318 
3319         /**
3320          * Constant for the "Elymaic" Unicode
3321          * character block.
3322          * @since 13
3323          */
3324         public static final UnicodeBlock ELYMAIC =
3325             new UnicodeBlock("ELYMAIC");
3326 
3327         /**
3328          * Constant for the "Nandinagari" Unicode
3329          * character block.
3330          * @since 13
3331          */
3332         public static final UnicodeBlock NANDINAGARI =
3333             new UnicodeBlock("NANDINAGARI");
3334 
3335         /**
3336          * Constant for the "Tamil Supplement" Unicode
3337          * character block.
3338          * @since 13
3339          */
3340         public static final UnicodeBlock TAMIL_SUPPLEMENT =
3341             new UnicodeBlock("TAMIL_SUPPLEMENT",
3342                              "TAMIL SUPPLEMENT",
3343                              "TAMILSUPPLEMENT");
3344 
3345         /**
3346          * Constant for the "Egyptian Hieroglyph Format Controls" Unicode
3347          * character block.
3348          * @since 13
3349          */
3350         public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
3351             new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS",
3352                              "EGYPTIAN HIEROGLYPH FORMAT CONTROLS",
3353                              "EGYPTIANHIEROGLYPHFORMATCONTROLS");
3354 
3355         /**
3356          * Constant for the "Small Kana Extension" Unicode
3357          * character block.
3358          * @since 13
3359          */
3360         public static final UnicodeBlock SMALL_KANA_EXTENSION =
3361             new UnicodeBlock("SMALL_KANA_EXTENSION",
3362                              "SMALL KANA EXTENSION",
3363                              "SMALLKANAEXTENSION");
3364 
3365         /**
3366          * Constant for the "Nyiakeng Puachue Hmong" Unicode
3367          * character block.
3368          * @since 13
3369          */
3370         public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
3371             new UnicodeBlock("NYIAKENG_PUACHUE_HMONG",
3372                              "NYIAKENG PUACHUE HMONG",
3373                              "NYIAKENGPUACHUEHMONG");
3374 
3375         /**
3376          * Constant for the "Wancho" Unicode
3377          * character block.
3378          * @since 13
3379          */
3380         public static final UnicodeBlock WANCHO =
3381             new UnicodeBlock("WANCHO");
3382 
3383         /**
3384          * Constant for the "Ottoman Siyaq Numbers" Unicode
3385          * character block.
3386          * @since 13
3387          */
3388         public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
3389             new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS",
3390                              "OTTOMAN SIYAQ NUMBERS",
3391                              "OTTOMANSIYAQNUMBERS");
3392 
3393         /**
3394          * Constant for the "Symbols and Pictographs Extended-A" Unicode
3395          * character block.
3396          * @since 13
3397          */
3398         public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
3399             new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A",
3400                              "SYMBOLS AND PICTOGRAPHS EXTENDED-A",
3401                              "SYMBOLSANDPICTOGRAPHSEXTENDED-A");
3402 
3403         /**
3404          * Constant for the "Yezidi" Unicode
3405          * character block.
3406          * @since 15
3407          */
3408         public static final UnicodeBlock YEZIDI =
3409             new UnicodeBlock("YEZIDI");
3410 
3411         /**
3412          * Constant for the "Chorasmian" Unicode
3413          * character block.
3414          * @since 15
3415          */
3416         public static final UnicodeBlock CHORASMIAN =
3417             new UnicodeBlock("CHORASMIAN");
3418 
3419         /**
3420          * Constant for the "Dives Akuru" Unicode
3421          * character block.
3422          * @since 15
3423          */
3424         public static final UnicodeBlock DIVES_AKURU =
3425             new UnicodeBlock("DIVES_AKURU",
3426                              "DIVES AKURU",
3427                              "DIVESAKURU");
3428 
3429         /**
3430          * Constant for the "Lisu Supplement" Unicode
3431          * character block.
3432          * @since 15
3433          */
3434         public static final UnicodeBlock LISU_SUPPLEMENT =
3435             new UnicodeBlock("LISU_SUPPLEMENT",
3436                              "LISU SUPPLEMENT",
3437                              "LISUSUPPLEMENT");
3438 
3439         /**
3440          * Constant for the "Khitan Small Script" Unicode
3441          * character block.
3442          * @since 15
3443          */
3444         public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
3445             new UnicodeBlock("KHITAN_SMALL_SCRIPT",
3446                              "KHITAN SMALL SCRIPT",
3447                              "KHITANSMALLSCRIPT");
3448 
3449         /**
3450          * Constant for the "Tangut Supplement" Unicode
3451          * character block.
3452          * @since 15
3453          */
3454         public static final UnicodeBlock TANGUT_SUPPLEMENT =
3455             new UnicodeBlock("TANGUT_SUPPLEMENT",
3456                              "TANGUT SUPPLEMENT",
3457                              "TANGUTSUPPLEMENT");
3458 
3459         /**
3460          * Constant for the "Symbols for Legacy Computing" Unicode
3461          * character block.
3462          * @since 15
3463          */
3464         public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
3465             new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING",
3466                              "SYMBOLS FOR LEGACY COMPUTING",
3467                              "SYMBOLSFORLEGACYCOMPUTING");
3468 
3469         /**
3470          * Constant for the "CJK Unified Ideographs Extension G" Unicode
3471          * character block.
3472          * @since 15
3473          */
3474         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
3475             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
3476                              "CJK UNIFIED IDEOGRAPHS EXTENSION G",
3477                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONG");
3478 
3479         private static final int[] blockStarts = {
3480             0x0000,   // 0000..007F; Basic Latin
3481             0x0080,   // 0080..00FF; Latin-1 Supplement
3482             0x0100,   // 0100..017F; Latin Extended-A
3483             0x0180,   // 0180..024F; Latin Extended-B
3484             0x0250,   // 0250..02AF; IPA Extensions
3485             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
3486             0x0300,   // 0300..036F; Combining Diacritical Marks
3487             0x0370,   // 0370..03FF; Greek and Coptic
3488             0x0400,   // 0400..04FF; Cyrillic
3489             0x0500,   // 0500..052F; Cyrillic Supplement
3490             0x0530,   // 0530..058F; Armenian
3491             0x0590,   // 0590..05FF; Hebrew
3492             0x0600,   // 0600..06FF; Arabic
3493             0x0700,   // 0700..074F; Syriac
3494             0x0750,   // 0750..077F; Arabic Supplement
3495             0x0780,   // 0780..07BF; Thaana
3496             0x07C0,   // 07C0..07FF; NKo
3497             0x0800,   // 0800..083F; Samaritan
3498             0x0840,   // 0840..085F; Mandaic
3499             0x0860,   // 0860..086F; Syriac Supplement
3500             0x0870,   //             unassigned
3501             0x08A0,   // 08A0..08FF; Arabic Extended-A
3502             0x0900,   // 0900..097F; Devanagari
3503             0x0980,   // 0980..09FF; Bengali
3504             0x0A00,   // 0A00..0A7F; Gurmukhi
3505             0x0A80,   // 0A80..0AFF; Gujarati
3506             0x0B00,   // 0B00..0B7F; Oriya
3507             0x0B80,   // 0B80..0BFF; Tamil
3508             0x0C00,   // 0C00..0C7F; Telugu
3509             0x0C80,   // 0C80..0CFF; Kannada
3510             0x0D00,   // 0D00..0D7F; Malayalam
3511             0x0D80,   // 0D80..0DFF; Sinhala
3512             0x0E00,   // 0E00..0E7F; Thai
3513             0x0E80,   // 0E80..0EFF; Lao
3514             0x0F00,   // 0F00..0FFF; Tibetan
3515             0x1000,   // 1000..109F; Myanmar
3516             0x10A0,   // 10A0..10FF; Georgian
3517             0x1100,   // 1100..11FF; Hangul Jamo
3518             0x1200,   // 1200..137F; Ethiopic
3519             0x1380,   // 1380..139F; Ethiopic Supplement
3520             0x13A0,   // 13A0..13FF; Cherokee
3521             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
3522             0x1680,   // 1680..169F; Ogham
3523             0x16A0,   // 16A0..16FF; Runic
3524             0x1700,   // 1700..171F; Tagalog
3525             0x1720,   // 1720..173F; Hanunoo
3526             0x1740,   // 1740..175F; Buhid
3527             0x1760,   // 1760..177F; Tagbanwa
3528             0x1780,   // 1780..17FF; Khmer
3529             0x1800,   // 1800..18AF; Mongolian
3530             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3531             0x1900,   // 1900..194F; Limbu
3532             0x1950,   // 1950..197F; Tai Le
3533             0x1980,   // 1980..19DF; New Tai Lue
3534             0x19E0,   // 19E0..19FF; Khmer Symbols
3535             0x1A00,   // 1A00..1A1F; Buginese
3536             0x1A20,   // 1A20..1AAF; Tai Tham
3537             0x1AB0,   // 1AB0..1AFF; Combining Diacritical Marks Extended
3538             0x1B00,   // 1B00..1B7F; Balinese
3539             0x1B80,   // 1B80..1BBF; Sundanese
3540             0x1BC0,   // 1BC0..1BFF; Batak
3541             0x1C00,   // 1C00..1C4F; Lepcha
3542             0x1C50,   // 1C50..1C7F; Ol Chiki
3543             0x1C80,   // 1C80..1C8F; Cyrillic Extended-C
3544             0x1C90,   // 1C90..1CBF; Georgian Extended
3545             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
3546             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
3547             0x1D00,   // 1D00..1D7F; Phonetic Extensions
3548             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
3549             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
3550             0x1E00,   // 1E00..1EFF; Latin Extended Additional
3551             0x1F00,   // 1F00..1FFF; Greek Extended
3552             0x2000,   // 2000..206F; General Punctuation
3553             0x2070,   // 2070..209F; Superscripts and Subscripts
3554             0x20A0,   // 20A0..20CF; Currency Symbols
3555             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
3556             0x2100,   // 2100..214F; Letterlike Symbols
3557             0x2150,   // 2150..218F; Number Forms
3558             0x2190,   // 2190..21FF; Arrows
3559             0x2200,   // 2200..22FF; Mathematical Operators
3560             0x2300,   // 2300..23FF; Miscellaneous Technical
3561             0x2400,   // 2400..243F; Control Pictures
3562             0x2440,   // 2440..245F; Optical Character Recognition
3563             0x2460,   // 2460..24FF; Enclosed Alphanumerics
3564             0x2500,   // 2500..257F; Box Drawing
3565             0x2580,   // 2580..259F; Block Elements
3566             0x25A0,   // 25A0..25FF; Geometric Shapes
3567             0x2600,   // 2600..26FF; Miscellaneous Symbols
3568             0x2700,   // 2700..27BF; Dingbats
3569             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3570             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
3571             0x2800,   // 2800..28FF; Braille Patterns
3572             0x2900,   // 2900..297F; Supplemental Arrows-B
3573             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
3574             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
3575             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
3576             0x2C00,   // 2C00..2C5F; Glagolitic
3577             0x2C60,   // 2C60..2C7F; Latin Extended-C
3578             0x2C80,   // 2C80..2CFF; Coptic
3579             0x2D00,   // 2D00..2D2F; Georgian Supplement
3580             0x2D30,   // 2D30..2D7F; Tifinagh
3581             0x2D80,   // 2D80..2DDF; Ethiopic Extended
3582             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
3583             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
3584             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
3585             0x2F00,   // 2F00..2FDF; Kangxi Radicals
3586             0x2FE0,   //             unassigned
3587             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
3588             0x3000,   // 3000..303F; CJK Symbols and Punctuation
3589             0x3040,   // 3040..309F; Hiragana
3590             0x30A0,   // 30A0..30FF; Katakana
3591             0x3100,   // 3100..312F; Bopomofo
3592             0x3130,   // 3130..318F; Hangul Compatibility Jamo
3593             0x3190,   // 3190..319F; Kanbun
3594             0x31A0,   // 31A0..31BF; Bopomofo Extended
3595             0x31C0,   // 31C0..31EF; CJK Strokes
3596             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
3597             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
3598             0x3300,   // 3300..33FF; CJK Compatibility
3599             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
3600             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
3601             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
3602             0xA000,   // A000..A48F; Yi Syllables
3603             0xA490,   // A490..A4CF; Yi Radicals
3604             0xA4D0,   // A4D0..A4FF; Lisu
3605             0xA500,   // A500..A63F; Vai
3606             0xA640,   // A640..A69F; Cyrillic Extended-B
3607             0xA6A0,   // A6A0..A6FF; Bamum
3608             0xA700,   // A700..A71F; Modifier Tone Letters
3609             0xA720,   // A720..A7FF; Latin Extended-D
3610             0xA800,   // A800..A82F; Syloti Nagri
3611             0xA830,   // A830..A83F; Common Indic Number Forms
3612             0xA840,   // A840..A87F; Phags-pa
3613             0xA880,   // A880..A8DF; Saurashtra
3614             0xA8E0,   // A8E0..A8FF; Devanagari Extended
3615             0xA900,   // A900..A92F; Kayah Li
3616             0xA930,   // A930..A95F; Rejang
3617             0xA960,   // A960..A97F; Hangul Jamo Extended-A
3618             0xA980,   // A980..A9DF; Javanese
3619             0xA9E0,   // A9E0..A9FF; Myanmar Extended-B
3620             0xAA00,   // AA00..AA5F; Cham
3621             0xAA60,   // AA60..AA7F; Myanmar Extended-A
3622             0xAA80,   // AA80..AADF; Tai Viet
3623             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
3624             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
3625             0xAB30,   // AB30..AB6F; Latin Extended-E
3626             0xAB70,   // AB70..ABBF; Cherokee Supplement
3627             0xABC0,   // ABC0..ABFF; Meetei Mayek
3628             0xAC00,   // AC00..D7AF; Hangul Syllables
3629             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
3630             0xD800,   // D800..DB7F; High Surrogates
3631             0xDB80,   // DB80..DBFF; High Private Use Surrogates
3632             0xDC00,   // DC00..DFFF; Low Surrogates
3633             0xE000,   // E000..F8FF; Private Use Area
3634             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
3635             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
3636             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
3637             0xFE00,   // FE00..FE0F; Variation Selectors
3638             0xFE10,   // FE10..FE1F; Vertical Forms
3639             0xFE20,   // FE20..FE2F; Combining Half Marks
3640             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
3641             0xFE50,   // FE50..FE6F; Small Form Variants
3642             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
3643             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
3644             0xFFF0,   // FFF0..FFFF; Specials
3645             0x10000,  // 10000..1007F; Linear B Syllabary
3646             0x10080,  // 10080..100FF; Linear B Ideograms
3647             0x10100,  // 10100..1013F; Aegean Numbers
3648             0x10140,  // 10140..1018F; Ancient Greek Numbers
3649             0x10190,  // 10190..101CF; Ancient Symbols
3650             0x101D0,  // 101D0..101FF; Phaistos Disc
3651             0x10200,  //               unassigned
3652             0x10280,  // 10280..1029F; Lycian
3653             0x102A0,  // 102A0..102DF; Carian
3654             0x102E0,  // 102E0..102FF; Coptic Epact Numbers
3655             0x10300,  // 10300..1032F; Old Italic
3656             0x10330,  // 10330..1034F; Gothic
3657             0x10350,  // 10350..1037F; Old Permic
3658             0x10380,  // 10380..1039F; Ugaritic
3659             0x103A0,  // 103A0..103DF; Old Persian
3660             0x103E0,  //               unassigned
3661             0x10400,  // 10400..1044F; Deseret
3662             0x10450,  // 10450..1047F; Shavian
3663             0x10480,  // 10480..104AF; Osmanya
3664             0x104B0,  // 104B0..104FF; Osage
3665             0x10500,  // 10500..1052F; Elbasan
3666             0x10530,  // 10530..1056F; Caucasian Albanian
3667             0x10570,  //               unassigned
3668             0x10600,  // 10600..1077F; Linear A
3669             0x10780,  //               unassigned
3670             0x10800,  // 10800..1083F; Cypriot Syllabary
3671             0x10840,  // 10840..1085F; Imperial Aramaic
3672             0x10860,  // 10860..1087F; Palmyrene
3673             0x10880,  // 10880..108AF; Nabataean
3674             0x108B0,  //               unassigned
3675             0x108E0,  // 108E0..108FF; Hatran
3676             0x10900,  // 10900..1091F; Phoenician
3677             0x10920,  // 10920..1093F; Lydian
3678             0x10940,  //               unassigned
3679             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
3680             0x109A0,  // 109A0..109FF; Meroitic Cursive
3681             0x10A00,  // 10A00..10A5F; Kharoshthi
3682             0x10A60,  // 10A60..10A7F; Old South Arabian
3683             0x10A80,  // 10A80..10A9F; Old North Arabian
3684             0x10AA0,  //               unassigned
3685             0x10AC0,  // 10AC0..10AFF; Manichaean
3686             0x10B00,  // 10B00..10B3F; Avestan
3687             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
3688             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
3689             0x10B80,  // 10B80..10BAF; Psalter Pahlavi
3690             0x10BB0,  //               unassigned
3691             0x10C00,  // 10C00..10C4F; Old Turkic
3692             0x10C50,  //               unassigned
3693             0x10C80,  // 10C80..10CFF; Old Hungarian
3694             0x10D00,  // 10D00..10D3F; Hanifi Rohingya
3695             0x10D40,  //               unassigned
3696             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
3697             0x10E80,  // 10E80..10EBF; Yezidi
3698             0x10EC0,  //               unassigned
3699             0x10F00,  // 10F00..10F2F; Old Sogdian
3700             0x10F30,  // 10F30..10F6F; Sogdian
3701             0x10F70,  //               unassigned
3702             0x10FB0,  // 10FB0..10FDF; Chorasmian
3703             0x10FE0,  // 10FE0..10FFF; Elymaic
3704             0x11000,  // 11000..1107F; Brahmi
3705             0x11080,  // 11080..110CF; Kaithi
3706             0x110D0,  // 110D0..110FF; Sora Sompeng
3707             0x11100,  // 11100..1114F; Chakma
3708             0x11150,  // 11150..1117F; Mahajani
3709             0x11180,  // 11180..111DF; Sharada
3710             0x111E0,  // 111E0..111FF; Sinhala Archaic Numbers
3711             0x11200,  // 11200..1124F; Khojki
3712             0x11250,  //               unassigned
3713             0x11280,  // 11280..112AF; Multani
3714             0x112B0,  // 112B0..112FF; Khudawadi
3715             0x11300,  // 11300..1137F; Grantha
3716             0x11380,  //               unassigned
3717             0x11400,  // 11400..1147F; Newa
3718             0x11480,  // 11480..114DF; Tirhuta
3719             0x114E0,  //               unassigned
3720             0x11580,  // 11580..115FF; Siddham
3721             0x11600,  // 11600..1165F; Modi
3722             0x11660,  // 11660..1167F; Mongolian Supplement
3723             0x11680,  // 11680..116CF; Takri
3724             0x116D0,  //               unassigned
3725             0x11700,  // 11700..1173F; Ahom
3726             0x11740,  //               unassigned
3727             0x11800,  // 11800..1184F; Dogra
3728             0x11850,  //               unassigned
3729             0x118A0,  // 118A0..118FF; Warang Citi
3730             0x11900,  // 11900..1195F; Dives Akuru
3731             0x11960,  //               unassigned
3732             0x119A0,  // 119A0..119FF; Nandinagari
3733             0x11A00,  // 11A00..11A4F; Zanabazar Square
3734             0x11A50,  // 11A50..11AAF; Soyombo
3735             0x11AB0,  //               unassigned
3736             0x11AC0,  // 11AC0..11AFF; Pau Cin Hau
3737             0x11B00,  //               unassigned
3738             0x11C00,  // 11C00..11C6F; Bhaiksuki
3739             0x11C70,  // 11C70..11CBF; Marchen
3740             0x11CC0,  //               unassigned
3741             0x11D00,  // 11D00..11D5F; Masaram Gondi
3742             0x11D60,  // 11D60..11DAF; Gunjala Gondi
3743             0x11DB0,  //               unassigned
3744             0x11EE0,  // 11EE0..11EFF; Makasar
3745             0x11F00,  //               unassigned
3746             0x11FB0,  // 11FB0..11FBF; Lisu Supplement
3747             0x11FC0,  // 11FC0..11FFF; Tamil Supplement
3748             0x12000,  // 12000..123FF; Cuneiform
3749             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
3750             0x12480,  // 12480..1254F; Early Dynastic Cuneiform
3751             0x12550,  //               unassigned
3752             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
3753             0x13430,  // 13430..1343F; Egyptian Hieroglyph Format Controls
3754             0x13440,  //               unassigned
3755             0x14400,  // 14400..1467F; Anatolian Hieroglyphs
3756             0x14680,  //               unassigned
3757             0x16800,  // 16800..16A3F; Bamum Supplement
3758             0x16A40,  // 16A40..16A6F; Mro
3759             0x16A70,  //               unassigned
3760             0x16AD0,  // 16AD0..16AFF; Bassa Vah
3761             0x16B00,  // 16B00..16B8F; Pahawh Hmong
3762             0x16B90,  //               unassigned
3763             0x16E40,  // 16E40..16E9F; Medefaidrin
3764             0x16EA0,  //               unassigned
3765             0x16F00,  // 16F00..16F9F; Miao
3766             0x16FA0,  //               unassigned
3767             0x16FE0,  // 16FE0..16FFF; Ideographic Symbols and Punctuation
3768             0x17000,  // 17000..187FF; Tangut
3769             0x18800,  // 18800..18AFF; Tangut Components
3770             0x18B00,  // 18B00..18CFF; Khitan Small Script
3771             0x18D00,  // 18D00..18D8F; Tangut Supplement
3772             0x18D90,  //               unassigned
3773             0x1B000,  // 1B000..1B0FF; Kana Supplement
3774             0x1B100,  // 1B100..1B12F; Kana Extended-A
3775             0x1B130,  // 1B130..1B16F; Small Kana Extension
3776             0x1B170,  // 1B170..1B2FF; Nushu
3777             0x1B300,  //               unassigned
3778             0x1BC00,  // 1BC00..1BC9F; Duployan
3779             0x1BCA0,  // 1BCA0..1BCAF; Shorthand Format Controls
3780             0x1BCB0,  //               unassigned
3781             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
3782             0x1D100,  // 1D100..1D1FF; Musical Symbols
3783             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
3784             0x1D250,  //               unassigned
3785             0x1D2E0,  // 1D2E0..1D2FF; Mayan Numerals
3786             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
3787             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
3788             0x1D380,  //               unassigned
3789             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3790             0x1D800,  // 1D800..1DAAF; Sutton SignWriting
3791             0x1DAB0,  //               unassigned
3792             0x1E000,  // 1E000..1E02F; Glagolitic Supplement
3793             0x1E030,  //               unassigned
3794             0x1E100,  // 1E100..1E14F; Nyiakeng Puachue Hmong
3795             0x1E150,  //               unassigned
3796             0x1E2C0,  // 1E2C0..1E2FF; Wancho
3797             0x1E300,  //               unassigned
3798             0x1E800,  // 1E800..1E8DF; Mende Kikakui
3799             0x1E8E0,  //               unassigned
3800             0x1E900,  // 1E900..1E95F; Adlam
3801             0x1E960,  //               unassigned
3802             0x1EC70,  // 1EC70..1ECBF; Indic Siyaq Numbers
3803             0x1ECC0,  //               unassigned
3804             0x1ED00,  // 1ED00..1ED4F; Ottoman Siyaq Numbers
3805             0x1ED50,  //               unassigned
3806             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
3807             0x1EF00,  //               unassigned
3808             0x1F000,  // 1F000..1F02F; Mahjong Tiles
3809             0x1F030,  // 1F030..1F09F; Domino Tiles
3810             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
3811             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
3812             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
3813             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
3814             0x1F600,  // 1F600..1F64F; Emoticons
3815             0x1F650,  // 1F650..1F67F; Ornamental Dingbats
3816             0x1F680,  // 1F680..1F6FF; Transport and Map Symbols
3817             0x1F700,  // 1F700..1F77F; Alchemical Symbols
3818             0x1F780,  // 1F780..1F7FF; Geometric Shapes Extended
3819             0x1F800,  // 1F800..1F8FF; Supplemental Arrows-C
3820             0x1F900,  // 1F900..1F9FF; Supplemental Symbols and Pictographs
3821             0x1FA00,  // 1FA00..1FA6F; Chess Symbols
3822             0x1FA70,  // 1FA70..1FAFF; Symbols and Pictographs Extended-A
3823             0x1FB00,  // 1FB00..1FBFF; Symbols for Legacy Computing
3824             0x1FC00,  //               unassigned
3825             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
3826             0x2A6E0,  //               unassigned
3827             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
3828             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
3829             0x2B820,  // 2B820..2CEAF; CJK Unified Ideographs Extension E
3830             0x2CEB0,  // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
3831             0x2EBF0,  //               unassigned
3832             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
3833             0x2FA20,  //               unassigned
3834             0x30000,  // 30000..3134F; CJK Unified Ideographs Extension G
3835             0x31350,  //               unassigned
3836             0xE0000,  // E0000..E007F; Tags
3837             0xE0080,  //               unassigned
3838             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
3839             0xE01F0,  //               unassigned
3840             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
3841             0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
3842         };
3843 
3844         private static final UnicodeBlock[] blocks = {
3845             BASIC_LATIN,
3846             LATIN_1_SUPPLEMENT,
3847             LATIN_EXTENDED_A,
3848             LATIN_EXTENDED_B,
3849             IPA_EXTENSIONS,
3850             SPACING_MODIFIER_LETTERS,
3851             COMBINING_DIACRITICAL_MARKS,
3852             GREEK,
3853             CYRILLIC,
3854             CYRILLIC_SUPPLEMENTARY,
3855             ARMENIAN,
3856             HEBREW,
3857             ARABIC,
3858             SYRIAC,
3859             ARABIC_SUPPLEMENT,
3860             THAANA,
3861             NKO,
3862             SAMARITAN,
3863             MANDAIC,
3864             SYRIAC_SUPPLEMENT,
3865             null,
3866             ARABIC_EXTENDED_A,
3867             DEVANAGARI,
3868             BENGALI,
3869             GURMUKHI,
3870             GUJARATI,
3871             ORIYA,
3872             TAMIL,
3873             TELUGU,
3874             KANNADA,
3875             MALAYALAM,
3876             SINHALA,
3877             THAI,
3878             LAO,
3879             TIBETAN,
3880             MYANMAR,
3881             GEORGIAN,
3882             HANGUL_JAMO,
3883             ETHIOPIC,
3884             ETHIOPIC_SUPPLEMENT,
3885             CHEROKEE,
3886             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
3887             OGHAM,
3888             RUNIC,
3889             TAGALOG,
3890             HANUNOO,
3891             BUHID,
3892             TAGBANWA,
3893             KHMER,
3894             MONGOLIAN,
3895             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
3896             LIMBU,
3897             TAI_LE,
3898             NEW_TAI_LUE,
3899             KHMER_SYMBOLS,
3900             BUGINESE,
3901             TAI_THAM,
3902             COMBINING_DIACRITICAL_MARKS_EXTENDED,
3903             BALINESE,
3904             SUNDANESE,
3905             BATAK,
3906             LEPCHA,
3907             OL_CHIKI,
3908             CYRILLIC_EXTENDED_C,
3909             GEORGIAN_EXTENDED,
3910             SUNDANESE_SUPPLEMENT,
3911             VEDIC_EXTENSIONS,
3912             PHONETIC_EXTENSIONS,
3913             PHONETIC_EXTENSIONS_SUPPLEMENT,
3914             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
3915             LATIN_EXTENDED_ADDITIONAL,
3916             GREEK_EXTENDED,
3917             GENERAL_PUNCTUATION,
3918             SUPERSCRIPTS_AND_SUBSCRIPTS,
3919             CURRENCY_SYMBOLS,
3920             COMBINING_MARKS_FOR_SYMBOLS,
3921             LETTERLIKE_SYMBOLS,
3922             NUMBER_FORMS,
3923             ARROWS,
3924             MATHEMATICAL_OPERATORS,
3925             MISCELLANEOUS_TECHNICAL,
3926             CONTROL_PICTURES,
3927             OPTICAL_CHARACTER_RECOGNITION,
3928             ENCLOSED_ALPHANUMERICS,
3929             BOX_DRAWING,
3930             BLOCK_ELEMENTS,
3931             GEOMETRIC_SHAPES,
3932             MISCELLANEOUS_SYMBOLS,
3933             DINGBATS,
3934             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
3935             SUPPLEMENTAL_ARROWS_A,
3936             BRAILLE_PATTERNS,
3937             SUPPLEMENTAL_ARROWS_B,
3938             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
3939             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
3940             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
3941             GLAGOLITIC,
3942             LATIN_EXTENDED_C,
3943             COPTIC,
3944             GEORGIAN_SUPPLEMENT,
3945             TIFINAGH,
3946             ETHIOPIC_EXTENDED,
3947             CYRILLIC_EXTENDED_A,
3948             SUPPLEMENTAL_PUNCTUATION,
3949             CJK_RADICALS_SUPPLEMENT,
3950             KANGXI_RADICALS,
3951             null,
3952             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
3953             CJK_SYMBOLS_AND_PUNCTUATION,
3954             HIRAGANA,
3955             KATAKANA,
3956             BOPOMOFO,
3957             HANGUL_COMPATIBILITY_JAMO,
3958             KANBUN,
3959             BOPOMOFO_EXTENDED,
3960             CJK_STROKES,
3961             KATAKANA_PHONETIC_EXTENSIONS,
3962             ENCLOSED_CJK_LETTERS_AND_MONTHS,
3963             CJK_COMPATIBILITY,
3964             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
3965             YIJING_HEXAGRAM_SYMBOLS,
3966             CJK_UNIFIED_IDEOGRAPHS,
3967             YI_SYLLABLES,
3968             YI_RADICALS,
3969             LISU,
3970             VAI,
3971             CYRILLIC_EXTENDED_B,
3972             BAMUM,
3973             MODIFIER_TONE_LETTERS,
3974             LATIN_EXTENDED_D,
3975             SYLOTI_NAGRI,
3976             COMMON_INDIC_NUMBER_FORMS,
3977             PHAGS_PA,
3978             SAURASHTRA,
3979             DEVANAGARI_EXTENDED,
3980             KAYAH_LI,
3981             REJANG,
3982             HANGUL_JAMO_EXTENDED_A,
3983             JAVANESE,
3984             MYANMAR_EXTENDED_B,
3985             CHAM,
3986             MYANMAR_EXTENDED_A,
3987             TAI_VIET,
3988             MEETEI_MAYEK_EXTENSIONS,
3989             ETHIOPIC_EXTENDED_A,
3990             LATIN_EXTENDED_E,
3991             CHEROKEE_SUPPLEMENT,
3992             MEETEI_MAYEK,
3993             HANGUL_SYLLABLES,
3994             HANGUL_JAMO_EXTENDED_B,
3995             HIGH_SURROGATES,
3996             HIGH_PRIVATE_USE_SURROGATES,
3997             LOW_SURROGATES,
3998             PRIVATE_USE_AREA,
3999             CJK_COMPATIBILITY_IDEOGRAPHS,
4000             ALPHABETIC_PRESENTATION_FORMS,
4001             ARABIC_PRESENTATION_FORMS_A,
4002             VARIATION_SELECTORS,
4003             VERTICAL_FORMS,
4004             COMBINING_HALF_MARKS,
4005             CJK_COMPATIBILITY_FORMS,
4006             SMALL_FORM_VARIANTS,
4007             ARABIC_PRESENTATION_FORMS_B,
4008             HALFWIDTH_AND_FULLWIDTH_FORMS,
4009             SPECIALS,
4010             LINEAR_B_SYLLABARY,
4011             LINEAR_B_IDEOGRAMS,
4012             AEGEAN_NUMBERS,
4013             ANCIENT_GREEK_NUMBERS,
4014             ANCIENT_SYMBOLS,
4015             PHAISTOS_DISC,
4016             null,
4017             LYCIAN,
4018             CARIAN,
4019             COPTIC_EPACT_NUMBERS,
4020             OLD_ITALIC,
4021             GOTHIC,
4022             OLD_PERMIC,
4023             UGARITIC,
4024             OLD_PERSIAN,
4025             null,
4026             DESERET,
4027             SHAVIAN,
4028             OSMANYA,
4029             OSAGE,
4030             ELBASAN,
4031             CAUCASIAN_ALBANIAN,
4032             null,
4033             LINEAR_A,
4034             null,
4035             CYPRIOT_SYLLABARY,
4036             IMPERIAL_ARAMAIC,
4037             PALMYRENE,
4038             NABATAEAN,
4039             null,
4040             HATRAN,
4041             PHOENICIAN,
4042             LYDIAN,
4043             null,
4044             MEROITIC_HIEROGLYPHS,
4045             MEROITIC_CURSIVE,
4046             KHAROSHTHI,
4047             OLD_SOUTH_ARABIAN,
4048             OLD_NORTH_ARABIAN,
4049             null,
4050             MANICHAEAN,
4051             AVESTAN,
4052             INSCRIPTIONAL_PARTHIAN,
4053             INSCRIPTIONAL_PAHLAVI,
4054             PSALTER_PAHLAVI,
4055             null,
4056             OLD_TURKIC,
4057             null,
4058             OLD_HUNGARIAN,
4059             HANIFI_ROHINGYA,
4060             null,
4061             RUMI_NUMERAL_SYMBOLS,
4062             YEZIDI,
4063             null,
4064             OLD_SOGDIAN,
4065             SOGDIAN,
4066             null,
4067             CHORASMIAN,
4068             ELYMAIC,
4069             BRAHMI,
4070             KAITHI,
4071             SORA_SOMPENG,
4072             CHAKMA,
4073             MAHAJANI,
4074             SHARADA,
4075             SINHALA_ARCHAIC_NUMBERS,
4076             KHOJKI,
4077             null,
4078             MULTANI,
4079             KHUDAWADI,
4080             GRANTHA,
4081             null,
4082             NEWA,
4083             TIRHUTA,
4084             null,
4085             SIDDHAM,
4086             MODI,
4087             MONGOLIAN_SUPPLEMENT,
4088             TAKRI,
4089             null,
4090             AHOM,
4091             null,
4092             DOGRA,
4093             null,
4094             WARANG_CITI,
4095             DIVES_AKURU,
4096             null,
4097             NANDINAGARI,
4098             ZANABAZAR_SQUARE,
4099             SOYOMBO,
4100             null,
4101             PAU_CIN_HAU,
4102             null,
4103             BHAIKSUKI,
4104             MARCHEN,
4105             null,
4106             MASARAM_GONDI,
4107             GUNJALA_GONDI,
4108             null,
4109             MAKASAR,
4110             null,
4111             LISU_SUPPLEMENT,
4112             TAMIL_SUPPLEMENT,
4113             CUNEIFORM,
4114             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
4115             EARLY_DYNASTIC_CUNEIFORM,
4116             null,
4117             EGYPTIAN_HIEROGLYPHS,
4118             EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS,
4119             null,
4120             ANATOLIAN_HIEROGLYPHS,
4121             null,
4122             BAMUM_SUPPLEMENT,
4123             MRO,
4124             null,
4125             BASSA_VAH,
4126             PAHAWH_HMONG,
4127             null,
4128             MEDEFAIDRIN,
4129             null,
4130             MIAO,
4131             null,
4132             IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
4133             TANGUT,
4134             TANGUT_COMPONENTS,
4135             KHITAN_SMALL_SCRIPT,
4136             TANGUT_SUPPLEMENT,
4137             null,
4138             KANA_SUPPLEMENT,
4139             KANA_EXTENDED_A,
4140             SMALL_KANA_EXTENSION,
4141             NUSHU,
4142             null,
4143             DUPLOYAN,
4144             SHORTHAND_FORMAT_CONTROLS,
4145             null,
4146             BYZANTINE_MUSICAL_SYMBOLS,
4147             MUSICAL_SYMBOLS,
4148             ANCIENT_GREEK_MUSICAL_NOTATION,
4149             null,
4150             MAYAN_NUMERALS,
4151             TAI_XUAN_JING_SYMBOLS,
4152             COUNTING_ROD_NUMERALS,
4153             null,
4154             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
4155             SUTTON_SIGNWRITING,
4156             null,
4157             GLAGOLITIC_SUPPLEMENT,
4158             null,
4159             NYIAKENG_PUACHUE_HMONG,
4160             null,
4161             WANCHO,
4162             null,
4163             MENDE_KIKAKUI,
4164             null,
4165             ADLAM,
4166             null,
4167             INDIC_SIYAQ_NUMBERS,
4168             null,
4169             OTTOMAN_SIYAQ_NUMBERS,
4170             null,
4171             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
4172             null,
4173             MAHJONG_TILES,
4174             DOMINO_TILES,
4175             PLAYING_CARDS,
4176             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
4177             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
4178             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
4179             EMOTICONS,
4180             ORNAMENTAL_DINGBATS,
4181             TRANSPORT_AND_MAP_SYMBOLS,
4182             ALCHEMICAL_SYMBOLS,
4183             GEOMETRIC_SHAPES_EXTENDED,
4184             SUPPLEMENTAL_ARROWS_C,
4185             SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
4186             CHESS_SYMBOLS,
4187             SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A,
4188             SYMBOLS_FOR_LEGACY_COMPUTING,
4189             null,
4190             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
4191             null,
4192             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
4193             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
4194             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
4195             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
4196             null,
4197             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
4198             null,
4199             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G,
4200             null,
4201             TAGS,
4202             null,
4203             VARIATION_SELECTORS_SUPPLEMENT,
4204             null,
4205             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
4206             SUPPLEMENTARY_PRIVATE_USE_AREA_B,
4207         };
4208 
4209 
4210         /**
4211          * Returns the object representing the Unicode block containing the
4212          * given character, or {@code null} if the character is not a
4213          * member of a defined block.
4214          *
4215          * <p><b>Note:</b> This method cannot handle
4216          * <a href="Character.html#supplementary"> supplementary
4217          * characters</a>.  To support all Unicode characters, including
4218          * supplementary characters, use the {@link #of(int)} method.
4219          *
4220          * @param   c  The character in question
4221          * @return  The {@code UnicodeBlock} instance representing the
4222          *          Unicode block of which this character is a member, or
4223          *          {@code null} if the character is not a member of any
4224          *          Unicode block
4225          */
of(char c)4226         public static UnicodeBlock of(char c) {
4227             return of((int)c);
4228         }
4229 
4230         /**
4231          * Returns the object representing the Unicode block
4232          * containing the given character (Unicode code point), or
4233          * {@code null} if the character is not a member of a
4234          * defined block.
4235          *
4236          * @param   codePoint the character (Unicode code point) in question.
4237          * @return  The {@code UnicodeBlock} instance representing the
4238          *          Unicode block of which this character is a member, or
4239          *          {@code null} if the character is not a member of any
4240          *          Unicode block
4241          * @throws  IllegalArgumentException if the specified
4242          * {@code codePoint} is an invalid Unicode code point.
4243          * @see Character#isValidCodePoint(int)
4244          * @since   1.5
4245          */
of(int codePoint)4246         public static UnicodeBlock of(int codePoint) {
4247             if (!isValidCodePoint(codePoint)) {
4248                 throw new IllegalArgumentException(
4249                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
4250             }
4251 
4252             int top, bottom, current;
4253             bottom = 0;
4254             top = blockStarts.length;
4255             current = top/2;
4256 
4257             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
4258             while (top - bottom > 1) {
4259                 if (codePoint >= blockStarts[current]) {
4260                     bottom = current;
4261                 } else {
4262                     top = current;
4263                 }
4264                 current = (top + bottom) / 2;
4265             }
4266             return blocks[current];
4267         }
4268 
4269         /**
4270          * Returns the UnicodeBlock with the given name. Block
4271          * names are determined by The Unicode Standard. The file
4272          * {@code Blocks-<version>.txt} defines blocks for a particular
4273          * version of the standard. The {@link Character} class specifies
4274          * the version of the standard that it supports.
4275          * <p>
4276          * This method accepts block names in the following forms:
4277          * <ol>
4278          * <li> Canonical block names as defined by the Unicode Standard.
4279          * For example, the standard defines a "Basic Latin" block. Therefore, this
4280          * method accepts "Basic Latin" as a valid block name. The documentation of
4281          * each UnicodeBlock provides the canonical name.
4282          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
4283          * is a valid block name for the "Basic Latin" block.
4284          * <li>The text representation of each constant UnicodeBlock identifier.
4285          * For example, this method will return the {@link #BASIC_LATIN} block if
4286          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
4287          * hyphens in the canonical name with underscores.
4288          * </ol>
4289          * Finally, character case is ignored for all of the valid block name forms.
4290          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
4291          * The en_US locale's case mapping rules are used to provide case-insensitive
4292          * string comparisons for block name validation.
4293          * <p>
4294          * If the Unicode Standard changes block names, both the previous and
4295          * current names will be accepted.
4296          *
4297          * @param blockName A {@code UnicodeBlock} name.
4298          * @return The {@code UnicodeBlock} instance identified
4299          *         by {@code blockName}
4300          * @throws IllegalArgumentException if {@code blockName} is an
4301          *         invalid name
4302          * @throws NullPointerException if {@code blockName} is null
4303          * @since 1.5
4304          */
forName(String blockName)4305         public static final UnicodeBlock forName(String blockName) {
4306             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
4307             if (block == null) {
4308                 throw new IllegalArgumentException("Not a valid block name: "
4309                             + blockName);
4310             }
4311             return block;
4312         }
4313     }
4314 
4315 
4316     /**
4317      * A family of character subsets representing the character scripts
4318      * defined in the <a href="http://www.unicode.org/reports/tr24/">
4319      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
4320      * character is assigned to a single Unicode script, either a specific
4321      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
4322      * one of the following three special values,
4323      * {@link Character.UnicodeScript#INHERITED Inherited},
4324      * {@link Character.UnicodeScript#COMMON Common} or
4325      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
4326      *
4327      * @since 1.7
4328      */
4329     public static enum UnicodeScript {
4330         /**
4331          * Unicode script "Common".
4332          */
4333         COMMON,
4334 
4335         /**
4336          * Unicode script "Latin".
4337          */
4338         LATIN,
4339 
4340         /**
4341          * Unicode script "Greek".
4342          */
4343         GREEK,
4344 
4345         /**
4346          * Unicode script "Cyrillic".
4347          */
4348         CYRILLIC,
4349 
4350         /**
4351          * Unicode script "Armenian".
4352          */
4353         ARMENIAN,
4354 
4355         /**
4356          * Unicode script "Hebrew".
4357          */
4358         HEBREW,
4359 
4360         /**
4361          * Unicode script "Arabic".
4362          */
4363         ARABIC,
4364 
4365         /**
4366          * Unicode script "Syriac".
4367          */
4368         SYRIAC,
4369 
4370         /**
4371          * Unicode script "Thaana".
4372          */
4373         THAANA,
4374 
4375         /**
4376          * Unicode script "Devanagari".
4377          */
4378         DEVANAGARI,
4379 
4380         /**
4381          * Unicode script "Bengali".
4382          */
4383         BENGALI,
4384 
4385         /**
4386          * Unicode script "Gurmukhi".
4387          */
4388         GURMUKHI,
4389 
4390         /**
4391          * Unicode script "Gujarati".
4392          */
4393         GUJARATI,
4394 
4395         /**
4396          * Unicode script "Oriya".
4397          */
4398         ORIYA,
4399 
4400         /**
4401          * Unicode script "Tamil".
4402          */
4403         TAMIL,
4404 
4405         /**
4406          * Unicode script "Telugu".
4407          */
4408         TELUGU,
4409 
4410         /**
4411          * Unicode script "Kannada".
4412          */
4413         KANNADA,
4414 
4415         /**
4416          * Unicode script "Malayalam".
4417          */
4418         MALAYALAM,
4419 
4420         /**
4421          * Unicode script "Sinhala".
4422          */
4423         SINHALA,
4424 
4425         /**
4426          * Unicode script "Thai".
4427          */
4428         THAI,
4429 
4430         /**
4431          * Unicode script "Lao".
4432          */
4433         LAO,
4434 
4435         /**
4436          * Unicode script "Tibetan".
4437          */
4438         TIBETAN,
4439 
4440         /**
4441          * Unicode script "Myanmar".
4442          */
4443         MYANMAR,
4444 
4445         /**
4446          * Unicode script "Georgian".
4447          */
4448         GEORGIAN,
4449 
4450         /**
4451          * Unicode script "Hangul".
4452          */
4453         HANGUL,
4454 
4455         /**
4456          * Unicode script "Ethiopic".
4457          */
4458         ETHIOPIC,
4459 
4460         /**
4461          * Unicode script "Cherokee".
4462          */
4463         CHEROKEE,
4464 
4465         /**
4466          * Unicode script "Canadian_Aboriginal".
4467          */
4468         CANADIAN_ABORIGINAL,
4469 
4470         /**
4471          * Unicode script "Ogham".
4472          */
4473         OGHAM,
4474 
4475         /**
4476          * Unicode script "Runic".
4477          */
4478         RUNIC,
4479 
4480         /**
4481          * Unicode script "Khmer".
4482          */
4483         KHMER,
4484 
4485         /**
4486          * Unicode script "Mongolian".
4487          */
4488         MONGOLIAN,
4489 
4490         /**
4491          * Unicode script "Hiragana".
4492          */
4493         HIRAGANA,
4494 
4495         /**
4496          * Unicode script "Katakana".
4497          */
4498         KATAKANA,
4499 
4500         /**
4501          * Unicode script "Bopomofo".
4502          */
4503         BOPOMOFO,
4504 
4505         /**
4506          * Unicode script "Han".
4507          */
4508         HAN,
4509 
4510         /**
4511          * Unicode script "Yi".
4512          */
4513         YI,
4514 
4515         /**
4516          * Unicode script "Old_Italic".
4517          */
4518         OLD_ITALIC,
4519 
4520         /**
4521          * Unicode script "Gothic".
4522          */
4523         GOTHIC,
4524 
4525         /**
4526          * Unicode script "Deseret".
4527          */
4528         DESERET,
4529 
4530         /**
4531          * Unicode script "Inherited".
4532          */
4533         INHERITED,
4534 
4535         /**
4536          * Unicode script "Tagalog".
4537          */
4538         TAGALOG,
4539 
4540         /**
4541          * Unicode script "Hanunoo".
4542          */
4543         HANUNOO,
4544 
4545         /**
4546          * Unicode script "Buhid".
4547          */
4548         BUHID,
4549 
4550         /**
4551          * Unicode script "Tagbanwa".
4552          */
4553         TAGBANWA,
4554 
4555         /**
4556          * Unicode script "Limbu".
4557          */
4558         LIMBU,
4559 
4560         /**
4561          * Unicode script "Tai_Le".
4562          */
4563         TAI_LE,
4564 
4565         /**
4566          * Unicode script "Linear_B".
4567          */
4568         LINEAR_B,
4569 
4570         /**
4571          * Unicode script "Ugaritic".
4572          */
4573         UGARITIC,
4574 
4575         /**
4576          * Unicode script "Shavian".
4577          */
4578         SHAVIAN,
4579 
4580         /**
4581          * Unicode script "Osmanya".
4582          */
4583         OSMANYA,
4584 
4585         /**
4586          * Unicode script "Cypriot".
4587          */
4588         CYPRIOT,
4589 
4590         /**
4591          * Unicode script "Braille".
4592          */
4593         BRAILLE,
4594 
4595         /**
4596          * Unicode script "Buginese".
4597          */
4598         BUGINESE,
4599 
4600         /**
4601          * Unicode script "Coptic".
4602          */
4603         COPTIC,
4604 
4605         /**
4606          * Unicode script "New_Tai_Lue".
4607          */
4608         NEW_TAI_LUE,
4609 
4610         /**
4611          * Unicode script "Glagolitic".
4612          */
4613         GLAGOLITIC,
4614 
4615         /**
4616          * Unicode script "Tifinagh".
4617          */
4618         TIFINAGH,
4619 
4620         /**
4621          * Unicode script "Syloti_Nagri".
4622          */
4623         SYLOTI_NAGRI,
4624 
4625         /**
4626          * Unicode script "Old_Persian".
4627          */
4628         OLD_PERSIAN,
4629 
4630         /**
4631          * Unicode script "Kharoshthi".
4632          */
4633         KHAROSHTHI,
4634 
4635         /**
4636          * Unicode script "Balinese".
4637          */
4638         BALINESE,
4639 
4640         /**
4641          * Unicode script "Cuneiform".
4642          */
4643         CUNEIFORM,
4644 
4645         /**
4646          * Unicode script "Phoenician".
4647          */
4648         PHOENICIAN,
4649 
4650         /**
4651          * Unicode script "Phags_Pa".
4652          */
4653         PHAGS_PA,
4654 
4655         /**
4656          * Unicode script "Nko".
4657          */
4658         NKO,
4659 
4660         /**
4661          * Unicode script "Sundanese".
4662          */
4663         SUNDANESE,
4664 
4665         /**
4666          * Unicode script "Batak".
4667          */
4668         BATAK,
4669 
4670         /**
4671          * Unicode script "Lepcha".
4672          */
4673         LEPCHA,
4674 
4675         /**
4676          * Unicode script "Ol_Chiki".
4677          */
4678         OL_CHIKI,
4679 
4680         /**
4681          * Unicode script "Vai".
4682          */
4683         VAI,
4684 
4685         /**
4686          * Unicode script "Saurashtra".
4687          */
4688         SAURASHTRA,
4689 
4690         /**
4691          * Unicode script "Kayah_Li".
4692          */
4693         KAYAH_LI,
4694 
4695         /**
4696          * Unicode script "Rejang".
4697          */
4698         REJANG,
4699 
4700         /**
4701          * Unicode script "Lycian".
4702          */
4703         LYCIAN,
4704 
4705         /**
4706          * Unicode script "Carian".
4707          */
4708         CARIAN,
4709 
4710         /**
4711          * Unicode script "Lydian".
4712          */
4713         LYDIAN,
4714 
4715         /**
4716          * Unicode script "Cham".
4717          */
4718         CHAM,
4719 
4720         /**
4721          * Unicode script "Tai_Tham".
4722          */
4723         TAI_THAM,
4724 
4725         /**
4726          * Unicode script "Tai_Viet".
4727          */
4728         TAI_VIET,
4729 
4730         /**
4731          * Unicode script "Avestan".
4732          */
4733         AVESTAN,
4734 
4735         /**
4736          * Unicode script "Egyptian_Hieroglyphs".
4737          */
4738         EGYPTIAN_HIEROGLYPHS,
4739 
4740         /**
4741          * Unicode script "Samaritan".
4742          */
4743         SAMARITAN,
4744 
4745         /**
4746          * Unicode script "Mandaic".
4747          */
4748         MANDAIC,
4749 
4750         /**
4751          * Unicode script "Lisu".
4752          */
4753         LISU,
4754 
4755         /**
4756          * Unicode script "Bamum".
4757          */
4758         BAMUM,
4759 
4760         /**
4761          * Unicode script "Javanese".
4762          */
4763         JAVANESE,
4764 
4765         /**
4766          * Unicode script "Meetei_Mayek".
4767          */
4768         MEETEI_MAYEK,
4769 
4770         /**
4771          * Unicode script "Imperial_Aramaic".
4772          */
4773         IMPERIAL_ARAMAIC,
4774 
4775         /**
4776          * Unicode script "Old_South_Arabian".
4777          */
4778         OLD_SOUTH_ARABIAN,
4779 
4780         /**
4781          * Unicode script "Inscriptional_Parthian".
4782          */
4783         INSCRIPTIONAL_PARTHIAN,
4784 
4785         /**
4786          * Unicode script "Inscriptional_Pahlavi".
4787          */
4788         INSCRIPTIONAL_PAHLAVI,
4789 
4790         /**
4791          * Unicode script "Old_Turkic".
4792          */
4793         OLD_TURKIC,
4794 
4795         /**
4796          * Unicode script "Brahmi".
4797          */
4798         BRAHMI,
4799 
4800         /**
4801          * Unicode script "Kaithi".
4802          */
4803         KAITHI,
4804 
4805         /**
4806          * Unicode script "Meroitic Hieroglyphs".
4807          * @since 1.8
4808          */
4809         MEROITIC_HIEROGLYPHS,
4810 
4811         /**
4812          * Unicode script "Meroitic Cursive".
4813          * @since 1.8
4814          */
4815         MEROITIC_CURSIVE,
4816 
4817         /**
4818          * Unicode script "Sora Sompeng".
4819          * @since 1.8
4820          */
4821         SORA_SOMPENG,
4822 
4823         /**
4824          * Unicode script "Chakma".
4825          * @since 1.8
4826          */
4827         CHAKMA,
4828 
4829         /**
4830          * Unicode script "Sharada".
4831          * @since 1.8
4832          */
4833         SHARADA,
4834 
4835         /**
4836          * Unicode script "Takri".
4837          * @since 1.8
4838          */
4839         TAKRI,
4840 
4841         /**
4842          * Unicode script "Miao".
4843          * @since 1.8
4844          */
4845         MIAO,
4846 
4847         /**
4848          * Unicode script "Caucasian Albanian".
4849          * @since 9
4850          */
4851         CAUCASIAN_ALBANIAN,
4852 
4853         /**
4854          * Unicode script "Bassa Vah".
4855          * @since 9
4856          */
4857         BASSA_VAH,
4858 
4859         /**
4860          * Unicode script "Duployan".
4861          * @since 9
4862          */
4863         DUPLOYAN,
4864 
4865         /**
4866          * Unicode script "Elbasan".
4867          * @since 9
4868          */
4869         ELBASAN,
4870 
4871         /**
4872          * Unicode script "Grantha".
4873          * @since 9
4874          */
4875         GRANTHA,
4876 
4877         /**
4878          * Unicode script "Pahawh Hmong".
4879          * @since 9
4880          */
4881         PAHAWH_HMONG,
4882 
4883         /**
4884          * Unicode script "Khojki".
4885          * @since 9
4886          */
4887         KHOJKI,
4888 
4889         /**
4890          * Unicode script "Linear A".
4891          * @since 9
4892          */
4893         LINEAR_A,
4894 
4895         /**
4896          * Unicode script "Mahajani".
4897          * @since 9
4898          */
4899         MAHAJANI,
4900 
4901         /**
4902          * Unicode script "Manichaean".
4903          * @since 9
4904          */
4905         MANICHAEAN,
4906 
4907         /**
4908          * Unicode script "Mende Kikakui".
4909          * @since 9
4910          */
4911         MENDE_KIKAKUI,
4912 
4913         /**
4914          * Unicode script "Modi".
4915          * @since 9
4916          */
4917         MODI,
4918 
4919         /**
4920          * Unicode script "Mro".
4921          * @since 9
4922          */
4923         MRO,
4924 
4925         /**
4926          * Unicode script "Old North Arabian".
4927          * @since 9
4928          */
4929         OLD_NORTH_ARABIAN,
4930 
4931         /**
4932          * Unicode script "Nabataean".
4933          * @since 9
4934          */
4935         NABATAEAN,
4936 
4937         /**
4938          * Unicode script "Palmyrene".
4939          * @since 9
4940          */
4941         PALMYRENE,
4942 
4943         /**
4944          * Unicode script "Pau Cin Hau".
4945          * @since 9
4946          */
4947         PAU_CIN_HAU,
4948 
4949         /**
4950          * Unicode script "Old Permic".
4951          * @since 9
4952          */
4953         OLD_PERMIC,
4954 
4955         /**
4956          * Unicode script "Psalter Pahlavi".
4957          * @since 9
4958          */
4959         PSALTER_PAHLAVI,
4960 
4961         /**
4962          * Unicode script "Siddham".
4963          * @since 9
4964          */
4965         SIDDHAM,
4966 
4967         /**
4968          * Unicode script "Khudawadi".
4969          * @since 9
4970          */
4971         KHUDAWADI,
4972 
4973         /**
4974          * Unicode script "Tirhuta".
4975          * @since 9
4976          */
4977         TIRHUTA,
4978 
4979         /**
4980          * Unicode script "Warang Citi".
4981          * @since 9
4982          */
4983         WARANG_CITI,
4984 
4985         /**
4986          * Unicode script "Ahom".
4987          * @since 9
4988          */
4989         AHOM,
4990 
4991         /**
4992          * Unicode script "Anatolian Hieroglyphs".
4993          * @since 9
4994          */
4995         ANATOLIAN_HIEROGLYPHS,
4996 
4997         /**
4998          * Unicode script "Hatran".
4999          * @since 9
5000          */
5001         HATRAN,
5002 
5003         /**
5004          * Unicode script "Multani".
5005          * @since 9
5006          */
5007         MULTANI,
5008 
5009         /**
5010          * Unicode script "Old Hungarian".
5011          * @since 9
5012          */
5013         OLD_HUNGARIAN,
5014 
5015         /**
5016          * Unicode script "SignWriting".
5017          * @since 9
5018          */
5019         SIGNWRITING,
5020 
5021         /**
5022          * Unicode script "Adlam".
5023          * @since 11
5024          */
5025         ADLAM,
5026 
5027         /**
5028          * Unicode script "Bhaiksuki".
5029          * @since 11
5030          */
5031         BHAIKSUKI,
5032 
5033         /**
5034          * Unicode script "Marchen".
5035          * @since 11
5036          */
5037         MARCHEN,
5038 
5039         /**
5040          * Unicode script "Newa".
5041          * @since 11
5042          */
5043         NEWA,
5044 
5045         /**
5046          * Unicode script "Osage".
5047          * @since 11
5048          */
5049         OSAGE,
5050 
5051         /**
5052          * Unicode script "Tangut".
5053          * @since 11
5054          */
5055         TANGUT,
5056 
5057         /**
5058          * Unicode script "Masaram Gondi".
5059          * @since 11
5060          */
5061         MASARAM_GONDI,
5062 
5063         /**
5064          * Unicode script "Nushu".
5065          * @since 11
5066          */
5067         NUSHU,
5068 
5069         /**
5070          * Unicode script "Soyombo".
5071          * @since 11
5072          */
5073         SOYOMBO,
5074 
5075         /**
5076          * Unicode script "Zanabazar Square".
5077          * @since 11
5078          */
5079         ZANABAZAR_SQUARE,
5080 
5081         /**
5082          * Unicode script "Hanifi Rohingya".
5083          * @since 12
5084          */
5085         HANIFI_ROHINGYA,
5086 
5087         /**
5088          * Unicode script "Old Sogdian".
5089          * @since 12
5090          */
5091         OLD_SOGDIAN,
5092 
5093         /**
5094          * Unicode script "Sogdian".
5095          * @since 12
5096          */
5097         SOGDIAN,
5098 
5099         /**
5100          * Unicode script "Dogra".
5101          * @since 12
5102          */
5103         DOGRA,
5104 
5105         /**
5106          * Unicode script "Gunjala Gondi".
5107          * @since 12
5108          */
5109         GUNJALA_GONDI,
5110 
5111         /**
5112          * Unicode script "Makasar".
5113          * @since 12
5114          */
5115         MAKASAR,
5116 
5117         /**
5118          * Unicode script "Medefaidrin".
5119          * @since 12
5120          */
5121         MEDEFAIDRIN,
5122 
5123         /**
5124          * Unicode script "Elymaic".
5125          * @since 13
5126          */
5127         ELYMAIC,
5128 
5129         /**
5130          * Unicode script "Nandinagari".
5131          * @since 13
5132          */
5133         NANDINAGARI,
5134 
5135         /**
5136          * Unicode script "Nyiakeng Puachue Hmong".
5137          * @since 13
5138          */
5139         NYIAKENG_PUACHUE_HMONG,
5140 
5141         /**
5142          * Unicode script "Wancho".
5143          * @since 13
5144          */
5145         WANCHO,
5146 
5147         /**
5148          * Unicode script "Yezidi".
5149          * @since 15
5150          */
5151         YEZIDI,
5152 
5153         /**
5154          * Unicode script "Chorasmian".
5155          * @since 15
5156          */
5157         CHORASMIAN,
5158 
5159         /**
5160          * Unicode script "Dives Akuru".
5161          * @since 15
5162          */
5163         DIVES_AKURU,
5164 
5165         /**
5166          * Unicode script "Khitan Small Script".
5167          * @since 15
5168          */
5169         KHITAN_SMALL_SCRIPT,
5170 
5171         /**
5172          * Unicode script "Unknown".
5173          */
5174         UNKNOWN;
5175 
5176         private static final int[] scriptStarts = {
5177             0x0000,   // 0000..0040; COMMON
5178             0x0041,   // 0041..005A; LATIN
5179             0x005B,   // 005B..0060; COMMON
5180             0x0061,   // 0061..007A; LATIN
5181             0x007B,   // 007B..00A9; COMMON
5182             0x00AA,   // 00AA      ; LATIN
5183             0x00AB,   // 00AB..00B9; COMMON
5184             0x00BA,   // 00BA      ; LATIN
5185             0x00BB,   // 00BB..00BF; COMMON
5186             0x00C0,   // 00C0..00D6; LATIN
5187             0x00D7,   // 00D7      ; COMMON
5188             0x00D8,   // 00D8..00F6; LATIN
5189             0x00F7,   // 00F7      ; COMMON
5190             0x00F8,   // 00F8..02B8; LATIN
5191             0x02B9,   // 02B9..02DF; COMMON
5192             0x02E0,   // 02E0..02E4; LATIN
5193             0x02E5,   // 02E5..02E9; COMMON
5194             0x02EA,   // 02EA..02EB; BOPOMOFO
5195             0x02EC,   // 02EC..02FF; COMMON
5196             0x0300,   // 0300..036F; INHERITED
5197             0x0370,   // 0370..0373; GREEK
5198             0x0374,   // 0374      ; COMMON
5199             0x0375,   // 0375..0377; GREEK
5200             0x0378,   // 0378..0379; UNKNOWN
5201             0x037A,   // 037A..037D; GREEK
5202             0x037E,   // 037E      ; COMMON
5203             0x037F,   // 037F      ; GREEK
5204             0x0380,   // 0380..0383; UNKNOWN
5205             0x0384,   // 0384      ; GREEK
5206             0x0385,   // 0385      ; COMMON
5207             0x0386,   // 0386      ; GREEK
5208             0x0387,   // 0387      ; COMMON
5209             0x0388,   // 0388..038A; GREEK
5210             0x038B,   // 038B      ; UNKNOWN
5211             0x038C,   // 038C      ; GREEK
5212             0x038D,   // 038D      ; UNKNOWN
5213             0x038E,   // 038E..03A1; GREEK
5214             0x03A2,   // 03A2      ; UNKNOWN
5215             0x03A3,   // 03A3..03E1; GREEK
5216             0x03E2,   // 03E2..03EF; COPTIC
5217             0x03F0,   // 03F0..03FF; GREEK
5218             0x0400,   // 0400..0484; CYRILLIC
5219             0x0485,   // 0485..0486; INHERITED
5220             0x0487,   // 0487..052F; CYRILLIC
5221             0x0530,   // 0530      ; UNKNOWN
5222             0x0531,   // 0531..0556; ARMENIAN
5223             0x0557,   // 0557..0558; UNKNOWN
5224             0x0559,   // 0559..058A; ARMENIAN
5225             0x058B,   // 058B..058C; UNKNOWN
5226             0x058D,   // 058D..058F; ARMENIAN
5227             0x0590,   // 0590      ; UNKNOWN
5228             0x0591,   // 0591..05C7; HEBREW
5229             0x05C8,   // 05C8..05CF; UNKNOWN
5230             0x05D0,   // 05D0..05EA; HEBREW
5231             0x05EB,   // 05EB..05EE; UNKNOWN
5232             0x05EF,   // 05EF..05F4; HEBREW
5233             0x05F5,   // 05F5..05FF; UNKNOWN
5234             0x0600,   // 0600..0604; ARABIC
5235             0x0605,   // 0605      ; COMMON
5236             0x0606,   // 0606..060B; ARABIC
5237             0x060C,   // 060C      ; COMMON
5238             0x060D,   // 060D..061A; ARABIC
5239             0x061B,   // 061B      ; COMMON
5240             0x061C,   // 061C      ; ARABIC
5241             0x061D,   // 061D      ; UNKNOWN
5242             0x061E,   // 061E      ; ARABIC
5243             0x061F,   // 061F      ; COMMON
5244             0x0620,   // 0620..063F; ARABIC
5245             0x0640,   // 0640      ; COMMON
5246             0x0641,   // 0641..064A; ARABIC
5247             0x064B,   // 064B..0655; INHERITED
5248             0x0656,   // 0656..066F; ARABIC
5249             0x0670,   // 0670      ; INHERITED
5250             0x0671,   // 0671..06DC; ARABIC
5251             0x06DD,   // 06DD      ; COMMON
5252             0x06DE,   // 06DE..06FF; ARABIC
5253             0x0700,   // 0700..070D; SYRIAC
5254             0x070E,   // 070E      ; UNKNOWN
5255             0x070F,   // 070F..074A; SYRIAC
5256             0x074B,   // 074B..074C; UNKNOWN
5257             0x074D,   // 074D..074F; SYRIAC
5258             0x0750,   // 0750..077F; ARABIC
5259             0x0780,   // 0780..07B1; THAANA
5260             0x07B2,   // 07B2..07BF; UNKNOWN
5261             0x07C0,   // 07C0..07FA; NKO
5262             0x07FB,   // 07FB..07FC; UNKNOWN
5263             0x07FD,   // 07FD..07FF; NKO
5264             0x0800,   // 0800..082D; SAMARITAN
5265             0x082E,   // 082E..082F; UNKNOWN
5266             0x0830,   // 0830..083E; SAMARITAN
5267             0x083F,   // 083F      ; UNKNOWN
5268             0x0840,   // 0840..085B; MANDAIC
5269             0x085C,   // 085C..085D; UNKNOWN
5270             0x085E,   // 085E      ; MANDAIC
5271             0x085F,   // 085F      ; UNKNOWN
5272             0x0860,   // 0860..086A; SYRIAC
5273             0x086B,   // 086B..089F; UNKNOWN
5274             0x08A0,   // 08A0..08B4; ARABIC
5275             0x08B5,   // 08B5      ; UNKNOWN
5276             0x08B6,   // 08B6..08C7; ARABIC
5277             0x08C8,   // 08C8..08D2; UNKNOWN
5278             0x08D3,   // 08D3..08E1; ARABIC
5279             0x08E2,   // 08E2      ; COMMON
5280             0x08E3,   // 08E3..08FF; ARABIC
5281             0x0900,   // 0900..0950; DEVANAGARI
5282             0x0951,   // 0951..0954; INHERITED
5283             0x0955,   // 0955..0963; DEVANAGARI
5284             0x0964,   // 0964..0965; COMMON
5285             0x0966,   // 0966..097F; DEVANAGARI
5286             0x0980,   // 0980..0983; BENGALI
5287             0x0984,   // 0984      ; UNKNOWN
5288             0x0985,   // 0985..098C; BENGALI
5289             0x098D,   // 098D..098E; UNKNOWN
5290             0x098F,   // 098F..0990; BENGALI
5291             0x0991,   // 0991..0992; UNKNOWN
5292             0x0993,   // 0993..09A8; BENGALI
5293             0x09A9,   // 09A9      ; UNKNOWN
5294             0x09AA,   // 09AA..09B0; BENGALI
5295             0x09B1,   // 09B1      ; UNKNOWN
5296             0x09B2,   // 09B2      ; BENGALI
5297             0x09B3,   // 09B3..09B5; UNKNOWN
5298             0x09B6,   // 09B6..09B9; BENGALI
5299             0x09BA,   // 09BA..09BB; UNKNOWN
5300             0x09BC,   // 09BC..09C4; BENGALI
5301             0x09C5,   // 09C5..09C6; UNKNOWN
5302             0x09C7,   // 09C7..09C8; BENGALI
5303             0x09C9,   // 09C9..09CA; UNKNOWN
5304             0x09CB,   // 09CB..09CE; BENGALI
5305             0x09CF,   // 09CF..09D6; UNKNOWN
5306             0x09D7,   // 09D7      ; BENGALI
5307             0x09D8,   // 09D8..09DB; UNKNOWN
5308             0x09DC,   // 09DC..09DD; BENGALI
5309             0x09DE,   // 09DE      ; UNKNOWN
5310             0x09DF,   // 09DF..09E3; BENGALI
5311             0x09E4,   // 09E4..09E5; UNKNOWN
5312             0x09E6,   // 09E6..09FE; BENGALI
5313             0x09FF,   // 09FF..0A00; UNKNOWN
5314             0x0A01,   // 0A01..0A03; GURMUKHI
5315             0x0A04,   // 0A04      ; UNKNOWN
5316             0x0A05,   // 0A05..0A0A; GURMUKHI
5317             0x0A0B,   // 0A0B..0A0E; UNKNOWN
5318             0x0A0F,   // 0A0F..0A10; GURMUKHI
5319             0x0A11,   // 0A11..0A12; UNKNOWN
5320             0x0A13,   // 0A13..0A28; GURMUKHI
5321             0x0A29,   // 0A29      ; UNKNOWN
5322             0x0A2A,   // 0A2A..0A30; GURMUKHI
5323             0x0A31,   // 0A31      ; UNKNOWN
5324             0x0A32,   // 0A32..0A33; GURMUKHI
5325             0x0A34,   // 0A34      ; UNKNOWN
5326             0x0A35,   // 0A35..0A36; GURMUKHI
5327             0x0A37,   // 0A37      ; UNKNOWN
5328             0x0A38,   // 0A38..0A39; GURMUKHI
5329             0x0A3A,   // 0A3A..0A3B; UNKNOWN
5330             0x0A3C,   // 0A3C      ; GURMUKHI
5331             0x0A3D,   // 0A3D      ; UNKNOWN
5332             0x0A3E,   // 0A3E..0A42; GURMUKHI
5333             0x0A43,   // 0A43..0A46; UNKNOWN
5334             0x0A47,   // 0A47..0A48; GURMUKHI
5335             0x0A49,   // 0A49..0A4A; UNKNOWN
5336             0x0A4B,   // 0A4B..0A4D; GURMUKHI
5337             0x0A4E,   // 0A4E..0A50; UNKNOWN
5338             0x0A51,   // 0A51      ; GURMUKHI
5339             0x0A52,   // 0A52..0A58; UNKNOWN
5340             0x0A59,   // 0A59..0A5C; GURMUKHI
5341             0x0A5D,   // 0A5D      ; UNKNOWN
5342             0x0A5E,   // 0A5E      ; GURMUKHI
5343             0x0A5F,   // 0A5F..0A65; UNKNOWN
5344             0x0A66,   // 0A66..0A76; GURMUKHI
5345             0x0A77,   // 0A77..0A80; UNKNOWN
5346             0x0A81,   // 0A81..0A83; GUJARATI
5347             0x0A84,   // 0A84      ; UNKNOWN
5348             0x0A85,   // 0A85..0A8D; GUJARATI
5349             0x0A8E,   // 0A8E      ; UNKNOWN
5350             0x0A8F,   // 0A8F..0A91; GUJARATI
5351             0x0A92,   // 0A92      ; UNKNOWN
5352             0x0A93,   // 0A93..0AA8; GUJARATI
5353             0x0AA9,   // 0AA9      ; UNKNOWN
5354             0x0AAA,   // 0AAA..0AB0; GUJARATI
5355             0x0AB1,   // 0AB1      ; UNKNOWN
5356             0x0AB2,   // 0AB2..0AB3; GUJARATI
5357             0x0AB4,   // 0AB4      ; UNKNOWN
5358             0x0AB5,   // 0AB5..0AB9; GUJARATI
5359             0x0ABA,   // 0ABA..0ABB; UNKNOWN
5360             0x0ABC,   // 0ABC..0AC5; GUJARATI
5361             0x0AC6,   // 0AC6      ; UNKNOWN
5362             0x0AC7,   // 0AC7..0AC9; GUJARATI
5363             0x0ACA,   // 0ACA      ; UNKNOWN
5364             0x0ACB,   // 0ACB..0ACD; GUJARATI
5365             0x0ACE,   // 0ACE..0ACF; UNKNOWN
5366             0x0AD0,   // 0AD0      ; GUJARATI
5367             0x0AD1,   // 0AD1..0ADF; UNKNOWN
5368             0x0AE0,   // 0AE0..0AE3; GUJARATI
5369             0x0AE4,   // 0AE4..0AE5; UNKNOWN
5370             0x0AE6,   // 0AE6..0AF1; GUJARATI
5371             0x0AF2,   // 0AF2..0AF8; UNKNOWN
5372             0x0AF9,   // 0AF9..0AFF; GUJARATI
5373             0x0B00,   // 0B00      ; UNKNOWN
5374             0x0B01,   // 0B01..0B03; ORIYA
5375             0x0B04,   // 0B04      ; UNKNOWN
5376             0x0B05,   // 0B05..0B0C; ORIYA
5377             0x0B0D,   // 0B0D..0B0E; UNKNOWN
5378             0x0B0F,   // 0B0F..0B10; ORIYA
5379             0x0B11,   // 0B11..0B12; UNKNOWN
5380             0x0B13,   // 0B13..0B28; ORIYA
5381             0x0B29,   // 0B29      ; UNKNOWN
5382             0x0B2A,   // 0B2A..0B30; ORIYA
5383             0x0B31,   // 0B31      ; UNKNOWN
5384             0x0B32,   // 0B32..0B33; ORIYA
5385             0x0B34,   // 0B34      ; UNKNOWN
5386             0x0B35,   // 0B35..0B39; ORIYA
5387             0x0B3A,   // 0B3A..0B3B; UNKNOWN
5388             0x0B3C,   // 0B3C..0B44; ORIYA
5389             0x0B45,   // 0B45..0B46; UNKNOWN
5390             0x0B47,   // 0B47..0B48; ORIYA
5391             0x0B49,   // 0B49..0B4A; UNKNOWN
5392             0x0B4B,   // 0B4B..0B4D; ORIYA
5393             0x0B4E,   // 0B4E..0B54; UNKNOWN
5394             0x0B55,   // 0B55..0B57; ORIYA
5395             0x0B58,   // 0B58..0B5B; UNKNOWN
5396             0x0B5C,   // 0B5C..0B5D; ORIYA
5397             0x0B5E,   // 0B5E      ; UNKNOWN
5398             0x0B5F,   // 0B5F..0B63; ORIYA
5399             0x0B64,   // 0B64..0B65; UNKNOWN
5400             0x0B66,   // 0B66..0B77; ORIYA
5401             0x0B78,   // 0B78..0B81; UNKNOWN
5402             0x0B82,   // 0B82..0B83; TAMIL
5403             0x0B84,   // 0B84      ; UNKNOWN
5404             0x0B85,   // 0B85..0B8A; TAMIL
5405             0x0B8B,   // 0B8B..0B8D; UNKNOWN
5406             0x0B8E,   // 0B8E..0B90; TAMIL
5407             0x0B91,   // 0B91      ; UNKNOWN
5408             0x0B92,   // 0B92..0B95; TAMIL
5409             0x0B96,   // 0B96..0B98; UNKNOWN
5410             0x0B99,   // 0B99..0B9A; TAMIL
5411             0x0B9B,   // 0B9B      ; UNKNOWN
5412             0x0B9C,   // 0B9C      ; TAMIL
5413             0x0B9D,   // 0B9D      ; UNKNOWN
5414             0x0B9E,   // 0B9E..0B9F; TAMIL
5415             0x0BA0,   // 0BA0..0BA2; UNKNOWN
5416             0x0BA3,   // 0BA3..0BA4; TAMIL
5417             0x0BA5,   // 0BA5..0BA7; UNKNOWN
5418             0x0BA8,   // 0BA8..0BAA; TAMIL
5419             0x0BAB,   // 0BAB..0BAD; UNKNOWN
5420             0x0BAE,   // 0BAE..0BB9; TAMIL
5421             0x0BBA,   // 0BBA..0BBD; UNKNOWN
5422             0x0BBE,   // 0BBE..0BC2; TAMIL
5423             0x0BC3,   // 0BC3..0BC5; UNKNOWN
5424             0x0BC6,   // 0BC6..0BC8; TAMIL
5425             0x0BC9,   // 0BC9      ; UNKNOWN
5426             0x0BCA,   // 0BCA..0BCD; TAMIL
5427             0x0BCE,   // 0BCE..0BCF; UNKNOWN
5428             0x0BD0,   // 0BD0      ; TAMIL
5429             0x0BD1,   // 0BD1..0BD6; UNKNOWN
5430             0x0BD7,   // 0BD7      ; TAMIL
5431             0x0BD8,   // 0BD8..0BE5; UNKNOWN
5432             0x0BE6,   // 0BE6..0BFA; TAMIL
5433             0x0BFB,   // 0BFB..0BFF; UNKNOWN
5434             0x0C00,   // 0C00..0C0C; TELUGU
5435             0x0C0D,   // 0C0D      ; UNKNOWN
5436             0x0C0E,   // 0C0E..0C10; TELUGU
5437             0x0C11,   // 0C11      ; UNKNOWN
5438             0x0C12,   // 0C12..0C28; TELUGU
5439             0x0C29,   // 0C29      ; UNKNOWN
5440             0x0C2A,   // 0C2A..0C39; TELUGU
5441             0x0C3A,   // 0C3A..0C3C; UNKNOWN
5442             0x0C3D,   // 0C3D..0C44; TELUGU
5443             0x0C45,   // 0C45      ; UNKNOWN
5444             0x0C46,   // 0C46..0C48; TELUGU
5445             0x0C49,   // 0C49      ; UNKNOWN
5446             0x0C4A,   // 0C4A..0C4D; TELUGU
5447             0x0C4E,   // 0C4E..0C54; UNKNOWN
5448             0x0C55,   // 0C55..0C56; TELUGU
5449             0x0C57,   // 0C57      ; UNKNOWN
5450             0x0C58,   // 0C58..0C5A; TELUGU
5451             0x0C5B,   // 0C5B..0C5F; UNKNOWN
5452             0x0C60,   // 0C60..0C63; TELUGU
5453             0x0C64,   // 0C64..0C65; UNKNOWN
5454             0x0C66,   // 0C66..0C6F; TELUGU
5455             0x0C70,   // 0C70..0C76; UNKNOWN
5456             0x0C77,   // 0C77..0C7F; TELUGU
5457             0x0C80,   // 0C80..0C8C; KANNADA
5458             0x0C8D,   // 0C8D      ; UNKNOWN
5459             0x0C8E,   // 0C8E..0C90; KANNADA
5460             0x0C91,   // 0C91      ; UNKNOWN
5461             0x0C92,   // 0C92..0CA8; KANNADA
5462             0x0CA9,   // 0CA9      ; UNKNOWN
5463             0x0CAA,   // 0CAA..0CB3; KANNADA
5464             0x0CB4,   // 0CB4      ; UNKNOWN
5465             0x0CB5,   // 0CB5..0CB9; KANNADA
5466             0x0CBA,   // 0CBA..0CBB; UNKNOWN
5467             0x0CBC,   // 0CBC..0CC4; KANNADA
5468             0x0CC5,   // 0CC5      ; UNKNOWN
5469             0x0CC6,   // 0CC6..0CC8; KANNADA
5470             0x0CC9,   // 0CC9      ; UNKNOWN
5471             0x0CCA,   // 0CCA..0CCD; KANNADA
5472             0x0CCE,   // 0CCE..0CD4; UNKNOWN
5473             0x0CD5,   // 0CD5..0CD6; KANNADA
5474             0x0CD7,   // 0CD7..0CDD; UNKNOWN
5475             0x0CDE,   // 0CDE      ; KANNADA
5476             0x0CDF,   // 0CDF      ; UNKNOWN
5477             0x0CE0,   // 0CE0..0CE3; KANNADA
5478             0x0CE4,   // 0CE4..0CE5; UNKNOWN
5479             0x0CE6,   // 0CE6..0CEF; KANNADA
5480             0x0CF0,   // 0CF0      ; UNKNOWN
5481             0x0CF1,   // 0CF1..0CF2; KANNADA
5482             0x0CF3,   // 0CF3..0CFF; UNKNOWN
5483             0x0D00,   // 0D00..0D0C; MALAYALAM
5484             0x0D0D,   // 0D0D      ; UNKNOWN
5485             0x0D0E,   // 0D0E..0D10; MALAYALAM
5486             0x0D11,   // 0D11      ; UNKNOWN
5487             0x0D12,   // 0D12..0D44; MALAYALAM
5488             0x0D45,   // 0D45      ; UNKNOWN
5489             0x0D46,   // 0D46..0D48; MALAYALAM
5490             0x0D49,   // 0D49      ; UNKNOWN
5491             0x0D4A,   // 0D4A..0D4F; MALAYALAM
5492             0x0D50,   // 0D50..0D53; UNKNOWN
5493             0x0D54,   // 0D54..0D63; MALAYALAM
5494             0x0D64,   // 0D64..0D65; UNKNOWN
5495             0x0D66,   // 0D66..0D7F; MALAYALAM
5496             0x0D80,   // 0D80      ; UNKNOWN
5497             0x0D81,   // 0D81..0D83; SINHALA
5498             0x0D84,   // 0D84      ; UNKNOWN
5499             0x0D85,   // 0D85..0D96; SINHALA
5500             0x0D97,   // 0D97..0D99; UNKNOWN
5501             0x0D9A,   // 0D9A..0DB1; SINHALA
5502             0x0DB2,   // 0DB2      ; UNKNOWN
5503             0x0DB3,   // 0DB3..0DBB; SINHALA
5504             0x0DBC,   // 0DBC      ; UNKNOWN
5505             0x0DBD,   // 0DBD      ; SINHALA
5506             0x0DBE,   // 0DBE..0DBF; UNKNOWN
5507             0x0DC0,   // 0DC0..0DC6; SINHALA
5508             0x0DC7,   // 0DC7..0DC9; UNKNOWN
5509             0x0DCA,   // 0DCA      ; SINHALA
5510             0x0DCB,   // 0DCB..0DCE; UNKNOWN
5511             0x0DCF,   // 0DCF..0DD4; SINHALA
5512             0x0DD5,   // 0DD5      ; UNKNOWN
5513             0x0DD6,   // 0DD6      ; SINHALA
5514             0x0DD7,   // 0DD7      ; UNKNOWN
5515             0x0DD8,   // 0DD8..0DDF; SINHALA
5516             0x0DE0,   // 0DE0..0DE5; UNKNOWN
5517             0x0DE6,   // 0DE6..0DEF; SINHALA
5518             0x0DF0,   // 0DF0..0DF1; UNKNOWN
5519             0x0DF2,   // 0DF2..0DF4; SINHALA
5520             0x0DF5,   // 0DF5..0E00; UNKNOWN
5521             0x0E01,   // 0E01..0E3A; THAI
5522             0x0E3B,   // 0E3B..0E3E; UNKNOWN
5523             0x0E3F,   // 0E3F      ; COMMON
5524             0x0E40,   // 0E40..0E5B; THAI
5525             0x0E5C,   // 0E5C..0E80; UNKNOWN
5526             0x0E81,   // 0E81..0E82; LAO
5527             0x0E83,   // 0E83      ; UNKNOWN
5528             0x0E84,   // 0E84      ; LAO
5529             0x0E85,   // 0E85      ; UNKNOWN
5530             0x0E86,   // 0E86..0E8A; LAO
5531             0x0E8B,   // 0E8B      ; UNKNOWN
5532             0x0E8C,   // 0E8C..0EA3; LAO
5533             0x0EA4,   // 0EA4      ; UNKNOWN
5534             0x0EA5,   // 0EA5      ; LAO
5535             0x0EA6,   // 0EA6      ; UNKNOWN
5536             0x0EA7,   // 0EA7..0EBD; LAO
5537             0x0EBE,   // 0EBE..0EBF; UNKNOWN
5538             0x0EC0,   // 0EC0..0EC4; LAO
5539             0x0EC5,   // 0EC5      ; UNKNOWN
5540             0x0EC6,   // 0EC6      ; LAO
5541             0x0EC7,   // 0EC7      ; UNKNOWN
5542             0x0EC8,   // 0EC8..0ECD; LAO
5543             0x0ECE,   // 0ECE..0ECF; UNKNOWN
5544             0x0ED0,   // 0ED0..0ED9; LAO
5545             0x0EDA,   // 0EDA..0EDB; UNKNOWN
5546             0x0EDC,   // 0EDC..0EDF; LAO
5547             0x0EE0,   // 0EE0..0EFF; UNKNOWN
5548             0x0F00,   // 0F00..0F47; TIBETAN
5549             0x0F48,   // 0F48      ; UNKNOWN
5550             0x0F49,   // 0F49..0F6C; TIBETAN
5551             0x0F6D,   // 0F6D..0F70; UNKNOWN
5552             0x0F71,   // 0F71..0F97; TIBETAN
5553             0x0F98,   // 0F98      ; UNKNOWN
5554             0x0F99,   // 0F99..0FBC; TIBETAN
5555             0x0FBD,   // 0FBD      ; UNKNOWN
5556             0x0FBE,   // 0FBE..0FCC; TIBETAN
5557             0x0FCD,   // 0FCD      ; UNKNOWN
5558             0x0FCE,   // 0FCE..0FD4; TIBETAN
5559             0x0FD5,   // 0FD5..0FD8; COMMON
5560             0x0FD9,   // 0FD9..0FDA; TIBETAN
5561             0x0FDB,   // 0FDB..0FFF; UNKNOWN
5562             0x1000,   // 1000..109F; MYANMAR
5563             0x10A0,   // 10A0..10C5; GEORGIAN
5564             0x10C6,   // 10C6      ; UNKNOWN
5565             0x10C7,   // 10C7      ; GEORGIAN
5566             0x10C8,   // 10C8..10CC; UNKNOWN
5567             0x10CD,   // 10CD      ; GEORGIAN
5568             0x10CE,   // 10CE..10CF; UNKNOWN
5569             0x10D0,   // 10D0..10FA; GEORGIAN
5570             0x10FB,   // 10FB      ; COMMON
5571             0x10FC,   // 10FC..10FF; GEORGIAN
5572             0x1100,   // 1100..11FF; HANGUL
5573             0x1200,   // 1200..1248; ETHIOPIC
5574             0x1249,   // 1249      ; UNKNOWN
5575             0x124A,   // 124A..124D; ETHIOPIC
5576             0x124E,   // 124E..124F; UNKNOWN
5577             0x1250,   // 1250..1256; ETHIOPIC
5578             0x1257,   // 1257      ; UNKNOWN
5579             0x1258,   // 1258      ; ETHIOPIC
5580             0x1259,   // 1259      ; UNKNOWN
5581             0x125A,   // 125A..125D; ETHIOPIC
5582             0x125E,   // 125E..125F; UNKNOWN
5583             0x1260,   // 1260..1288; ETHIOPIC
5584             0x1289,   // 1289      ; UNKNOWN
5585             0x128A,   // 128A..128D; ETHIOPIC
5586             0x128E,   // 128E..128F; UNKNOWN
5587             0x1290,   // 1290..12B0; ETHIOPIC
5588             0x12B1,   // 12B1      ; UNKNOWN
5589             0x12B2,   // 12B2..12B5; ETHIOPIC
5590             0x12B6,   // 12B6..12B7; UNKNOWN
5591             0x12B8,   // 12B8..12BE; ETHIOPIC
5592             0x12BF,   // 12BF      ; UNKNOWN
5593             0x12C0,   // 12C0      ; ETHIOPIC
5594             0x12C1,   // 12C1      ; UNKNOWN
5595             0x12C2,   // 12C2..12C5; ETHIOPIC
5596             0x12C6,   // 12C6..12C7; UNKNOWN
5597             0x12C8,   // 12C8..12D6; ETHIOPIC
5598             0x12D7,   // 12D7      ; UNKNOWN
5599             0x12D8,   // 12D8..1310; ETHIOPIC
5600             0x1311,   // 1311      ; UNKNOWN
5601             0x1312,   // 1312..1315; ETHIOPIC
5602             0x1316,   // 1316..1317; UNKNOWN
5603             0x1318,   // 1318..135A; ETHIOPIC
5604             0x135B,   // 135B..135C; UNKNOWN
5605             0x135D,   // 135D..137C; ETHIOPIC
5606             0x137D,   // 137D..137F; UNKNOWN
5607             0x1380,   // 1380..1399; ETHIOPIC
5608             0x139A,   // 139A..139F; UNKNOWN
5609             0x13A0,   // 13A0..13F5; CHEROKEE
5610             0x13F6,   // 13F6..13F7; UNKNOWN
5611             0x13F8,   // 13F8..13FD; CHEROKEE
5612             0x13FE,   // 13FE..13FF; UNKNOWN
5613             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
5614             0x1680,   // 1680..169C; OGHAM
5615             0x169D,   // 169D..169F; UNKNOWN
5616             0x16A0,   // 16A0..16EA; RUNIC
5617             0x16EB,   // 16EB..16ED; COMMON
5618             0x16EE,   // 16EE..16F8; RUNIC
5619             0x16F9,   // 16F9..16FF; UNKNOWN
5620             0x1700,   // 1700..170C; TAGALOG
5621             0x170D,   // 170D      ; UNKNOWN
5622             0x170E,   // 170E..1714; TAGALOG
5623             0x1715,   // 1715..171F; UNKNOWN
5624             0x1720,   // 1720..1734; HANUNOO
5625             0x1735,   // 1735..1736; COMMON
5626             0x1737,   // 1737..173F; UNKNOWN
5627             0x1740,   // 1740..1753; BUHID
5628             0x1754,   // 1754..175F; UNKNOWN
5629             0x1760,   // 1760..176C; TAGBANWA
5630             0x176D,   // 176D      ; UNKNOWN
5631             0x176E,   // 176E..1770; TAGBANWA
5632             0x1771,   // 1771      ; UNKNOWN
5633             0x1772,   // 1772..1773; TAGBANWA
5634             0x1774,   // 1774..177F; UNKNOWN
5635             0x1780,   // 1780..17DD; KHMER
5636             0x17DE,   // 17DE..17DF; UNKNOWN
5637             0x17E0,   // 17E0..17E9; KHMER
5638             0x17EA,   // 17EA..17EF; UNKNOWN
5639             0x17F0,   // 17F0..17F9; KHMER
5640             0x17FA,   // 17FA..17FF; UNKNOWN
5641             0x1800,   // 1800..1801; MONGOLIAN
5642             0x1802,   // 1802..1803; COMMON
5643             0x1804,   // 1804      ; MONGOLIAN
5644             0x1805,   // 1805      ; COMMON
5645             0x1806,   // 1806..180E; MONGOLIAN
5646             0x180F,   // 180F      ; UNKNOWN
5647             0x1810,   // 1810..1819; MONGOLIAN
5648             0x181A,   // 181A..181F; UNKNOWN
5649             0x1820,   // 1820..1878; MONGOLIAN
5650             0x1879,   // 1879..187F; UNKNOWN
5651             0x1880,   // 1880..18AA; MONGOLIAN
5652             0x18AB,   // 18AB..18AF; UNKNOWN
5653             0x18B0,   // 18B0..18F5; CANADIAN_ABORIGINAL
5654             0x18F6,   // 18F6..18FF; UNKNOWN
5655             0x1900,   // 1900..191E; LIMBU
5656             0x191F,   // 191F      ; UNKNOWN
5657             0x1920,   // 1920..192B; LIMBU
5658             0x192C,   // 192C..192F; UNKNOWN
5659             0x1930,   // 1930..193B; LIMBU
5660             0x193C,   // 193C..193F; UNKNOWN
5661             0x1940,   // 1940      ; LIMBU
5662             0x1941,   // 1941..1943; UNKNOWN
5663             0x1944,   // 1944..194F; LIMBU
5664             0x1950,   // 1950..196D; TAI_LE
5665             0x196E,   // 196E..196F; UNKNOWN
5666             0x1970,   // 1970..1974; TAI_LE
5667             0x1975,   // 1975..197F; UNKNOWN
5668             0x1980,   // 1980..19AB; NEW_TAI_LUE
5669             0x19AC,   // 19AC..19AF; UNKNOWN
5670             0x19B0,   // 19B0..19C9; NEW_TAI_LUE
5671             0x19CA,   // 19CA..19CF; UNKNOWN
5672             0x19D0,   // 19D0..19DA; NEW_TAI_LUE
5673             0x19DB,   // 19DB..19DD; UNKNOWN
5674             0x19DE,   // 19DE..19DF; NEW_TAI_LUE
5675             0x19E0,   // 19E0..19FF; KHMER
5676             0x1A00,   // 1A00..1A1B; BUGINESE
5677             0x1A1C,   // 1A1C..1A1D; UNKNOWN
5678             0x1A1E,   // 1A1E..1A1F; BUGINESE
5679             0x1A20,   // 1A20..1A5E; TAI_THAM
5680             0x1A5F,   // 1A5F      ; UNKNOWN
5681             0x1A60,   // 1A60..1A7C; TAI_THAM
5682             0x1A7D,   // 1A7D..1A7E; UNKNOWN
5683             0x1A7F,   // 1A7F..1A89; TAI_THAM
5684             0x1A8A,   // 1A8A..1A8F; UNKNOWN
5685             0x1A90,   // 1A90..1A99; TAI_THAM
5686             0x1A9A,   // 1A9A..1A9F; UNKNOWN
5687             0x1AA0,   // 1AA0..1AAD; TAI_THAM
5688             0x1AAE,   // 1AAE..1AAF; UNKNOWN
5689             0x1AB0,   // 1AB0..1AC0; INHERITED
5690             0x1AC1,   // 1AC1..1AFF; UNKNOWN
5691             0x1B00,   // 1B00..1B4B; BALINESE
5692             0x1B4C,   // 1B4C..1B4F; UNKNOWN
5693             0x1B50,   // 1B50..1B7C; BALINESE
5694             0x1B7D,   // 1B7D..1B7F; UNKNOWN
5695             0x1B80,   // 1B80..1BBF; SUNDANESE
5696             0x1BC0,   // 1BC0..1BF3; BATAK
5697             0x1BF4,   // 1BF4..1BFB; UNKNOWN
5698             0x1BFC,   // 1BFC..1BFF; BATAK
5699             0x1C00,   // 1C00..1C37; LEPCHA
5700             0x1C38,   // 1C38..1C3A; UNKNOWN
5701             0x1C3B,   // 1C3B..1C49; LEPCHA
5702             0x1C4A,   // 1C4A..1C4C; UNKNOWN
5703             0x1C4D,   // 1C4D..1C4F; LEPCHA
5704             0x1C50,   // 1C50..1C7F; OL_CHIKI
5705             0x1C80,   // 1C80..1C88; CYRILLIC
5706             0x1C89,   // 1C89..1C8F; UNKNOWN
5707             0x1C90,   // 1C90..1CBA; GEORGIAN
5708             0x1CBB,   // 1CBB..1CBC; UNKNOWN
5709             0x1CBD,   // 1CBD..1CBF; GEORGIAN
5710             0x1CC0,   // 1CC0..1CC7; SUNDANESE
5711             0x1CC8,   // 1CC8..1CCF; UNKNOWN
5712             0x1CD0,   // 1CD0..1CD2; INHERITED
5713             0x1CD3,   // 1CD3      ; COMMON
5714             0x1CD4,   // 1CD4..1CE0; INHERITED
5715             0x1CE1,   // 1CE1      ; COMMON
5716             0x1CE2,   // 1CE2..1CE8; INHERITED
5717             0x1CE9,   // 1CE9..1CEC; COMMON
5718             0x1CED,   // 1CED      ; INHERITED
5719             0x1CEE,   // 1CEE..1CF3; COMMON
5720             0x1CF4,   // 1CF4      ; INHERITED
5721             0x1CF5,   // 1CF5..1CF7; COMMON
5722             0x1CF8,   // 1CF8..1CF9; INHERITED
5723             0x1CFA,   // 1CFA      ; COMMON
5724             0x1CFB,   // 1CFB..1CFF; UNKNOWN
5725             0x1D00,   // 1D00..1D25; LATIN
5726             0x1D26,   // 1D26..1D2A; GREEK
5727             0x1D2B,   // 1D2B      ; CYRILLIC
5728             0x1D2C,   // 1D2C..1D5C; LATIN
5729             0x1D5D,   // 1D5D..1D61; GREEK
5730             0x1D62,   // 1D62..1D65; LATIN
5731             0x1D66,   // 1D66..1D6A; GREEK
5732             0x1D6B,   // 1D6B..1D77; LATIN
5733             0x1D78,   // 1D78      ; CYRILLIC
5734             0x1D79,   // 1D79..1DBE; LATIN
5735             0x1DBF,   // 1DBF      ; GREEK
5736             0x1DC0,   // 1DC0..1DF9; INHERITED
5737             0x1DFA,   // 1DFA      ; UNKNOWN
5738             0x1DFB,   // 1DFB..1DFF; INHERITED
5739             0x1E00,   // 1E00..1EFF; LATIN
5740             0x1F00,   // 1F00..1F15; GREEK
5741             0x1F16,   // 1F16..1F17; UNKNOWN
5742             0x1F18,   // 1F18..1F1D; GREEK
5743             0x1F1E,   // 1F1E..1F1F; UNKNOWN
5744             0x1F20,   // 1F20..1F45; GREEK
5745             0x1F46,   // 1F46..1F47; UNKNOWN
5746             0x1F48,   // 1F48..1F4D; GREEK
5747             0x1F4E,   // 1F4E..1F4F; UNKNOWN
5748             0x1F50,   // 1F50..1F57; GREEK
5749             0x1F58,   // 1F58      ; UNKNOWN
5750             0x1F59,   // 1F59      ; GREEK
5751             0x1F5A,   // 1F5A      ; UNKNOWN
5752             0x1F5B,   // 1F5B      ; GREEK
5753             0x1F5C,   // 1F5C      ; UNKNOWN
5754             0x1F5D,   // 1F5D      ; GREEK
5755             0x1F5E,   // 1F5E      ; UNKNOWN
5756             0x1F5F,   // 1F5F..1F7D; GREEK
5757             0x1F7E,   // 1F7E..1F7F; UNKNOWN
5758             0x1F80,   // 1F80..1FB4; GREEK
5759             0x1FB5,   // 1FB5      ; UNKNOWN
5760             0x1FB6,   // 1FB6..1FC4; GREEK
5761             0x1FC5,   // 1FC5      ; UNKNOWN
5762             0x1FC6,   // 1FC6..1FD3; GREEK
5763             0x1FD4,   // 1FD4..1FD5; UNKNOWN
5764             0x1FD6,   // 1FD6..1FDB; GREEK
5765             0x1FDC,   // 1FDC      ; UNKNOWN
5766             0x1FDD,   // 1FDD..1FEF; GREEK
5767             0x1FF0,   // 1FF0..1FF1; UNKNOWN
5768             0x1FF2,   // 1FF2..1FF4; GREEK
5769             0x1FF5,   // 1FF5      ; UNKNOWN
5770             0x1FF6,   // 1FF6..1FFE; GREEK
5771             0x1FFF,   // 1FFF      ; UNKNOWN
5772             0x2000,   // 2000..200B; COMMON
5773             0x200C,   // 200C..200D; INHERITED
5774             0x200E,   // 200E..2064; COMMON
5775             0x2065,   // 2065      ; UNKNOWN
5776             0x2066,   // 2066..2070; COMMON
5777             0x2071,   // 2071      ; LATIN
5778             0x2072,   // 2072..2073; UNKNOWN
5779             0x2074,   // 2074..207E; COMMON
5780             0x207F,   // 207F      ; LATIN
5781             0x2080,   // 2080..208E; COMMON
5782             0x208F,   // 208F      ; UNKNOWN
5783             0x2090,   // 2090..209C; LATIN
5784             0x209D,   // 209D..209F; UNKNOWN
5785             0x20A0,   // 20A0..20BF; COMMON
5786             0x20C0,   // 20C0..20CF; UNKNOWN
5787             0x20D0,   // 20D0..20F0; INHERITED
5788             0x20F1,   // 20F1..20FF; UNKNOWN
5789             0x2100,   // 2100..2125; COMMON
5790             0x2126,   // 2126      ; GREEK
5791             0x2127,   // 2127..2129; COMMON
5792             0x212A,   // 212A..212B; LATIN
5793             0x212C,   // 212C..2131; COMMON
5794             0x2132,   // 2132      ; LATIN
5795             0x2133,   // 2133..214D; COMMON
5796             0x214E,   // 214E      ; LATIN
5797             0x214F,   // 214F..215F; COMMON
5798             0x2160,   // 2160..2188; LATIN
5799             0x2189,   // 2189..218B; COMMON
5800             0x218C,   // 218C..218F; UNKNOWN
5801             0x2190,   // 2190..2426; COMMON
5802             0x2427,   // 2427..243F; UNKNOWN
5803             0x2440,   // 2440..244A; COMMON
5804             0x244B,   // 244B..245F; UNKNOWN
5805             0x2460,   // 2460..27FF; COMMON
5806             0x2800,   // 2800..28FF; BRAILLE
5807             0x2900,   // 2900..2B73; COMMON
5808             0x2B74,   // 2B74..2B75; UNKNOWN
5809             0x2B76,   // 2B76..2B95; COMMON
5810             0x2B96,   // 2B96      ; UNKNOWN
5811             0x2B97,   // 2B97..2BFF; COMMON
5812             0x2C00,   // 2C00..2C2E; GLAGOLITIC
5813             0x2C2F,   // 2C2F      ; UNKNOWN
5814             0x2C30,   // 2C30..2C5E; GLAGOLITIC
5815             0x2C5F,   // 2C5F      ; UNKNOWN
5816             0x2C60,   // 2C60..2C7F; LATIN
5817             0x2C80,   // 2C80..2CF3; COPTIC
5818             0x2CF4,   // 2CF4..2CF8; UNKNOWN
5819             0x2CF9,   // 2CF9..2CFF; COPTIC
5820             0x2D00,   // 2D00..2D25; GEORGIAN
5821             0x2D26,   // 2D26      ; UNKNOWN
5822             0x2D27,   // 2D27      ; GEORGIAN
5823             0x2D28,   // 2D28..2D2C; UNKNOWN
5824             0x2D2D,   // 2D2D      ; GEORGIAN
5825             0x2D2E,   // 2D2E..2D2F; UNKNOWN
5826             0x2D30,   // 2D30..2D67; TIFINAGH
5827             0x2D68,   // 2D68..2D6E; UNKNOWN
5828             0x2D6F,   // 2D6F..2D70; TIFINAGH
5829             0x2D71,   // 2D71..2D7E; UNKNOWN
5830             0x2D7F,   // 2D7F      ; TIFINAGH
5831             0x2D80,   // 2D80..2D96; ETHIOPIC
5832             0x2D97,   // 2D97..2D9F; UNKNOWN
5833             0x2DA0,   // 2DA0..2DA6; ETHIOPIC
5834             0x2DA7,   // 2DA7      ; UNKNOWN
5835             0x2DA8,   // 2DA8..2DAE; ETHIOPIC
5836             0x2DAF,   // 2DAF      ; UNKNOWN
5837             0x2DB0,   // 2DB0..2DB6; ETHIOPIC
5838             0x2DB7,   // 2DB7      ; UNKNOWN
5839             0x2DB8,   // 2DB8..2DBE; ETHIOPIC
5840             0x2DBF,   // 2DBF      ; UNKNOWN
5841             0x2DC0,   // 2DC0..2DC6; ETHIOPIC
5842             0x2DC7,   // 2DC7      ; UNKNOWN
5843             0x2DC8,   // 2DC8..2DCE; ETHIOPIC
5844             0x2DCF,   // 2DCF      ; UNKNOWN
5845             0x2DD0,   // 2DD0..2DD6; ETHIOPIC
5846             0x2DD7,   // 2DD7      ; UNKNOWN
5847             0x2DD8,   // 2DD8..2DDE; ETHIOPIC
5848             0x2DDF,   // 2DDF      ; UNKNOWN
5849             0x2DE0,   // 2DE0..2DFF; CYRILLIC
5850             0x2E00,   // 2E00..2E52; COMMON
5851             0x2E53,   // 2E53..2E7F; UNKNOWN
5852             0x2E80,   // 2E80..2E99; HAN
5853             0x2E9A,   // 2E9A      ; UNKNOWN
5854             0x2E9B,   // 2E9B..2EF3; HAN
5855             0x2EF4,   // 2EF4..2EFF; UNKNOWN
5856             0x2F00,   // 2F00..2FD5; HAN
5857             0x2FD6,   // 2FD6..2FEF; UNKNOWN
5858             0x2FF0,   // 2FF0..2FFB; COMMON
5859             0x2FFC,   // 2FFC..2FFF; UNKNOWN
5860             0x3000,   // 3000..3004; COMMON
5861             0x3005,   // 3005      ; HAN
5862             0x3006,   // 3006      ; COMMON
5863             0x3007,   // 3007      ; HAN
5864             0x3008,   // 3008..3020; COMMON
5865             0x3021,   // 3021..3029; HAN
5866             0x302A,   // 302A..302D; INHERITED
5867             0x302E,   // 302E..302F; HANGUL
5868             0x3030,   // 3030..3037; COMMON
5869             0x3038,   // 3038..303B; HAN
5870             0x303C,   // 303C..303F; COMMON
5871             0x3040,   // 3040      ; UNKNOWN
5872             0x3041,   // 3041..3096; HIRAGANA
5873             0x3097,   // 3097..3098; UNKNOWN
5874             0x3099,   // 3099..309A; INHERITED
5875             0x309B,   // 309B..309C; COMMON
5876             0x309D,   // 309D..309F; HIRAGANA
5877             0x30A0,   // 30A0      ; COMMON
5878             0x30A1,   // 30A1..30FA; KATAKANA
5879             0x30FB,   // 30FB..30FC; COMMON
5880             0x30FD,   // 30FD..30FF; KATAKANA
5881             0x3100,   // 3100..3104; UNKNOWN
5882             0x3105,   // 3105..312F; BOPOMOFO
5883             0x3130,   // 3130      ; UNKNOWN
5884             0x3131,   // 3131..318E; HANGUL
5885             0x318F,   // 318F      ; UNKNOWN
5886             0x3190,   // 3190..319F; COMMON
5887             0x31A0,   // 31A0..31BF; BOPOMOFO
5888             0x31C0,   // 31C0..31E3; COMMON
5889             0x31E4,   // 31E4..31EF; UNKNOWN
5890             0x31F0,   // 31F0..31FF; KATAKANA
5891             0x3200,   // 3200..321E; HANGUL
5892             0x321F,   // 321F      ; UNKNOWN
5893             0x3220,   // 3220..325F; COMMON
5894             0x3260,   // 3260..327E; HANGUL
5895             0x327F,   // 327F..32CF; COMMON
5896             0x32D0,   // 32D0..32FE; KATAKANA
5897             0x32FF,   // 32FF      ; COMMON
5898             0x3300,   // 3300..3357; KATAKANA
5899             0x3358,   // 3358..33FF; COMMON
5900             0x3400,   // 3400..4DBF; HAN
5901             0x4DC0,   // 4DC0..4DFF; COMMON
5902             0x4E00,   // 4E00..9FFC; HAN
5903             0x9FFD,   // 9FFD..9FFF; UNKNOWN
5904             0xA000,   // A000..A48C; YI
5905             0xA48D,   // A48D..A48F; UNKNOWN
5906             0xA490,   // A490..A4C6; YI
5907             0xA4C7,   // A4C7..A4CF; UNKNOWN
5908             0xA4D0,   // A4D0..A4FF; LISU
5909             0xA500,   // A500..A62B; VAI
5910             0xA62C,   // A62C..A63F; UNKNOWN
5911             0xA640,   // A640..A69F; CYRILLIC
5912             0xA6A0,   // A6A0..A6F7; BAMUM
5913             0xA6F8,   // A6F8..A6FF; UNKNOWN
5914             0xA700,   // A700..A721; COMMON
5915             0xA722,   // A722..A787; LATIN
5916             0xA788,   // A788..A78A; COMMON
5917             0xA78B,   // A78B..A7BF; LATIN
5918             0xA7C0,   // A7C0..A7C1; UNKNOWN
5919             0xA7C2,   // A7C2..A7CA; LATIN
5920             0xA7CB,   // A7CB..A7F4; UNKNOWN
5921             0xA7F5,   // A7F5..A7FF; LATIN
5922             0xA800,   // A800..A82C; SYLOTI_NAGRI
5923             0xA82D,   // A82D..A82F; UNKNOWN
5924             0xA830,   // A830..A839; COMMON
5925             0xA83A,   // A83A..A83F; UNKNOWN
5926             0xA840,   // A840..A877; PHAGS_PA
5927             0xA878,   // A878..A87F; UNKNOWN
5928             0xA880,   // A880..A8C5; SAURASHTRA
5929             0xA8C6,   // A8C6..A8CD; UNKNOWN
5930             0xA8CE,   // A8CE..A8D9; SAURASHTRA
5931             0xA8DA,   // A8DA..A8DF; UNKNOWN
5932             0xA8E0,   // A8E0..A8FF; DEVANAGARI
5933             0xA900,   // A900..A92D; KAYAH_LI
5934             0xA92E,   // A92E      ; COMMON
5935             0xA92F,   // A92F      ; KAYAH_LI
5936             0xA930,   // A930..A953; REJANG
5937             0xA954,   // A954..A95E; UNKNOWN
5938             0xA95F,   // A95F      ; REJANG
5939             0xA960,   // A960..A97C; HANGUL
5940             0xA97D,   // A97D..A97F; UNKNOWN
5941             0xA980,   // A980..A9CD; JAVANESE
5942             0xA9CE,   // A9CE      ; UNKNOWN
5943             0xA9CF,   // A9CF      ; COMMON
5944             0xA9D0,   // A9D0..A9D9; JAVANESE
5945             0xA9DA,   // A9DA..A9DD; UNKNOWN
5946             0xA9DE,   // A9DE..A9DF; JAVANESE
5947             0xA9E0,   // A9E0..A9FE; MYANMAR
5948             0xA9FF,   // A9FF      ; UNKNOWN
5949             0xAA00,   // AA00..AA36; CHAM
5950             0xAA37,   // AA37..AA3F; UNKNOWN
5951             0xAA40,   // AA40..AA4D; CHAM
5952             0xAA4E,   // AA4E..AA4F; UNKNOWN
5953             0xAA50,   // AA50..AA59; CHAM
5954             0xAA5A,   // AA5A..AA5B; UNKNOWN
5955             0xAA5C,   // AA5C..AA5F; CHAM
5956             0xAA60,   // AA60..AA7F; MYANMAR
5957             0xAA80,   // AA80..AAC2; TAI_VIET
5958             0xAAC3,   // AAC3..AADA; UNKNOWN
5959             0xAADB,   // AADB..AADF; TAI_VIET
5960             0xAAE0,   // AAE0..AAF6; MEETEI_MAYEK
5961             0xAAF7,   // AAF7..AB00; UNKNOWN
5962             0xAB01,   // AB01..AB06; ETHIOPIC
5963             0xAB07,   // AB07..AB08; UNKNOWN
5964             0xAB09,   // AB09..AB0E; ETHIOPIC
5965             0xAB0F,   // AB0F..AB10; UNKNOWN
5966             0xAB11,   // AB11..AB16; ETHIOPIC
5967             0xAB17,   // AB17..AB1F; UNKNOWN
5968             0xAB20,   // AB20..AB26; ETHIOPIC
5969             0xAB27,   // AB27      ; UNKNOWN
5970             0xAB28,   // AB28..AB2E; ETHIOPIC
5971             0xAB2F,   // AB2F      ; UNKNOWN
5972             0xAB30,   // AB30..AB5A; LATIN
5973             0xAB5B,   // AB5B      ; COMMON
5974             0xAB5C,   // AB5C..AB64; LATIN
5975             0xAB65,   // AB65      ; GREEK
5976             0xAB66,   // AB66..AB69; LATIN
5977             0xAB6A,   // AB6A..AB6B; COMMON
5978             0xAB6C,   // AB6C..AB6F; UNKNOWN
5979             0xAB70,   // AB70..ABBF; CHEROKEE
5980             0xABC0,   // ABC0..ABED; MEETEI_MAYEK
5981             0xABEE,   // ABEE..ABEF; UNKNOWN
5982             0xABF0,   // ABF0..ABF9; MEETEI_MAYEK
5983             0xABFA,   // ABFA..ABFF; UNKNOWN
5984             0xAC00,   // AC00..D7A3; HANGUL
5985             0xD7A4,   // D7A4..D7AF; UNKNOWN
5986             0xD7B0,   // D7B0..D7C6; HANGUL
5987             0xD7C7,   // D7C7..D7CA; UNKNOWN
5988             0xD7CB,   // D7CB..D7FB; HANGUL
5989             0xD7FC,   // D7FC..F8FF; UNKNOWN
5990             0xF900,   // F900..FA6D; HAN
5991             0xFA6E,   // FA6E..FA6F; UNKNOWN
5992             0xFA70,   // FA70..FAD9; HAN
5993             0xFADA,   // FADA..FAFF; UNKNOWN
5994             0xFB00,   // FB00..FB06; LATIN
5995             0xFB07,   // FB07..FB12; UNKNOWN
5996             0xFB13,   // FB13..FB17; ARMENIAN
5997             0xFB18,   // FB18..FB1C; UNKNOWN
5998             0xFB1D,   // FB1D..FB36; HEBREW
5999             0xFB37,   // FB37      ; UNKNOWN
6000             0xFB38,   // FB38..FB3C; HEBREW
6001             0xFB3D,   // FB3D      ; UNKNOWN
6002             0xFB3E,   // FB3E      ; HEBREW
6003             0xFB3F,   // FB3F      ; UNKNOWN
6004             0xFB40,   // FB40..FB41; HEBREW
6005             0xFB42,   // FB42      ; UNKNOWN
6006             0xFB43,   // FB43..FB44; HEBREW
6007             0xFB45,   // FB45      ; UNKNOWN
6008             0xFB46,   // FB46..FB4F; HEBREW
6009             0xFB50,   // FB50..FBC1; ARABIC
6010             0xFBC2,   // FBC2..FBD2; UNKNOWN
6011             0xFBD3,   // FBD3..FD3D; ARABIC
6012             0xFD3E,   // FD3E..FD3F; COMMON
6013             0xFD40,   // FD40..FD4F; UNKNOWN
6014             0xFD50,   // FD50..FD8F; ARABIC
6015             0xFD90,   // FD90..FD91; UNKNOWN
6016             0xFD92,   // FD92..FDC7; ARABIC
6017             0xFDC8,   // FDC8..FDEF; UNKNOWN
6018             0xFDF0,   // FDF0..FDFD; ARABIC
6019             0xFDFE,   // FDFE..FDFF; UNKNOWN
6020             0xFE00,   // FE00..FE0F; INHERITED
6021             0xFE10,   // FE10..FE19; COMMON
6022             0xFE1A,   // FE1A..FE1F; UNKNOWN
6023             0xFE20,   // FE20..FE2D; INHERITED
6024             0xFE2E,   // FE2E..FE2F; CYRILLIC
6025             0xFE30,   // FE30..FE52; COMMON
6026             0xFE53,   // FE53      ; UNKNOWN
6027             0xFE54,   // FE54..FE66; COMMON
6028             0xFE67,   // FE67      ; UNKNOWN
6029             0xFE68,   // FE68..FE6B; COMMON
6030             0xFE6C,   // FE6C..FE6F; UNKNOWN
6031             0xFE70,   // FE70..FE74; ARABIC
6032             0xFE75,   // FE75      ; UNKNOWN
6033             0xFE76,   // FE76..FEFC; ARABIC
6034             0xFEFD,   // FEFD..FEFE; UNKNOWN
6035             0xFEFF,   // FEFF      ; COMMON
6036             0xFF00,   // FF00      ; UNKNOWN
6037             0xFF01,   // FF01..FF20; COMMON
6038             0xFF21,   // FF21..FF3A; LATIN
6039             0xFF3B,   // FF3B..FF40; COMMON
6040             0xFF41,   // FF41..FF5A; LATIN
6041             0xFF5B,   // FF5B..FF65; COMMON
6042             0xFF66,   // FF66..FF6F; KATAKANA
6043             0xFF70,   // FF70      ; COMMON
6044             0xFF71,   // FF71..FF9D; KATAKANA
6045             0xFF9E,   // FF9E..FF9F; COMMON
6046             0xFFA0,   // FFA0..FFBE; HANGUL
6047             0xFFBF,   // FFBF..FFC1; UNKNOWN
6048             0xFFC2,   // FFC2..FFC7; HANGUL
6049             0xFFC8,   // FFC8..FFC9; UNKNOWN
6050             0xFFCA,   // FFCA..FFCF; HANGUL
6051             0xFFD0,   // FFD0..FFD1; UNKNOWN
6052             0xFFD2,   // FFD2..FFD7; HANGUL
6053             0xFFD8,   // FFD8..FFD9; UNKNOWN
6054             0xFFDA,   // FFDA..FFDC; HANGUL
6055             0xFFDD,   // FFDD..FFDF; UNKNOWN
6056             0xFFE0,   // FFE0..FFE6; COMMON
6057             0xFFE7,   // FFE7      ; UNKNOWN
6058             0xFFE8,   // FFE8..FFEE; COMMON
6059             0xFFEF,   // FFEF..FFF8; UNKNOWN
6060             0xFFF9,   // FFF9..FFFD; COMMON
6061             0xFFFE,   // FFFE..FFFF; UNKNOWN
6062             0x10000,  // 10000..1000B; LINEAR_B
6063             0x1000C,  // 1000C       ; UNKNOWN
6064             0x1000D,  // 1000D..10026; LINEAR_B
6065             0x10027,  // 10027       ; UNKNOWN
6066             0x10028,  // 10028..1003A; LINEAR_B
6067             0x1003B,  // 1003B       ; UNKNOWN
6068             0x1003C,  // 1003C..1003D; LINEAR_B
6069             0x1003E,  // 1003E       ; UNKNOWN
6070             0x1003F,  // 1003F..1004D; LINEAR_B
6071             0x1004E,  // 1004E..1004F; UNKNOWN
6072             0x10050,  // 10050..1005D; LINEAR_B
6073             0x1005E,  // 1005E..1007F; UNKNOWN
6074             0x10080,  // 10080..100FA; LINEAR_B
6075             0x100FB,  // 100FB..100FF; UNKNOWN
6076             0x10100,  // 10100..10102; COMMON
6077             0x10103,  // 10103..10106; UNKNOWN
6078             0x10107,  // 10107..10133; COMMON
6079             0x10134,  // 10134..10136; UNKNOWN
6080             0x10137,  // 10137..1013F; COMMON
6081             0x10140,  // 10140..1018E; GREEK
6082             0x1018F,  // 1018F       ; UNKNOWN
6083             0x10190,  // 10190..1019C; COMMON
6084             0x1019D,  // 1019D..1019F; UNKNOWN
6085             0x101A0,  // 101A0       ; GREEK
6086             0x101A1,  // 101A1..101CF; UNKNOWN
6087             0x101D0,  // 101D0..101FC; COMMON
6088             0x101FD,  // 101FD       ; INHERITED
6089             0x101FE,  // 101FE..1027F; UNKNOWN
6090             0x10280,  // 10280..1029C; LYCIAN
6091             0x1029D,  // 1029D..1029F; UNKNOWN
6092             0x102A0,  // 102A0..102D0; CARIAN
6093             0x102D1,  // 102D1..102DF; UNKNOWN
6094             0x102E0,  // 102E0       ; INHERITED
6095             0x102E1,  // 102E1..102FB; COMMON
6096             0x102FC,  // 102FC..102FF; UNKNOWN
6097             0x10300,  // 10300..10323; OLD_ITALIC
6098             0x10324,  // 10324..1032C; UNKNOWN
6099             0x1032D,  // 1032D..1032F; OLD_ITALIC
6100             0x10330,  // 10330..1034A; GOTHIC
6101             0x1034B,  // 1034B..1034F; UNKNOWN
6102             0x10350,  // 10350..1037A; OLD_PERMIC
6103             0x1037B,  // 1037B..1037F; UNKNOWN
6104             0x10380,  // 10380..1039D; UGARITIC
6105             0x1039E,  // 1039E       ; UNKNOWN
6106             0x1039F,  // 1039F       ; UGARITIC
6107             0x103A0,  // 103A0..103C3; OLD_PERSIAN
6108             0x103C4,  // 103C4..103C7; UNKNOWN
6109             0x103C8,  // 103C8..103D5; OLD_PERSIAN
6110             0x103D6,  // 103D6..103FF; UNKNOWN
6111             0x10400,  // 10400..1044F; DESERET
6112             0x10450,  // 10450..1047F; SHAVIAN
6113             0x10480,  // 10480..1049D; OSMANYA
6114             0x1049E,  // 1049E..1049F; UNKNOWN
6115             0x104A0,  // 104A0..104A9; OSMANYA
6116             0x104AA,  // 104AA..104AF; UNKNOWN
6117             0x104B0,  // 104B0..104D3; OSAGE
6118             0x104D4,  // 104D4..104D7; UNKNOWN
6119             0x104D8,  // 104D8..104FB; OSAGE
6120             0x104FC,  // 104FC..104FF; UNKNOWN
6121             0x10500,  // 10500..10527; ELBASAN
6122             0x10528,  // 10528..1052F; UNKNOWN
6123             0x10530,  // 10530..10563; CAUCASIAN_ALBANIAN
6124             0x10564,  // 10564..1056E; UNKNOWN
6125             0x1056F,  // 1056F       ; CAUCASIAN_ALBANIAN
6126             0x10570,  // 10570..105FF; UNKNOWN
6127             0x10600,  // 10600..10736; LINEAR_A
6128             0x10737,  // 10737..1073F; UNKNOWN
6129             0x10740,  // 10740..10755; LINEAR_A
6130             0x10756,  // 10756..1075F; UNKNOWN
6131             0x10760,  // 10760..10767; LINEAR_A
6132             0x10768,  // 10768..107FF; UNKNOWN
6133             0x10800,  // 10800..10805; CYPRIOT
6134             0x10806,  // 10806..10807; UNKNOWN
6135             0x10808,  // 10808       ; CYPRIOT
6136             0x10809,  // 10809       ; UNKNOWN
6137             0x1080A,  // 1080A..10835; CYPRIOT
6138             0x10836,  // 10836       ; UNKNOWN
6139             0x10837,  // 10837..10838; CYPRIOT
6140             0x10839,  // 10839..1083B; UNKNOWN
6141             0x1083C,  // 1083C       ; CYPRIOT
6142             0x1083D,  // 1083D..1083E; UNKNOWN
6143             0x1083F,  // 1083F       ; CYPRIOT
6144             0x10840,  // 10840..10855; IMPERIAL_ARAMAIC
6145             0x10856,  // 10856       ; UNKNOWN
6146             0x10857,  // 10857..1085F; IMPERIAL_ARAMAIC
6147             0x10860,  // 10860..1087F; PALMYRENE
6148             0x10880,  // 10880..1089E; NABATAEAN
6149             0x1089F,  // 1089F..108A6; UNKNOWN
6150             0x108A7,  // 108A7..108AF; NABATAEAN
6151             0x108B0,  // 108B0..108DF; UNKNOWN
6152             0x108E0,  // 108E0..108F2; HATRAN
6153             0x108F3,  // 108F3       ; UNKNOWN
6154             0x108F4,  // 108F4..108F5; HATRAN
6155             0x108F6,  // 108F6..108FA; UNKNOWN
6156             0x108FB,  // 108FB..108FF; HATRAN
6157             0x10900,  // 10900..1091B; PHOENICIAN
6158             0x1091C,  // 1091C..1091E; UNKNOWN
6159             0x1091F,  // 1091F       ; PHOENICIAN
6160             0x10920,  // 10920..10939; LYDIAN
6161             0x1093A,  // 1093A..1093E; UNKNOWN
6162             0x1093F,  // 1093F       ; LYDIAN
6163             0x10940,  // 10940..1097F; UNKNOWN
6164             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
6165             0x109A0,  // 109A0..109B7; MEROITIC_CURSIVE
6166             0x109B8,  // 109B8..109BB; UNKNOWN
6167             0x109BC,  // 109BC..109CF; MEROITIC_CURSIVE
6168             0x109D0,  // 109D0..109D1; UNKNOWN
6169             0x109D2,  // 109D2..109FF; MEROITIC_CURSIVE
6170             0x10A00,  // 10A00..10A03; KHAROSHTHI
6171             0x10A04,  // 10A04       ; UNKNOWN
6172             0x10A05,  // 10A05..10A06; KHAROSHTHI
6173             0x10A07,  // 10A07..10A0B; UNKNOWN
6174             0x10A0C,  // 10A0C..10A13; KHAROSHTHI
6175             0x10A14,  // 10A14       ; UNKNOWN
6176             0x10A15,  // 10A15..10A17; KHAROSHTHI
6177             0x10A18,  // 10A18       ; UNKNOWN
6178             0x10A19,  // 10A19..10A35; KHAROSHTHI
6179             0x10A36,  // 10A36..10A37; UNKNOWN
6180             0x10A38,  // 10A38..10A3A; KHAROSHTHI
6181             0x10A3B,  // 10A3B..10A3E; UNKNOWN
6182             0x10A3F,  // 10A3F..10A48; KHAROSHTHI
6183             0x10A49,  // 10A49..10A4F; UNKNOWN
6184             0x10A50,  // 10A50..10A58; KHAROSHTHI
6185             0x10A59,  // 10A59..10A5F; UNKNOWN
6186             0x10A60,  // 10A60..10A7F; OLD_SOUTH_ARABIAN
6187             0x10A80,  // 10A80..10A9F; OLD_NORTH_ARABIAN
6188             0x10AA0,  // 10AA0..10ABF; UNKNOWN
6189             0x10AC0,  // 10AC0..10AE6; MANICHAEAN
6190             0x10AE7,  // 10AE7..10AEA; UNKNOWN
6191             0x10AEB,  // 10AEB..10AF6; MANICHAEAN
6192             0x10AF7,  // 10AF7..10AFF; UNKNOWN
6193             0x10B00,  // 10B00..10B35; AVESTAN
6194             0x10B36,  // 10B36..10B38; UNKNOWN
6195             0x10B39,  // 10B39..10B3F; AVESTAN
6196             0x10B40,  // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
6197             0x10B56,  // 10B56..10B57; UNKNOWN
6198             0x10B58,  // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
6199             0x10B60,  // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
6200             0x10B73,  // 10B73..10B77; UNKNOWN
6201             0x10B78,  // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
6202             0x10B80,  // 10B80..10B91; PSALTER_PAHLAVI
6203             0x10B92,  // 10B92..10B98; UNKNOWN
6204             0x10B99,  // 10B99..10B9C; PSALTER_PAHLAVI
6205             0x10B9D,  // 10B9D..10BA8; UNKNOWN
6206             0x10BA9,  // 10BA9..10BAF; PSALTER_PAHLAVI
6207             0x10BB0,  // 10BB0..10BFF; UNKNOWN
6208             0x10C00,  // 10C00..10C48; OLD_TURKIC
6209             0x10C49,  // 10C49..10C7F; UNKNOWN
6210             0x10C80,  // 10C80..10CB2; OLD_HUNGARIAN
6211             0x10CB3,  // 10CB3..10CBF; UNKNOWN
6212             0x10CC0,  // 10CC0..10CF2; OLD_HUNGARIAN
6213             0x10CF3,  // 10CF3..10CF9; UNKNOWN
6214             0x10CFA,  // 10CFA..10CFF; OLD_HUNGARIAN
6215             0x10D00,  // 10D00..10D27; HANIFI_ROHINGYA
6216             0x10D28,  // 10D28..10D2F; UNKNOWN
6217             0x10D30,  // 10D30..10D39; HANIFI_ROHINGYA
6218             0x10D3A,  // 10D3A..10E5F; UNKNOWN
6219             0x10E60,  // 10E60..10E7E; ARABIC
6220             0x10E7F,  // 10E7F       ; UNKNOWN
6221             0x10E80,  // 10E80..10EA9; YEZIDI
6222             0x10EAA,  // 10EAA       ; UNKNOWN
6223             0x10EAB,  // 10EAB..10EAD; YEZIDI
6224             0x10EAE,  // 10EAE..10EAF; UNKNOWN
6225             0x10EB0,  // 10EB0..10EB1; YEZIDI
6226             0x10EB2,  // 10EB2..10EFF; UNKNOWN
6227             0x10F00,  // 10F00..10F27; OLD_SOGDIAN
6228             0x10F28,  // 10F28..10F2F; UNKNOWN
6229             0x10F30,  // 10F30..10F59; SOGDIAN
6230             0x10F5A,  // 10F5A..10FAF; UNKNOWN
6231             0x10FB0,  // 10FB0..10FCB; CHORASMIAN
6232             0x10FCC,  // 10FCC..10FDF; UNKNOWN
6233             0x10FE0,  // 10FE0..10FF6; ELYMAIC
6234             0x10FF7,  // 10FF7..10FFF; UNKNOWN
6235             0x11000,  // 11000..1104D; BRAHMI
6236             0x1104E,  // 1104E..11051; UNKNOWN
6237             0x11052,  // 11052..1106F; BRAHMI
6238             0x11070,  // 11070..1107E; UNKNOWN
6239             0x1107F,  // 1107F       ; BRAHMI
6240             0x11080,  // 11080..110C1; KAITHI
6241             0x110C2,  // 110C2..110CC; UNKNOWN
6242             0x110CD,  // 110CD       ; KAITHI
6243             0x110CE,  // 110CE..110CF; UNKNOWN
6244             0x110D0,  // 110D0..110E8; SORA_SOMPENG
6245             0x110E9,  // 110E9..110EF; UNKNOWN
6246             0x110F0,  // 110F0..110F9; SORA_SOMPENG
6247             0x110FA,  // 110FA..110FF; UNKNOWN
6248             0x11100,  // 11100..11134; CHAKMA
6249             0x11135,  // 11135       ; UNKNOWN
6250             0x11136,  // 11136..11147; CHAKMA
6251             0x11148,  // 11148..1114F; UNKNOWN
6252             0x11150,  // 11150..11176; MAHAJANI
6253             0x11177,  // 11177..1117F; UNKNOWN
6254             0x11180,  // 11180..111DF; SHARADA
6255             0x111E0,  // 111E0       ; UNKNOWN
6256             0x111E1,  // 111E1..111F4; SINHALA
6257             0x111F5,  // 111F5..111FF; UNKNOWN
6258             0x11200,  // 11200..11211; KHOJKI
6259             0x11212,  // 11212       ; UNKNOWN
6260             0x11213,  // 11213..1123E; KHOJKI
6261             0x1123F,  // 1123F..1127F; UNKNOWN
6262             0x11280,  // 11280..11286; MULTANI
6263             0x11287,  // 11287       ; UNKNOWN
6264             0x11288,  // 11288       ; MULTANI
6265             0x11289,  // 11289       ; UNKNOWN
6266             0x1128A,  // 1128A..1128D; MULTANI
6267             0x1128E,  // 1128E       ; UNKNOWN
6268             0x1128F,  // 1128F..1129D; MULTANI
6269             0x1129E,  // 1129E       ; UNKNOWN
6270             0x1129F,  // 1129F..112A9; MULTANI
6271             0x112AA,  // 112AA..112AF; UNKNOWN
6272             0x112B0,  // 112B0..112EA; KHUDAWADI
6273             0x112EB,  // 112EB..112EF; UNKNOWN
6274             0x112F0,  // 112F0..112F9; KHUDAWADI
6275             0x112FA,  // 112FA..112FF; UNKNOWN
6276             0x11300,  // 11300..11303; GRANTHA
6277             0x11304,  // 11304       ; UNKNOWN
6278             0x11305,  // 11305..1130C; GRANTHA
6279             0x1130D,  // 1130D..1130E; UNKNOWN
6280             0x1130F,  // 1130F..11310; GRANTHA
6281             0x11311,  // 11311..11312; UNKNOWN
6282             0x11313,  // 11313..11328; GRANTHA
6283             0x11329,  // 11329       ; UNKNOWN
6284             0x1132A,  // 1132A..11330; GRANTHA
6285             0x11331,  // 11331       ; UNKNOWN
6286             0x11332,  // 11332..11333; GRANTHA
6287             0x11334,  // 11334       ; UNKNOWN
6288             0x11335,  // 11335..11339; GRANTHA
6289             0x1133A,  // 1133A       ; UNKNOWN
6290             0x1133B,  // 1133B       ; INHERITED
6291             0x1133C,  // 1133C..11344; GRANTHA
6292             0x11345,  // 11345..11346; UNKNOWN
6293             0x11347,  // 11347..11348; GRANTHA
6294             0x11349,  // 11349..1134A; UNKNOWN
6295             0x1134B,  // 1134B..1134D; GRANTHA
6296             0x1134E,  // 1134E..1134F; UNKNOWN
6297             0x11350,  // 11350       ; GRANTHA
6298             0x11351,  // 11351..11356; UNKNOWN
6299             0x11357,  // 11357       ; GRANTHA
6300             0x11358,  // 11358..1135C; UNKNOWN
6301             0x1135D,  // 1135D..11363; GRANTHA
6302             0x11364,  // 11364..11365; UNKNOWN
6303             0x11366,  // 11366..1136C; GRANTHA
6304             0x1136D,  // 1136D..1136F; UNKNOWN
6305             0x11370,  // 11370..11374; GRANTHA
6306             0x11375,  // 11375..113FF; UNKNOWN
6307             0x11400,  // 11400..1145B; NEWA
6308             0x1145C,  // 1145C       ; UNKNOWN
6309             0x1145D,  // 1145D..11461; NEWA
6310             0x11462,  // 11462..1147F; UNKNOWN
6311             0x11480,  // 11480..114C7; TIRHUTA
6312             0x114C8,  // 114C8..114CF; UNKNOWN
6313             0x114D0,  // 114D0..114D9; TIRHUTA
6314             0x114DA,  // 114DA..1157F; UNKNOWN
6315             0x11580,  // 11580..115B5; SIDDHAM
6316             0x115B6,  // 115B6..115B7; UNKNOWN
6317             0x115B8,  // 115B8..115DD; SIDDHAM
6318             0x115DE,  // 115DE..115FF; UNKNOWN
6319             0x11600,  // 11600..11644; MODI
6320             0x11645,  // 11645..1164F; UNKNOWN
6321             0x11650,  // 11650..11659; MODI
6322             0x1165A,  // 1165A..1165F; UNKNOWN
6323             0x11660,  // 11660..1166C; MONGOLIAN
6324             0x1166D,  // 1166D..1167F; UNKNOWN
6325             0x11680,  // 11680..116B8; TAKRI
6326             0x116B9,  // 116B9..116BF; UNKNOWN
6327             0x116C0,  // 116C0..116C9; TAKRI
6328             0x116CA,  // 116CA..116FF; UNKNOWN
6329             0x11700,  // 11700..1171A; AHOM
6330             0x1171B,  // 1171B..1171C; UNKNOWN
6331             0x1171D,  // 1171D..1172B; AHOM
6332             0x1172C,  // 1172C..1172F; UNKNOWN
6333             0x11730,  // 11730..1173F; AHOM
6334             0x11740,  // 11740..117FF; UNKNOWN
6335             0x11800,  // 11800..1183B; DOGRA
6336             0x1183C,  // 1183C..1189F; UNKNOWN
6337             0x118A0,  // 118A0..118F2; WARANG_CITI
6338             0x118F3,  // 118F3..118FE; UNKNOWN
6339             0x118FF,  // 118FF       ; WARANG_CITI
6340             0x11900,  // 11900..11906; DIVES_AKURU
6341             0x11907,  // 11907..11908; UNKNOWN
6342             0x11909,  // 11909       ; DIVES_AKURU
6343             0x1190A,  // 1190A..1190B; UNKNOWN
6344             0x1190C,  // 1190C..11913; DIVES_AKURU
6345             0x11914,  // 11914       ; UNKNOWN
6346             0x11915,  // 11915..11916; DIVES_AKURU
6347             0x11917,  // 11917       ; UNKNOWN
6348             0x11918,  // 11918..11935; DIVES_AKURU
6349             0x11936,  // 11936       ; UNKNOWN
6350             0x11937,  // 11937..11938; DIVES_AKURU
6351             0x11939,  // 11939..1193A; UNKNOWN
6352             0x1193B,  // 1193B..11946; DIVES_AKURU
6353             0x11947,  // 11947..1194F; UNKNOWN
6354             0x11950,  // 11950..11959; DIVES_AKURU
6355             0x1195A,  // 1195A..1199F; UNKNOWN
6356             0x119A0,  // 119A0..119A7; NANDINAGARI
6357             0x119A8,  // 119A8..119A9; UNKNOWN
6358             0x119AA,  // 119AA..119D7; NANDINAGARI
6359             0x119D8,  // 119D8..119D9; UNKNOWN
6360             0x119DA,  // 119DA..119E4; NANDINAGARI
6361             0x119E5,  // 119E5..119FF; UNKNOWN
6362             0x11A00,  // 11A00..11A47; ZANABAZAR_SQUARE
6363             0x11A48,  // 11A48..11A4F; UNKNOWN
6364             0x11A50,  // 11A50..11AA2; SOYOMBO
6365             0x11AA3,  // 11AA3..11ABF; UNKNOWN
6366             0x11AC0,  // 11AC0..11AF8; PAU_CIN_HAU
6367             0x11AF9,  // 11AF9..11BFF; UNKNOWN
6368             0x11C00,  // 11C00..11C08; BHAIKSUKI
6369             0x11C09,  // 11C09       ; UNKNOWN
6370             0x11C0A,  // 11C0A..11C36; BHAIKSUKI
6371             0x11C37,  // 11C37       ; UNKNOWN
6372             0x11C38,  // 11C38..11C45; BHAIKSUKI
6373             0x11C46,  // 11C46..11C4F; UNKNOWN
6374             0x11C50,  // 11C50..11C6C; BHAIKSUKI
6375             0x11C6D,  // 11C6D..11C6F; UNKNOWN
6376             0x11C70,  // 11C70..11C8F; MARCHEN
6377             0x11C90,  // 11C90..11C91; UNKNOWN
6378             0x11C92,  // 11C92..11CA7; MARCHEN
6379             0x11CA8,  // 11CA8       ; UNKNOWN
6380             0x11CA9,  // 11CA9..11CB6; MARCHEN
6381             0x11CB7,  // 11CB7..11CFF; UNKNOWN
6382             0x11D00,  // 11D00..11D06; MASARAM_GONDI
6383             0x11D07,  // 11D07       ; UNKNOWN
6384             0x11D08,  // 11D08..11D09; MASARAM_GONDI
6385             0x11D0A,  // 11D0A       ; UNKNOWN
6386             0x11D0B,  // 11D0B..11D36; MASARAM_GONDI
6387             0x11D37,  // 11D37..11D39; UNKNOWN
6388             0x11D3A,  // 11D3A       ; MASARAM_GONDI
6389             0x11D3B,  // 11D3B       ; UNKNOWN
6390             0x11D3C,  // 11D3C..11D3D; MASARAM_GONDI
6391             0x11D3E,  // 11D3E       ; UNKNOWN
6392             0x11D3F,  // 11D3F..11D47; MASARAM_GONDI
6393             0x11D48,  // 11D48..11D4F; UNKNOWN
6394             0x11D50,  // 11D50..11D59; MASARAM_GONDI
6395             0x11D5A,  // 11D5A..11D5F; UNKNOWN
6396             0x11D60,  // 11D60..11D65; GUNJALA_GONDI
6397             0x11D66,  // 11D66       ; UNKNOWN
6398             0x11D67,  // 11D67..11D68; GUNJALA_GONDI
6399             0x11D69,  // 11D69       ; UNKNOWN
6400             0x11D6A,  // 11D6A..11D8E; GUNJALA_GONDI
6401             0x11D8F,  // 11D8F       ; UNKNOWN
6402             0x11D90,  // 11D90..11D91; GUNJALA_GONDI
6403             0x11D92,  // 11D92       ; UNKNOWN
6404             0x11D93,  // 11D93..11D98; GUNJALA_GONDI
6405             0x11D99,  // 11D99..11D9F; UNKNOWN
6406             0x11DA0,  // 11DA0..11DA9; GUNJALA_GONDI
6407             0x11DAA,  // 11DAA..11EDF; UNKNOWN
6408             0x11EE0,  // 11EE0..11EF8; MAKASAR
6409             0x11EF9,  // 11EF9..11FAF; UNKNOWN
6410             0x11FB0,  // 11FB0       ; LISU
6411             0x11FB1,  // 11FB1..11FBF; UNKNOWN
6412             0x11FC0,  // 11FC0..11FF1; TAMIL
6413             0x11FF2,  // 11FF2..11FFE; UNKNOWN
6414             0x11FFF,  // 11FFF       ; TAMIL
6415             0x12000,  // 12000..12399; CUNEIFORM
6416             0x1239A,  // 1239A..123FF; UNKNOWN
6417             0x12400,  // 12400..1246E; CUNEIFORM
6418             0x1246F,  // 1246F       ; UNKNOWN
6419             0x12470,  // 12470..12474; CUNEIFORM
6420             0x12475,  // 12475..1247F; UNKNOWN
6421             0x12480,  // 12480..12543; CUNEIFORM
6422             0x12544,  // 12544..12FFF; UNKNOWN
6423             0x13000,  // 13000..1342E; EGYPTIAN_HIEROGLYPHS
6424             0x1342F,  // 1342F       ; UNKNOWN
6425             0x13430,  // 13430..13438; EGYPTIAN_HIEROGLYPHS
6426             0x13439,  // 13439..143FF; UNKNOWN
6427             0x14400,  // 14400..14646; ANATOLIAN_HIEROGLYPHS
6428             0x14647,  // 14647..167FF; UNKNOWN
6429             0x16800,  // 16800..16A38; BAMUM
6430             0x16A39,  // 16A39..16A3F; UNKNOWN
6431             0x16A40,  // 16A40..16A5E; MRO
6432             0x16A5F,  // 16A5F       ; UNKNOWN
6433             0x16A60,  // 16A60..16A69; MRO
6434             0x16A6A,  // 16A6A..16A6D; UNKNOWN
6435             0x16A6E,  // 16A6E..16A6F; MRO
6436             0x16A70,  // 16A70..16ACF; UNKNOWN
6437             0x16AD0,  // 16AD0..16AED; BASSA_VAH
6438             0x16AEE,  // 16AEE..16AEF; UNKNOWN
6439             0x16AF0,  // 16AF0..16AF5; BASSA_VAH
6440             0x16AF6,  // 16AF6..16AFF; UNKNOWN
6441             0x16B00,  // 16B00..16B45; PAHAWH_HMONG
6442             0x16B46,  // 16B46..16B4F; UNKNOWN
6443             0x16B50,  // 16B50..16B59; PAHAWH_HMONG
6444             0x16B5A,  // 16B5A       ; UNKNOWN
6445             0x16B5B,  // 16B5B..16B61; PAHAWH_HMONG
6446             0x16B62,  // 16B62       ; UNKNOWN
6447             0x16B63,  // 16B63..16B77; PAHAWH_HMONG
6448             0x16B78,  // 16B78..16B7C; UNKNOWN
6449             0x16B7D,  // 16B7D..16B8F; PAHAWH_HMONG
6450             0x16B90,  // 16B90..16E3F; UNKNOWN
6451             0x16E40,  // 16E40..16E9A; MEDEFAIDRIN
6452             0x16E9B,  // 16E9B..16EFF; UNKNOWN
6453             0x16F00,  // 16F00..16F4A; MIAO
6454             0x16F4B,  // 16F4B..16F4E; UNKNOWN
6455             0x16F4F,  // 16F4F..16F87; MIAO
6456             0x16F88,  // 16F88..16F8E; UNKNOWN
6457             0x16F8F,  // 16F8F..16F9F; MIAO
6458             0x16FA0,  // 16FA0..16FDF; UNKNOWN
6459             0x16FE0,  // 16FE0       ; TANGUT
6460             0x16FE1,  // 16FE1       ; NUSHU
6461             0x16FE2,  // 16FE2..16FE3; COMMON
6462             0x16FE4,  // 16FE4       ; KHITAN_SMALL_SCRIPT
6463             0x16FE5,  // 16FE5..16FEF; UNKNOWN
6464             0x16FF0,  // 16FF0..16FF1; HAN
6465             0x16FF2,  // 16FF2..16FFF; UNKNOWN
6466             0x17000,  // 17000..187F7; TANGUT
6467             0x187F8,  // 187F8..187FF; UNKNOWN
6468             0x18800,  // 18800..18AFF; TANGUT
6469             0x18B00,  // 18B00..18CD5; KHITAN_SMALL_SCRIPT
6470             0x18CD6,  // 18CD6..18CFF; UNKNOWN
6471             0x18D00,  // 18D00..18D08; TANGUT
6472             0x18D09,  // 18D09..1AFFF; UNKNOWN
6473             0x1B000,  // 1B000       ; KATAKANA
6474             0x1B001,  // 1B001..1B11E; HIRAGANA
6475             0x1B11F,  // 1B11F..1B14F; UNKNOWN
6476             0x1B150,  // 1B150..1B152; HIRAGANA
6477             0x1B153,  // 1B153..1B163; UNKNOWN
6478             0x1B164,  // 1B164..1B167; KATAKANA
6479             0x1B168,  // 1B168..1B16F; UNKNOWN
6480             0x1B170,  // 1B170..1B2FB; NUSHU
6481             0x1B2FC,  // 1B2FC..1BBFF; UNKNOWN
6482             0x1BC00,  // 1BC00..1BC6A; DUPLOYAN
6483             0x1BC6B,  // 1BC6B..1BC6F; UNKNOWN
6484             0x1BC70,  // 1BC70..1BC7C; DUPLOYAN
6485             0x1BC7D,  // 1BC7D..1BC7F; UNKNOWN
6486             0x1BC80,  // 1BC80..1BC88; DUPLOYAN
6487             0x1BC89,  // 1BC89..1BC8F; UNKNOWN
6488             0x1BC90,  // 1BC90..1BC99; DUPLOYAN
6489             0x1BC9A,  // 1BC9A..1BC9B; UNKNOWN
6490             0x1BC9C,  // 1BC9C..1BC9F; DUPLOYAN
6491             0x1BCA0,  // 1BCA0..1BCA3; COMMON
6492             0x1BCA4,  // 1BCA4..1CFFF; UNKNOWN
6493             0x1D000,  // 1D000..1D0F5; COMMON
6494             0x1D0F6,  // 1D0F6..1D0FF; UNKNOWN
6495             0x1D100,  // 1D100..1D126; COMMON
6496             0x1D127,  // 1D127..1D128; UNKNOWN
6497             0x1D129,  // 1D129..1D166; COMMON
6498             0x1D167,  // 1D167..1D169; INHERITED
6499             0x1D16A,  // 1D16A..1D17A; COMMON
6500             0x1D17B,  // 1D17B..1D182; INHERITED
6501             0x1D183,  // 1D183..1D184; COMMON
6502             0x1D185,  // 1D185..1D18B; INHERITED
6503             0x1D18C,  // 1D18C..1D1A9; COMMON
6504             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
6505             0x1D1AE,  // 1D1AE..1D1E8; COMMON
6506             0x1D1E9,  // 1D1E9..1D1FF; UNKNOWN
6507             0x1D200,  // 1D200..1D245; GREEK
6508             0x1D246,  // 1D246..1D2DF; UNKNOWN
6509             0x1D2E0,  // 1D2E0..1D2F3; COMMON
6510             0x1D2F4,  // 1D2F4..1D2FF; UNKNOWN
6511             0x1D300,  // 1D300..1D356; COMMON
6512             0x1D357,  // 1D357..1D35F; UNKNOWN
6513             0x1D360,  // 1D360..1D378; COMMON
6514             0x1D379,  // 1D379..1D3FF; UNKNOWN
6515             0x1D400,  // 1D400..1D454; COMMON
6516             0x1D455,  // 1D455       ; UNKNOWN
6517             0x1D456,  // 1D456..1D49C; COMMON
6518             0x1D49D,  // 1D49D       ; UNKNOWN
6519             0x1D49E,  // 1D49E..1D49F; COMMON
6520             0x1D4A0,  // 1D4A0..1D4A1; UNKNOWN
6521             0x1D4A2,  // 1D4A2       ; COMMON
6522             0x1D4A3,  // 1D4A3..1D4A4; UNKNOWN
6523             0x1D4A5,  // 1D4A5..1D4A6; COMMON
6524             0x1D4A7,  // 1D4A7..1D4A8; UNKNOWN
6525             0x1D4A9,  // 1D4A9..1D4AC; COMMON
6526             0x1D4AD,  // 1D4AD       ; UNKNOWN
6527             0x1D4AE,  // 1D4AE..1D4B9; COMMON
6528             0x1D4BA,  // 1D4BA       ; UNKNOWN
6529             0x1D4BB,  // 1D4BB       ; COMMON
6530             0x1D4BC,  // 1D4BC       ; UNKNOWN
6531             0x1D4BD,  // 1D4BD..1D4C3; COMMON
6532             0x1D4C4,  // 1D4C4       ; UNKNOWN
6533             0x1D4C5,  // 1D4C5..1D505; COMMON
6534             0x1D506,  // 1D506       ; UNKNOWN
6535             0x1D507,  // 1D507..1D50A; COMMON
6536             0x1D50B,  // 1D50B..1D50C; UNKNOWN
6537             0x1D50D,  // 1D50D..1D514; COMMON
6538             0x1D515,  // 1D515       ; UNKNOWN
6539             0x1D516,  // 1D516..1D51C; COMMON
6540             0x1D51D,  // 1D51D       ; UNKNOWN
6541             0x1D51E,  // 1D51E..1D539; COMMON
6542             0x1D53A,  // 1D53A       ; UNKNOWN
6543             0x1D53B,  // 1D53B..1D53E; COMMON
6544             0x1D53F,  // 1D53F       ; UNKNOWN
6545             0x1D540,  // 1D540..1D544; COMMON
6546             0x1D545,  // 1D545       ; UNKNOWN
6547             0x1D546,  // 1D546       ; COMMON
6548             0x1D547,  // 1D547..1D549; UNKNOWN
6549             0x1D54A,  // 1D54A..1D550; COMMON
6550             0x1D551,  // 1D551       ; UNKNOWN
6551             0x1D552,  // 1D552..1D6A5; COMMON
6552             0x1D6A6,  // 1D6A6..1D6A7; UNKNOWN
6553             0x1D6A8,  // 1D6A8..1D7CB; COMMON
6554             0x1D7CC,  // 1D7CC..1D7CD; UNKNOWN
6555             0x1D7CE,  // 1D7CE..1D7FF; COMMON
6556             0x1D800,  // 1D800..1DA8B; SIGNWRITING
6557             0x1DA8C,  // 1DA8C..1DA9A; UNKNOWN
6558             0x1DA9B,  // 1DA9B..1DA9F; SIGNWRITING
6559             0x1DAA0,  // 1DAA0       ; UNKNOWN
6560             0x1DAA1,  // 1DAA1..1DAAF; SIGNWRITING
6561             0x1DAB0,  // 1DAB0..1DFFF; UNKNOWN
6562             0x1E000,  // 1E000..1E006; GLAGOLITIC
6563             0x1E007,  // 1E007       ; UNKNOWN
6564             0x1E008,  // 1E008..1E018; GLAGOLITIC
6565             0x1E019,  // 1E019..1E01A; UNKNOWN
6566             0x1E01B,  // 1E01B..1E021; GLAGOLITIC
6567             0x1E022,  // 1E022       ; UNKNOWN
6568             0x1E023,  // 1E023..1E024; GLAGOLITIC
6569             0x1E025,  // 1E025       ; UNKNOWN
6570             0x1E026,  // 1E026..1E02A; GLAGOLITIC
6571             0x1E02B,  // 1E02B..1E0FF; UNKNOWN
6572             0x1E100,  // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG
6573             0x1E12D,  // 1E12D..1E12F; UNKNOWN
6574             0x1E130,  // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG
6575             0x1E13E,  // 1E13E..1E13F; UNKNOWN
6576             0x1E140,  // 1E140..1E149; NYIAKENG_PUACHUE_HMONG
6577             0x1E14A,  // 1E14A..1E14D; UNKNOWN
6578             0x1E14E,  // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG
6579             0x1E150,  // 1E150..1E2BF; UNKNOWN
6580             0x1E2C0,  // 1E2C0..1E2F9; WANCHO
6581             0x1E2FA,  // 1E2FA..1E2FE; UNKNOWN
6582             0x1E2FF,  // 1E2FF       ; WANCHO
6583             0x1E300,  // 1E300..1E7FF; UNKNOWN
6584             0x1E800,  // 1E800..1E8C4; MENDE_KIKAKUI
6585             0x1E8C5,  // 1E8C5..1E8C6; UNKNOWN
6586             0x1E8C7,  // 1E8C7..1E8D6; MENDE_KIKAKUI
6587             0x1E8D7,  // 1E8D7..1E8FF; UNKNOWN
6588             0x1E900,  // 1E900..1E94B; ADLAM
6589             0x1E94C,  // 1E94C..1E94F; UNKNOWN
6590             0x1E950,  // 1E950..1E959; ADLAM
6591             0x1E95A,  // 1E95A..1E95D; UNKNOWN
6592             0x1E95E,  // 1E95E..1E95F; ADLAM
6593             0x1E960,  // 1E960..1EC70; UNKNOWN
6594             0x1EC71,  // 1EC71..1ECB4; COMMON
6595             0x1ECB5,  // 1ECB5..1ED00; UNKNOWN
6596             0x1ED01,  // 1ED01..1ED3D; COMMON
6597             0x1ED3E,  // 1ED3E..1EDFF; UNKNOWN
6598             0x1EE00,  // 1EE00..1EE03; ARABIC
6599             0x1EE04,  // 1EE04       ; UNKNOWN
6600             0x1EE05,  // 1EE05..1EE1F; ARABIC
6601             0x1EE20,  // 1EE20       ; UNKNOWN
6602             0x1EE21,  // 1EE21..1EE22; ARABIC
6603             0x1EE23,  // 1EE23       ; UNKNOWN
6604             0x1EE24,  // 1EE24       ; ARABIC
6605             0x1EE25,  // 1EE25..1EE26; UNKNOWN
6606             0x1EE27,  // 1EE27       ; ARABIC
6607             0x1EE28,  // 1EE28       ; UNKNOWN
6608             0x1EE29,  // 1EE29..1EE32; ARABIC
6609             0x1EE33,  // 1EE33       ; UNKNOWN
6610             0x1EE34,  // 1EE34..1EE37; ARABIC
6611             0x1EE38,  // 1EE38       ; UNKNOWN
6612             0x1EE39,  // 1EE39       ; ARABIC
6613             0x1EE3A,  // 1EE3A       ; UNKNOWN
6614             0x1EE3B,  // 1EE3B       ; ARABIC
6615             0x1EE3C,  // 1EE3C..1EE41; UNKNOWN
6616             0x1EE42,  // 1EE42       ; ARABIC
6617             0x1EE43,  // 1EE43..1EE46; UNKNOWN
6618             0x1EE47,  // 1EE47       ; ARABIC
6619             0x1EE48,  // 1EE48       ; UNKNOWN
6620             0x1EE49,  // 1EE49       ; ARABIC
6621             0x1EE4A,  // 1EE4A       ; UNKNOWN
6622             0x1EE4B,  // 1EE4B       ; ARABIC
6623             0x1EE4C,  // 1EE4C       ; UNKNOWN
6624             0x1EE4D,  // 1EE4D..1EE4F; ARABIC
6625             0x1EE50,  // 1EE50       ; UNKNOWN
6626             0x1EE51,  // 1EE51..1EE52; ARABIC
6627             0x1EE53,  // 1EE53       ; UNKNOWN
6628             0x1EE54,  // 1EE54       ; ARABIC
6629             0x1EE55,  // 1EE55..1EE56; UNKNOWN
6630             0x1EE57,  // 1EE57       ; ARABIC
6631             0x1EE58,  // 1EE58       ; UNKNOWN
6632             0x1EE59,  // 1EE59       ; ARABIC
6633             0x1EE5A,  // 1EE5A       ; UNKNOWN
6634             0x1EE5B,  // 1EE5B       ; ARABIC
6635             0x1EE5C,  // 1EE5C       ; UNKNOWN
6636             0x1EE5D,  // 1EE5D       ; ARABIC
6637             0x1EE5E,  // 1EE5E       ; UNKNOWN
6638             0x1EE5F,  // 1EE5F       ; ARABIC
6639             0x1EE60,  // 1EE60       ; UNKNOWN
6640             0x1EE61,  // 1EE61..1EE62; ARABIC
6641             0x1EE63,  // 1EE63       ; UNKNOWN
6642             0x1EE64,  // 1EE64       ; ARABIC
6643             0x1EE65,  // 1EE65..1EE66; UNKNOWN
6644             0x1EE67,  // 1EE67..1EE6A; ARABIC
6645             0x1EE6B,  // 1EE6B       ; UNKNOWN
6646             0x1EE6C,  // 1EE6C..1EE72; ARABIC
6647             0x1EE73,  // 1EE73       ; UNKNOWN
6648             0x1EE74,  // 1EE74..1EE77; ARABIC
6649             0x1EE78,  // 1EE78       ; UNKNOWN
6650             0x1EE79,  // 1EE79..1EE7C; ARABIC
6651             0x1EE7D,  // 1EE7D       ; UNKNOWN
6652             0x1EE7E,  // 1EE7E       ; ARABIC
6653             0x1EE7F,  // 1EE7F       ; UNKNOWN
6654             0x1EE80,  // 1EE80..1EE89; ARABIC
6655             0x1EE8A,  // 1EE8A       ; UNKNOWN
6656             0x1EE8B,  // 1EE8B..1EE9B; ARABIC
6657             0x1EE9C,  // 1EE9C..1EEA0; UNKNOWN
6658             0x1EEA1,  // 1EEA1..1EEA3; ARABIC
6659             0x1EEA4,  // 1EEA4       ; UNKNOWN
6660             0x1EEA5,  // 1EEA5..1EEA9; ARABIC
6661             0x1EEAA,  // 1EEAA       ; UNKNOWN
6662             0x1EEAB,  // 1EEAB..1EEBB; ARABIC
6663             0x1EEBC,  // 1EEBC..1EEEF; UNKNOWN
6664             0x1EEF0,  // 1EEF0..1EEF1; ARABIC
6665             0x1EEF2,  // 1EEF2..1EFFF; UNKNOWN
6666             0x1F000,  // 1F000..1F02B; COMMON
6667             0x1F02C,  // 1F02C..1F02F; UNKNOWN
6668             0x1F030,  // 1F030..1F093; COMMON
6669             0x1F094,  // 1F094..1F09F; UNKNOWN
6670             0x1F0A0,  // 1F0A0..1F0AE; COMMON
6671             0x1F0AF,  // 1F0AF..1F0B0; UNKNOWN
6672             0x1F0B1,  // 1F0B1..1F0BF; COMMON
6673             0x1F0C0,  // 1F0C0       ; UNKNOWN
6674             0x1F0C1,  // 1F0C1..1F0CF; COMMON
6675             0x1F0D0,  // 1F0D0       ; UNKNOWN
6676             0x1F0D1,  // 1F0D1..1F0F5; COMMON
6677             0x1F0F6,  // 1F0F6..1F0FF; UNKNOWN
6678             0x1F100,  // 1F100..1F1AD; COMMON
6679             0x1F1AE,  // 1F1AE..1F1E5; UNKNOWN
6680             0x1F1E6,  // 1F1E6..1F1FF; COMMON
6681             0x1F200,  // 1F200       ; HIRAGANA
6682             0x1F201,  // 1F201..1F202; COMMON
6683             0x1F203,  // 1F203..1F20F; UNKNOWN
6684             0x1F210,  // 1F210..1F23B; COMMON
6685             0x1F23C,  // 1F23C..1F23F; UNKNOWN
6686             0x1F240,  // 1F240..1F248; COMMON
6687             0x1F249,  // 1F249..1F24F; UNKNOWN
6688             0x1F250,  // 1F250..1F251; COMMON
6689             0x1F252,  // 1F252..1F25F; UNKNOWN
6690             0x1F260,  // 1F260..1F265; COMMON
6691             0x1F266,  // 1F266..1F2FF; UNKNOWN
6692             0x1F300,  // 1F300..1F6D7; COMMON
6693             0x1F6D8,  // 1F6D8..1F6DF; UNKNOWN
6694             0x1F6E0,  // 1F6E0..1F6EC; COMMON
6695             0x1F6ED,  // 1F6ED..1F6EF; UNKNOWN
6696             0x1F6F0,  // 1F6F0..1F6FC; COMMON
6697             0x1F6FD,  // 1F6FD..1F6FF; UNKNOWN
6698             0x1F700,  // 1F700..1F773; COMMON
6699             0x1F774,  // 1F774..1F77F; UNKNOWN
6700             0x1F780,  // 1F780..1F7D8; COMMON
6701             0x1F7D9,  // 1F7D9..1F7DF; UNKNOWN
6702             0x1F7E0,  // 1F7E0..1F7EB; COMMON
6703             0x1F7EC,  // 1F7EC..1F7FF; UNKNOWN
6704             0x1F800,  // 1F800..1F80B; COMMON
6705             0x1F80C,  // 1F80C..1F80F; UNKNOWN
6706             0x1F810,  // 1F810..1F847; COMMON
6707             0x1F848,  // 1F848..1F84F; UNKNOWN
6708             0x1F850,  // 1F850..1F859; COMMON
6709             0x1F85A,  // 1F85A..1F85F; UNKNOWN
6710             0x1F860,  // 1F860..1F887; COMMON
6711             0x1F888,  // 1F888..1F88F; UNKNOWN
6712             0x1F890,  // 1F890..1F8AD; COMMON
6713             0x1F8AE,  // 1F8AE..1F8AF; UNKNOWN
6714             0x1F8B0,  // 1F8B0..1F8B1; COMMON
6715             0x1F8B2,  // 1F8B2..1F8FF; UNKNOWN
6716             0x1F900,  // 1F900..1F978; COMMON
6717             0x1F979,  // 1F979       ; UNKNOWN
6718             0x1F97A,  // 1F97A..1F9CB; COMMON
6719             0x1F9CC,  // 1F9CC       ; UNKNOWN
6720             0x1F9CD,  // 1F9CD..1FA53; COMMON
6721             0x1FA54,  // 1FA54..1FA5F; UNKNOWN
6722             0x1FA60,  // 1FA60..1FA6D; COMMON
6723             0x1FA6E,  // 1FA6E..1FA6F; UNKNOWN
6724             0x1FA70,  // 1FA70..1FA74; COMMON
6725             0x1FA75,  // 1FA75..1FA77; UNKNOWN
6726             0x1FA78,  // 1FA78..1FA7A; COMMON
6727             0x1FA7B,  // 1FA7B..1FA7F; UNKNOWN
6728             0x1FA80,  // 1FA80..1FA86; COMMON
6729             0x1FA87,  // 1FA87..1FA8F; UNKNOWN
6730             0x1FA90,  // 1FA90..1FAA8; COMMON
6731             0x1FAA9,  // 1FAA9..1FAAF; UNKNOWN
6732             0x1FAB0,  // 1FAB0..1FAB6; COMMON
6733             0x1FAB7,  // 1FAB7..1FABF; UNKNOWN
6734             0x1FAC0,  // 1FAC0..1FAC2; COMMON
6735             0x1FAC3,  // 1FAC3..1FACF; UNKNOWN
6736             0x1FAD0,  // 1FAD0..1FAD6; COMMON
6737             0x1FAD7,  // 1FAD7..1FAFF; UNKNOWN
6738             0x1FB00,  // 1FB00..1FB92; COMMON
6739             0x1FB93,  // 1FB93       ; UNKNOWN
6740             0x1FB94,  // 1FB94..1FBCA; COMMON
6741             0x1FBCB,  // 1FBCB..1FBEF; UNKNOWN
6742             0x1FBF0,  // 1FBF0..1FBF9; COMMON
6743             0x1FBFA,  // 1FBFA..1FFFF; UNKNOWN
6744             0x20000,  // 20000..2A6DD; HAN
6745             0x2A6DE,  // 2A6DE..2A6FF; UNKNOWN
6746             0x2A700,  // 2A700..2B734; HAN
6747             0x2B735,  // 2B735..2B73F; UNKNOWN
6748             0x2B740,  // 2B740..2B81D; HAN
6749             0x2B81E,  // 2B81E..2B81F; UNKNOWN
6750             0x2B820,  // 2B820..2CEA1; HAN
6751             0x2CEA2,  // 2CEA2..2CEAF; UNKNOWN
6752             0x2CEB0,  // 2CEB0..2EBE0; HAN
6753             0x2EBE1,  // 2EBE1..2F7FF; UNKNOWN
6754             0x2F800,  // 2F800..2FA1D; HAN
6755             0x2FA1E,  // 2FA1E..2FFFF; UNKNOWN
6756             0x30000,  // 30000..3134A; HAN
6757             0x3134B,  // 3134B..E0000; UNKNOWN
6758             0xE0001,  // E0001       ; COMMON
6759             0xE0002,  // E0002..E001F; UNKNOWN
6760             0xE0020,  // E0020..E007F; COMMON
6761             0xE0080,  // E0080..E00FF; UNKNOWN
6762             0xE0100,  // E0100..E01EF; INHERITED
6763             0xE01F0,  // E01F0..10FFFF; UNKNOWN
6764         };
6765 
6766         private static final UnicodeScript[] scripts = {
6767             COMMON,                   // 0000..0040
6768             LATIN,                    // 0041..005A
6769             COMMON,                   // 005B..0060
6770             LATIN,                    // 0061..007A
6771             COMMON,                   // 007B..00A9
6772             LATIN,                    // 00AA
6773             COMMON,                   // 00AB..00B9
6774             LATIN,                    // 00BA
6775             COMMON,                   // 00BB..00BF
6776             LATIN,                    // 00C0..00D6
6777             COMMON,                   // 00D7
6778             LATIN,                    // 00D8..00F6
6779             COMMON,                   // 00F7
6780             LATIN,                    // 00F8..02B8
6781             COMMON,                   // 02B9..02DF
6782             LATIN,                    // 02E0..02E4
6783             COMMON,                   // 02E5..02E9
6784             BOPOMOFO,                 // 02EA..02EB
6785             COMMON,                   // 02EC..02FF
6786             INHERITED,                // 0300..036F
6787             GREEK,                    // 0370..0373
6788             COMMON,                   // 0374
6789             GREEK,                    // 0375..0377
6790             UNKNOWN,                  // 0378..0379
6791             GREEK,                    // 037A..037D
6792             COMMON,                   // 037E
6793             GREEK,                    // 037F
6794             UNKNOWN,                  // 0380..0383
6795             GREEK,                    // 0384
6796             COMMON,                   // 0385
6797             GREEK,                    // 0386
6798             COMMON,                   // 0387
6799             GREEK,                    // 0388..038A
6800             UNKNOWN,                  // 038B
6801             GREEK,                    // 038C
6802             UNKNOWN,                  // 038D
6803             GREEK,                    // 038E..03A1
6804             UNKNOWN,                  // 03A2
6805             GREEK,                    // 03A3..03E1
6806             COPTIC,                   // 03E2..03EF
6807             GREEK,                    // 03F0..03FF
6808             CYRILLIC,                 // 0400..0484
6809             INHERITED,                // 0485..0486
6810             CYRILLIC,                 // 0487..052F
6811             UNKNOWN,                  // 0530
6812             ARMENIAN,                 // 0531..0556
6813             UNKNOWN,                  // 0557..0558
6814             ARMENIAN,                 // 0559..058A
6815             UNKNOWN,                  // 058B..058C
6816             ARMENIAN,                 // 058D..058F
6817             UNKNOWN,                  // 0590
6818             HEBREW,                   // 0591..05C7
6819             UNKNOWN,                  // 05C8..05CF
6820             HEBREW,                   // 05D0..05EA
6821             UNKNOWN,                  // 05EB..05EE
6822             HEBREW,                   // 05EF..05F4
6823             UNKNOWN,                  // 05F5..05FF
6824             ARABIC,                   // 0600..0604
6825             COMMON,                   // 0605
6826             ARABIC,                   // 0606..060B
6827             COMMON,                   // 060C
6828             ARABIC,                   // 060D..061A
6829             COMMON,                   // 061B
6830             ARABIC,                   // 061C
6831             UNKNOWN,                  // 061D
6832             ARABIC,                   // 061E
6833             COMMON,                   // 061F
6834             ARABIC,                   // 0620..063F
6835             COMMON,                   // 0640
6836             ARABIC,                   // 0641..064A
6837             INHERITED,                // 064B..0655
6838             ARABIC,                   // 0656..066F
6839             INHERITED,                // 0670
6840             ARABIC,                   // 0671..06DC
6841             COMMON,                   // 06DD
6842             ARABIC,                   // 06DE..06FF
6843             SYRIAC,                   // 0700..070D
6844             UNKNOWN,                  // 070E
6845             SYRIAC,                   // 070F..074A
6846             UNKNOWN,                  // 074B..074C
6847             SYRIAC,                   // 074D..074F
6848             ARABIC,                   // 0750..077F
6849             THAANA,                   // 0780..07B1
6850             UNKNOWN,                  // 07B2..07BF
6851             NKO,                      // 07C0..07FA
6852             UNKNOWN,                  // 07FB..07FC
6853             NKO,                      // 07FD..07FF
6854             SAMARITAN,                // 0800..082D
6855             UNKNOWN,                  // 082E..082F
6856             SAMARITAN,                // 0830..083E
6857             UNKNOWN,                  // 083F
6858             MANDAIC,                  // 0840..085B
6859             UNKNOWN,                  // 085C..085D
6860             MANDAIC,                  // 085E
6861             UNKNOWN,                  // 085F
6862             SYRIAC,                   // 0860..086A
6863             UNKNOWN,                  // 086B..089F
6864             ARABIC,                   // 08A0..08B4
6865             UNKNOWN,                  // 08B5
6866             ARABIC,                   // 08B6..08C7
6867             UNKNOWN,                  // 08C8..08D2
6868             ARABIC,                   // 08D3..08E1
6869             COMMON,                   // 08E2
6870             ARABIC,                   // 08E3..08FF
6871             DEVANAGARI,               // 0900..0950
6872             INHERITED,                // 0951..0954
6873             DEVANAGARI,               // 0955..0963
6874             COMMON,                   // 0964..0965
6875             DEVANAGARI,               // 0966..097F
6876             BENGALI,                  // 0980..0983
6877             UNKNOWN,                  // 0984
6878             BENGALI,                  // 0985..098C
6879             UNKNOWN,                  // 098D..098E
6880             BENGALI,                  // 098F..0990
6881             UNKNOWN,                  // 0991..0992
6882             BENGALI,                  // 0993..09A8
6883             UNKNOWN,                  // 09A9
6884             BENGALI,                  // 09AA..09B0
6885             UNKNOWN,                  // 09B1
6886             BENGALI,                  // 09B2
6887             UNKNOWN,                  // 09B3..09B5
6888             BENGALI,                  // 09B6..09B9
6889             UNKNOWN,                  // 09BA..09BB
6890             BENGALI,                  // 09BC..09C4
6891             UNKNOWN,                  // 09C5..09C6
6892             BENGALI,                  // 09C7..09C8
6893             UNKNOWN,                  // 09C9..09CA
6894             BENGALI,                  // 09CB..09CE
6895             UNKNOWN,                  // 09CF..09D6
6896             BENGALI,                  // 09D7
6897             UNKNOWN,                  // 09D8..09DB
6898             BENGALI,                  // 09DC..09DD
6899             UNKNOWN,                  // 09DE
6900             BENGALI,                  // 09DF..09E3
6901             UNKNOWN,                  // 09E4..09E5
6902             BENGALI,                  // 09E6..09FE
6903             UNKNOWN,                  // 09FF..0A00
6904             GURMUKHI,                 // 0A01..0A03
6905             UNKNOWN,                  // 0A04
6906             GURMUKHI,                 // 0A05..0A0A
6907             UNKNOWN,                  // 0A0B..0A0E
6908             GURMUKHI,                 // 0A0F..0A10
6909             UNKNOWN,                  // 0A11..0A12
6910             GURMUKHI,                 // 0A13..0A28
6911             UNKNOWN,                  // 0A29
6912             GURMUKHI,                 // 0A2A..0A30
6913             UNKNOWN,                  // 0A31
6914             GURMUKHI,                 // 0A32..0A33
6915             UNKNOWN,                  // 0A34
6916             GURMUKHI,                 // 0A35..0A36
6917             UNKNOWN,                  // 0A37
6918             GURMUKHI,                 // 0A38..0A39
6919             UNKNOWN,                  // 0A3A..0A3B
6920             GURMUKHI,                 // 0A3C
6921             UNKNOWN,                  // 0A3D
6922             GURMUKHI,                 // 0A3E..0A42
6923             UNKNOWN,                  // 0A43..0A46
6924             GURMUKHI,                 // 0A47..0A48
6925             UNKNOWN,                  // 0A49..0A4A
6926             GURMUKHI,                 // 0A4B..0A4D
6927             UNKNOWN,                  // 0A4E..0A50
6928             GURMUKHI,                 // 0A51
6929             UNKNOWN,                  // 0A52..0A58
6930             GURMUKHI,                 // 0A59..0A5C
6931             UNKNOWN,                  // 0A5D
6932             GURMUKHI,                 // 0A5E
6933             UNKNOWN,                  // 0A5F..0A65
6934             GURMUKHI,                 // 0A66..0A76
6935             UNKNOWN,                  // 0A77..0A80
6936             GUJARATI,                 // 0A81..0A83
6937             UNKNOWN,                  // 0A84
6938             GUJARATI,                 // 0A85..0A8D
6939             UNKNOWN,                  // 0A8E
6940             GUJARATI,                 // 0A8F..0A91
6941             UNKNOWN,                  // 0A92
6942             GUJARATI,                 // 0A93..0AA8
6943             UNKNOWN,                  // 0AA9
6944             GUJARATI,                 // 0AAA..0AB0
6945             UNKNOWN,                  // 0AB1
6946             GUJARATI,                 // 0AB2..0AB3
6947             UNKNOWN,                  // 0AB4
6948             GUJARATI,                 // 0AB5..0AB9
6949             UNKNOWN,                  // 0ABA..0ABB
6950             GUJARATI,                 // 0ABC..0AC5
6951             UNKNOWN,                  // 0AC6
6952             GUJARATI,                 // 0AC7..0AC9
6953             UNKNOWN,                  // 0ACA
6954             GUJARATI,                 // 0ACB..0ACD
6955             UNKNOWN,                  // 0ACE..0ACF
6956             GUJARATI,                 // 0AD0
6957             UNKNOWN,                  // 0AD1..0ADF
6958             GUJARATI,                 // 0AE0..0AE3
6959             UNKNOWN,                  // 0AE4..0AE5
6960             GUJARATI,                 // 0AE6..0AF1
6961             UNKNOWN,                  // 0AF2..0AF8
6962             GUJARATI,                 // 0AF9..0AFF
6963             UNKNOWN,                  // 0B00
6964             ORIYA,                    // 0B01..0B03
6965             UNKNOWN,                  // 0B04
6966             ORIYA,                    // 0B05..0B0C
6967             UNKNOWN,                  // 0B0D..0B0E
6968             ORIYA,                    // 0B0F..0B10
6969             UNKNOWN,                  // 0B11..0B12
6970             ORIYA,                    // 0B13..0B28
6971             UNKNOWN,                  // 0B29
6972             ORIYA,                    // 0B2A..0B30
6973             UNKNOWN,                  // 0B31
6974             ORIYA,                    // 0B32..0B33
6975             UNKNOWN,                  // 0B34
6976             ORIYA,                    // 0B35..0B39
6977             UNKNOWN,                  // 0B3A..0B3B
6978             ORIYA,                    // 0B3C..0B44
6979             UNKNOWN,                  // 0B45..0B46
6980             ORIYA,                    // 0B47..0B48
6981             UNKNOWN,                  // 0B49..0B4A
6982             ORIYA,                    // 0B4B..0B4D
6983             UNKNOWN,                  // 0B4E..0B54
6984             ORIYA,                    // 0B55..0B57
6985             UNKNOWN,                  // 0B58..0B5B
6986             ORIYA,                    // 0B5C..0B5D
6987             UNKNOWN,                  // 0B5E
6988             ORIYA,                    // 0B5F..0B63
6989             UNKNOWN,                  // 0B64..0B65
6990             ORIYA,                    // 0B66..0B77
6991             UNKNOWN,                  // 0B78..0B81
6992             TAMIL,                    // 0B82..0B83
6993             UNKNOWN,                  // 0B84
6994             TAMIL,                    // 0B85..0B8A
6995             UNKNOWN,                  // 0B8B..0B8D
6996             TAMIL,                    // 0B8E..0B90
6997             UNKNOWN,                  // 0B91
6998             TAMIL,                    // 0B92..0B95
6999             UNKNOWN,                  // 0B96..0B98
7000             TAMIL,                    // 0B99..0B9A
7001             UNKNOWN,                  // 0B9B
7002             TAMIL,                    // 0B9C
7003             UNKNOWN,                  // 0B9D
7004             TAMIL,                    // 0B9E..0B9F
7005             UNKNOWN,                  // 0BA0..0BA2
7006             TAMIL,                    // 0BA3..0BA4
7007             UNKNOWN,                  // 0BA5..0BA7
7008             TAMIL,                    // 0BA8..0BAA
7009             UNKNOWN,                  // 0BAB..0BAD
7010             TAMIL,                    // 0BAE..0BB9
7011             UNKNOWN,                  // 0BBA..0BBD
7012             TAMIL,                    // 0BBE..0BC2
7013             UNKNOWN,                  // 0BC3..0BC5
7014             TAMIL,                    // 0BC6..0BC8
7015             UNKNOWN,                  // 0BC9
7016             TAMIL,                    // 0BCA..0BCD
7017             UNKNOWN,                  // 0BCE..0BCF
7018             TAMIL,                    // 0BD0
7019             UNKNOWN,                  // 0BD1..0BD6
7020             TAMIL,                    // 0BD7
7021             UNKNOWN,                  // 0BD8..0BE5
7022             TAMIL,                    // 0BE6..0BFA
7023             UNKNOWN,                  // 0BFB..0BFF
7024             TELUGU,                   // 0C00..0C0C
7025             UNKNOWN,                  // 0C0D
7026             TELUGU,                   // 0C0E..0C10
7027             UNKNOWN,                  // 0C11
7028             TELUGU,                   // 0C12..0C28
7029             UNKNOWN,                  // 0C29
7030             TELUGU,                   // 0C2A..0C39
7031             UNKNOWN,                  // 0C3A..0C3C
7032             TELUGU,                   // 0C3D..0C44
7033             UNKNOWN,                  // 0C45
7034             TELUGU,                   // 0C46..0C48
7035             UNKNOWN,                  // 0C49
7036             TELUGU,                   // 0C4A..0C4D
7037             UNKNOWN,                  // 0C4E..0C54
7038             TELUGU,                   // 0C55..0C56
7039             UNKNOWN,                  // 0C57
7040             TELUGU,                   // 0C58..0C5A
7041             UNKNOWN,                  // 0C5B..0C5F
7042             TELUGU,                   // 0C60..0C63
7043             UNKNOWN,                  // 0C64..0C65
7044             TELUGU,                   // 0C66..0C6F
7045             UNKNOWN,                  // 0C70..0C76
7046             TELUGU,                   // 0C77..0C7F
7047             KANNADA,                  // 0C80..0C8C
7048             UNKNOWN,                  // 0C8D
7049             KANNADA,                  // 0C8E..0C90
7050             UNKNOWN,                  // 0C91
7051             KANNADA,                  // 0C92..0CA8
7052             UNKNOWN,                  // 0CA9
7053             KANNADA,                  // 0CAA..0CB3
7054             UNKNOWN,                  // 0CB4
7055             KANNADA,                  // 0CB5..0CB9
7056             UNKNOWN,                  // 0CBA..0CBB
7057             KANNADA,                  // 0CBC..0CC4
7058             UNKNOWN,                  // 0CC5
7059             KANNADA,                  // 0CC6..0CC8
7060             UNKNOWN,                  // 0CC9
7061             KANNADA,                  // 0CCA..0CCD
7062             UNKNOWN,                  // 0CCE..0CD4
7063             KANNADA,                  // 0CD5..0CD6
7064             UNKNOWN,                  // 0CD7..0CDD
7065             KANNADA,                  // 0CDE
7066             UNKNOWN,                  // 0CDF
7067             KANNADA,                  // 0CE0..0CE3
7068             UNKNOWN,                  // 0CE4..0CE5
7069             KANNADA,                  // 0CE6..0CEF
7070             UNKNOWN,                  // 0CF0
7071             KANNADA,                  // 0CF1..0CF2
7072             UNKNOWN,                  // 0CF3..0CFF
7073             MALAYALAM,                // 0D00..0D0C
7074             UNKNOWN,                  // 0D0D
7075             MALAYALAM,                // 0D0E..0D10
7076             UNKNOWN,                  // 0D11
7077             MALAYALAM,                // 0D12..0D44
7078             UNKNOWN,                  // 0D45
7079             MALAYALAM,                // 0D46..0D48
7080             UNKNOWN,                  // 0D49
7081             MALAYALAM,                // 0D4A..0D4F
7082             UNKNOWN,                  // 0D50..0D53
7083             MALAYALAM,                // 0D54..0D63
7084             UNKNOWN,                  // 0D64..0D65
7085             MALAYALAM,                // 0D66..0D7F
7086             UNKNOWN,                  // 0D80
7087             SINHALA,                  // 0D81..0D83
7088             UNKNOWN,                  // 0D84
7089             SINHALA,                  // 0D85..0D96
7090             UNKNOWN,                  // 0D97..0D99
7091             SINHALA,                  // 0D9A..0DB1
7092             UNKNOWN,                  // 0DB2
7093             SINHALA,                  // 0DB3..0DBB
7094             UNKNOWN,                  // 0DBC
7095             SINHALA,                  // 0DBD
7096             UNKNOWN,                  // 0DBE..0DBF
7097             SINHALA,                  // 0DC0..0DC6
7098             UNKNOWN,                  // 0DC7..0DC9
7099             SINHALA,                  // 0DCA
7100             UNKNOWN,                  // 0DCB..0DCE
7101             SINHALA,                  // 0DCF..0DD4
7102             UNKNOWN,                  // 0DD5
7103             SINHALA,                  // 0DD6
7104             UNKNOWN,                  // 0DD7
7105             SINHALA,                  // 0DD8..0DDF
7106             UNKNOWN,                  // 0DE0..0DE5
7107             SINHALA,                  // 0DE6..0DEF
7108             UNKNOWN,                  // 0DF0..0DF1
7109             SINHALA,                  // 0DF2..0DF4
7110             UNKNOWN,                  // 0DF5..0E00
7111             THAI,                     // 0E01..0E3A
7112             UNKNOWN,                  // 0E3B..0E3E
7113             COMMON,                   // 0E3F
7114             THAI,                     // 0E40..0E5B
7115             UNKNOWN,                  // 0E5C..0E80
7116             LAO,                      // 0E81..0E82
7117             UNKNOWN,                  // 0E83
7118             LAO,                      // 0E84
7119             UNKNOWN,                  // 0E85
7120             LAO,                      // 0E86..0E8A
7121             UNKNOWN,                  // 0E8B
7122             LAO,                      // 0E8C..0EA3
7123             UNKNOWN,                  // 0EA4
7124             LAO,                      // 0EA5
7125             UNKNOWN,                  // 0EA6
7126             LAO,                      // 0EA7..0EBD
7127             UNKNOWN,                  // 0EBE..0EBF
7128             LAO,                      // 0EC0..0EC4
7129             UNKNOWN,                  // 0EC5
7130             LAO,                      // 0EC6
7131             UNKNOWN,                  // 0EC7
7132             LAO,                      // 0EC8..0ECD
7133             UNKNOWN,                  // 0ECE..0ECF
7134             LAO,                      // 0ED0..0ED9
7135             UNKNOWN,                  // 0EDA..0EDB
7136             LAO,                      // 0EDC..0EDF
7137             UNKNOWN,                  // 0EE0..0EFF
7138             TIBETAN,                  // 0F00..0F47
7139             UNKNOWN,                  // 0F48
7140             TIBETAN,                  // 0F49..0F6C
7141             UNKNOWN,                  // 0F6D..0F70
7142             TIBETAN,                  // 0F71..0F97
7143             UNKNOWN,                  // 0F98
7144             TIBETAN,                  // 0F99..0FBC
7145             UNKNOWN,                  // 0FBD
7146             TIBETAN,                  // 0FBE..0FCC
7147             UNKNOWN,                  // 0FCD
7148             TIBETAN,                  // 0FCE..0FD4
7149             COMMON,                   // 0FD5..0FD8
7150             TIBETAN,                  // 0FD9..0FDA
7151             UNKNOWN,                  // 0FDB..0FFF
7152             MYANMAR,                  // 1000..109F
7153             GEORGIAN,                 // 10A0..10C5
7154             UNKNOWN,                  // 10C6
7155             GEORGIAN,                 // 10C7
7156             UNKNOWN,                  // 10C8..10CC
7157             GEORGIAN,                 // 10CD
7158             UNKNOWN,                  // 10CE..10CF
7159             GEORGIAN,                 // 10D0..10FA
7160             COMMON,                   // 10FB
7161             GEORGIAN,                 // 10FC..10FF
7162             HANGUL,                   // 1100..11FF
7163             ETHIOPIC,                 // 1200..1248
7164             UNKNOWN,                  // 1249
7165             ETHIOPIC,                 // 124A..124D
7166             UNKNOWN,                  // 124E..124F
7167             ETHIOPIC,                 // 1250..1256
7168             UNKNOWN,                  // 1257
7169             ETHIOPIC,                 // 1258
7170             UNKNOWN,                  // 1259
7171             ETHIOPIC,                 // 125A..125D
7172             UNKNOWN,                  // 125E..125F
7173             ETHIOPIC,                 // 1260..1288
7174             UNKNOWN,                  // 1289
7175             ETHIOPIC,                 // 128A..128D
7176             UNKNOWN,                  // 128E..128F
7177             ETHIOPIC,                 // 1290..12B0
7178             UNKNOWN,                  // 12B1
7179             ETHIOPIC,                 // 12B2..12B5
7180             UNKNOWN,                  // 12B6..12B7
7181             ETHIOPIC,                 // 12B8..12BE
7182             UNKNOWN,                  // 12BF
7183             ETHIOPIC,                 // 12C0
7184             UNKNOWN,                  // 12C1
7185             ETHIOPIC,                 // 12C2..12C5
7186             UNKNOWN,                  // 12C6..12C7
7187             ETHIOPIC,                 // 12C8..12D6
7188             UNKNOWN,                  // 12D7
7189             ETHIOPIC,                 // 12D8..1310
7190             UNKNOWN,                  // 1311
7191             ETHIOPIC,                 // 1312..1315
7192             UNKNOWN,                  // 1316..1317
7193             ETHIOPIC,                 // 1318..135A
7194             UNKNOWN,                  // 135B..135C
7195             ETHIOPIC,                 // 135D..137C
7196             UNKNOWN,                  // 137D..137F
7197             ETHIOPIC,                 // 1380..1399
7198             UNKNOWN,                  // 139A..139F
7199             CHEROKEE,                 // 13A0..13F5
7200             UNKNOWN,                  // 13F6..13F7
7201             CHEROKEE,                 // 13F8..13FD
7202             UNKNOWN,                  // 13FE..13FF
7203             CANADIAN_ABORIGINAL,      // 1400..167F
7204             OGHAM,                    // 1680..169C
7205             UNKNOWN,                  // 169D..169F
7206             RUNIC,                    // 16A0..16EA
7207             COMMON,                   // 16EB..16ED
7208             RUNIC,                    // 16EE..16F8
7209             UNKNOWN,                  // 16F9..16FF
7210             TAGALOG,                  // 1700..170C
7211             UNKNOWN,                  // 170D
7212             TAGALOG,                  // 170E..1714
7213             UNKNOWN,                  // 1715..171F
7214             HANUNOO,                  // 1720..1734
7215             COMMON,                   // 1735..1736
7216             UNKNOWN,                  // 1737..173F
7217             BUHID,                    // 1740..1753
7218             UNKNOWN,                  // 1754..175F
7219             TAGBANWA,                 // 1760..176C
7220             UNKNOWN,                  // 176D
7221             TAGBANWA,                 // 176E..1770
7222             UNKNOWN,                  // 1771
7223             TAGBANWA,                 // 1772..1773
7224             UNKNOWN,                  // 1774..177F
7225             KHMER,                    // 1780..17DD
7226             UNKNOWN,                  // 17DE..17DF
7227             KHMER,                    // 17E0..17E9
7228             UNKNOWN,                  // 17EA..17EF
7229             KHMER,                    // 17F0..17F9
7230             UNKNOWN,                  // 17FA..17FF
7231             MONGOLIAN,                // 1800..1801
7232             COMMON,                   // 1802..1803
7233             MONGOLIAN,                // 1804
7234             COMMON,                   // 1805
7235             MONGOLIAN,                // 1806..180E
7236             UNKNOWN,                  // 180F
7237             MONGOLIAN,                // 1810..1819
7238             UNKNOWN,                  // 181A..181F
7239             MONGOLIAN,                // 1820..1878
7240             UNKNOWN,                  // 1879..187F
7241             MONGOLIAN,                // 1880..18AA
7242             UNKNOWN,                  // 18AB..18AF
7243             CANADIAN_ABORIGINAL,      // 18B0..18F5
7244             UNKNOWN,                  // 18F6..18FF
7245             LIMBU,                    // 1900..191E
7246             UNKNOWN,                  // 191F
7247             LIMBU,                    // 1920..192B
7248             UNKNOWN,                  // 192C..192F
7249             LIMBU,                    // 1930..193B
7250             UNKNOWN,                  // 193C..193F
7251             LIMBU,                    // 1940
7252             UNKNOWN,                  // 1941..1943
7253             LIMBU,                    // 1944..194F
7254             TAI_LE,                   // 1950..196D
7255             UNKNOWN,                  // 196E..196F
7256             TAI_LE,                   // 1970..1974
7257             UNKNOWN,                  // 1975..197F
7258             NEW_TAI_LUE,              // 1980..19AB
7259             UNKNOWN,                  // 19AC..19AF
7260             NEW_TAI_LUE,              // 19B0..19C9
7261             UNKNOWN,                  // 19CA..19CF
7262             NEW_TAI_LUE,              // 19D0..19DA
7263             UNKNOWN,                  // 19DB..19DD
7264             NEW_TAI_LUE,              // 19DE..19DF
7265             KHMER,                    // 19E0..19FF
7266             BUGINESE,                 // 1A00..1A1B
7267             UNKNOWN,                  // 1A1C..1A1D
7268             BUGINESE,                 // 1A1E..1A1F
7269             TAI_THAM,                 // 1A20..1A5E
7270             UNKNOWN,                  // 1A5F
7271             TAI_THAM,                 // 1A60..1A7C
7272             UNKNOWN,                  // 1A7D..1A7E
7273             TAI_THAM,                 // 1A7F..1A89
7274             UNKNOWN,                  // 1A8A..1A8F
7275             TAI_THAM,                 // 1A90..1A99
7276             UNKNOWN,                  // 1A9A..1A9F
7277             TAI_THAM,                 // 1AA0..1AAD
7278             UNKNOWN,                  // 1AAE..1AAF
7279             INHERITED,                // 1AB0..1AC0
7280             UNKNOWN,                  // 1AC1..1AFF
7281             BALINESE,                 // 1B00..1B4B
7282             UNKNOWN,                  // 1B4C..1B4F
7283             BALINESE,                 // 1B50..1B7C
7284             UNKNOWN,                  // 1B7D..1B7F
7285             SUNDANESE,                // 1B80..1BBF
7286             BATAK,                    // 1BC0..1BF3
7287             UNKNOWN,                  // 1BF4..1BFB
7288             BATAK,                    // 1BFC..1BFF
7289             LEPCHA,                   // 1C00..1C37
7290             UNKNOWN,                  // 1C38..1C3A
7291             LEPCHA,                   // 1C3B..1C49
7292             UNKNOWN,                  // 1C4A..1C4C
7293             LEPCHA,                   // 1C4D..1C4F
7294             OL_CHIKI,                 // 1C50..1C7F
7295             CYRILLIC,                 // 1C80..1C88
7296             UNKNOWN,                  // 1C89..1C8F
7297             GEORGIAN,                 // 1C90..1CBA
7298             UNKNOWN,                  // 1CBB..1CBC
7299             GEORGIAN,                 // 1CBD..1CBF
7300             SUNDANESE,                // 1CC0..1CC7
7301             UNKNOWN,                  // 1CC8..1CCF
7302             INHERITED,                // 1CD0..1CD2
7303             COMMON,                   // 1CD3
7304             INHERITED,                // 1CD4..1CE0
7305             COMMON,                   // 1CE1
7306             INHERITED,                // 1CE2..1CE8
7307             COMMON,                   // 1CE9..1CEC
7308             INHERITED,                // 1CED
7309             COMMON,                   // 1CEE..1CF3
7310             INHERITED,                // 1CF4
7311             COMMON,                   // 1CF5..1CF7
7312             INHERITED,                // 1CF8..1CF9
7313             COMMON,                   // 1CFA
7314             UNKNOWN,                  // 1CFB..1CFF
7315             LATIN,                    // 1D00..1D25
7316             GREEK,                    // 1D26..1D2A
7317             CYRILLIC,                 // 1D2B
7318             LATIN,                    // 1D2C..1D5C
7319             GREEK,                    // 1D5D..1D61
7320             LATIN,                    // 1D62..1D65
7321             GREEK,                    // 1D66..1D6A
7322             LATIN,                    // 1D6B..1D77
7323             CYRILLIC,                 // 1D78
7324             LATIN,                    // 1D79..1DBE
7325             GREEK,                    // 1DBF
7326             INHERITED,                // 1DC0..1DF9
7327             UNKNOWN,                  // 1DFA
7328             INHERITED,                // 1DFB..1DFF
7329             LATIN,                    // 1E00..1EFF
7330             GREEK,                    // 1F00..1F15
7331             UNKNOWN,                  // 1F16..1F17
7332             GREEK,                    // 1F18..1F1D
7333             UNKNOWN,                  // 1F1E..1F1F
7334             GREEK,                    // 1F20..1F45
7335             UNKNOWN,                  // 1F46..1F47
7336             GREEK,                    // 1F48..1F4D
7337             UNKNOWN,                  // 1F4E..1F4F
7338             GREEK,                    // 1F50..1F57
7339             UNKNOWN,                  // 1F58
7340             GREEK,                    // 1F59
7341             UNKNOWN,                  // 1F5A
7342             GREEK,                    // 1F5B
7343             UNKNOWN,                  // 1F5C
7344             GREEK,                    // 1F5D
7345             UNKNOWN,                  // 1F5E
7346             GREEK,                    // 1F5F..1F7D
7347             UNKNOWN,                  // 1F7E..1F7F
7348             GREEK,                    // 1F80..1FB4
7349             UNKNOWN,                  // 1FB5
7350             GREEK,                    // 1FB6..1FC4
7351             UNKNOWN,                  // 1FC5
7352             GREEK,                    // 1FC6..1FD3
7353             UNKNOWN,                  // 1FD4..1FD5
7354             GREEK,                    // 1FD6..1FDB
7355             UNKNOWN,                  // 1FDC
7356             GREEK,                    // 1FDD..1FEF
7357             UNKNOWN,                  // 1FF0..1FF1
7358             GREEK,                    // 1FF2..1FF4
7359             UNKNOWN,                  // 1FF5
7360             GREEK,                    // 1FF6..1FFE
7361             UNKNOWN,                  // 1FFF
7362             COMMON,                   // 2000..200B
7363             INHERITED,                // 200C..200D
7364             COMMON,                   // 200E..2064
7365             UNKNOWN,                  // 2065
7366             COMMON,                   // 2066..2070
7367             LATIN,                    // 2071
7368             UNKNOWN,                  // 2072..2073
7369             COMMON,                   // 2074..207E
7370             LATIN,                    // 207F
7371             COMMON,                   // 2080..208E
7372             UNKNOWN,                  // 208F
7373             LATIN,                    // 2090..209C
7374             UNKNOWN,                  // 209D..209F
7375             COMMON,                   // 20A0..20BF
7376             UNKNOWN,                  // 20C0..20CF
7377             INHERITED,                // 20D0..20F0
7378             UNKNOWN,                  // 20F1..20FF
7379             COMMON,                   // 2100..2125
7380             GREEK,                    // 2126
7381             COMMON,                   // 2127..2129
7382             LATIN,                    // 212A..212B
7383             COMMON,                   // 212C..2131
7384             LATIN,                    // 2132
7385             COMMON,                   // 2133..214D
7386             LATIN,                    // 214E
7387             COMMON,                   // 214F..215F
7388             LATIN,                    // 2160..2188
7389             COMMON,                   // 2189..218B
7390             UNKNOWN,                  // 218C..218F
7391             COMMON,                   // 2190..2426
7392             UNKNOWN,                  // 2427..243F
7393             COMMON,                   // 2440..244A
7394             UNKNOWN,                  // 244B..245F
7395             COMMON,                   // 2460..27FF
7396             BRAILLE,                  // 2800..28FF
7397             COMMON,                   // 2900..2B73
7398             UNKNOWN,                  // 2B74..2B75
7399             COMMON,                   // 2B76..2B95
7400             UNKNOWN,                  // 2B96
7401             COMMON,                   // 2B97..2BFF
7402             GLAGOLITIC,               // 2C00..2C2E
7403             UNKNOWN,                  // 2C2F
7404             GLAGOLITIC,               // 2C30..2C5E
7405             UNKNOWN,                  // 2C5F
7406             LATIN,                    // 2C60..2C7F
7407             COPTIC,                   // 2C80..2CF3
7408             UNKNOWN,                  // 2CF4..2CF8
7409             COPTIC,                   // 2CF9..2CFF
7410             GEORGIAN,                 // 2D00..2D25
7411             UNKNOWN,                  // 2D26
7412             GEORGIAN,                 // 2D27
7413             UNKNOWN,                  // 2D28..2D2C
7414             GEORGIAN,                 // 2D2D
7415             UNKNOWN,                  // 2D2E..2D2F
7416             TIFINAGH,                 // 2D30..2D67
7417             UNKNOWN,                  // 2D68..2D6E
7418             TIFINAGH,                 // 2D6F..2D70
7419             UNKNOWN,                  // 2D71..2D7E
7420             TIFINAGH,                 // 2D7F
7421             ETHIOPIC,                 // 2D80..2D96
7422             UNKNOWN,                  // 2D97..2D9F
7423             ETHIOPIC,                 // 2DA0..2DA6
7424             UNKNOWN,                  // 2DA7
7425             ETHIOPIC,                 // 2DA8..2DAE
7426             UNKNOWN,                  // 2DAF
7427             ETHIOPIC,                 // 2DB0..2DB6
7428             UNKNOWN,                  // 2DB7
7429             ETHIOPIC,                 // 2DB8..2DBE
7430             UNKNOWN,                  // 2DBF
7431             ETHIOPIC,                 // 2DC0..2DC6
7432             UNKNOWN,                  // 2DC7
7433             ETHIOPIC,                 // 2DC8..2DCE
7434             UNKNOWN,                  // 2DCF
7435             ETHIOPIC,                 // 2DD0..2DD6
7436             UNKNOWN,                  // 2DD7
7437             ETHIOPIC,                 // 2DD8..2DDE
7438             UNKNOWN,                  // 2DDF
7439             CYRILLIC,                 // 2DE0..2DFF
7440             COMMON,                   // 2E00..2E52
7441             UNKNOWN,                  // 2E53..2E7F
7442             HAN,                      // 2E80..2E99
7443             UNKNOWN,                  // 2E9A
7444             HAN,                      // 2E9B..2EF3
7445             UNKNOWN,                  // 2EF4..2EFF
7446             HAN,                      // 2F00..2FD5
7447             UNKNOWN,                  // 2FD6..2FEF
7448             COMMON,                   // 2FF0..2FFB
7449             UNKNOWN,                  // 2FFC..2FFF
7450             COMMON,                   // 3000..3004
7451             HAN,                      // 3005
7452             COMMON,                   // 3006
7453             HAN,                      // 3007
7454             COMMON,                   // 3008..3020
7455             HAN,                      // 3021..3029
7456             INHERITED,                // 302A..302D
7457             HANGUL,                   // 302E..302F
7458             COMMON,                   // 3030..3037
7459             HAN,                      // 3038..303B
7460             COMMON,                   // 303C..303F
7461             UNKNOWN,                  // 3040
7462             HIRAGANA,                 // 3041..3096
7463             UNKNOWN,                  // 3097..3098
7464             INHERITED,                // 3099..309A
7465             COMMON,                   // 309B..309C
7466             HIRAGANA,                 // 309D..309F
7467             COMMON,                   // 30A0
7468             KATAKANA,                 // 30A1..30FA
7469             COMMON,                   // 30FB..30FC
7470             KATAKANA,                 // 30FD..30FF
7471             UNKNOWN,                  // 3100..3104
7472             BOPOMOFO,                 // 3105..312F
7473             UNKNOWN,                  // 3130
7474             HANGUL,                   // 3131..318E
7475             UNKNOWN,                  // 318F
7476             COMMON,                   // 3190..319F
7477             BOPOMOFO,                 // 31A0..31BF
7478             COMMON,                   // 31C0..31E3
7479             UNKNOWN,                  // 31E4..31EF
7480             KATAKANA,                 // 31F0..31FF
7481             HANGUL,                   // 3200..321E
7482             UNKNOWN,                  // 321F
7483             COMMON,                   // 3220..325F
7484             HANGUL,                   // 3260..327E
7485             COMMON,                   // 327F..32CF
7486             KATAKANA,                 // 32D0..32FE
7487             COMMON,                   // 32FF
7488             KATAKANA,                 // 3300..3357
7489             COMMON,                   // 3358..33FF
7490             HAN,                      // 3400..4DBF
7491             COMMON,                   // 4DC0..4DFF
7492             HAN,                      // 4E00..9FFC
7493             UNKNOWN,                  // 9FFD..9FFF
7494             YI,                       // A000..A48C
7495             UNKNOWN,                  // A48D..A48F
7496             YI,                       // A490..A4C6
7497             UNKNOWN,                  // A4C7..A4CF
7498             LISU,                     // A4D0..A4FF
7499             VAI,                      // A500..A62B
7500             UNKNOWN,                  // A62C..A63F
7501             CYRILLIC,                 // A640..A69F
7502             BAMUM,                    // A6A0..A6F7
7503             UNKNOWN,                  // A6F8..A6FF
7504             COMMON,                   // A700..A721
7505             LATIN,                    // A722..A787
7506             COMMON,                   // A788..A78A
7507             LATIN,                    // A78B..A7BF
7508             UNKNOWN,                  // A7C0..A7C1
7509             LATIN,                    // A7C2..A7CA
7510             UNKNOWN,                  // A7CB..A7F4
7511             LATIN,                    // A7F5..A7FF
7512             SYLOTI_NAGRI,             // A800..A82C
7513             UNKNOWN,                  // A82D..A82F
7514             COMMON,                   // A830..A839
7515             UNKNOWN,                  // A83A..A83F
7516             PHAGS_PA,                 // A840..A877
7517             UNKNOWN,                  // A878..A87F
7518             SAURASHTRA,               // A880..A8C5
7519             UNKNOWN,                  // A8C6..A8CD
7520             SAURASHTRA,               // A8CE..A8D9
7521             UNKNOWN,                  // A8DA..A8DF
7522             DEVANAGARI,               // A8E0..A8FF
7523             KAYAH_LI,                 // A900..A92D
7524             COMMON,                   // A92E
7525             KAYAH_LI,                 // A92F
7526             REJANG,                   // A930..A953
7527             UNKNOWN,                  // A954..A95E
7528             REJANG,                   // A95F
7529             HANGUL,                   // A960..A97C
7530             UNKNOWN,                  // A97D..A97F
7531             JAVANESE,                 // A980..A9CD
7532             UNKNOWN,                  // A9CE
7533             COMMON,                   // A9CF
7534             JAVANESE,                 // A9D0..A9D9
7535             UNKNOWN,                  // A9DA..A9DD
7536             JAVANESE,                 // A9DE..A9DF
7537             MYANMAR,                  // A9E0..A9FE
7538             UNKNOWN,                  // A9FF
7539             CHAM,                     // AA00..AA36
7540             UNKNOWN,                  // AA37..AA3F
7541             CHAM,                     // AA40..AA4D
7542             UNKNOWN,                  // AA4E..AA4F
7543             CHAM,                     // AA50..AA59
7544             UNKNOWN,                  // AA5A..AA5B
7545             CHAM,                     // AA5C..AA5F
7546             MYANMAR,                  // AA60..AA7F
7547             TAI_VIET,                 // AA80..AAC2
7548             UNKNOWN,                  // AAC3..AADA
7549             TAI_VIET,                 // AADB..AADF
7550             MEETEI_MAYEK,             // AAE0..AAF6
7551             UNKNOWN,                  // AAF7..AB00
7552             ETHIOPIC,                 // AB01..AB06
7553             UNKNOWN,                  // AB07..AB08
7554             ETHIOPIC,                 // AB09..AB0E
7555             UNKNOWN,                  // AB0F..AB10
7556             ETHIOPIC,                 // AB11..AB16
7557             UNKNOWN,                  // AB17..AB1F
7558             ETHIOPIC,                 // AB20..AB26
7559             UNKNOWN,                  // AB27
7560             ETHIOPIC,                 // AB28..AB2E
7561             UNKNOWN,                  // AB2F
7562             LATIN,                    // AB30..AB5A
7563             COMMON,                   // AB5B
7564             LATIN,                    // AB5C..AB64
7565             GREEK,                    // AB65
7566             LATIN,                    // AB66..AB69
7567             COMMON,                   // AB6A..AB6B
7568             UNKNOWN,                  // AB6C..AB6F
7569             CHEROKEE,                 // AB70..ABBF
7570             MEETEI_MAYEK,             // ABC0..ABED
7571             UNKNOWN,                  // ABEE..ABEF
7572             MEETEI_MAYEK,             // ABF0..ABF9
7573             UNKNOWN,                  // ABFA..ABFF
7574             HANGUL,                   // AC00..D7A3
7575             UNKNOWN,                  // D7A4..D7AF
7576             HANGUL,                   // D7B0..D7C6
7577             UNKNOWN,                  // D7C7..D7CA
7578             HANGUL,                   // D7CB..D7FB
7579             UNKNOWN,                  // D7FC..F8FF
7580             HAN,                      // F900..FA6D
7581             UNKNOWN,                  // FA6E..FA6F
7582             HAN,                      // FA70..FAD9
7583             UNKNOWN,                  // FADA..FAFF
7584             LATIN,                    // FB00..FB06
7585             UNKNOWN,                  // FB07..FB12
7586             ARMENIAN,                 // FB13..FB17
7587             UNKNOWN,                  // FB18..FB1C
7588             HEBREW,                   // FB1D..FB36
7589             UNKNOWN,                  // FB37
7590             HEBREW,                   // FB38..FB3C
7591             UNKNOWN,                  // FB3D
7592             HEBREW,                   // FB3E
7593             UNKNOWN,                  // FB3F
7594             HEBREW,                   // FB40..FB41
7595             UNKNOWN,                  // FB42
7596             HEBREW,                   // FB43..FB44
7597             UNKNOWN,                  // FB45
7598             HEBREW,                   // FB46..FB4F
7599             ARABIC,                   // FB50..FBC1
7600             UNKNOWN,                  // FBC2..FBD2
7601             ARABIC,                   // FBD3..FD3D
7602             COMMON,                   // FD3E..FD3F
7603             UNKNOWN,                  // FD40..FD4F
7604             ARABIC,                   // FD50..FD8F
7605             UNKNOWN,                  // FD90..FD91
7606             ARABIC,                   // FD92..FDC7
7607             UNKNOWN,                  // FDC8..FDEF
7608             ARABIC,                   // FDF0..FDFD
7609             UNKNOWN,                  // FDFE..FDFF
7610             INHERITED,                // FE00..FE0F
7611             COMMON,                   // FE10..FE19
7612             UNKNOWN,                  // FE1A..FE1F
7613             INHERITED,                // FE20..FE2D
7614             CYRILLIC,                 // FE2E..FE2F
7615             COMMON,                   // FE30..FE52
7616             UNKNOWN,                  // FE53
7617             COMMON,                   // FE54..FE66
7618             UNKNOWN,                  // FE67
7619             COMMON,                   // FE68..FE6B
7620             UNKNOWN,                  // FE6C..FE6F
7621             ARABIC,                   // FE70..FE74
7622             UNKNOWN,                  // FE75
7623             ARABIC,                   // FE76..FEFC
7624             UNKNOWN,                  // FEFD..FEFE
7625             COMMON,                   // FEFF
7626             UNKNOWN,                  // FF00
7627             COMMON,                   // FF01..FF20
7628             LATIN,                    // FF21..FF3A
7629             COMMON,                   // FF3B..FF40
7630             LATIN,                    // FF41..FF5A
7631             COMMON,                   // FF5B..FF65
7632             KATAKANA,                 // FF66..FF6F
7633             COMMON,                   // FF70
7634             KATAKANA,                 // FF71..FF9D
7635             COMMON,                   // FF9E..FF9F
7636             HANGUL,                   // FFA0..FFBE
7637             UNKNOWN,                  // FFBF..FFC1
7638             HANGUL,                   // FFC2..FFC7
7639             UNKNOWN,                  // FFC8..FFC9
7640             HANGUL,                   // FFCA..FFCF
7641             UNKNOWN,                  // FFD0..FFD1
7642             HANGUL,                   // FFD2..FFD7
7643             UNKNOWN,                  // FFD8..FFD9
7644             HANGUL,                   // FFDA..FFDC
7645             UNKNOWN,                  // FFDD..FFDF
7646             COMMON,                   // FFE0..FFE6
7647             UNKNOWN,                  // FFE7
7648             COMMON,                   // FFE8..FFEE
7649             UNKNOWN,                  // FFEF..FFF8
7650             COMMON,                   // FFF9..FFFD
7651             UNKNOWN,                  // FFFE..FFFF
7652             LINEAR_B,                 // 10000..1000B
7653             UNKNOWN,                  // 1000C
7654             LINEAR_B,                 // 1000D..10026
7655             UNKNOWN,                  // 10027
7656             LINEAR_B,                 // 10028..1003A
7657             UNKNOWN,                  // 1003B
7658             LINEAR_B,                 // 1003C..1003D
7659             UNKNOWN,                  // 1003E
7660             LINEAR_B,                 // 1003F..1004D
7661             UNKNOWN,                  // 1004E..1004F
7662             LINEAR_B,                 // 10050..1005D
7663             UNKNOWN,                  // 1005E..1007F
7664             LINEAR_B,                 // 10080..100FA
7665             UNKNOWN,                  // 100FB..100FF
7666             COMMON,                   // 10100..10102
7667             UNKNOWN,                  // 10103..10106
7668             COMMON,                   // 10107..10133
7669             UNKNOWN,                  // 10134..10136
7670             COMMON,                   // 10137..1013F
7671             GREEK,                    // 10140..1018E
7672             UNKNOWN,                  // 1018F
7673             COMMON,                   // 10190..1019C
7674             UNKNOWN,                  // 1019D..1019F
7675             GREEK,                    // 101A0
7676             UNKNOWN,                  // 101A1..101CF
7677             COMMON,                   // 101D0..101FC
7678             INHERITED,                // 101FD
7679             UNKNOWN,                  // 101FE..1027F
7680             LYCIAN,                   // 10280..1029C
7681             UNKNOWN,                  // 1029D..1029F
7682             CARIAN,                   // 102A0..102D0
7683             UNKNOWN,                  // 102D1..102DF
7684             INHERITED,                // 102E0
7685             COMMON,                   // 102E1..102FB
7686             UNKNOWN,                  // 102FC..102FF
7687             OLD_ITALIC,               // 10300..10323
7688             UNKNOWN,                  // 10324..1032C
7689             OLD_ITALIC,               // 1032D..1032F
7690             GOTHIC,                   // 10330..1034A
7691             UNKNOWN,                  // 1034B..1034F
7692             OLD_PERMIC,               // 10350..1037A
7693             UNKNOWN,                  // 1037B..1037F
7694             UGARITIC,                 // 10380..1039D
7695             UNKNOWN,                  // 1039E
7696             UGARITIC,                 // 1039F
7697             OLD_PERSIAN,              // 103A0..103C3
7698             UNKNOWN,                  // 103C4..103C7
7699             OLD_PERSIAN,              // 103C8..103D5
7700             UNKNOWN,                  // 103D6..103FF
7701             DESERET,                  // 10400..1044F
7702             SHAVIAN,                  // 10450..1047F
7703             OSMANYA,                  // 10480..1049D
7704             UNKNOWN,                  // 1049E..1049F
7705             OSMANYA,                  // 104A0..104A9
7706             UNKNOWN,                  // 104AA..104AF
7707             OSAGE,                    // 104B0..104D3
7708             UNKNOWN,                  // 104D4..104D7
7709             OSAGE,                    // 104D8..104FB
7710             UNKNOWN,                  // 104FC..104FF
7711             ELBASAN,                  // 10500..10527
7712             UNKNOWN,                  // 10528..1052F
7713             CAUCASIAN_ALBANIAN,       // 10530..10563
7714             UNKNOWN,                  // 10564..1056E
7715             CAUCASIAN_ALBANIAN,       // 1056F
7716             UNKNOWN,                  // 10570..105FF
7717             LINEAR_A,                 // 10600..10736
7718             UNKNOWN,                  // 10737..1073F
7719             LINEAR_A,                 // 10740..10755
7720             UNKNOWN,                  // 10756..1075F
7721             LINEAR_A,                 // 10760..10767
7722             UNKNOWN,                  // 10768..107FF
7723             CYPRIOT,                  // 10800..10805
7724             UNKNOWN,                  // 10806..10807
7725             CYPRIOT,                  // 10808
7726             UNKNOWN,                  // 10809
7727             CYPRIOT,                  // 1080A..10835
7728             UNKNOWN,                  // 10836
7729             CYPRIOT,                  // 10837..10838
7730             UNKNOWN,                  // 10839..1083B
7731             CYPRIOT,                  // 1083C
7732             UNKNOWN,                  // 1083D..1083E
7733             CYPRIOT,                  // 1083F
7734             IMPERIAL_ARAMAIC,         // 10840..10855
7735             UNKNOWN,                  // 10856
7736             IMPERIAL_ARAMAIC,         // 10857..1085F
7737             PALMYRENE,                // 10860..1087F
7738             NABATAEAN,                // 10880..1089E
7739             UNKNOWN,                  // 1089F..108A6
7740             NABATAEAN,                // 108A7..108AF
7741             UNKNOWN,                  // 108B0..108DF
7742             HATRAN,                   // 108E0..108F2
7743             UNKNOWN,                  // 108F3
7744             HATRAN,                   // 108F4..108F5
7745             UNKNOWN,                  // 108F6..108FA
7746             HATRAN,                   // 108FB..108FF
7747             PHOENICIAN,               // 10900..1091B
7748             UNKNOWN,                  // 1091C..1091E
7749             PHOENICIAN,               // 1091F
7750             LYDIAN,                   // 10920..10939
7751             UNKNOWN,                  // 1093A..1093E
7752             LYDIAN,                   // 1093F
7753             UNKNOWN,                  // 10940..1097F
7754             MEROITIC_HIEROGLYPHS,     // 10980..1099F
7755             MEROITIC_CURSIVE,         // 109A0..109B7
7756             UNKNOWN,                  // 109B8..109BB
7757             MEROITIC_CURSIVE,         // 109BC..109CF
7758             UNKNOWN,                  // 109D0..109D1
7759             MEROITIC_CURSIVE,         // 109D2..109FF
7760             KHAROSHTHI,               // 10A00..10A03
7761             UNKNOWN,                  // 10A04
7762             KHAROSHTHI,               // 10A05..10A06
7763             UNKNOWN,                  // 10A07..10A0B
7764             KHAROSHTHI,               // 10A0C..10A13
7765             UNKNOWN,                  // 10A14
7766             KHAROSHTHI,               // 10A15..10A17
7767             UNKNOWN,                  // 10A18
7768             KHAROSHTHI,               // 10A19..10A35
7769             UNKNOWN,                  // 10A36..10A37
7770             KHAROSHTHI,               // 10A38..10A3A
7771             UNKNOWN,                  // 10A3B..10A3E
7772             KHAROSHTHI,               // 10A3F..10A48
7773             UNKNOWN,                  // 10A49..10A4F
7774             KHAROSHTHI,               // 10A50..10A58
7775             UNKNOWN,                  // 10A59..10A5F
7776             OLD_SOUTH_ARABIAN,        // 10A60..10A7F
7777             OLD_NORTH_ARABIAN,        // 10A80..10A9F
7778             UNKNOWN,                  // 10AA0..10ABF
7779             MANICHAEAN,               // 10AC0..10AE6
7780             UNKNOWN,                  // 10AE7..10AEA
7781             MANICHAEAN,               // 10AEB..10AF6
7782             UNKNOWN,                  // 10AF7..10AFF
7783             AVESTAN,                  // 10B00..10B35
7784             UNKNOWN,                  // 10B36..10B38
7785             AVESTAN,                  // 10B39..10B3F
7786             INSCRIPTIONAL_PARTHIAN,   // 10B40..10B55
7787             UNKNOWN,                  // 10B56..10B57
7788             INSCRIPTIONAL_PARTHIAN,   // 10B58..10B5F
7789             INSCRIPTIONAL_PAHLAVI,    // 10B60..10B72
7790             UNKNOWN,                  // 10B73..10B77
7791             INSCRIPTIONAL_PAHLAVI,    // 10B78..10B7F
7792             PSALTER_PAHLAVI,          // 10B80..10B91
7793             UNKNOWN,                  // 10B92..10B98
7794             PSALTER_PAHLAVI,          // 10B99..10B9C
7795             UNKNOWN,                  // 10B9D..10BA8
7796             PSALTER_PAHLAVI,          // 10BA9..10BAF
7797             UNKNOWN,                  // 10BB0..10BFF
7798             OLD_TURKIC,               // 10C00..10C48
7799             UNKNOWN,                  // 10C49..10C7F
7800             OLD_HUNGARIAN,            // 10C80..10CB2
7801             UNKNOWN,                  // 10CB3..10CBF
7802             OLD_HUNGARIAN,            // 10CC0..10CF2
7803             UNKNOWN,                  // 10CF3..10CF9
7804             OLD_HUNGARIAN,            // 10CFA..10CFF
7805             HANIFI_ROHINGYA,          // 10D00..10D27
7806             UNKNOWN,                  // 10D28..10D2F
7807             HANIFI_ROHINGYA,          // 10D30..10D39
7808             UNKNOWN,                  // 10D3A..10E5F
7809             ARABIC,                   // 10E60..10E7E
7810             UNKNOWN,                  // 10E7F
7811             YEZIDI,                   // 10E80..10EA9
7812             UNKNOWN,                  // 10EAA
7813             YEZIDI,                   // 10EAB..10EAD
7814             UNKNOWN,                  // 10EAE..10EAF
7815             YEZIDI,                   // 10EB0..10EB1
7816             UNKNOWN,                  // 10EB2..10EFF
7817             OLD_SOGDIAN,              // 10F00..10F27
7818             UNKNOWN,                  // 10F28..10F2F
7819             SOGDIAN,                  // 10F30..10F59
7820             UNKNOWN,                  // 10F5A..10FAF
7821             CHORASMIAN,               // 10FB0..10FCB
7822             UNKNOWN,                  // 10FCC..10FDF
7823             ELYMAIC,                  // 10FE0..10FF6
7824             UNKNOWN,                  // 10FF7..10FFF
7825             BRAHMI,                   // 11000..1104D
7826             UNKNOWN,                  // 1104E..11051
7827             BRAHMI,                   // 11052..1106F
7828             UNKNOWN,                  // 11070..1107E
7829             BRAHMI,                   // 1107F
7830             KAITHI,                   // 11080..110C1
7831             UNKNOWN,                  // 110C2..110CC
7832             KAITHI,                   // 110CD
7833             UNKNOWN,                  // 110CE..110CF
7834             SORA_SOMPENG,             // 110D0..110E8
7835             UNKNOWN,                  // 110E9..110EF
7836             SORA_SOMPENG,             // 110F0..110F9
7837             UNKNOWN,                  // 110FA..110FF
7838             CHAKMA,                   // 11100..11134
7839             UNKNOWN,                  // 11135
7840             CHAKMA,                   // 11136..11147
7841             UNKNOWN,                  // 11148..1114F
7842             MAHAJANI,                 // 11150..11176
7843             UNKNOWN,                  // 11177..1117F
7844             SHARADA,                  // 11180..111DF
7845             UNKNOWN,                  // 111E0
7846             SINHALA,                  // 111E1..111F4
7847             UNKNOWN,                  // 111F5..111FF
7848             KHOJKI,                   // 11200..11211
7849             UNKNOWN,                  // 11212
7850             KHOJKI,                   // 11213..1123E
7851             UNKNOWN,                  // 1123F..1127F
7852             MULTANI,                  // 11280..11286
7853             UNKNOWN,                  // 11287
7854             MULTANI,                  // 11288
7855             UNKNOWN,                  // 11289
7856             MULTANI,                  // 1128A..1128D
7857             UNKNOWN,                  // 1128E
7858             MULTANI,                  // 1128F..1129D
7859             UNKNOWN,                  // 1129E
7860             MULTANI,                  // 1129F..112A9
7861             UNKNOWN,                  // 112AA..112AF
7862             KHUDAWADI,                // 112B0..112EA
7863             UNKNOWN,                  // 112EB..112EF
7864             KHUDAWADI,                // 112F0..112F9
7865             UNKNOWN,                  // 112FA..112FF
7866             GRANTHA,                  // 11300..11303
7867             UNKNOWN,                  // 11304
7868             GRANTHA,                  // 11305..1130C
7869             UNKNOWN,                  // 1130D..1130E
7870             GRANTHA,                  // 1130F..11310
7871             UNKNOWN,                  // 11311..11312
7872             GRANTHA,                  // 11313..11328
7873             UNKNOWN,                  // 11329
7874             GRANTHA,                  // 1132A..11330
7875             UNKNOWN,                  // 11331
7876             GRANTHA,                  // 11332..11333
7877             UNKNOWN,                  // 11334
7878             GRANTHA,                  // 11335..11339
7879             UNKNOWN,                  // 1133A
7880             INHERITED,                // 1133B
7881             GRANTHA,                  // 1133C..11344
7882             UNKNOWN,                  // 11345..11346
7883             GRANTHA,                  // 11347..11348
7884             UNKNOWN,                  // 11349..1134A
7885             GRANTHA,                  // 1134B..1134D
7886             UNKNOWN,                  // 1134E..1134F
7887             GRANTHA,                  // 11350
7888             UNKNOWN,                  // 11351..11356
7889             GRANTHA,                  // 11357
7890             UNKNOWN,                  // 11358..1135C
7891             GRANTHA,                  // 1135D..11363
7892             UNKNOWN,                  // 11364..11365
7893             GRANTHA,                  // 11366..1136C
7894             UNKNOWN,                  // 1136D..1136F
7895             GRANTHA,                  // 11370..11374
7896             UNKNOWN,                  // 11375..113FF
7897             NEWA,                     // 11400..1145B
7898             UNKNOWN,                  // 1145C
7899             NEWA,                     // 1145D..11461
7900             UNKNOWN,                  // 11462..1147F
7901             TIRHUTA,                  // 11480..114C7
7902             UNKNOWN,                  // 114C8..114CF
7903             TIRHUTA,                  // 114D0..114D9
7904             UNKNOWN,                  // 114DA..1157F
7905             SIDDHAM,                  // 11580..115B5
7906             UNKNOWN,                  // 115B6..115B7
7907             SIDDHAM,                  // 115B8..115DD
7908             UNKNOWN,                  // 115DE..115FF
7909             MODI,                     // 11600..11644
7910             UNKNOWN,                  // 11645..1164F
7911             MODI,                     // 11650..11659
7912             UNKNOWN,                  // 1165A..1165F
7913             MONGOLIAN,                // 11660..1166C
7914             UNKNOWN,                  // 1166D..1167F
7915             TAKRI,                    // 11680..116B8
7916             UNKNOWN,                  // 116B9..116BF
7917             TAKRI,                    // 116C0..116C9
7918             UNKNOWN,                  // 116CA..116FF
7919             AHOM,                     // 11700..1171A
7920             UNKNOWN,                  // 1171B..1171C
7921             AHOM,                     // 1171D..1172B
7922             UNKNOWN,                  // 1172C..1172F
7923             AHOM,                     // 11730..1173F
7924             UNKNOWN,                  // 11740..117FF
7925             DOGRA,                    // 11800..1183B
7926             UNKNOWN,                  // 1183C..1189F
7927             WARANG_CITI,              // 118A0..118F2
7928             UNKNOWN,                  // 118F3..118FE
7929             WARANG_CITI,              // 118FF
7930             DIVES_AKURU,              // 11900..11906
7931             UNKNOWN,                  // 11907..11908
7932             DIVES_AKURU,              // 11909
7933             UNKNOWN,                  // 1190A..1190B
7934             DIVES_AKURU,              // 1190C..11913
7935             UNKNOWN,                  // 11914
7936             DIVES_AKURU,              // 11915..11916
7937             UNKNOWN,                  // 11917
7938             DIVES_AKURU,              // 11918..11935
7939             UNKNOWN,                  // 11936
7940             DIVES_AKURU,              // 11937..11938
7941             UNKNOWN,                  // 11939..1193A
7942             DIVES_AKURU,              // 1193B..11946
7943             UNKNOWN,                  // 11947..1194F
7944             DIVES_AKURU,              // 11950..11959
7945             UNKNOWN,                  // 1195A..1199F
7946             NANDINAGARI,              // 119A0..119A7
7947             UNKNOWN,                  // 119A8..119A9
7948             NANDINAGARI,              // 119AA..119D7
7949             UNKNOWN,                  // 119D8..119D9
7950             NANDINAGARI,              // 119DA..119E4
7951             UNKNOWN,                  // 119E5..119FF
7952             ZANABAZAR_SQUARE,         // 11A00..11A47
7953             UNKNOWN,                  // 11A48..11A4F
7954             SOYOMBO,                  // 11A50..11AA2
7955             UNKNOWN,                  // 11AA3..11ABF
7956             PAU_CIN_HAU,              // 11AC0..11AF8
7957             UNKNOWN,                  // 11AF9..11BFF
7958             BHAIKSUKI,                // 11C00..11C08
7959             UNKNOWN,                  // 11C09
7960             BHAIKSUKI,                // 11C0A..11C36
7961             UNKNOWN,                  // 11C37
7962             BHAIKSUKI,                // 11C38..11C45
7963             UNKNOWN,                  // 11C46..11C4F
7964             BHAIKSUKI,                // 11C50..11C6C
7965             UNKNOWN,                  // 11C6D..11C6F
7966             MARCHEN,                  // 11C70..11C8F
7967             UNKNOWN,                  // 11C90..11C91
7968             MARCHEN,                  // 11C92..11CA7
7969             UNKNOWN,                  // 11CA8
7970             MARCHEN,                  // 11CA9..11CB6
7971             UNKNOWN,                  // 11CB7..11CFF
7972             MASARAM_GONDI,            // 11D00..11D06
7973             UNKNOWN,                  // 11D07
7974             MASARAM_GONDI,            // 11D08..11D09
7975             UNKNOWN,                  // 11D0A
7976             MASARAM_GONDI,            // 11D0B..11D36
7977             UNKNOWN,                  // 11D37..11D39
7978             MASARAM_GONDI,            // 11D3A
7979             UNKNOWN,                  // 11D3B
7980             MASARAM_GONDI,            // 11D3C..11D3D
7981             UNKNOWN,                  // 11D3E
7982             MASARAM_GONDI,            // 11D3F..11D47
7983             UNKNOWN,                  // 11D48..11D4F
7984             MASARAM_GONDI,            // 11D50..11D59
7985             UNKNOWN,                  // 11D5A..11D5F
7986             GUNJALA_GONDI,            // 11D60..11D65
7987             UNKNOWN,                  // 11D66
7988             GUNJALA_GONDI,            // 11D67..11D68
7989             UNKNOWN,                  // 11D69
7990             GUNJALA_GONDI,            // 11D6A..11D8E
7991             UNKNOWN,                  // 11D8F
7992             GUNJALA_GONDI,            // 11D90..11D91
7993             UNKNOWN,                  // 11D92
7994             GUNJALA_GONDI,            // 11D93..11D98
7995             UNKNOWN,                  // 11D99..11D9F
7996             GUNJALA_GONDI,            // 11DA0..11DA9
7997             UNKNOWN,                  // 11DAA..11EDF
7998             MAKASAR,                  // 11EE0..11EF8
7999             UNKNOWN,                  // 11EF9..11FAF
8000             LISU,                     // 11FB0
8001             UNKNOWN,                  // 11FB1..11FBF
8002             TAMIL,                    // 11FC0..11FF1
8003             UNKNOWN,                  // 11FF2..11FFE
8004             TAMIL,                    // 11FFF
8005             CUNEIFORM,                // 12000..12399
8006             UNKNOWN,                  // 1239A..123FF
8007             CUNEIFORM,                // 12400..1246E
8008             UNKNOWN,                  // 1246F
8009             CUNEIFORM,                // 12470..12474
8010             UNKNOWN,                  // 12475..1247F
8011             CUNEIFORM,                // 12480..12543
8012             UNKNOWN,                  // 12544..12FFF
8013             EGYPTIAN_HIEROGLYPHS,     // 13000..1342E
8014             UNKNOWN,                  // 1342F
8015             EGYPTIAN_HIEROGLYPHS,     // 13430..13438
8016             UNKNOWN,                  // 13439..143FF
8017             ANATOLIAN_HIEROGLYPHS,    // 14400..14646
8018             UNKNOWN,                  // 14647..167FF
8019             BAMUM,                    // 16800..16A38
8020             UNKNOWN,                  // 16A39..16A3F
8021             MRO,                      // 16A40..16A5E
8022             UNKNOWN,                  // 16A5F
8023             MRO,                      // 16A60..16A69
8024             UNKNOWN,                  // 16A6A..16A6D
8025             MRO,                      // 16A6E..16A6F
8026             UNKNOWN,                  // 16A70..16ACF
8027             BASSA_VAH,                // 16AD0..16AED
8028             UNKNOWN,                  // 16AEE..16AEF
8029             BASSA_VAH,                // 16AF0..16AF5
8030             UNKNOWN,                  // 16AF6..16AFF
8031             PAHAWH_HMONG,             // 16B00..16B45
8032             UNKNOWN,                  // 16B46..16B4F
8033             PAHAWH_HMONG,             // 16B50..16B59
8034             UNKNOWN,                  // 16B5A
8035             PAHAWH_HMONG,             // 16B5B..16B61
8036             UNKNOWN,                  // 16B62
8037             PAHAWH_HMONG,             // 16B63..16B77
8038             UNKNOWN,                  // 16B78..16B7C
8039             PAHAWH_HMONG,             // 16B7D..16B8F
8040             UNKNOWN,                  // 16B90..16E3F
8041             MEDEFAIDRIN,              // 16E40..16E9A
8042             UNKNOWN,                  // 16E9B..16EFF
8043             MIAO,                     // 16F00..16F4A
8044             UNKNOWN,                  // 16F4B..16F4E
8045             MIAO,                     // 16F4F..16F87
8046             UNKNOWN,                  // 16F88..16F8E
8047             MIAO,                     // 16F8F..16F9F
8048             UNKNOWN,                  // 16FA0..16FDF
8049             TANGUT,                   // 16FE0
8050             NUSHU,                    // 16FE1
8051             COMMON,                   // 16FE2..16FE3
8052             KHITAN_SMALL_SCRIPT,      // 16FE4
8053             UNKNOWN,                  // 16FE5..16FEF
8054             HAN,                      // 16FF0..16FF1
8055             UNKNOWN,                  // 16FF2..16FFF
8056             TANGUT,                   // 17000..187F7
8057             UNKNOWN,                  // 187F8..187FF
8058             TANGUT,                   // 18800..18AFF
8059             KHITAN_SMALL_SCRIPT,      // 18B00..18CD5
8060             UNKNOWN,                  // 18CD6..18CFF
8061             TANGUT,                   // 18D00..18D08
8062             UNKNOWN,                  // 18D09..1AFFF
8063             KATAKANA,                 // 1B000
8064             HIRAGANA,                 // 1B001..1B11E
8065             UNKNOWN,                  // 1B11F..1B14F
8066             HIRAGANA,                 // 1B150..1B152
8067             UNKNOWN,                  // 1B153..1B163
8068             KATAKANA,                 // 1B164..1B167
8069             UNKNOWN,                  // 1B168..1B16F
8070             NUSHU,                    // 1B170..1B2FB
8071             UNKNOWN,                  // 1B2FC..1BBFF
8072             DUPLOYAN,                 // 1BC00..1BC6A
8073             UNKNOWN,                  // 1BC6B..1BC6F
8074             DUPLOYAN,                 // 1BC70..1BC7C
8075             UNKNOWN,                  // 1BC7D..1BC7F
8076             DUPLOYAN,                 // 1BC80..1BC88
8077             UNKNOWN,                  // 1BC89..1BC8F
8078             DUPLOYAN,                 // 1BC90..1BC99
8079             UNKNOWN,                  // 1BC9A..1BC9B
8080             DUPLOYAN,                 // 1BC9C..1BC9F
8081             COMMON,                   // 1BCA0..1BCA3
8082             UNKNOWN,                  // 1BCA4..1CFFF
8083             COMMON,                   // 1D000..1D0F5
8084             UNKNOWN,                  // 1D0F6..1D0FF
8085             COMMON,                   // 1D100..1D126
8086             UNKNOWN,                  // 1D127..1D128
8087             COMMON,                   // 1D129..1D166
8088             INHERITED,                // 1D167..1D169
8089             COMMON,                   // 1D16A..1D17A
8090             INHERITED,                // 1D17B..1D182
8091             COMMON,                   // 1D183..1D184
8092             INHERITED,                // 1D185..1D18B
8093             COMMON,                   // 1D18C..1D1A9
8094             INHERITED,                // 1D1AA..1D1AD
8095             COMMON,                   // 1D1AE..1D1E8
8096             UNKNOWN,                  // 1D1E9..1D1FF
8097             GREEK,                    // 1D200..1D245
8098             UNKNOWN,                  // 1D246..1D2DF
8099             COMMON,                   // 1D2E0..1D2F3
8100             UNKNOWN,                  // 1D2F4..1D2FF
8101             COMMON,                   // 1D300..1D356
8102             UNKNOWN,                  // 1D357..1D35F
8103             COMMON,                   // 1D360..1D378
8104             UNKNOWN,                  // 1D379..1D3FF
8105             COMMON,                   // 1D400..1D454
8106             UNKNOWN,                  // 1D455
8107             COMMON,                   // 1D456..1D49C
8108             UNKNOWN,                  // 1D49D
8109             COMMON,                   // 1D49E..1D49F
8110             UNKNOWN,                  // 1D4A0..1D4A1
8111             COMMON,                   // 1D4A2
8112             UNKNOWN,                  // 1D4A3..1D4A4
8113             COMMON,                   // 1D4A5..1D4A6
8114             UNKNOWN,                  // 1D4A7..1D4A8
8115             COMMON,                   // 1D4A9..1D4AC
8116             UNKNOWN,                  // 1D4AD
8117             COMMON,                   // 1D4AE..1D4B9
8118             UNKNOWN,                  // 1D4BA
8119             COMMON,                   // 1D4BB
8120             UNKNOWN,                  // 1D4BC
8121             COMMON,                   // 1D4BD..1D4C3
8122             UNKNOWN,                  // 1D4C4
8123             COMMON,                   // 1D4C5..1D505
8124             UNKNOWN,                  // 1D506
8125             COMMON,                   // 1D507..1D50A
8126             UNKNOWN,                  // 1D50B..1D50C
8127             COMMON,                   // 1D50D..1D514
8128             UNKNOWN,                  // 1D515
8129             COMMON,                   // 1D516..1D51C
8130             UNKNOWN,                  // 1D51D
8131             COMMON,                   // 1D51E..1D539
8132             UNKNOWN,                  // 1D53A
8133             COMMON,                   // 1D53B..1D53E
8134             UNKNOWN,                  // 1D53F
8135             COMMON,                   // 1D540..1D544
8136             UNKNOWN,                  // 1D545
8137             COMMON,                   // 1D546
8138             UNKNOWN,                  // 1D547..1D549
8139             COMMON,                   // 1D54A..1D550
8140             UNKNOWN,                  // 1D551
8141             COMMON,                   // 1D552..1D6A5
8142             UNKNOWN,                  // 1D6A6..1D6A7
8143             COMMON,                   // 1D6A8..1D7CB
8144             UNKNOWN,                  // 1D7CC..1D7CD
8145             COMMON,                   // 1D7CE..1D7FF
8146             SIGNWRITING,              // 1D800..1DA8B
8147             UNKNOWN,                  // 1DA8C..1DA9A
8148             SIGNWRITING,              // 1DA9B..1DA9F
8149             UNKNOWN,                  // 1DAA0
8150             SIGNWRITING,              // 1DAA1..1DAAF
8151             UNKNOWN,                  // 1DAB0..1DFFF
8152             GLAGOLITIC,               // 1E000..1E006
8153             UNKNOWN,                  // 1E007
8154             GLAGOLITIC,               // 1E008..1E018
8155             UNKNOWN,                  // 1E019..1E01A
8156             GLAGOLITIC,               // 1E01B..1E021
8157             UNKNOWN,                  // 1E022
8158             GLAGOLITIC,               // 1E023..1E024
8159             UNKNOWN,                  // 1E025
8160             GLAGOLITIC,               // 1E026..1E02A
8161             UNKNOWN,                  // 1E02B..1E0FF
8162             NYIAKENG_PUACHUE_HMONG,   // 1E100..1E12C
8163             UNKNOWN,                  // 1E12D..1E12F
8164             NYIAKENG_PUACHUE_HMONG,   // 1E130..1E13D
8165             UNKNOWN,                  // 1E13E..1E13F
8166             NYIAKENG_PUACHUE_HMONG,   // 1E140..1E149
8167             UNKNOWN,                  // 1E14A..1E14D
8168             NYIAKENG_PUACHUE_HMONG,   // 1E14E..1E14F
8169             UNKNOWN,                  // 1E150..1E2BF
8170             WANCHO,                   // 1E2C0..1E2F9
8171             UNKNOWN,                  // 1E2FA..1E2FE
8172             WANCHO,                   // 1E2FF
8173             UNKNOWN,                  // 1E300..1E7FF
8174             MENDE_KIKAKUI,            // 1E800..1E8C4
8175             UNKNOWN,                  // 1E8C5..1E8C6
8176             MENDE_KIKAKUI,            // 1E8C7..1E8D6
8177             UNKNOWN,                  // 1E8D7..1E8FF
8178             ADLAM,                    // 1E900..1E94B
8179             UNKNOWN,                  // 1E94C..1E94F
8180             ADLAM,                    // 1E950..1E959
8181             UNKNOWN,                  // 1E95A..1E95D
8182             ADLAM,                    // 1E95E..1E95F
8183             UNKNOWN,                  // 1E960..1EC70
8184             COMMON,                   // 1EC71..1ECB4
8185             UNKNOWN,                  // 1ECB5..1ED00
8186             COMMON,                   // 1ED01..1ED3D
8187             UNKNOWN,                  // 1ED3E..1EDFF
8188             ARABIC,                   // 1EE00..1EE03
8189             UNKNOWN,                  // 1EE04
8190             ARABIC,                   // 1EE05..1EE1F
8191             UNKNOWN,                  // 1EE20
8192             ARABIC,                   // 1EE21..1EE22
8193             UNKNOWN,                  // 1EE23
8194             ARABIC,                   // 1EE24
8195             UNKNOWN,                  // 1EE25..1EE26
8196             ARABIC,                   // 1EE27
8197             UNKNOWN,                  // 1EE28
8198             ARABIC,                   // 1EE29..1EE32
8199             UNKNOWN,                  // 1EE33
8200             ARABIC,                   // 1EE34..1EE37
8201             UNKNOWN,                  // 1EE38
8202             ARABIC,                   // 1EE39
8203             UNKNOWN,                  // 1EE3A
8204             ARABIC,                   // 1EE3B
8205             UNKNOWN,                  // 1EE3C..1EE41
8206             ARABIC,                   // 1EE42
8207             UNKNOWN,                  // 1EE43..1EE46
8208             ARABIC,                   // 1EE47
8209             UNKNOWN,                  // 1EE48
8210             ARABIC,                   // 1EE49
8211             UNKNOWN,                  // 1EE4A
8212             ARABIC,                   // 1EE4B
8213             UNKNOWN,                  // 1EE4C
8214             ARABIC,                   // 1EE4D..1EE4F
8215             UNKNOWN,                  // 1EE50
8216             ARABIC,                   // 1EE51..1EE52
8217             UNKNOWN,                  // 1EE53
8218             ARABIC,                   // 1EE54
8219             UNKNOWN,                  // 1EE55..1EE56
8220             ARABIC,                   // 1EE57
8221             UNKNOWN,                  // 1EE58
8222             ARABIC,                   // 1EE59
8223             UNKNOWN,                  // 1EE5A
8224             ARABIC,                   // 1EE5B
8225             UNKNOWN,                  // 1EE5C
8226             ARABIC,                   // 1EE5D
8227             UNKNOWN,                  // 1EE5E
8228             ARABIC,                   // 1EE5F
8229             UNKNOWN,                  // 1EE60
8230             ARABIC,                   // 1EE61..1EE62
8231             UNKNOWN,                  // 1EE63
8232             ARABIC,                   // 1EE64
8233             UNKNOWN,                  // 1EE65..1EE66
8234             ARABIC,                   // 1EE67..1EE6A
8235             UNKNOWN,                  // 1EE6B
8236             ARABIC,                   // 1EE6C..1EE72
8237             UNKNOWN,                  // 1EE73
8238             ARABIC,                   // 1EE74..1EE77
8239             UNKNOWN,                  // 1EE78
8240             ARABIC,                   // 1EE79..1EE7C
8241             UNKNOWN,                  // 1EE7D
8242             ARABIC,                   // 1EE7E
8243             UNKNOWN,                  // 1EE7F
8244             ARABIC,                   // 1EE80..1EE89
8245             UNKNOWN,                  // 1EE8A
8246             ARABIC,                   // 1EE8B..1EE9B
8247             UNKNOWN,                  // 1EE9C..1EEA0
8248             ARABIC,                   // 1EEA1..1EEA3
8249             UNKNOWN,                  // 1EEA4
8250             ARABIC,                   // 1EEA5..1EEA9
8251             UNKNOWN,                  // 1EEAA
8252             ARABIC,                   // 1EEAB..1EEBB
8253             UNKNOWN,                  // 1EEBC..1EEEF
8254             ARABIC,                   // 1EEF0..1EEF1
8255             UNKNOWN,                  // 1EEF2..1EFFF
8256             COMMON,                   // 1F000..1F02B
8257             UNKNOWN,                  // 1F02C..1F02F
8258             COMMON,                   // 1F030..1F093
8259             UNKNOWN,                  // 1F094..1F09F
8260             COMMON,                   // 1F0A0..1F0AE
8261             UNKNOWN,                  // 1F0AF..1F0B0
8262             COMMON,                   // 1F0B1..1F0BF
8263             UNKNOWN,                  // 1F0C0
8264             COMMON,                   // 1F0C1..1F0CF
8265             UNKNOWN,                  // 1F0D0
8266             COMMON,                   // 1F0D1..1F0F5
8267             UNKNOWN,                  // 1F0F6..1F0FF
8268             COMMON,                   // 1F100..1F1AD
8269             UNKNOWN,                  // 1F1AE..1F1E5
8270             COMMON,                   // 1F1E6..1F1FF
8271             HIRAGANA,                 // 1F200
8272             COMMON,                   // 1F201..1F202
8273             UNKNOWN,                  // 1F203..1F20F
8274             COMMON,                   // 1F210..1F23B
8275             UNKNOWN,                  // 1F23C..1F23F
8276             COMMON,                   // 1F240..1F248
8277             UNKNOWN,                  // 1F249..1F24F
8278             COMMON,                   // 1F250..1F251
8279             UNKNOWN,                  // 1F252..1F25F
8280             COMMON,                   // 1F260..1F265
8281             UNKNOWN,                  // 1F266..1F2FF
8282             COMMON,                   // 1F300..1F6D7
8283             UNKNOWN,                  // 1F6D8..1F6DF
8284             COMMON,                   // 1F6E0..1F6EC
8285             UNKNOWN,                  // 1F6ED..1F6EF
8286             COMMON,                   // 1F6F0..1F6FC
8287             UNKNOWN,                  // 1F6FD..1F6FF
8288             COMMON,                   // 1F700..1F773
8289             UNKNOWN,                  // 1F774..1F77F
8290             COMMON,                   // 1F780..1F7D8
8291             UNKNOWN,                  // 1F7D9..1F7DF
8292             COMMON,                   // 1F7E0..1F7EB
8293             UNKNOWN,                  // 1F7EC..1F7FF
8294             COMMON,                   // 1F800..1F80B
8295             UNKNOWN,                  // 1F80C..1F80F
8296             COMMON,                   // 1F810..1F847
8297             UNKNOWN,                  // 1F848..1F84F
8298             COMMON,                   // 1F850..1F859
8299             UNKNOWN,                  // 1F85A..1F85F
8300             COMMON,                   // 1F860..1F887
8301             UNKNOWN,                  // 1F888..1F88F
8302             COMMON,                   // 1F890..1F8AD
8303             UNKNOWN,                  // 1F8AE..1F8AF
8304             COMMON,                   // 1F8B0..1F8B1
8305             UNKNOWN,                  // 1F8B2..1F8FF
8306             COMMON,                   // 1F900..1F978
8307             UNKNOWN,                  // 1F979
8308             COMMON,                   // 1F97A..1F9CB
8309             UNKNOWN,                  // 1F9CC
8310             COMMON,                   // 1F9CD..1FA53
8311             UNKNOWN,                  // 1FA54..1FA5F
8312             COMMON,                   // 1FA60..1FA6D
8313             UNKNOWN,                  // 1FA6E..1FA6F
8314             COMMON,                   // 1FA70..1FA74
8315             UNKNOWN,                  // 1FA75..1FA77
8316             COMMON,                   // 1FA78..1FA7A
8317             UNKNOWN,                  // 1FA7B..1FA7F
8318             COMMON,                   // 1FA80..1FA86
8319             UNKNOWN,                  // 1FA87..1FA8F
8320             COMMON,                   // 1FA90..1FAA8
8321             UNKNOWN,                  // 1FAA9..1FAAF
8322             COMMON,                   // 1FAB0..1FAB6
8323             UNKNOWN,                  // 1FAB7..1FABF
8324             COMMON,                   // 1FAC0..1FAC2
8325             UNKNOWN,                  // 1FAC3..1FACF
8326             COMMON,                   // 1FAD0..1FAD6
8327             UNKNOWN,                  // 1FAD7..1FAFF
8328             COMMON,                   // 1FB00..1FB92
8329             UNKNOWN,                  // 1FB93
8330             COMMON,                   // 1FB94..1FBCA
8331             UNKNOWN,                  // 1FBCB..1FBEF
8332             COMMON,                   // 1FBF0..1FBF9
8333             UNKNOWN,                  // 1FBFA..1FFFF
8334             HAN,                      // 20000..2A6DD
8335             UNKNOWN,                  // 2A6DE..2A6FF
8336             HAN,                      // 2A700..2B734
8337             UNKNOWN,                  // 2B735..2B73F
8338             HAN,                      // 2B740..2B81D
8339             UNKNOWN,                  // 2B81E..2B81F
8340             HAN,                      // 2B820..2CEA1
8341             UNKNOWN,                  // 2CEA2..2CEAF
8342             HAN,                      // 2CEB0..2EBE0
8343             UNKNOWN,                  // 2EBE1..2F7FF
8344             HAN,                      // 2F800..2FA1D
8345             UNKNOWN,                  // 2FA1E..2FFFF
8346             HAN,                      // 30000..3134A
8347             UNKNOWN,                  // 3134B..E0000
8348             COMMON,                   // E0001
8349             UNKNOWN,                  // E0002..E001F
8350             COMMON,                   // E0020..E007F
8351             UNKNOWN,                  // E0080..E00FF
8352             INHERITED,                // E0100..E01EF
8353             UNKNOWN,                  // E01F0..10FFFF
8354         };
8355 
8356         private static final HashMap<String, Character.UnicodeScript> aliases;
8357         static {
8358             aliases = new HashMap<>((int)(157 / 0.75f + 1.0f));
8359             aliases.put("ADLM", ADLAM);
8360             aliases.put("AGHB", CAUCASIAN_ALBANIAN);
8361             aliases.put("AHOM", AHOM);
8362             aliases.put("ARAB", ARABIC);
8363             aliases.put("ARMI", IMPERIAL_ARAMAIC);
8364             aliases.put("ARMN", ARMENIAN);
8365             aliases.put("AVST", AVESTAN);
8366             aliases.put("BALI", BALINESE);
8367             aliases.put("BAMU", BAMUM);
8368             aliases.put("BASS", BASSA_VAH);
8369             aliases.put("BATK", BATAK);
8370             aliases.put("BENG", BENGALI);
8371             aliases.put("BHKS", BHAIKSUKI);
8372             aliases.put("BOPO", BOPOMOFO);
8373             aliases.put("BRAH", BRAHMI);
8374             aliases.put("BRAI", BRAILLE);
8375             aliases.put("BUGI", BUGINESE);
8376             aliases.put("BUHD", BUHID);
8377             aliases.put("CAKM", CHAKMA);
8378             aliases.put("CANS", CANADIAN_ABORIGINAL);
8379             aliases.put("CARI", CARIAN);
8380             aliases.put("CHAM", CHAM);
8381             aliases.put("CHER", CHEROKEE);
8382             aliases.put("CHRS", CHORASMIAN);
8383             aliases.put("COPT", COPTIC);
8384             aliases.put("CPRT", CYPRIOT);
8385             aliases.put("CYRL", CYRILLIC);
8386             aliases.put("DEVA", DEVANAGARI);
8387             aliases.put("DIAK", DIVES_AKURU);
8388             aliases.put("DOGR", DOGRA);
8389             aliases.put("DSRT", DESERET);
8390             aliases.put("DUPL", DUPLOYAN);
8391             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
8392             aliases.put("ELBA", ELBASAN);
8393             aliases.put("ELYM", ELYMAIC);
8394             aliases.put("ETHI", ETHIOPIC);
8395             aliases.put("GEOR", GEORGIAN);
8396             aliases.put("GLAG", GLAGOLITIC);
8397             aliases.put("GONM", MASARAM_GONDI);
8398             aliases.put("GOTH", GOTHIC);
8399             aliases.put("GONG", GUNJALA_GONDI);
8400             aliases.put("GRAN", GRANTHA);
8401             aliases.put("GREK", GREEK);
8402             aliases.put("GUJR", GUJARATI);
8403             aliases.put("GURU", GURMUKHI);
8404             aliases.put("HANG", HANGUL);
8405             aliases.put("HANI", HAN);
8406             aliases.put("HANO", HANUNOO);
8407             aliases.put("HATR", HATRAN);
8408             aliases.put("HEBR", HEBREW);
8409             aliases.put("HIRA", HIRAGANA);
8410             aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
8411             aliases.put("HMNG", PAHAWH_HMONG);
8412             aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG);
8413             // it appears we don't have the KATAKANA_OR_HIRAGANA
8414             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
8415             aliases.put("HUNG", OLD_HUNGARIAN);
8416             aliases.put("ITAL", OLD_ITALIC);
8417             aliases.put("JAVA", JAVANESE);
8418             aliases.put("KALI", KAYAH_LI);
8419             aliases.put("KANA", KATAKANA);
8420             aliases.put("KHAR", KHAROSHTHI);
8421             aliases.put("KHMR", KHMER);
8422             aliases.put("KHOJ", KHOJKI);
8423             aliases.put("KITS", KHITAN_SMALL_SCRIPT);
8424             aliases.put("KNDA", KANNADA);
8425             aliases.put("KTHI", KAITHI);
8426             aliases.put("LANA", TAI_THAM);
8427             aliases.put("LAOO", LAO);
8428             aliases.put("LATN", LATIN);
8429             aliases.put("LEPC", LEPCHA);
8430             aliases.put("LIMB", LIMBU);
8431             aliases.put("LINA", LINEAR_A);
8432             aliases.put("LINB", LINEAR_B);
8433             aliases.put("LISU", LISU);
8434             aliases.put("LYCI", LYCIAN);
8435             aliases.put("LYDI", LYDIAN);
8436             aliases.put("MAHJ", MAHAJANI);
8437             aliases.put("MAKA", MAKASAR);
8438             aliases.put("MARC", MARCHEN);
8439             aliases.put("MAND", MANDAIC);
8440             aliases.put("MANI", MANICHAEAN);
8441             aliases.put("MEDF", MEDEFAIDRIN);
8442             aliases.put("MEND", MENDE_KIKAKUI);
8443             aliases.put("MERC", MEROITIC_CURSIVE);
8444             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
8445             aliases.put("MLYM", MALAYALAM);
8446             aliases.put("MODI", MODI);
8447             aliases.put("MONG", MONGOLIAN);
8448             aliases.put("MROO", MRO);
8449             aliases.put("MTEI", MEETEI_MAYEK);
8450             aliases.put("MULT", MULTANI);
8451             aliases.put("MYMR", MYANMAR);
8452             aliases.put("NAND", NANDINAGARI);
8453             aliases.put("NARB", OLD_NORTH_ARABIAN);
8454             aliases.put("NBAT", NABATAEAN);
8455             aliases.put("NEWA", NEWA);
8456             aliases.put("NKOO", NKO);
8457             aliases.put("NSHU", NUSHU);
8458             aliases.put("OGAM", OGHAM);
8459             aliases.put("OLCK", OL_CHIKI);
8460             aliases.put("ORKH", OLD_TURKIC);
8461             aliases.put("ORYA", ORIYA);
8462             aliases.put("OSGE", OSAGE);
8463             aliases.put("OSMA", OSMANYA);
8464             aliases.put("PALM", PALMYRENE);
8465             aliases.put("PAUC", PAU_CIN_HAU);
8466             aliases.put("PERM", OLD_PERMIC);
8467             aliases.put("PHAG", PHAGS_PA);
8468             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
8469             aliases.put("PHLP", PSALTER_PAHLAVI);
8470             aliases.put("PHNX", PHOENICIAN);
8471             aliases.put("PLRD", MIAO);
8472             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
8473             aliases.put("RJNG", REJANG);
8474             aliases.put("ROHG", HANIFI_ROHINGYA);
8475             aliases.put("RUNR", RUNIC);
8476             aliases.put("SAMR", SAMARITAN);
8477             aliases.put("SARB", OLD_SOUTH_ARABIAN);
8478             aliases.put("SAUR", SAURASHTRA);
8479             aliases.put("SGNW", SIGNWRITING);
8480             aliases.put("SHAW", SHAVIAN);
8481             aliases.put("SHRD", SHARADA);
8482             aliases.put("SIDD", SIDDHAM);
8483             aliases.put("SIND", KHUDAWADI);
8484             aliases.put("SINH", SINHALA);
8485             aliases.put("SOGD", SOGDIAN);
8486             aliases.put("SOGO", OLD_SOGDIAN);
8487             aliases.put("SORA", SORA_SOMPENG);
8488             aliases.put("SOYO", SOYOMBO);
8489             aliases.put("SUND", SUNDANESE);
8490             aliases.put("SYLO", SYLOTI_NAGRI);
8491             aliases.put("SYRC", SYRIAC);
8492             aliases.put("TAGB", TAGBANWA);
8493             aliases.put("TAKR", TAKRI);
8494             aliases.put("TALE", TAI_LE);
8495             aliases.put("TALU", NEW_TAI_LUE);
8496             aliases.put("TAML", TAMIL);
8497             aliases.put("TANG", TANGUT);
8498             aliases.put("TAVT", TAI_VIET);
8499             aliases.put("TELU", TELUGU);
8500             aliases.put("TFNG", TIFINAGH);
8501             aliases.put("TGLG", TAGALOG);
8502             aliases.put("THAA", THAANA);
8503             aliases.put("THAI", THAI);
8504             aliases.put("TIBT", TIBETAN);
8505             aliases.put("TIRH", TIRHUTA);
8506             aliases.put("UGAR", UGARITIC);
8507             aliases.put("VAII", VAI);
8508             aliases.put("WARA", WARANG_CITI);
8509             aliases.put("WCHO", WANCHO);
8510             aliases.put("XPEO", OLD_PERSIAN);
8511             aliases.put("XSUX", CUNEIFORM);
8512             aliases.put("YIII", YI);
8513             aliases.put("YEZI", YEZIDI);
8514             aliases.put("ZANB", ZANABAZAR_SQUARE);
8515             aliases.put("ZINH", INHERITED);
8516             aliases.put("ZYYY", COMMON);
8517             aliases.put("ZZZZ", UNKNOWN);
8518         }
8519 
8520         /**
8521          * Returns the enum constant representing the Unicode script of which
8522          * the given character (Unicode code point) is assigned to.
8523          *
8524          * @param   codePoint the character (Unicode code point) in question.
8525          * @return  The {@code UnicodeScript} constant representing the
8526          *          Unicode script of which this character is assigned to.
8527          *
8528          * @throws  IllegalArgumentException if the specified
8529          * {@code codePoint} is an invalid Unicode code point.
8530          * @see Character#isValidCodePoint(int)
8531          *
8532          */
of(int codePoint)8533         public static UnicodeScript of(int codePoint) {
8534             if (!isValidCodePoint(codePoint))
8535                 throw new IllegalArgumentException(
8536                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
8537             int type = getType(codePoint);
8538             // leave SURROGATE and PRIVATE_USE for table lookup
8539             if (type == UNASSIGNED)
8540                 return UNKNOWN;
8541             int index = Arrays.binarySearch(scriptStarts, codePoint);
8542             if (index < 0)
8543                 index = -index - 2;
8544             return scripts[index];
8545         }
8546 
8547         /**
8548          * Returns the UnicodeScript constant with the given Unicode script
8549          * name or the script name alias. Script names and their aliases are
8550          * determined by The Unicode Standard. The files {@code Scripts<version>.txt}
8551          * and {@code PropertyValueAliases<version>.txt} define script names
8552          * and the script name aliases for a particular version of the
8553          * standard. The {@link Character} class specifies the version of
8554          * the standard that it supports.
8555          * <p>
8556          * Character case is ignored for all of the valid script names.
8557          * The en_US locale's case mapping rules are used to provide
8558          * case-insensitive string comparisons for script name validation.
8559          *
8560          * @param scriptName A {@code UnicodeScript} name.
8561          * @return The {@code UnicodeScript} constant identified
8562          *         by {@code scriptName}
8563          * @throws IllegalArgumentException if {@code scriptName} is an
8564          *         invalid name
8565          * @throws NullPointerException if {@code scriptName} is null
8566          */
forName(String scriptName)8567         public static final UnicodeScript forName(String scriptName) {
8568             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
8569                                  //.replace(' ', '_'));
8570             UnicodeScript sc = aliases.get(scriptName);
8571             if (sc != null)
8572                 return sc;
8573             return valueOf(scriptName);
8574         }
8575     }
8576 
8577     /**
8578      * The value of the {@code Character}.
8579      *
8580      * @serial
8581      */
8582     private final char value;
8583 
8584     /** use serialVersionUID from JDK 1.0.2 for interoperability */
8585     @java.io.Serial
8586     private static final long serialVersionUID = 3786198910865385080L;
8587 
8588     /**
8589      * Constructs a newly allocated {@code Character} object that
8590      * represents the specified {@code char} value.
8591      *
8592      * @param  value   the value to be represented by the
8593      *                  {@code Character} object.
8594      *
8595      * @deprecated
8596      * It is rarely appropriate to use this constructor. The static factory
8597      * {@link #valueOf(char)} is generally a better choice, as it is
8598      * likely to yield significantly better space and time performance.
8599      */
8600     // Android-changed: not yet forRemoval on Android.
8601     @Deprecated(since="9"/*, forRemoval = true*/)
Character(char value)8602     public Character(char value) {
8603         this.value = value;
8604     }
8605 
8606     private static class CharacterCache {
CharacterCache()8607         private CharacterCache(){}
8608 
8609         static final Character[] cache;
8610         static Character[] archivedCache;
8611 
8612         static {
8613             int size = 127 + 1;
8614 
8615             // Load and use the archived cache if it exists
8616             // Android-removed: CDS is not used on Android.
8617             // CDS.initializeFromArchive(CharacterCache.class);
8618             if (archivedCache == null || archivedCache.length != size) {
8619                 Character[] c = new Character[size];
8620                 for (int i = 0; i < size; i++) {
8621                     c[i] = new Character((char) i);
8622                 }
8623                 archivedCache = c;
8624             }
8625             cache = archivedCache;
8626         }
8627     }
8628 
8629     /**
8630      * Returns a {@code Character} instance representing the specified
8631      * {@code char} value.
8632      * If a new {@code Character} instance is not required, this method
8633      * should generally be used in preference to the constructor
8634      * {@link #Character(char)}, as this method is likely to yield
8635      * significantly better space and time performance by caching
8636      * frequently requested values.
8637      *
8638      * This method will always cache values in the range {@code
8639      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
8640      * cache other values outside of this range.
8641      *
8642      * @param  c a char value.
8643      * @return a {@code Character} instance representing {@code c}.
8644      * @since  1.5
8645      */
8646     @IntrinsicCandidate
valueOf(char c)8647     public static Character valueOf(char c) {
8648         if (c <= 127) { // must cache
8649             return CharacterCache.cache[(int)c];
8650         }
8651         return new Character(c);
8652     }
8653 
8654     /**
8655      * Returns the value of this {@code Character} object.
8656      * @return  the primitive {@code char} value represented by
8657      *          this object.
8658      */
8659     @IntrinsicCandidate
charValue()8660     public char charValue() {
8661         return value;
8662     }
8663 
8664     /**
8665      * Returns a hash code for this {@code Character}; equal to the result
8666      * of invoking {@code charValue()}.
8667      *
8668      * @return a hash code value for this {@code Character}
8669      */
8670     @Override
hashCode()8671     public int hashCode() {
8672         return Character.hashCode(value);
8673     }
8674 
8675     /**
8676      * Returns a hash code for a {@code char} value; compatible with
8677      * {@code Character.hashCode()}.
8678      *
8679      * @since 1.8
8680      *
8681      * @param value The {@code char} for which to return a hash code.
8682      * @return a hash code value for a {@code char} value.
8683      */
hashCode(char value)8684     public static int hashCode(char value) {
8685         return (int)value;
8686     }
8687 
8688     /**
8689      * Compares this object against the specified object.
8690      * The result is {@code true} if and only if the argument is not
8691      * {@code null} and is a {@code Character} object that
8692      * represents the same {@code char} value as this object.
8693      *
8694      * @param   obj   the object to compare with.
8695      * @return  {@code true} if the objects are the same;
8696      *          {@code false} otherwise.
8697      */
equals(Object obj)8698     public boolean equals(Object obj) {
8699         if (obj instanceof Character) {
8700             return value == ((Character)obj).charValue();
8701         }
8702         return false;
8703     }
8704 
8705     /**
8706      * Returns a {@code String} object representing this
8707      * {@code Character}'s value.  The result is a string of
8708      * length 1 whose sole component is the primitive
8709      * {@code char} value represented by this
8710      * {@code Character} object.
8711      *
8712      * @return  a string representation of this object.
8713      */
toString()8714     public String toString() {
8715         return String.valueOf(value);
8716     }
8717 
8718     // Android-removed: reference to Character.toString(int) in javadoc.
8719     /**
8720      * Returns a {@code String} object representing the
8721      * specified {@code char}.  The result is a string of length
8722      * 1 consisting solely of the specified {@code char}.
8723      *
8724      * @param c the {@code char} to be converted
8725      * @return the string representation of the specified {@code char}
8726      * @since 1.4
8727      */
toString(char c)8728     public static String toString(char c) {
8729         return String.valueOf(c);
8730     }
8731 
8732     // BEGIN Android-removed: expose after String.valueOfCodePoint() is imported.
8733     /**
8734      * Returns a {@code String} object representing the
8735      * specified character (Unicode code point).  The result is a string of
8736      * length 1 or 2, consisting solely of the specified {@code codePoint}.
8737      *
8738      * @param codePoint the {@code codePoint} to be converted
8739      * @return the string representation of the specified {@code codePoint}
8740      * @throws IllegalArgumentException if the specified
8741      *      {@code codePoint} is not a {@linkplain #isValidCodePoint
8742      *      valid Unicode code point}.
8743      * @since 11
8744      */
toString(int codePoint)8745     public static String toString(int codePoint) {
8746         return String.valueOfCodePoint(codePoint);
8747     }
8748 
8749     /**
8750      * Determines whether the specified code point is a valid
8751      * <a href="http://www.unicode.org/glossary/#code_point">
8752      * Unicode code point value</a>.
8753      *
8754      * @param  codePoint the Unicode code point to be tested
8755      * @return {@code true} if the specified code point value is between
8756      *         {@link #MIN_CODE_POINT} and
8757      *         {@link #MAX_CODE_POINT} inclusive;
8758      *         {@code false} otherwise.
8759      * @since  1.5
8760      */
isValidCodePoint(int codePoint)8761     public static boolean isValidCodePoint(int codePoint) {
8762         // Optimized form of:
8763         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
8764         int plane = codePoint >>> 16;
8765         return plane < ((MAX_CODE_POINT + 1) >>> 16);
8766     }
8767 
8768     /**
8769      * Determines whether the specified character (Unicode code point)
8770      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
8771      * Such code points can be represented using a single {@code char}.
8772      *
8773      * @param  codePoint the character (Unicode code point) to be to
8774      * @return {@code true} if the specified code point is between
8775      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
8776      *         {@code false} otherwise.
8777      * @since  1.7
8778      */
isBmpCodePoint(int codePoint)8779     public static boolean isBmpCodePoint(int codePoint) {
8780         return codePoint >>> 16 == 0;
8781         // Optimized form of:
8782         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
8783         // We consistently use logical shift (>>>) to facilitate
8784         // additional runtime optimizations.
8785     }
8786 
8787     /**
8788      * Determines whether the specified character (Unicode code point)
8789      * is in the <a href="#supplementary">supplementary character</a> range.
8790      *
8791      * @param  codePoint the character (Unicode code point) to be tested
8792      * @return {@code true} if the specified code point is between
8793      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
8794      *         {@link #MAX_CODE_POINT} inclusive;
8795      *         {@code false} otherwise.
8796      * @since  1.5
8797      */
isSupplementaryCodePoint(int codePoint)8798     public static boolean isSupplementaryCodePoint(int codePoint) {
8799         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
8800             && codePoint <  MAX_CODE_POINT + 1;
8801     }
8802 
8803     /**
8804      * Determines if the given {@code char} value is a
8805      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8806      * Unicode high-surrogate code unit</a>
8807      * (also known as <i>leading-surrogate code unit</i>).
8808      *
8809      * <p>Such values do not represent characters by themselves,
8810      * but are used in the representation of
8811      * <a href="#supplementary">supplementary characters</a>
8812      * in the UTF-16 encoding.
8813      *
8814      * @param  ch the {@code char} value to be tested.
8815      * @return {@code true} if the {@code char} value is between
8816      *         {@link #MIN_HIGH_SURROGATE} and
8817      *         {@link #MAX_HIGH_SURROGATE} inclusive;
8818      *         {@code false} otherwise.
8819      * @see    Character#isLowSurrogate(char)
8820      * @see    Character.UnicodeBlock#of(int)
8821      * @since  1.5
8822      */
isHighSurrogate(char ch)8823     public static boolean isHighSurrogate(char ch) {
8824         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
8825         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
8826     }
8827 
8828     /**
8829      * Determines if the given {@code char} value is a
8830      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8831      * Unicode low-surrogate code unit</a>
8832      * (also known as <i>trailing-surrogate code unit</i>).
8833      *
8834      * <p>Such values do not represent characters by themselves,
8835      * but are used in the representation of
8836      * <a href="#supplementary">supplementary characters</a>
8837      * in the UTF-16 encoding.
8838      *
8839      * @param  ch the {@code char} value to be tested.
8840      * @return {@code true} if the {@code char} value is between
8841      *         {@link #MIN_LOW_SURROGATE} and
8842      *         {@link #MAX_LOW_SURROGATE} inclusive;
8843      *         {@code false} otherwise.
8844      * @see    Character#isHighSurrogate(char)
8845      * @since  1.5
8846      */
isLowSurrogate(char ch)8847     public static boolean isLowSurrogate(char ch) {
8848         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
8849     }
8850 
8851     /**
8852      * Determines if the given {@code char} value is a Unicode
8853      * <i>surrogate code unit</i>.
8854      *
8855      * <p>Such values do not represent characters by themselves,
8856      * but are used in the representation of
8857      * <a href="#supplementary">supplementary characters</a>
8858      * in the UTF-16 encoding.
8859      *
8860      * <p>A char value is a surrogate code unit if and only if it is either
8861      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
8862      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
8863      *
8864      * @param  ch the {@code char} value to be tested.
8865      * @return {@code true} if the {@code char} value is between
8866      *         {@link #MIN_SURROGATE} and
8867      *         {@link #MAX_SURROGATE} inclusive;
8868      *         {@code false} otherwise.
8869      * @since  1.7
8870      */
isSurrogate(char ch)8871     public static boolean isSurrogate(char ch) {
8872         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
8873     }
8874 
8875     /**
8876      * Determines whether the specified pair of {@code char}
8877      * values is a valid
8878      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8879      * Unicode surrogate pair</a>.
8880      *
8881      * <p>This method is equivalent to the expression:
8882      * <blockquote><pre>{@code
8883      * isHighSurrogate(high) && isLowSurrogate(low)
8884      * }</pre></blockquote>
8885      *
8886      * @param  high the high-surrogate code value to be tested
8887      * @param  low the low-surrogate code value to be tested
8888      * @return {@code true} if the specified high and
8889      * low-surrogate code values represent a valid surrogate pair;
8890      * {@code false} otherwise.
8891      * @since  1.5
8892      */
isSurrogatePair(char high, char low)8893     public static boolean isSurrogatePair(char high, char low) {
8894         return isHighSurrogate(high) && isLowSurrogate(low);
8895     }
8896 
8897     /**
8898      * Determines the number of {@code char} values needed to
8899      * represent the specified character (Unicode code point). If the
8900      * specified character is equal to or greater than 0x10000, then
8901      * the method returns 2. Otherwise, the method returns 1.
8902      *
8903      * <p>This method doesn't validate the specified character to be a
8904      * valid Unicode code point. The caller must validate the
8905      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
8906      * if necessary.
8907      *
8908      * @param   codePoint the character (Unicode code point) to be tested.
8909      * @return  2 if the character is a valid supplementary character; 1 otherwise.
8910      * @see     Character#isSupplementaryCodePoint(int)
8911      * @since   1.5
8912      */
charCount(int codePoint)8913     public static int charCount(int codePoint) {
8914         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
8915     }
8916 
8917     /**
8918      * Converts the specified surrogate pair to its supplementary code
8919      * point value. This method does not validate the specified
8920      * surrogate pair. The caller must validate it using {@link
8921      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
8922      *
8923      * @param  high the high-surrogate code unit
8924      * @param  low the low-surrogate code unit
8925      * @return the supplementary code point composed from the
8926      *         specified surrogate pair.
8927      * @since  1.5
8928      */
toCodePoint(char high, char low)8929     public static int toCodePoint(char high, char low) {
8930         // Optimized form of:
8931         // return ((high - MIN_HIGH_SURROGATE) << 10)
8932         //         + (low - MIN_LOW_SURROGATE)
8933         //         + MIN_SUPPLEMENTARY_CODE_POINT;
8934         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
8935                                        - (MIN_HIGH_SURROGATE << 10)
8936                                        - MIN_LOW_SURROGATE);
8937     }
8938 
8939     /**
8940      * Returns the code point at the given index of the
8941      * {@code CharSequence}. If the {@code char} value at
8942      * the given index in the {@code CharSequence} is in the
8943      * high-surrogate range, the following index is less than the
8944      * length of the {@code CharSequence}, and the
8945      * {@code char} value at the following index is in the
8946      * low-surrogate range, then the supplementary code point
8947      * corresponding to this surrogate pair is returned. Otherwise,
8948      * the {@code char} value at the given index is returned.
8949      *
8950      * @param seq a sequence of {@code char} values (Unicode code
8951      * units)
8952      * @param index the index to the {@code char} values (Unicode
8953      * code units) in {@code seq} to be converted
8954      * @return the Unicode code point at the given index
8955      * @throws NullPointerException if {@code seq} is null.
8956      * @throws IndexOutOfBoundsException if the value
8957      * {@code index} is negative or not less than
8958      * {@link CharSequence#length() seq.length()}.
8959      * @since  1.5
8960      */
codePointAt(CharSequence seq, int index)8961     public static int codePointAt(CharSequence seq, int index) {
8962         char c1 = seq.charAt(index);
8963         if (isHighSurrogate(c1) && ++index < seq.length()) {
8964             char c2 = seq.charAt(index);
8965             if (isLowSurrogate(c2)) {
8966                 return toCodePoint(c1, c2);
8967             }
8968         }
8969         return c1;
8970     }
8971 
8972     /**
8973      * Returns the code point at the given index of the
8974      * {@code char} array. If the {@code char} value at
8975      * the given index in the {@code char} array is in the
8976      * high-surrogate range, the following index is less than the
8977      * length of the {@code char} array, and the
8978      * {@code char} value at the following index is in the
8979      * low-surrogate range, then the supplementary code point
8980      * corresponding to this surrogate pair is returned. Otherwise,
8981      * the {@code char} value at the given index is returned.
8982      *
8983      * @param a the {@code char} array
8984      * @param index the index to the {@code char} values (Unicode
8985      * code units) in the {@code char} array to be converted
8986      * @return the Unicode code point at the given index
8987      * @throws NullPointerException if {@code a} is null.
8988      * @throws IndexOutOfBoundsException if the value
8989      * {@code index} is negative or not less than
8990      * the length of the {@code char} array.
8991      * @since  1.5
8992      */
codePointAt(char[] a, int index)8993     public static int codePointAt(char[] a, int index) {
8994         return codePointAtImpl(a, index, a.length);
8995     }
8996 
8997     /**
8998      * Returns the code point at the given index of the
8999      * {@code char} array, where only array elements with
9000      * {@code index} less than {@code limit} can be used. If
9001      * the {@code char} value at the given index in the
9002      * {@code char} array is in the high-surrogate range, the
9003      * following index is less than the {@code limit}, and the
9004      * {@code char} value at the following index is in the
9005      * low-surrogate range, then the supplementary code point
9006      * corresponding to this surrogate pair is returned. Otherwise,
9007      * the {@code char} value at the given index is returned.
9008      *
9009      * @param a the {@code char} array
9010      * @param index the index to the {@code char} values (Unicode
9011      * code units) in the {@code char} array to be converted
9012      * @param limit the index after the last array element that
9013      * can be used in the {@code char} array
9014      * @return the Unicode code point at the given index
9015      * @throws NullPointerException if {@code a} is null.
9016      * @throws IndexOutOfBoundsException if the {@code index}
9017      * argument is negative or not less than the {@code limit}
9018      * argument, or if the {@code limit} argument is negative or
9019      * greater than the length of the {@code char} array.
9020      * @since  1.5
9021      */
codePointAt(char[] a, int index, int limit)9022     public static int codePointAt(char[] a, int index, int limit) {
9023         if (index >= limit || limit < 0 || limit > a.length) {
9024             throw new IndexOutOfBoundsException();
9025         }
9026         return codePointAtImpl(a, index, limit);
9027     }
9028 
9029     // throws ArrayIndexOutOfBoundsException if index out of bounds
codePointAtImpl(char[] a, int index, int limit)9030     static int codePointAtImpl(char[] a, int index, int limit) {
9031         char c1 = a[index];
9032         if (isHighSurrogate(c1) && ++index < limit) {
9033             char c2 = a[index];
9034             if (isLowSurrogate(c2)) {
9035                 return toCodePoint(c1, c2);
9036             }
9037         }
9038         return c1;
9039     }
9040 
9041     /**
9042      * Returns the code point preceding the given index of the
9043      * {@code CharSequence}. If the {@code char} value at
9044      * {@code (index - 1)} in the {@code CharSequence} is in
9045      * the low-surrogate range, {@code (index - 2)} is not
9046      * negative, and the {@code char} value at {@code (index - 2)}
9047      * in the {@code CharSequence} is in the
9048      * high-surrogate range, then the supplementary code point
9049      * corresponding to this surrogate pair is returned. Otherwise,
9050      * the {@code char} value at {@code (index - 1)} is
9051      * returned.
9052      *
9053      * @param seq the {@code CharSequence} instance
9054      * @param index the index following the code point that should be returned
9055      * @return the Unicode code point value before the given index.
9056      * @throws NullPointerException if {@code seq} is null.
9057      * @throws IndexOutOfBoundsException if the {@code index}
9058      * argument is less than 1 or greater than {@link
9059      * CharSequence#length() seq.length()}.
9060      * @since  1.5
9061      */
codePointBefore(CharSequence seq, int index)9062     public static int codePointBefore(CharSequence seq, int index) {
9063         char c2 = seq.charAt(--index);
9064         if (isLowSurrogate(c2) && index > 0) {
9065             char c1 = seq.charAt(--index);
9066             if (isHighSurrogate(c1)) {
9067                 return toCodePoint(c1, c2);
9068             }
9069         }
9070         return c2;
9071     }
9072 
9073     /**
9074      * Returns the code point preceding the given index of the
9075      * {@code char} array. If the {@code char} value at
9076      * {@code (index - 1)} in the {@code char} array is in
9077      * the low-surrogate range, {@code (index - 2)} is not
9078      * negative, and the {@code char} value at {@code (index - 2)}
9079      * in the {@code char} array is in the
9080      * high-surrogate range, then the supplementary code point
9081      * corresponding to this surrogate pair is returned. Otherwise,
9082      * the {@code char} value at {@code (index - 1)} is
9083      * returned.
9084      *
9085      * @param a the {@code char} array
9086      * @param index the index following the code point that should be returned
9087      * @return the Unicode code point value before the given index.
9088      * @throws NullPointerException if {@code a} is null.
9089      * @throws IndexOutOfBoundsException if the {@code index}
9090      * argument is less than 1 or greater than the length of the
9091      * {@code char} array
9092      * @since  1.5
9093      */
codePointBefore(char[] a, int index)9094     public static int codePointBefore(char[] a, int index) {
9095         return codePointBeforeImpl(a, index, 0);
9096     }
9097 
9098     /**
9099      * Returns the code point preceding the given index of the
9100      * {@code char} array, where only array elements with
9101      * {@code index} greater than or equal to {@code start}
9102      * can be used. If the {@code char} value at {@code (index - 1)}
9103      * in the {@code char} array is in the
9104      * low-surrogate range, {@code (index - 2)} is not less than
9105      * {@code start}, and the {@code char} value at
9106      * {@code (index - 2)} in the {@code char} array is in
9107      * the high-surrogate range, then the supplementary code point
9108      * corresponding to this surrogate pair is returned. Otherwise,
9109      * the {@code char} value at {@code (index - 1)} is
9110      * returned.
9111      *
9112      * @param a the {@code char} array
9113      * @param index the index following the code point that should be returned
9114      * @param start the index of the first array element in the
9115      * {@code char} array
9116      * @return the Unicode code point value before the given index.
9117      * @throws NullPointerException if {@code a} is null.
9118      * @throws IndexOutOfBoundsException if the {@code index}
9119      * argument is not greater than the {@code start} argument or
9120      * is greater than the length of the {@code char} array, or
9121      * if the {@code start} argument is negative or not less than
9122      * the length of the {@code char} array.
9123      * @since  1.5
9124      */
codePointBefore(char[] a, int index, int start)9125     public static int codePointBefore(char[] a, int index, int start) {
9126         if (index <= start || start < 0 || start >= a.length) {
9127             throw new IndexOutOfBoundsException();
9128         }
9129         return codePointBeforeImpl(a, index, start);
9130     }
9131 
9132     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
codePointBeforeImpl(char[] a, int index, int start)9133     static int codePointBeforeImpl(char[] a, int index, int start) {
9134         char c2 = a[--index];
9135         if (isLowSurrogate(c2) && index > start) {
9136             char c1 = a[--index];
9137             if (isHighSurrogate(c1)) {
9138                 return toCodePoint(c1, c2);
9139             }
9140         }
9141         return c2;
9142     }
9143 
9144     /**
9145      * Returns the leading surrogate (a
9146      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9147      * high surrogate code unit</a>) of the
9148      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9149      * surrogate pair</a>
9150      * representing the specified supplementary character (Unicode
9151      * code point) in the UTF-16 encoding.  If the specified character
9152      * is not a
9153      * <a href="Character.html#supplementary">supplementary character</a>,
9154      * an unspecified {@code char} is returned.
9155      *
9156      * <p>If
9157      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9158      * is {@code true}, then
9159      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
9160      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
9161      * are also always {@code true}.
9162      *
9163      * @param   codePoint a supplementary character (Unicode code point)
9164      * @return  the leading surrogate code unit used to represent the
9165      *          character in the UTF-16 encoding
9166      * @since   1.7
9167      */
highSurrogate(int codePoint)9168     public static char highSurrogate(int codePoint) {
9169         return (char) ((codePoint >>> 10)
9170             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
9171     }
9172 
9173     /**
9174      * Returns the trailing surrogate (a
9175      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
9176      * low surrogate code unit</a>) of the
9177      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9178      * surrogate pair</a>
9179      * representing the specified supplementary character (Unicode
9180      * code point) in the UTF-16 encoding.  If the specified character
9181      * is not a
9182      * <a href="Character.html#supplementary">supplementary character</a>,
9183      * an unspecified {@code char} is returned.
9184      *
9185      * <p>If
9186      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9187      * is {@code true}, then
9188      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
9189      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
9190      * are also always {@code true}.
9191      *
9192      * @param   codePoint a supplementary character (Unicode code point)
9193      * @return  the trailing surrogate code unit used to represent the
9194      *          character in the UTF-16 encoding
9195      * @since   1.7
9196      */
lowSurrogate(int codePoint)9197     public static char lowSurrogate(int codePoint) {
9198         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
9199     }
9200 
9201     /**
9202      * Converts the specified character (Unicode code point) to its
9203      * UTF-16 representation. If the specified code point is a BMP
9204      * (Basic Multilingual Plane or Plane 0) value, the same value is
9205      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
9206      * specified code point is a supplementary character, its
9207      * surrogate values are stored in {@code dst[dstIndex]}
9208      * (high-surrogate) and {@code dst[dstIndex+1]}
9209      * (low-surrogate), and 2 is returned.
9210      *
9211      * @param  codePoint the character (Unicode code point) to be converted.
9212      * @param  dst an array of {@code char} in which the
9213      * {@code codePoint}'s UTF-16 value is stored.
9214      * @param dstIndex the start index into the {@code dst}
9215      * array where the converted value is stored.
9216      * @return 1 if the code point is a BMP code point, 2 if the
9217      * code point is a supplementary code point.
9218      * @throws IllegalArgumentException if the specified
9219      * {@code codePoint} is not a valid Unicode code point.
9220      * @throws NullPointerException if the specified {@code dst} is null.
9221      * @throws IndexOutOfBoundsException if {@code dstIndex}
9222      * is negative or not less than {@code dst.length}, or if
9223      * {@code dst} at {@code dstIndex} doesn't have enough
9224      * array element(s) to store the resulting {@code char}
9225      * value(s). (If {@code dstIndex} is equal to
9226      * {@code dst.length-1} and the specified
9227      * {@code codePoint} is a supplementary character, the
9228      * high-surrogate value is not stored in
9229      * {@code dst[dstIndex]}.)
9230      * @since  1.5
9231      */
toChars(int codePoint, char[] dst, int dstIndex)9232     public static int toChars(int codePoint, char[] dst, int dstIndex) {
9233         if (isBmpCodePoint(codePoint)) {
9234             dst[dstIndex] = (char) codePoint;
9235             return 1;
9236         } else if (isValidCodePoint(codePoint)) {
9237             toSurrogates(codePoint, dst, dstIndex);
9238             return 2;
9239         } else {
9240             throw new IllegalArgumentException(
9241                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9242         }
9243     }
9244 
9245     /**
9246      * Converts the specified character (Unicode code point) to its
9247      * UTF-16 representation stored in a {@code char} array. If
9248      * the specified code point is a BMP (Basic Multilingual Plane or
9249      * Plane 0) value, the resulting {@code char} array has
9250      * the same value as {@code codePoint}. If the specified code
9251      * point is a supplementary code point, the resulting
9252      * {@code char} array has the corresponding surrogate pair.
9253      *
9254      * @param  codePoint a Unicode code point
9255      * @return a {@code char} array having
9256      *         {@code codePoint}'s UTF-16 representation.
9257      * @throws IllegalArgumentException if the specified
9258      * {@code codePoint} is not a valid Unicode code point.
9259      * @since  1.5
9260      */
toChars(int codePoint)9261     public static char[] toChars(int codePoint) {
9262         if (isBmpCodePoint(codePoint)) {
9263             return new char[] { (char) codePoint };
9264         } else if (isValidCodePoint(codePoint)) {
9265             char[] result = new char[2];
9266             toSurrogates(codePoint, result, 0);
9267             return result;
9268         } else {
9269             throw new IllegalArgumentException(
9270                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9271         }
9272     }
9273 
toSurrogates(int codePoint, char[] dst, int index)9274     static void toSurrogates(int codePoint, char[] dst, int index) {
9275         // We write elements "backwards" to guarantee all-or-nothing
9276         dst[index+1] = lowSurrogate(codePoint);
9277         dst[index] = highSurrogate(codePoint);
9278     }
9279 
9280     /**
9281      * Returns the number of Unicode code points in the text range of
9282      * the specified char sequence. The text range begins at the
9283      * specified {@code beginIndex} and extends to the
9284      * {@code char} at index {@code endIndex - 1}. Thus the
9285      * length (in {@code char}s) of the text range is
9286      * {@code endIndex-beginIndex}. Unpaired surrogates within
9287      * the text range count as one code point each.
9288      *
9289      * @param seq the char sequence
9290      * @param beginIndex the index to the first {@code char} of
9291      * the text range.
9292      * @param endIndex the index after the last {@code char} of
9293      * the text range.
9294      * @return the number of Unicode code points in the specified text
9295      * range
9296      * @throws NullPointerException if {@code seq} is null.
9297      * @throws IndexOutOfBoundsException if the
9298      * {@code beginIndex} is negative, or {@code endIndex}
9299      * is larger than the length of the given sequence, or
9300      * {@code beginIndex} is larger than {@code endIndex}.
9301      * @since  1.5
9302      */
codePointCount(CharSequence seq, int beginIndex, int endIndex)9303     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
9304         int length = seq.length();
9305         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
9306             throw new IndexOutOfBoundsException();
9307         }
9308         int n = endIndex - beginIndex;
9309         for (int i = beginIndex; i < endIndex; ) {
9310             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
9311                 isLowSurrogate(seq.charAt(i))) {
9312                 n--;
9313                 i++;
9314             }
9315         }
9316         return n;
9317     }
9318 
9319     /**
9320      * Returns the number of Unicode code points in a subarray of the
9321      * {@code char} array argument. The {@code offset}
9322      * argument is the index of the first {@code char} of the
9323      * subarray and the {@code count} argument specifies the
9324      * length of the subarray in {@code char}s. Unpaired
9325      * surrogates within the subarray count as one code point each.
9326      *
9327      * @param a the {@code char} array
9328      * @param offset the index of the first {@code char} in the
9329      * given {@code char} array
9330      * @param count the length of the subarray in {@code char}s
9331      * @return the number of Unicode code points in the specified subarray
9332      * @throws NullPointerException if {@code a} is null.
9333      * @throws IndexOutOfBoundsException if {@code offset} or
9334      * {@code count} is negative, or if {@code offset +
9335      * count} is larger than the length of the given array.
9336      * @since  1.5
9337      */
codePointCount(char[] a, int offset, int count)9338     public static int codePointCount(char[] a, int offset, int count) {
9339         if (count > a.length - offset || offset < 0 || count < 0) {
9340             throw new IndexOutOfBoundsException();
9341         }
9342         return codePointCountImpl(a, offset, count);
9343     }
9344 
codePointCountImpl(char[] a, int offset, int count)9345     static int codePointCountImpl(char[] a, int offset, int count) {
9346         int endIndex = offset + count;
9347         int n = count;
9348         for (int i = offset; i < endIndex; ) {
9349             if (isHighSurrogate(a[i++]) && i < endIndex &&
9350                 isLowSurrogate(a[i])) {
9351                 n--;
9352                 i++;
9353             }
9354         }
9355         return n;
9356     }
9357 
9358     /**
9359      * Returns the index within the given char sequence that is offset
9360      * from the given {@code index} by {@code codePointOffset}
9361      * code points. Unpaired surrogates within the text range given by
9362      * {@code index} and {@code codePointOffset} count as
9363      * one code point each.
9364      *
9365      * @param seq the char sequence
9366      * @param index the index to be offset
9367      * @param codePointOffset the offset in code points
9368      * @return the index within the char sequence
9369      * @throws NullPointerException if {@code seq} is null.
9370      * @throws IndexOutOfBoundsException if {@code index}
9371      *   is negative or larger then the length of the char sequence,
9372      *   or if {@code codePointOffset} is positive and the
9373      *   subsequence starting with {@code index} has fewer than
9374      *   {@code codePointOffset} code points, or if
9375      *   {@code codePointOffset} is negative and the subsequence
9376      *   before {@code index} has fewer than the absolute value
9377      *   of {@code codePointOffset} code points.
9378      * @since 1.5
9379      */
offsetByCodePoints(CharSequence seq, int index, int codePointOffset)9380     public static int offsetByCodePoints(CharSequence seq, int index,
9381                                          int codePointOffset) {
9382         int length = seq.length();
9383         if (index < 0 || index > length) {
9384             throw new IndexOutOfBoundsException();
9385         }
9386 
9387         int x = index;
9388         if (codePointOffset >= 0) {
9389             int i;
9390             for (i = 0; x < length && i < codePointOffset; i++) {
9391                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
9392                     isLowSurrogate(seq.charAt(x))) {
9393                     x++;
9394                 }
9395             }
9396             if (i < codePointOffset) {
9397                 throw new IndexOutOfBoundsException();
9398             }
9399         } else {
9400             int i;
9401             for (i = codePointOffset; x > 0 && i < 0; i++) {
9402                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
9403                     isHighSurrogate(seq.charAt(x-1))) {
9404                     x--;
9405                 }
9406             }
9407             if (i < 0) {
9408                 throw new IndexOutOfBoundsException();
9409             }
9410         }
9411         return x;
9412     }
9413 
9414     /**
9415      * Returns the index within the given {@code char} subarray
9416      * that is offset from the given {@code index} by
9417      * {@code codePointOffset} code points. The
9418      * {@code start} and {@code count} arguments specify a
9419      * subarray of the {@code char} array. Unpaired surrogates
9420      * within the text range given by {@code index} and
9421      * {@code codePointOffset} count as one code point each.
9422      *
9423      * @param a the {@code char} array
9424      * @param start the index of the first {@code char} of the
9425      * subarray
9426      * @param count the length of the subarray in {@code char}s
9427      * @param index the index to be offset
9428      * @param codePointOffset the offset in code points
9429      * @return the index within the subarray
9430      * @throws NullPointerException if {@code a} is null.
9431      * @throws IndexOutOfBoundsException
9432      *   if {@code start} or {@code count} is negative,
9433      *   or if {@code start + count} is larger than the length of
9434      *   the given array,
9435      *   or if {@code index} is less than {@code start} or
9436      *   larger then {@code start + count},
9437      *   or if {@code codePointOffset} is positive and the text range
9438      *   starting with {@code index} and ending with {@code start + count - 1}
9439      *   has fewer than {@code codePointOffset} code
9440      *   points,
9441      *   or if {@code codePointOffset} is negative and the text range
9442      *   starting with {@code start} and ending with {@code index - 1}
9443      *   has fewer than the absolute value of
9444      *   {@code codePointOffset} code points.
9445      * @since 1.5
9446      */
offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset)9447     public static int offsetByCodePoints(char[] a, int start, int count,
9448                                          int index, int codePointOffset) {
9449         if (count > a.length-start || start < 0 || count < 0
9450             || index < start || index > start+count) {
9451             throw new IndexOutOfBoundsException();
9452         }
9453         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
9454     }
9455 
offsetByCodePointsImpl(char[]a, int start, int count, int index, int codePointOffset)9456     static int offsetByCodePointsImpl(char[]a, int start, int count,
9457                                       int index, int codePointOffset) {
9458         int x = index;
9459         if (codePointOffset >= 0) {
9460             int limit = start + count;
9461             int i;
9462             for (i = 0; x < limit && i < codePointOffset; i++) {
9463                 if (isHighSurrogate(a[x++]) && x < limit &&
9464                     isLowSurrogate(a[x])) {
9465                     x++;
9466                 }
9467             }
9468             if (i < codePointOffset) {
9469                 throw new IndexOutOfBoundsException();
9470             }
9471         } else {
9472             int i;
9473             for (i = codePointOffset; x > start && i < 0; i++) {
9474                 if (isLowSurrogate(a[--x]) && x > start &&
9475                     isHighSurrogate(a[x-1])) {
9476                     x--;
9477                 }
9478             }
9479             if (i < 0) {
9480                 throw new IndexOutOfBoundsException();
9481             }
9482         }
9483         return x;
9484     }
9485 
9486     /**
9487      * Determines if the specified character is a lowercase character.
9488      * <p>
9489      * A character is lowercase if its general category type, provided
9490      * by {@code Character.getType(ch)}, is
9491      * {@code LOWERCASE_LETTER}, or it has contributory property
9492      * Other_Lowercase as defined by the Unicode Standard.
9493      * <p>
9494      * The following are examples of lowercase characters:
9495      * <blockquote><pre>
9496      * a b c d e f g h i j k l m n o p q r s t u v w x y z
9497      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
9498      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
9499      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
9500      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
9501      * </pre></blockquote>
9502      * <p> Many other Unicode characters are lowercase too.
9503      *
9504      * <p><b>Note:</b> This method cannot handle <a
9505      * href="#supplementary"> supplementary characters</a>. To support
9506      * all Unicode characters, including supplementary characters, use
9507      * the {@link #isLowerCase(int)} method.
9508      *
9509      * @param   ch   the character to be tested.
9510      * @return  {@code true} if the character is lowercase;
9511      *          {@code false} otherwise.
9512      * @see     Character#isLowerCase(char)
9513      * @see     Character#isTitleCase(char)
9514      * @see     Character#toLowerCase(char)
9515      * @see     Character#getType(char)
9516      */
isLowerCase(char ch)9517     public static boolean isLowerCase(char ch) {
9518         return isLowerCase((int)ch);
9519     }
9520 
9521     /**
9522      * Determines if the specified character (Unicode code point) is a
9523      * lowercase character.
9524      * <p>
9525      * A character is lowercase if its general category type, provided
9526      * by {@link Character#getType getType(codePoint)}, is
9527      * {@code LOWERCASE_LETTER}, or it has contributory property
9528      * Other_Lowercase as defined by the Unicode Standard.
9529      * <p>
9530      * The following are examples of lowercase characters:
9531      * <blockquote><pre>
9532      * a b c d e f g h i j k l m n o p q r s t u v w x y z
9533      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
9534      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
9535      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
9536      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
9537      * </pre></blockquote>
9538      * <p> Many other Unicode characters are lowercase too.
9539      *
9540      * @param   codePoint the character (Unicode code point) to be tested.
9541      * @return  {@code true} if the character is lowercase;
9542      *          {@code false} otherwise.
9543      * @see     Character#isLowerCase(int)
9544      * @see     Character#isTitleCase(int)
9545      * @see     Character#toLowerCase(int)
9546      * @see     Character#getType(int)
9547      * @since   1.5
9548      */
9549     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9550     /*
9551     public static boolean isLowerCase(int codePoint) {
9552         return CharacterData.of(codePoint).isLowerCase(codePoint);
9553     }
9554     */
isLowerCase(int codePoint)9555     public static boolean isLowerCase(int codePoint) {
9556         return isLowerCaseImpl(codePoint);
9557     }
9558 
9559     @FastNative
isLowerCaseImpl(int codePoint)9560     static native boolean isLowerCaseImpl(int codePoint);
9561     // END Android-changed: Reimplement methods natively on top of ICU4C.
9562 
9563     /**
9564      * Determines if the specified character is an uppercase character.
9565      * <p>
9566      * A character is uppercase if its general category type, provided by
9567      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
9568      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9569      * <p>
9570      * The following are examples of uppercase characters:
9571      * <blockquote><pre>
9572      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9573      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
9574      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
9575      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
9576      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
9577      * </pre></blockquote>
9578      * <p> Many other Unicode characters are uppercase too.
9579      *
9580      * <p><b>Note:</b> This method cannot handle <a
9581      * href="#supplementary"> supplementary characters</a>. To support
9582      * all Unicode characters, including supplementary characters, use
9583      * the {@link #isUpperCase(int)} method.
9584      *
9585      * @param   ch   the character to be tested.
9586      * @return  {@code true} if the character is uppercase;
9587      *          {@code false} otherwise.
9588      * @see     Character#isLowerCase(char)
9589      * @see     Character#isTitleCase(char)
9590      * @see     Character#toUpperCase(char)
9591      * @see     Character#getType(char)
9592      * @since   1.0
9593      */
isUpperCase(char ch)9594     public static boolean isUpperCase(char ch) {
9595         return isUpperCase((int)ch);
9596     }
9597 
9598     /**
9599      * Determines if the specified character (Unicode code point) is an uppercase character.
9600      * <p>
9601      * A character is uppercase if its general category type, provided by
9602      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
9603      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9604      * <p>
9605      * The following are examples of uppercase characters:
9606      * <blockquote><pre>
9607      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9608      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
9609      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
9610      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
9611      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
9612      * </pre></blockquote>
9613      * <p> Many other Unicode characters are uppercase too.
9614      *
9615      * @param   codePoint the character (Unicode code point) to be tested.
9616      * @return  {@code true} if the character is uppercase;
9617      *          {@code false} otherwise.
9618      * @see     Character#isLowerCase(int)
9619      * @see     Character#isTitleCase(int)
9620      * @see     Character#toUpperCase(int)
9621      * @see     Character#getType(int)
9622      * @since   1.5
9623      */
9624     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9625     /*
9626     public static boolean isUpperCase(int codePoint) {
9627         return CharacterData.of(codePoint).isUpperCase(codePoint);
9628     }
9629     */
isUpperCase(int codePoint)9630     public static boolean isUpperCase(int codePoint) {
9631         return isUpperCaseImpl(codePoint);
9632     }
9633 
9634     @FastNative
isUpperCaseImpl(int codePoint)9635     static native boolean isUpperCaseImpl(int codePoint);
9636     // END Android-changed: Reimplement methods natively on top of ICU4C.
9637 
9638     /**
9639      * Determines if the specified character is a titlecase character.
9640      * <p>
9641      * A character is a titlecase character if its general
9642      * category type, provided by {@code Character.getType(ch)},
9643      * is {@code TITLECASE_LETTER}.
9644      * <p>
9645      * Some characters look like pairs of Latin letters. For example, there
9646      * is an uppercase letter that looks like "LJ" and has a corresponding
9647      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
9648      * is the appropriate form to use when rendering a word in lowercase
9649      * with initial capitals, as for a book title.
9650      * <p>
9651      * These are some of the Unicode characters for which this method returns
9652      * {@code true}:
9653      * <ul>
9654      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
9655      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
9656      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
9657      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
9658      * </ul>
9659      * <p> Many other Unicode characters are titlecase too.
9660      *
9661      * <p><b>Note:</b> This method cannot handle <a
9662      * href="#supplementary"> supplementary characters</a>. To support
9663      * all Unicode characters, including supplementary characters, use
9664      * the {@link #isTitleCase(int)} method.
9665      *
9666      * @param   ch   the character to be tested.
9667      * @return  {@code true} if the character is titlecase;
9668      *          {@code false} otherwise.
9669      * @see     Character#isLowerCase(char)
9670      * @see     Character#isUpperCase(char)
9671      * @see     Character#toTitleCase(char)
9672      * @see     Character#getType(char)
9673      * @since   1.0.2
9674      */
isTitleCase(char ch)9675     public static boolean isTitleCase(char ch) {
9676         return isTitleCase((int)ch);
9677     }
9678 
9679     /**
9680      * Determines if the specified character (Unicode code point) is a titlecase character.
9681      * <p>
9682      * A character is a titlecase character if its general
9683      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9684      * is {@code TITLECASE_LETTER}.
9685      * <p>
9686      * Some characters look like pairs of Latin letters. For example, there
9687      * is an uppercase letter that looks like "LJ" and has a corresponding
9688      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
9689      * is the appropriate form to use when rendering a word in lowercase
9690      * with initial capitals, as for a book title.
9691      * <p>
9692      * These are some of the Unicode characters for which this method returns
9693      * {@code true}:
9694      * <ul>
9695      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
9696      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
9697      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
9698      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
9699      * </ul>
9700      * <p> Many other Unicode characters are titlecase too.
9701      *
9702      * @param   codePoint the character (Unicode code point) to be tested.
9703      * @return  {@code true} if the character is titlecase;
9704      *          {@code false} otherwise.
9705      * @see     Character#isLowerCase(int)
9706      * @see     Character#isUpperCase(int)
9707      * @see     Character#toTitleCase(int)
9708      * @see     Character#getType(int)
9709      * @since   1.5
9710      */
9711     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9712     /*
9713     public static boolean isTitleCase(int codePoint) {
9714         return getType(codePoint) == Character.TITLECASE_LETTER;
9715     }
9716     */
isTitleCase(int codePoint)9717     public static boolean isTitleCase(int codePoint) {
9718         return isTitleCaseImpl(codePoint);
9719     }
9720 
9721     @FastNative
isTitleCaseImpl(int codePoint)9722     static native boolean isTitleCaseImpl(int codePoint);
9723     // END Android-changed: Reimplement methods natively on top of ICU4C.
9724 
9725     /**
9726      * Determines if the specified character is a digit.
9727      * <p>
9728      * A character is a digit if its general category type, provided
9729      * by {@code Character.getType(ch)}, is
9730      * {@code DECIMAL_DIGIT_NUMBER}.
9731      * <p>
9732      * Some Unicode character ranges that contain digits:
9733      * <ul>
9734      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9735      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9736      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9737      *     Arabic-Indic digits
9738      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9739      *     Extended Arabic-Indic digits
9740      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9741      *     Devanagari digits
9742      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9743      *     Fullwidth digits
9744      * </ul>
9745      *
9746      * Many other character ranges contain digits as well.
9747      *
9748      * <p><b>Note:</b> This method cannot handle <a
9749      * href="#supplementary"> supplementary characters</a>. To support
9750      * all Unicode characters, including supplementary characters, use
9751      * the {@link #isDigit(int)} method.
9752      *
9753      * @param   ch   the character to be tested.
9754      * @return  {@code true} if the character is a digit;
9755      *          {@code false} otherwise.
9756      * @see     Character#digit(char, int)
9757      * @see     Character#forDigit(int, int)
9758      * @see     Character#getType(char)
9759      */
isDigit(char ch)9760     public static boolean isDigit(char ch) {
9761         return isDigit((int)ch);
9762     }
9763 
9764     /**
9765      * Determines if the specified character (Unicode code point) is a digit.
9766      * <p>
9767      * A character is a digit if its general category type, provided
9768      * by {@link Character#getType(int) getType(codePoint)}, is
9769      * {@code DECIMAL_DIGIT_NUMBER}.
9770      * <p>
9771      * Some Unicode character ranges that contain digits:
9772      * <ul>
9773      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9774      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9775      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9776      *     Arabic-Indic digits
9777      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9778      *     Extended Arabic-Indic digits
9779      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9780      *     Devanagari digits
9781      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9782      *     Fullwidth digits
9783      * </ul>
9784      *
9785      * Many other character ranges contain digits as well.
9786      *
9787      * @param   codePoint the character (Unicode code point) to be tested.
9788      * @return  {@code true} if the character is a digit;
9789      *          {@code false} otherwise.
9790      * @see     Character#forDigit(int, int)
9791      * @see     Character#getType(int)
9792      * @since   1.5
9793      */
9794     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9795     /*
9796     public static boolean isDigit(int codePoint) {
9797         return CharacterData.of(codePoint).isDigit(codePoint);
9798     }
9799     */
isDigit(int codePoint)9800     public static boolean isDigit(int codePoint) {
9801         return isDigitImpl(codePoint);
9802     }
9803 
9804     @FastNative
isDigitImpl(int codePoint)9805     static native boolean isDigitImpl(int codePoint);
9806     // END Android-changed: Reimplement methods natively on top of ICU4C.
9807 
9808     /**
9809      * Determines if a character is defined in Unicode.
9810      * <p>
9811      * A character is defined if at least one of the following is true:
9812      * <ul>
9813      * <li>It has an entry in the UnicodeData file.
9814      * <li>It has a value in a range defined by the UnicodeData file.
9815      * </ul>
9816      *
9817      * <p><b>Note:</b> This method cannot handle <a
9818      * href="#supplementary"> supplementary characters</a>. To support
9819      * all Unicode characters, including supplementary characters, use
9820      * the {@link #isDefined(int)} method.
9821      *
9822      * @param   ch   the character to be tested
9823      * @return  {@code true} if the character has a defined meaning
9824      *          in Unicode; {@code false} otherwise.
9825      * @see     Character#isDigit(char)
9826      * @see     Character#isLetter(char)
9827      * @see     Character#isLetterOrDigit(char)
9828      * @see     Character#isLowerCase(char)
9829      * @see     Character#isTitleCase(char)
9830      * @see     Character#isUpperCase(char)
9831      * @since   1.0.2
9832      */
isDefined(char ch)9833     public static boolean isDefined(char ch) {
9834         return isDefined((int)ch);
9835     }
9836 
9837     /**
9838      * Determines if a character (Unicode code point) is defined in Unicode.
9839      * <p>
9840      * A character is defined if at least one of the following is true:
9841      * <ul>
9842      * <li>It has an entry in the UnicodeData file.
9843      * <li>It has a value in a range defined by the UnicodeData file.
9844      * </ul>
9845      *
9846      * @param   codePoint the character (Unicode code point) to be tested.
9847      * @return  {@code true} if the character has a defined meaning
9848      *          in Unicode; {@code false} otherwise.
9849      * @see     Character#isDigit(int)
9850      * @see     Character#isLetter(int)
9851      * @see     Character#isLetterOrDigit(int)
9852      * @see     Character#isLowerCase(int)
9853      * @see     Character#isTitleCase(int)
9854      * @see     Character#isUpperCase(int)
9855      * @since   1.5
9856      */
9857     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9858     /*
9859     public static boolean isDefined(int codePoint) {
9860         return getType(codePoint) != Character.UNASSIGNED;
9861     }
9862     */
isDefined(int codePoint)9863     public static boolean isDefined(int codePoint) {
9864         return isDefinedImpl(codePoint);
9865     }
9866 
9867     @FastNative
isDefinedImpl(int codePoint)9868     static native boolean isDefinedImpl(int codePoint);
9869     // END Android-changed: Reimplement methods natively on top of ICU4C.
9870 
9871     /**
9872      * Determines if the specified character is a letter.
9873      * <p>
9874      * A character is considered to be a letter if its general
9875      * category type, provided by {@code Character.getType(ch)},
9876      * is any of the following:
9877      * <ul>
9878      * <li> {@code UPPERCASE_LETTER}
9879      * <li> {@code LOWERCASE_LETTER}
9880      * <li> {@code TITLECASE_LETTER}
9881      * <li> {@code MODIFIER_LETTER}
9882      * <li> {@code OTHER_LETTER}
9883      * </ul>
9884      *
9885      * Not all letters have case. Many characters are
9886      * letters but are neither uppercase nor lowercase nor titlecase.
9887      *
9888      * <p><b>Note:</b> This method cannot handle <a
9889      * href="#supplementary"> supplementary characters</a>. To support
9890      * all Unicode characters, including supplementary characters, use
9891      * the {@link #isLetter(int)} method.
9892      *
9893      * @param   ch   the character to be tested.
9894      * @return  {@code true} if the character is a letter;
9895      *          {@code false} otherwise.
9896      * @see     Character#isDigit(char)
9897      * @see     Character#isJavaIdentifierStart(char)
9898      * @see     Character#isJavaLetter(char)
9899      * @see     Character#isJavaLetterOrDigit(char)
9900      * @see     Character#isLetterOrDigit(char)
9901      * @see     Character#isLowerCase(char)
9902      * @see     Character#isTitleCase(char)
9903      * @see     Character#isUnicodeIdentifierStart(char)
9904      * @see     Character#isUpperCase(char)
9905      */
isLetter(char ch)9906     public static boolean isLetter(char ch) {
9907         return isLetter((int)ch);
9908     }
9909 
9910     /**
9911      * Determines if the specified character (Unicode code point) is a letter.
9912      * <p>
9913      * A character is considered to be a letter if its general
9914      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9915      * is any of the following:
9916      * <ul>
9917      * <li> {@code UPPERCASE_LETTER}
9918      * <li> {@code LOWERCASE_LETTER}
9919      * <li> {@code TITLECASE_LETTER}
9920      * <li> {@code MODIFIER_LETTER}
9921      * <li> {@code OTHER_LETTER}
9922      * </ul>
9923      *
9924      * Not all letters have case. Many characters are
9925      * letters but are neither uppercase nor lowercase nor titlecase.
9926      *
9927      * @param   codePoint the character (Unicode code point) to be tested.
9928      * @return  {@code true} if the character is a letter;
9929      *          {@code false} otherwise.
9930      * @see     Character#isDigit(int)
9931      * @see     Character#isJavaIdentifierStart(int)
9932      * @see     Character#isLetterOrDigit(int)
9933      * @see     Character#isLowerCase(int)
9934      * @see     Character#isTitleCase(int)
9935      * @see     Character#isUnicodeIdentifierStart(int)
9936      * @see     Character#isUpperCase(int)
9937      * @since   1.5
9938      */
9939     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9940     /*
9941     public static boolean isLetter(int codePoint) {
9942         return ((((1 << Character.UPPERCASE_LETTER) |
9943             (1 << Character.LOWERCASE_LETTER) |
9944             (1 << Character.TITLECASE_LETTER) |
9945             (1 << Character.MODIFIER_LETTER) |
9946             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
9947             != 0;
9948     }
9949     */
isLetter(int codePoint)9950     public static boolean isLetter(int codePoint) {
9951         return isLetterImpl(codePoint);
9952     }
9953 
9954     @FastNative
isLetterImpl(int codePoint)9955     static native boolean isLetterImpl(int codePoint);
9956     // END Android-changed: Reimplement methods natively on top of ICU4C.
9957 
9958     /**
9959      * Determines if the specified character is a letter or digit.
9960      * <p>
9961      * A character is considered to be a letter or digit if either
9962      * {@code Character.isLetter(char ch)} or
9963      * {@code Character.isDigit(char ch)} returns
9964      * {@code true} for the character.
9965      *
9966      * <p><b>Note:</b> This method cannot handle <a
9967      * href="#supplementary"> supplementary characters</a>. To support
9968      * all Unicode characters, including supplementary characters, use
9969      * the {@link #isLetterOrDigit(int)} method.
9970      *
9971      * @param   ch   the character to be tested.
9972      * @return  {@code true} if the character is a letter or digit;
9973      *          {@code false} otherwise.
9974      * @see     Character#isDigit(char)
9975      * @see     Character#isJavaIdentifierPart(char)
9976      * @see     Character#isJavaLetter(char)
9977      * @see     Character#isJavaLetterOrDigit(char)
9978      * @see     Character#isLetter(char)
9979      * @see     Character#isUnicodeIdentifierPart(char)
9980      * @since   1.0.2
9981      */
isLetterOrDigit(char ch)9982     public static boolean isLetterOrDigit(char ch) {
9983         return isLetterOrDigit((int)ch);
9984     }
9985 
9986     /**
9987      * Determines if the specified character (Unicode code point) is a letter or digit.
9988      * <p>
9989      * A character is considered to be a letter or digit if either
9990      * {@link #isLetter(int) isLetter(codePoint)} or
9991      * {@link #isDigit(int) isDigit(codePoint)} returns
9992      * {@code true} for the character.
9993      *
9994      * @param   codePoint the character (Unicode code point) to be tested.
9995      * @return  {@code true} if the character is a letter or digit;
9996      *          {@code false} otherwise.
9997      * @see     Character#isDigit(int)
9998      * @see     Character#isJavaIdentifierPart(int)
9999      * @see     Character#isLetter(int)
10000      * @see     Character#isUnicodeIdentifierPart(int)
10001      * @since   1.5
10002      */
10003     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10004     /*
10005     public static boolean isLetterOrDigit(int codePoint) {
10006         return ((((1 << Character.UPPERCASE_LETTER) |
10007             (1 << Character.LOWERCASE_LETTER) |
10008             (1 << Character.TITLECASE_LETTER) |
10009             (1 << Character.MODIFIER_LETTER) |
10010             (1 << Character.OTHER_LETTER) |
10011             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
10012             != 0;
10013     }
10014     */
isLetterOrDigit(int codePoint)10015     public static boolean isLetterOrDigit(int codePoint) {
10016         return isLetterOrDigitImpl(codePoint);
10017     }
10018 
10019     @FastNative
isLetterOrDigitImpl(int codePoint)10020     static native boolean isLetterOrDigitImpl(int codePoint);
10021     // END Android-changed: Reimplement methods natively on top of ICU4C.
10022 
10023     /**
10024      * Determines if the specified character is permissible as the first
10025      * character in a Java identifier.
10026      * <p>
10027      * A character may start a Java identifier if and only if
10028      * one of the following conditions is true:
10029      * <ul>
10030      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10031      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10032      * <li> {@code ch} is a currency symbol (such as {@code '$'})
10033      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10034      * </ul>
10035      *
10036      * @param   ch the character to be tested.
10037      * @return  {@code true} if the character may start a Java
10038      *          identifier; {@code false} otherwise.
10039      * @see     Character#isJavaLetterOrDigit(char)
10040      * @see     Character#isJavaIdentifierStart(char)
10041      * @see     Character#isJavaIdentifierPart(char)
10042      * @see     Character#isLetter(char)
10043      * @see     Character#isLetterOrDigit(char)
10044      * @see     Character#isUnicodeIdentifierStart(char)
10045      * @since   1.0.2
10046      * @deprecated Replaced by isJavaIdentifierStart(char).
10047      */
10048     @Deprecated(since="1.1")
isJavaLetter(char ch)10049     public static boolean isJavaLetter(char ch) {
10050         return isJavaIdentifierStart(ch);
10051     }
10052 
10053     /**
10054      * Determines if the specified character may be part of a Java
10055      * identifier as other than the first character.
10056      * <p>
10057      * A character may be part of a Java identifier if and only if one
10058      * of the following conditions is true:
10059      * <ul>
10060      * <li>  it is a letter
10061      * <li>  it is a currency symbol (such as {@code '$'})
10062      * <li>  it is a connecting punctuation character (such as {@code '_'})
10063      * <li>  it is a digit
10064      * <li>  it is a numeric letter (such as a Roman numeral character)
10065      * <li>  it is a combining mark
10066      * <li>  it is a non-spacing mark
10067      * <li> {@code isIdentifierIgnorable} returns
10068      * {@code true} for the character.
10069      * </ul>
10070      *
10071      * @param   ch the character to be tested.
10072      * @return  {@code true} if the character may be part of a
10073      *          Java identifier; {@code false} otherwise.
10074      * @see     Character#isJavaLetter(char)
10075      * @see     Character#isJavaIdentifierStart(char)
10076      * @see     Character#isJavaIdentifierPart(char)
10077      * @see     Character#isLetter(char)
10078      * @see     Character#isLetterOrDigit(char)
10079      * @see     Character#isUnicodeIdentifierPart(char)
10080      * @see     Character#isIdentifierIgnorable(char)
10081      * @since   1.0.2
10082      * @deprecated Replaced by isJavaIdentifierPart(char).
10083      */
10084     @Deprecated(since="1.1")
isJavaLetterOrDigit(char ch)10085     public static boolean isJavaLetterOrDigit(char ch) {
10086         return isJavaIdentifierPart(ch);
10087     }
10088 
10089     /**
10090      * Determines if the specified character (Unicode code point) is alphabetic.
10091      * <p>
10092      * A character is considered to be alphabetic if its general category type,
10093      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
10094      * the following:
10095      * <ul>
10096      * <li> {@code UPPERCASE_LETTER}
10097      * <li> {@code LOWERCASE_LETTER}
10098      * <li> {@code TITLECASE_LETTER}
10099      * <li> {@code MODIFIER_LETTER}
10100      * <li> {@code OTHER_LETTER}
10101      * <li> {@code LETTER_NUMBER}
10102      * </ul>
10103      * or it has contributory property Other_Alphabetic as defined by the
10104      * Unicode Standard.
10105      *
10106      * @param   codePoint the character (Unicode code point) to be tested.
10107      * @return  {@code true} if the character is a Unicode alphabet
10108      *          character, {@code false} otherwise.
10109      * @since   1.7
10110      */
10111     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10112     /*
10113     public static boolean isAlphabetic(int codePoint) {
10114         return (((((1 << Character.UPPERCASE_LETTER) |
10115             (1 << Character.LOWERCASE_LETTER) |
10116             (1 << Character.TITLECASE_LETTER) |
10117             (1 << Character.MODIFIER_LETTER) |
10118             (1 << Character.OTHER_LETTER) |
10119             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
10120             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
10121     }
10122     */
isAlphabetic(int codePoint)10123     public static boolean isAlphabetic(int codePoint) {
10124         return isAlphabeticImpl(codePoint);
10125     }
10126 
10127     @FastNative
isAlphabeticImpl(int codePoint)10128     static native boolean isAlphabeticImpl(int codePoint);
10129     // END Android-changed: Reimplement methods natively on top of ICU4C.
10130 
10131     /**
10132      * Determines if the specified character (Unicode code point) is a CJKV
10133      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
10134      * the Unicode Standard.
10135      *
10136      * @param   codePoint the character (Unicode code point) to be tested.
10137      * @return  {@code true} if the character is a Unicode ideograph
10138      *          character, {@code false} otherwise.
10139      * @since   1.7
10140      */
10141     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10142     /*
10143     public static boolean isIdeographic(int codePoint) {
10144         return CharacterData.of(codePoint).isIdeographic(codePoint);
10145     }
10146     */
isIdeographic(int codePoint)10147     public static boolean isIdeographic(int codePoint) {
10148         return isIdeographicImpl(codePoint);
10149     }
10150     @FastNative
isIdeographicImpl(int codePoint)10151     static native boolean isIdeographicImpl(int codePoint);
10152     // END Android-changed: Reimplement methods natively on top of ICU4C.
10153 
10154     // Android-changed: Removed @see tag (target does not exist on Android):
10155     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10156     /**
10157      * Determines if the specified character is
10158      * permissible as the first character in a Java identifier.
10159      * <p>
10160      * A character may start a Java identifier if and only if
10161      * one of the following conditions is true:
10162      * <ul>
10163      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10164      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10165      * <li> {@code ch} is a currency symbol (such as {@code '$'})
10166      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10167      * </ul>
10168      *
10169      * <p><b>Note:</b> This method cannot handle <a
10170      * href="#supplementary"> supplementary characters</a>. To support
10171      * all Unicode characters, including supplementary characters, use
10172      * the {@link #isJavaIdentifierStart(int)} method.
10173      *
10174      * @param   ch the character to be tested.
10175      * @return  {@code true} if the character may start a Java identifier;
10176      *          {@code false} otherwise.
10177      * @see     Character#isJavaIdentifierPart(char)
10178      * @see     Character#isLetter(char)
10179      * @see     Character#isUnicodeIdentifierStart(char)
10180      * @since   1.1
10181      */
isJavaIdentifierStart(char ch)10182     public static boolean isJavaIdentifierStart(char ch) {
10183         return isJavaIdentifierStart((int)ch);
10184     }
10185 
10186     // Android-changed: Removed @see tag (target does not exist on Android):
10187     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10188     /**
10189      * Determines if the character (Unicode code point) is
10190      * permissible as the first character in a Java identifier.
10191      * <p>
10192      * A character may start a Java identifier if and only if
10193      * one of the following conditions is true:
10194      * <ul>
10195      * <li> {@link #isLetter(int) isLetter(codePoint)}
10196      *      returns {@code true}
10197      * <li> {@link #getType(int) getType(codePoint)}
10198      *      returns {@code LETTER_NUMBER}
10199      * <li> the referenced character is a currency symbol (such as {@code '$'})
10200      * <li> the referenced character is a connecting punctuation character
10201      *      (such as {@code '_'}).
10202      * </ul>
10203      *
10204      * @param   codePoint the character (Unicode code point) to be tested.
10205      * @return  {@code true} if the character may start a Java identifier;
10206      *          {@code false} otherwise.
10207      * @see     Character#isJavaIdentifierPart(int)
10208      * @see     Character#isLetter(int)
10209      * @see     Character#isUnicodeIdentifierStart(int)
10210      * @since   1.5
10211      */
10212     // BEGIN Android-changed: Use ICU.
10213     /*
10214     public static boolean isJavaIdentifierStart(int codePoint) {
10215         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
10216     }
10217     */
isJavaIdentifierStart(int codePoint)10218     public static boolean isJavaIdentifierStart(int codePoint) {
10219         // Use precomputed bitmasks to optimize the ASCII range.
10220         if (codePoint < 64) {
10221             return (codePoint == '$'); // There's only one character in this range.
10222         } else if (codePoint < 128) {
10223             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
10224         }
10225         return ((1 << getType(codePoint))
10226                 & ((1 << UPPERCASE_LETTER)
10227                    | (1 << LOWERCASE_LETTER)
10228                    | (1  << TITLECASE_LETTER)
10229                    | (1  << MODIFIER_LETTER)
10230                    | (1  << OTHER_LETTER)
10231                    | (1  << CURRENCY_SYMBOL)
10232                    | (1  << CONNECTOR_PUNCTUATION)
10233                    | (1  << LETTER_NUMBER))) != 0;
10234     }
10235     // END Android-changed: Use ICU.
10236 
10237     // Android-changed: Removed @see tag (target does not exist on Android):
10238     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10239     /**
10240      * Determines if the specified character may be part of a Java
10241      * identifier as other than the first character.
10242      * <p>
10243      * A character may be part of a Java identifier if any of the following
10244      * conditions are true:
10245      * <ul>
10246      * <li>  it is a letter
10247      * <li>  it is a currency symbol (such as {@code '$'})
10248      * <li>  it is a connecting punctuation character (such as {@code '_'})
10249      * <li>  it is a digit
10250      * <li>  it is a numeric letter (such as a Roman numeral character)
10251      * <li>  it is a combining mark
10252      * <li>  it is a non-spacing mark
10253      * <li> {@code isIdentifierIgnorable} returns
10254      * {@code true} for the character
10255      * </ul>
10256      *
10257      * <p><b>Note:</b> This method cannot handle <a
10258      * href="#supplementary"> supplementary characters</a>. To support
10259      * all Unicode characters, including supplementary characters, use
10260      * the {@link #isJavaIdentifierPart(int)} method.
10261      *
10262      * @param   ch      the character to be tested.
10263      * @return {@code true} if the character may be part of a
10264      *          Java identifier; {@code false} otherwise.
10265      * @see     Character#isIdentifierIgnorable(char)
10266      * @see     Character#isJavaIdentifierStart(char)
10267      * @see     Character#isLetterOrDigit(char)
10268      * @see     Character#isUnicodeIdentifierPart(char)
10269      * @since   1.1
10270      */
isJavaIdentifierPart(char ch)10271     public static boolean isJavaIdentifierPart(char ch) {
10272         return isJavaIdentifierPart((int)ch);
10273     }
10274 
10275     // Android-changed: Removed @see tag (target does not exist on Android):
10276     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10277     /**
10278      * Determines if the character (Unicode code point) may be part of a Java
10279      * identifier as other than the first character.
10280      * <p>
10281      * A character may be part of a Java identifier if any of the following
10282      * conditions are true:
10283      * <ul>
10284      * <li>  it is a letter
10285      * <li>  it is a currency symbol (such as {@code '$'})
10286      * <li>  it is a connecting punctuation character (such as {@code '_'})
10287      * <li>  it is a digit
10288      * <li>  it is a numeric letter (such as a Roman numeral character)
10289      * <li>  it is a combining mark
10290      * <li>  it is a non-spacing mark
10291      * <li> {@link #isIdentifierIgnorable(int)
10292      * isIdentifierIgnorable(codePoint)} returns {@code true} for
10293      * the code point
10294      * </ul>
10295      *
10296      * @param   codePoint the character (Unicode code point) to be tested.
10297      * @return {@code true} if the character may be part of a
10298      *          Java identifier; {@code false} otherwise.
10299      * @see     Character#isIdentifierIgnorable(int)
10300      * @see     Character#isJavaIdentifierStart(int)
10301      * @see     Character#isLetterOrDigit(int)
10302      * @see     Character#isUnicodeIdentifierPart(int)
10303      * @since   1.5
10304      */
10305     // BEGIN Android-changed: Use ICU.
10306     /*
10307     public static boolean isJavaIdentifierPart(int codePoint) {
10308         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
10309     }
10310     */
isJavaIdentifierPart(int codePoint)10311     public static boolean isJavaIdentifierPart(int codePoint) {
10312         // Use precomputed bitmasks to optimize the ASCII range.
10313         if (codePoint < 64) {
10314             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
10315         } else if (codePoint < 128) {
10316             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
10317         }
10318         return ((1 << getType(codePoint))
10319                 & ((1 << UPPERCASE_LETTER)
10320                    | (1 << LOWERCASE_LETTER)
10321                    | (1 << TITLECASE_LETTER)
10322                    | (1 << MODIFIER_LETTER)
10323                    | (1 << OTHER_LETTER)
10324                    | (1 << CURRENCY_SYMBOL)
10325                    | (1 << CONNECTOR_PUNCTUATION)
10326                    | (1 << DECIMAL_DIGIT_NUMBER)
10327                    | (1 << LETTER_NUMBER)
10328                    | (1 << FORMAT)
10329                    | (1 << COMBINING_SPACING_MARK)
10330                    | (1 << NON_SPACING_MARK))) != 0
10331                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
10332                 || (codePoint >= 0x7f && codePoint <= 0x9f);
10333     }
10334     // END Android-changed: Use ICU.
10335 
10336     /**
10337      * Determines if the specified character is permissible as the
10338      * first character in a Unicode identifier.
10339      * <p>
10340      * A character may start a Unicode identifier if and only if
10341      * one of the following conditions is true:
10342      * <ul>
10343      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10344      * <li> {@link #getType(char) getType(ch)} returns
10345      *      {@code LETTER_NUMBER}.
10346      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10347      *      {@code Other_ID_Start}</a> character.
10348      * </ul>
10349      * <p>
10350      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10351      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10352      * with the following profile of UAX31:
10353      * <pre>
10354      * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
10355      * </pre>
10356      * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
10357      * compatibility.
10358      *
10359      * <p><b>Note:</b> This method cannot handle <a
10360      * href="#supplementary"> supplementary characters</a>. To support
10361      * all Unicode characters, including supplementary characters, use
10362      * the {@link #isUnicodeIdentifierStart(int)} method.
10363      *
10364      * @param   ch      the character to be tested.
10365      * @return  {@code true} if the character may start a Unicode
10366      *          identifier; {@code false} otherwise.
10367      * @see     Character#isJavaIdentifierStart(char)
10368      * @see     Character#isLetter(char)
10369      * @see     Character#isUnicodeIdentifierPart(char)
10370      * @since   1.1
10371      */
isUnicodeIdentifierStart(char ch)10372     public static boolean isUnicodeIdentifierStart(char ch) {
10373         return isUnicodeIdentifierStart((int)ch);
10374     }
10375 
10376     /**
10377      * Determines if the specified character (Unicode code point) is permissible as the
10378      * first character in a Unicode identifier.
10379      * <p>
10380      * A character may start a Unicode identifier if and only if
10381      * one of the following conditions is true:
10382      * <ul>
10383      * <li> {@link #isLetter(int) isLetter(codePoint)}
10384      *      returns {@code true}
10385      * <li> {@link #getType(int) getType(codePoint)}
10386      *      returns {@code LETTER_NUMBER}.
10387      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10388      *      {@code Other_ID_Start}</a> character.
10389      * </ul>
10390      * <p>
10391      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10392      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10393      * with the following profile of UAX31:
10394      * <pre>
10395      * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
10396      * </pre>
10397      * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
10398      * compatibility.
10399      *
10400      * @param   codePoint the character (Unicode code point) to be tested.
10401      * @return  {@code true} if the character may start a Unicode
10402      *          identifier; {@code false} otherwise.
10403      * @see     Character#isJavaIdentifierStart(int)
10404      * @see     Character#isLetter(int)
10405      * @see     Character#isUnicodeIdentifierPart(int)
10406      * @since   1.5
10407      */
10408     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10409     /*
10410     public static boolean isUnicodeIdentifierStart(int codePoint) {
10411         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
10412     }
10413     */
isUnicodeIdentifierStart(int codePoint)10414     public static boolean isUnicodeIdentifierStart(int codePoint) {
10415         return isUnicodeIdentifierStartImpl(codePoint);
10416     }
10417 
10418     @FastNative
isUnicodeIdentifierStartImpl(int codePoint)10419     static native boolean isUnicodeIdentifierStartImpl(int codePoint);
10420     // END Android-changed: Reimplement methods natively on top of ICU4C.
10421 
10422     /**
10423      * Determines if the specified character may be part of a Unicode
10424      * identifier as other than the first character.
10425      * <p>
10426      * A character may be part of a Unicode identifier if and only if
10427      * one of the following statements is true:
10428      * <ul>
10429      * <li>  it is a letter
10430      * <li>  it is a connecting punctuation character (such as {@code '_'})
10431      * <li>  it is a digit
10432      * <li>  it is a numeric letter (such as a Roman numeral character)
10433      * <li>  it is a combining mark
10434      * <li>  it is a non-spacing mark
10435      * <li> {@code isIdentifierIgnorable} returns
10436      * {@code true} for this character.
10437      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10438      *      {@code Other_ID_Start}</a> character.
10439      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10440      *      {@code Other_ID_Continue}</a> character.
10441      * </ul>
10442      * <p>
10443      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10444      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10445      * with the following profile of UAX31:
10446      * <pre>
10447      * Continue := Start + ID_Continue + ignorable
10448      * Medial := empty
10449      * ignorable := isIdentifierIgnorable(char) returns true for the character
10450      * </pre>
10451      * {@code ignorable} is added to {@code Continue} for backward
10452      * compatibility.
10453      *
10454      * <p><b>Note:</b> This method cannot handle <a
10455      * href="#supplementary"> supplementary characters</a>. To support
10456      * all Unicode characters, including supplementary characters, use
10457      * the {@link #isUnicodeIdentifierPart(int)} method.
10458      *
10459      * @param   ch      the character to be tested.
10460      * @return  {@code true} if the character may be part of a
10461      *          Unicode identifier; {@code false} otherwise.
10462      * @see     Character#isIdentifierIgnorable(char)
10463      * @see     Character#isJavaIdentifierPart(char)
10464      * @see     Character#isLetterOrDigit(char)
10465      * @see     Character#isUnicodeIdentifierStart(char)
10466      * @since   1.1
10467      */
isUnicodeIdentifierPart(char ch)10468     public static boolean isUnicodeIdentifierPart(char ch) {
10469         return isUnicodeIdentifierPart((int)ch);
10470     }
10471 
10472     /**
10473      * Determines if the specified character (Unicode code point) may be part of a Unicode
10474      * identifier as other than the first character.
10475      * <p>
10476      * A character may be part of a Unicode identifier if and only if
10477      * one of the following statements is true:
10478      * <ul>
10479      * <li>  it is a letter
10480      * <li>  it is a connecting punctuation character (such as {@code '_'})
10481      * <li>  it is a digit
10482      * <li>  it is a numeric letter (such as a Roman numeral character)
10483      * <li>  it is a combining mark
10484      * <li>  it is a non-spacing mark
10485      * <li> {@code isIdentifierIgnorable} returns
10486      * {@code true} for this character.
10487      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10488      *      {@code Other_ID_Start}</a> character.
10489      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10490      *      {@code Other_ID_Continue}</a> character.
10491      * </ul>
10492      * <p>
10493      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10494      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10495      * with the following profile of UAX31:
10496      * <pre>
10497      * Continue := Start + ID_Continue + ignorable
10498      * Medial := empty
10499      * ignorable := isIdentifierIgnorable(int) returns true for the character
10500      * </pre>
10501      * {@code ignorable} is added to {@code Continue} for backward
10502      * compatibility.
10503      *
10504      * @param   codePoint the character (Unicode code point) to be tested.
10505      * @return  {@code true} if the character may be part of a
10506      *          Unicode identifier; {@code false} otherwise.
10507      * @see     Character#isIdentifierIgnorable(int)
10508      * @see     Character#isJavaIdentifierPart(int)
10509      * @see     Character#isLetterOrDigit(int)
10510      * @see     Character#isUnicodeIdentifierStart(int)
10511      * @since   1.5
10512      */
10513     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10514     /*
10515     public static boolean isUnicodeIdentifierPart(int codePoint) {
10516         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
10517     }
10518     */
isUnicodeIdentifierPart(int codePoint)10519     public static boolean isUnicodeIdentifierPart(int codePoint) {
10520         return isUnicodeIdentifierPartImpl(codePoint);
10521     }
10522 
10523     @FastNative
isUnicodeIdentifierPartImpl(int codePoint)10524     static native boolean isUnicodeIdentifierPartImpl(int codePoint);
10525     // END Android-changed: Reimplement methods natively on top of ICU4C.
10526 
10527     /**
10528      * Determines if the specified character should be regarded as
10529      * an ignorable character in a Java identifier or a Unicode identifier.
10530      * <p>
10531      * The following Unicode characters are ignorable in a Java identifier
10532      * or a Unicode identifier:
10533      * <ul>
10534      * <li>ISO control characters that are not whitespace
10535      * <ul>
10536      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
10537      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
10538      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
10539      * </ul>
10540      *
10541      * <li>all characters that have the {@code FORMAT} general
10542      * category value
10543      * </ul>
10544      *
10545      * <p><b>Note:</b> This method cannot handle <a
10546      * href="#supplementary"> supplementary characters</a>. To support
10547      * all Unicode characters, including supplementary characters, use
10548      * the {@link #isIdentifierIgnorable(int)} method.
10549      *
10550      * @param   ch      the character to be tested.
10551      * @return  {@code true} if the character is an ignorable control
10552      *          character that may be part of a Java or Unicode identifier;
10553      *           {@code false} otherwise.
10554      * @see     Character#isJavaIdentifierPart(char)
10555      * @see     Character#isUnicodeIdentifierPart(char)
10556      * @since   1.1
10557      */
isIdentifierIgnorable(char ch)10558     public static boolean isIdentifierIgnorable(char ch) {
10559         return isIdentifierIgnorable((int)ch);
10560     }
10561 
10562     /**
10563      * Determines if the specified character (Unicode code point) should be regarded as
10564      * an ignorable character in a Java identifier or a Unicode identifier.
10565      * <p>
10566      * The following Unicode characters are ignorable in a Java identifier
10567      * or a Unicode identifier:
10568      * <ul>
10569      * <li>ISO control characters that are not whitespace
10570      * <ul>
10571      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
10572      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
10573      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
10574      * </ul>
10575      *
10576      * <li>all characters that have the {@code FORMAT} general
10577      * category value
10578      * </ul>
10579      *
10580      * @param   codePoint the character (Unicode code point) to be tested.
10581      * @return  {@code true} if the character is an ignorable control
10582      *          character that may be part of a Java or Unicode identifier;
10583      *          {@code false} otherwise.
10584      * @see     Character#isJavaIdentifierPart(int)
10585      * @see     Character#isUnicodeIdentifierPart(int)
10586      * @since   1.5
10587      */
10588     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10589     /*
10590     public static boolean isIdentifierIgnorable(int codePoint) {
10591         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
10592     }
10593     */
isIdentifierIgnorable(int codePoint)10594     public static boolean isIdentifierIgnorable(int codePoint) {
10595         return isIdentifierIgnorableImpl(codePoint);
10596     }
10597 
10598     @FastNative
isIdentifierIgnorableImpl(int codePoint)10599     static native boolean isIdentifierIgnorableImpl(int codePoint);
10600     // END Android-changed: Reimplement methods natively on top of ICU4C.
10601 
10602     /**
10603      * Converts the character argument to lowercase using case
10604      * mapping information from the UnicodeData file.
10605      * <p>
10606      * Note that
10607      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
10608      * does not always return {@code true} for some ranges of
10609      * characters, particularly those that are symbols or ideographs.
10610      *
10611      * <p>In general, {@link String#toLowerCase()} should be used to map
10612      * characters to lowercase. {@code String} case mapping methods
10613      * have several benefits over {@code Character} case mapping methods.
10614      * {@code String} case mapping methods can perform locale-sensitive
10615      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10616      * the {@code Character} case mapping methods cannot.
10617      *
10618      * <p><b>Note:</b> This method cannot handle <a
10619      * href="#supplementary"> supplementary characters</a>. To support
10620      * all Unicode characters, including supplementary characters, use
10621      * the {@link #toLowerCase(int)} method.
10622      *
10623      * @param   ch   the character to be converted.
10624      * @return  the lowercase equivalent of the character, if any;
10625      *          otherwise, the character itself.
10626      * @see     Character#isLowerCase(char)
10627      * @see     String#toLowerCase()
10628      */
toLowerCase(char ch)10629     public static char toLowerCase(char ch) {
10630         return (char)toLowerCase((int)ch);
10631     }
10632 
10633     /**
10634      * Converts the character (Unicode code point) argument to
10635      * lowercase using case mapping information from the UnicodeData
10636      * file.
10637      *
10638      * <p> Note that
10639      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
10640      * does not always return {@code true} for some ranges of
10641      * characters, particularly those that are symbols or ideographs.
10642      *
10643      * <p>In general, {@link String#toLowerCase()} should be used to map
10644      * characters to lowercase. {@code String} case mapping methods
10645      * have several benefits over {@code Character} case mapping methods.
10646      * {@code String} case mapping methods can perform locale-sensitive
10647      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10648      * the {@code Character} case mapping methods cannot.
10649      *
10650      * @param   codePoint   the character (Unicode code point) to be converted.
10651      * @return  the lowercase equivalent of the character (Unicode code
10652      *          point), if any; otherwise, the character itself.
10653      * @see     Character#isLowerCase(int)
10654      * @see     String#toLowerCase()
10655      *
10656      * @since   1.5
10657      */
10658     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10659     /*
10660     public static int toLowerCase(int codePoint) {
10661         return CharacterData.of(codePoint).toLowerCase(codePoint);
10662     }
10663     */
toLowerCase(int codePoint)10664     public static int toLowerCase(int codePoint) {
10665         if (codePoint >= 'A' && codePoint <= 'Z') {
10666             return codePoint + ('a' - 'A');
10667         }
10668 
10669         // All ASCII codepoints except the ones above remain unchanged.
10670         if (codePoint < 0x80) {
10671             return codePoint;
10672         }
10673 
10674         return toLowerCaseImpl(codePoint);
10675     }
10676 
10677     @FastNative
toLowerCaseImpl(int codePoint)10678     static native int toLowerCaseImpl(int codePoint);
10679     // END Android-changed: Reimplement methods natively on top of ICU4C.
10680 
10681     /**
10682      * Converts the character argument to uppercase using case mapping
10683      * information from the UnicodeData file.
10684      * <p>
10685      * Note that
10686      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
10687      * does not always return {@code true} for some ranges of
10688      * characters, particularly those that are symbols or ideographs.
10689      *
10690      * <p>In general, {@link String#toUpperCase()} should be used to map
10691      * characters to uppercase. {@code String} case mapping methods
10692      * have several benefits over {@code Character} case mapping methods.
10693      * {@code String} case mapping methods can perform locale-sensitive
10694      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10695      * the {@code Character} case mapping methods cannot.
10696      *
10697      * <p><b>Note:</b> This method cannot handle <a
10698      * href="#supplementary"> supplementary characters</a>. To support
10699      * all Unicode characters, including supplementary characters, use
10700      * the {@link #toUpperCase(int)} method.
10701      *
10702      * @param   ch   the character to be converted.
10703      * @return  the uppercase equivalent of the character, if any;
10704      *          otherwise, the character itself.
10705      * @see     Character#isUpperCase(char)
10706      * @see     String#toUpperCase()
10707      */
toUpperCase(char ch)10708     public static char toUpperCase(char ch) {
10709         return (char)toUpperCase((int)ch);
10710     }
10711 
10712     /**
10713      * Converts the character (Unicode code point) argument to
10714      * uppercase using case mapping information from the UnicodeData
10715      * file.
10716      *
10717      * <p>Note that
10718      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
10719      * does not always return {@code true} for some ranges of
10720      * characters, particularly those that are symbols or ideographs.
10721      *
10722      * <p>In general, {@link String#toUpperCase()} should be used to map
10723      * characters to uppercase. {@code String} case mapping methods
10724      * have several benefits over {@code Character} case mapping methods.
10725      * {@code String} case mapping methods can perform locale-sensitive
10726      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10727      * the {@code Character} case mapping methods cannot.
10728      *
10729      * @param   codePoint   the character (Unicode code point) to be converted.
10730      * @return  the uppercase equivalent of the character, if any;
10731      *          otherwise, the character itself.
10732      * @see     Character#isUpperCase(int)
10733      * @see     String#toUpperCase()
10734      *
10735      * @since   1.5
10736      */
10737     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10738     /*
10739     public static int toUpperCase(int codePoint) {
10740         return CharacterData.of(codePoint).toUpperCase(codePoint);
10741     }
10742     */
toUpperCase(int codePoint)10743     public static int toUpperCase(int codePoint) {
10744         if (codePoint >= 'a' && codePoint <= 'z') {
10745             return codePoint - ('a' - 'A');
10746         }
10747 
10748         // All ASCII codepoints except the ones above remain unchanged.
10749         if (codePoint < 0x80) {
10750             return codePoint;
10751         }
10752 
10753         return toUpperCaseImpl(codePoint);
10754     }
10755 
10756     @FastNative
toUpperCaseImpl(int codePoint)10757     static native int toUpperCaseImpl(int codePoint);
10758     // END Android-changed: Reimplement methods natively on top of ICU4C.
10759 
10760     /**
10761      * Converts the character argument to titlecase using case mapping
10762      * information from the UnicodeData file. If a character has no
10763      * explicit titlecase mapping and is not itself a titlecase char
10764      * according to UnicodeData, then the uppercase mapping is
10765      * returned as an equivalent titlecase mapping. If the
10766      * {@code char} argument is already a titlecase
10767      * {@code char}, the same {@code char} value will be
10768      * returned.
10769      * <p>
10770      * Note that
10771      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
10772      * does not always return {@code true} for some ranges of
10773      * characters.
10774      *
10775      * <p><b>Note:</b> This method cannot handle <a
10776      * href="#supplementary"> supplementary characters</a>. To support
10777      * all Unicode characters, including supplementary characters, use
10778      * the {@link #toTitleCase(int)} method.
10779      *
10780      * @param   ch   the character to be converted.
10781      * @return  the titlecase equivalent of the character, if any;
10782      *          otherwise, the character itself.
10783      * @see     Character#isTitleCase(char)
10784      * @see     Character#toLowerCase(char)
10785      * @see     Character#toUpperCase(char)
10786      * @since   1.0.2
10787      */
toTitleCase(char ch)10788     public static char toTitleCase(char ch) {
10789         return (char)toTitleCase((int)ch);
10790     }
10791 
10792     /**
10793      * Converts the character (Unicode code point) argument to titlecase using case mapping
10794      * information from the UnicodeData file. If a character has no
10795      * explicit titlecase mapping and is not itself a titlecase char
10796      * according to UnicodeData, then the uppercase mapping is
10797      * returned as an equivalent titlecase mapping. If the
10798      * character argument is already a titlecase
10799      * character, the same character value will be
10800      * returned.
10801      *
10802      * <p>Note that
10803      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
10804      * does not always return {@code true} for some ranges of
10805      * characters.
10806      *
10807      * @param   codePoint   the character (Unicode code point) to be converted.
10808      * @return  the titlecase equivalent of the character, if any;
10809      *          otherwise, the character itself.
10810      * @see     Character#isTitleCase(int)
10811      * @see     Character#toLowerCase(int)
10812      * @see     Character#toUpperCase(int)
10813      * @since   1.5
10814      */
10815     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10816     /*
10817     public static int toTitleCase(int codePoint) {
10818         return CharacterData.of(codePoint).toTitleCase(codePoint);
10819     }
10820     */
toTitleCase(int codePoint)10821     public static int toTitleCase(int codePoint) {
10822         return toTitleCaseImpl(codePoint);
10823     }
10824 
10825     @FastNative
toTitleCaseImpl(int codePoint)10826     static native int toTitleCaseImpl(int codePoint);
10827     // END Android-changed: Reimplement methods natively on top of ICU4C.
10828 
10829     /**
10830      * Returns the numeric value of the character {@code ch} in the
10831      * specified radix.
10832      * <p>
10833      * If the radix is not in the range {@code MIN_RADIX} &le;
10834      * {@code radix} &le; {@code MAX_RADIX} or if the
10835      * value of {@code ch} is not a valid digit in the specified
10836      * radix, {@code -1} is returned. A character is a valid digit
10837      * if at least one of the following is true:
10838      * <ul>
10839      * <li>The method {@code isDigit} is {@code true} of the character
10840      *     and the Unicode decimal digit value of the character (or its
10841      *     single-character decomposition) is less than the specified radix.
10842      *     In this case the decimal digit value is returned.
10843      * <li>The character is one of the uppercase Latin letters
10844      *     {@code 'A'} through {@code 'Z'} and its code is less than
10845      *     {@code radix + 'A' - 10}.
10846      *     In this case, {@code ch - 'A' + 10}
10847      *     is returned.
10848      * <li>The character is one of the lowercase Latin letters
10849      *     {@code 'a'} through {@code 'z'} and its code is less than
10850      *     {@code radix + 'a' - 10}.
10851      *     In this case, {@code ch - 'a' + 10}
10852      *     is returned.
10853      * <li>The character is one of the fullwidth uppercase Latin letters A
10854      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
10855      *     and its code is less than
10856      *     {@code radix + '\u005CuFF21' - 10}.
10857      *     In this case, {@code ch - '\u005CuFF21' + 10}
10858      *     is returned.
10859      * <li>The character is one of the fullwidth lowercase Latin letters a
10860      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
10861      *     and its code is less than
10862      *     {@code radix + '\u005CuFF41' - 10}.
10863      *     In this case, {@code ch - '\u005CuFF41' + 10}
10864      *     is returned.
10865      * </ul>
10866      *
10867      * <p><b>Note:</b> This method cannot handle <a
10868      * href="#supplementary"> supplementary characters</a>. To support
10869      * all Unicode characters, including supplementary characters, use
10870      * the {@link #digit(int, int)} method.
10871      *
10872      * @param   ch      the character to be converted.
10873      * @param   radix   the radix.
10874      * @return  the numeric value represented by the character in the
10875      *          specified radix.
10876      * @see     Character#forDigit(int, int)
10877      * @see     Character#isDigit(char)
10878      */
digit(char ch, int radix)10879     public static int digit(char ch, int radix) {
10880         return digit((int)ch, radix);
10881     }
10882 
10883     /**
10884      * Returns the numeric value of the specified character (Unicode
10885      * code point) in the specified radix.
10886      *
10887      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
10888      * {@code radix} &le; {@code MAX_RADIX} or if the
10889      * character is not a valid digit in the specified
10890      * radix, {@code -1} is returned. A character is a valid digit
10891      * if at least one of the following is true:
10892      * <ul>
10893      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
10894      *     and the Unicode decimal digit value of the character (or its
10895      *     single-character decomposition) is less than the specified radix.
10896      *     In this case the decimal digit value is returned.
10897      * <li>The character is one of the uppercase Latin letters
10898      *     {@code 'A'} through {@code 'Z'} and its code is less than
10899      *     {@code radix + 'A' - 10}.
10900      *     In this case, {@code codePoint - 'A' + 10}
10901      *     is returned.
10902      * <li>The character is one of the lowercase Latin letters
10903      *     {@code 'a'} through {@code 'z'} and its code is less than
10904      *     {@code radix + 'a' - 10}.
10905      *     In this case, {@code codePoint - 'a' + 10}
10906      *     is returned.
10907      * <li>The character is one of the fullwidth uppercase Latin letters A
10908      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
10909      *     and its code is less than
10910      *     {@code radix + '\u005CuFF21' - 10}.
10911      *     In this case,
10912      *     {@code codePoint - '\u005CuFF21' + 10}
10913      *     is returned.
10914      * <li>The character is one of the fullwidth lowercase Latin letters a
10915      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
10916      *     and its code is less than
10917      *     {@code radix + '\u005CuFF41'- 10}.
10918      *     In this case,
10919      *     {@code codePoint - '\u005CuFF41' + 10}
10920      *     is returned.
10921      * </ul>
10922      *
10923      * @param   codePoint the character (Unicode code point) to be converted.
10924      * @param   radix   the radix.
10925      * @return  the numeric value represented by the character in the
10926      *          specified radix.
10927      * @see     Character#forDigit(int, int)
10928      * @see     Character#isDigit(int)
10929      * @since   1.5
10930      */
10931     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10932     /*
10933     public static int digit(int codePoint, int radix) {
10934         return CharacterData.of(codePoint).digit(codePoint, radix);
10935     }
10936     */
digit(int codePoint, int radix)10937     public static int digit(int codePoint, int radix) {
10938         if (radix < MIN_RADIX || radix > MAX_RADIX) {
10939             return -1;
10940         }
10941         if (codePoint < 128) {
10942             // Optimized for ASCII
10943             int result = -1;
10944             if ('0' <= codePoint && codePoint <= '9') {
10945                 result = codePoint - '0';
10946             } else if ('a' <= codePoint && codePoint <= 'z') {
10947                 result = 10 + (codePoint - 'a');
10948             } else if ('A' <= codePoint && codePoint <= 'Z') {
10949                 result = 10 + (codePoint - 'A');
10950             }
10951             return result < radix ? result : -1;
10952         }
10953         return digitImpl(codePoint, radix);
10954     }
10955 
10956     @FastNative
digitImpl(int codePoint, int radix)10957     native static int digitImpl(int codePoint, int radix);
10958     // END Android-changed: Reimplement methods natively on top of ICU4C.
10959 
10960     /**
10961      * Returns the {@code int} value that the specified Unicode
10962      * character represents. For example, the character
10963      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
10964      * an int with a value of 50.
10965      * <p>
10966      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10967      * {@code '\u005Cu005A'}), lowercase
10968      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10969      * full width variant ({@code '\u005CuFF21'} through
10970      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10971      * {@code '\u005CuFF5A'}) forms have numeric values from 10
10972      * through 35. This is independent of the Unicode specification,
10973      * which does not assign numeric values to these {@code char}
10974      * values.
10975      * <p>
10976      * If the character does not have a numeric value, then -1 is returned.
10977      * If the character has a numeric value that cannot be represented as a
10978      * nonnegative integer (for example, a fractional value), then -2
10979      * is returned.
10980      *
10981      * <p><b>Note:</b> This method cannot handle <a
10982      * href="#supplementary"> supplementary characters</a>. To support
10983      * all Unicode characters, including supplementary characters, use
10984      * the {@link #getNumericValue(int)} method.
10985      *
10986      * @param   ch      the character to be converted.
10987      * @return  the numeric value of the character, as a nonnegative {@code int}
10988      *          value; -2 if the character has a numeric value but the value
10989      *          can not be represented as a nonnegative {@code int} value;
10990      *          -1 if the character has no numeric value.
10991      * @see     Character#forDigit(int, int)
10992      * @see     Character#isDigit(char)
10993      * @since   1.1
10994      */
getNumericValue(char ch)10995     public static int getNumericValue(char ch) {
10996         return getNumericValue((int)ch);
10997     }
10998 
10999     /**
11000      * Returns the {@code int} value that the specified
11001      * character (Unicode code point) represents. For example, the character
11002      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
11003      * an {@code int} with a value of 50.
11004      * <p>
11005      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
11006      * {@code '\u005Cu005A'}), lowercase
11007      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
11008      * full width variant ({@code '\u005CuFF21'} through
11009      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
11010      * {@code '\u005CuFF5A'}) forms have numeric values from 10
11011      * through 35. This is independent of the Unicode specification,
11012      * which does not assign numeric values to these {@code char}
11013      * values.
11014      * <p>
11015      * If the character does not have a numeric value, then -1 is returned.
11016      * If the character has a numeric value that cannot be represented as a
11017      * nonnegative integer (for example, a fractional value), then -2
11018      * is returned.
11019      *
11020      * @param   codePoint the character (Unicode code point) to be converted.
11021      * @return  the numeric value of the character, as a nonnegative {@code int}
11022      *          value; -2 if the character has a numeric value but the value
11023      *          can not be represented as a nonnegative {@code int} value;
11024      *          -1 if the character has no numeric value.
11025      * @see     Character#forDigit(int, int)
11026      * @see     Character#isDigit(int)
11027      * @since   1.5
11028      */
11029     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11030     /*
11031     public static int getNumericValue(int codePoint) {
11032         return CharacterData.of(codePoint).getNumericValue(codePoint);
11033     }
11034     */
getNumericValue(int codePoint)11035     public static int getNumericValue(int codePoint) {
11036         // This is both an optimization and papers over differences between Java and ICU.
11037         if (codePoint < 128) {
11038             if (codePoint >= '0' && codePoint <= '9') {
11039                 return codePoint - '0';
11040             }
11041             if (codePoint >= 'a' && codePoint <= 'z') {
11042                 return codePoint - ('a' - 10);
11043             }
11044             if (codePoint >= 'A' && codePoint <= 'Z') {
11045                 return codePoint - ('A' - 10);
11046             }
11047             return -1;
11048         }
11049         // Full-width uppercase A-Z.
11050         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
11051             return codePoint - 0xff17;
11052         }
11053         // Full-width lowercase a-z.
11054         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
11055             return codePoint - 0xff37;
11056         }
11057         return getNumericValueImpl(codePoint);
11058     }
11059 
11060     @FastNative
getNumericValueImpl(int codePoint)11061     native static int getNumericValueImpl(int codePoint);
11062     // END Android-changed: Reimplement methods natively on top of ICU4C.
11063 
11064     /**
11065      * Determines if the specified character is ISO-LATIN-1 white space.
11066      * This method returns {@code true} for the following five
11067      * characters only:
11068      * <table class="striped">
11069      * <caption style="display:none">truechars</caption>
11070      * <thead>
11071      * <tr><th scope="col">Character
11072      *     <th scope="col">Code
11073      *     <th scope="col">Name
11074      * </thead>
11075      * <tbody>
11076      * <tr><th scope="row">{@code '\t'}</th>            <td>{@code U+0009}</td>
11077      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
11078      * <tr><th scope="row">{@code '\n'}</th>            <td>{@code U+000A}</td>
11079      *     <td>{@code NEW LINE}</td></tr>
11080      * <tr><th scope="row">{@code '\f'}</th>            <td>{@code U+000C}</td>
11081      *     <td>{@code FORM FEED}</td></tr>
11082      * <tr><th scope="row">{@code '\r'}</th>            <td>{@code U+000D}</td>
11083      *     <td>{@code CARRIAGE RETURN}</td></tr>
11084      * <tr><th scope="row">{@code ' '}</th>  <td>{@code U+0020}</td>
11085      *     <td>{@code SPACE}</td></tr>
11086      * </tbody>
11087      * </table>
11088      *
11089      * @param      ch   the character to be tested.
11090      * @return     {@code true} if the character is ISO-LATIN-1 white
11091      *             space; {@code false} otherwise.
11092      * @see        Character#isSpaceChar(char)
11093      * @see        Character#isWhitespace(char)
11094      * @deprecated Replaced by isWhitespace(char).
11095      */
11096     @Deprecated(since="1.1")
isSpace(char ch)11097     public static boolean isSpace(char ch) {
11098         return (ch <= 0x0020) &&
11099             (((((1L << 0x0009) |
11100             (1L << 0x000A) |
11101             (1L << 0x000C) |
11102             (1L << 0x000D) |
11103             (1L << 0x0020)) >> ch) & 1L) != 0);
11104     }
11105 
11106 
11107     /**
11108      * Determines if the specified character is a Unicode space character.
11109      * A character is considered to be a space character if and only if
11110      * it is specified to be a space character by the Unicode Standard. This
11111      * method returns true if the character's general category type is any of
11112      * the following:
11113      * <ul>
11114      * <li> {@code SPACE_SEPARATOR}
11115      * <li> {@code LINE_SEPARATOR}
11116      * <li> {@code PARAGRAPH_SEPARATOR}
11117      * </ul>
11118      *
11119      * <p><b>Note:</b> This method cannot handle <a
11120      * href="#supplementary"> supplementary characters</a>. To support
11121      * all Unicode characters, including supplementary characters, use
11122      * the {@link #isSpaceChar(int)} method.
11123      *
11124      * @param   ch      the character to be tested.
11125      * @return  {@code true} if the character is a space character;
11126      *          {@code false} otherwise.
11127      * @see     Character#isWhitespace(char)
11128      * @since   1.1
11129      */
isSpaceChar(char ch)11130     public static boolean isSpaceChar(char ch) {
11131         return isSpaceChar((int)ch);
11132     }
11133 
11134     /**
11135      * Determines if the specified character (Unicode code point) is a
11136      * Unicode space character.  A character is considered to be a
11137      * space character if and only if it is specified to be a space
11138      * character by the Unicode Standard. This method returns true if
11139      * the character's general category type is any of the following:
11140      *
11141      * <ul>
11142      * <li> {@link #SPACE_SEPARATOR}
11143      * <li> {@link #LINE_SEPARATOR}
11144      * <li> {@link #PARAGRAPH_SEPARATOR}
11145      * </ul>
11146      *
11147      * @param   codePoint the character (Unicode code point) to be tested.
11148      * @return  {@code true} if the character is a space character;
11149      *          {@code false} otherwise.
11150      * @see     Character#isWhitespace(int)
11151      * @since   1.5
11152      */
11153     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11154     /*
11155     public static boolean isSpaceChar(int codePoint) {
11156         return ((((1 << Character.SPACE_SEPARATOR) |
11157                   (1 << Character.LINE_SEPARATOR) |
11158                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
11159             != 0;
11160     }
11161     */
isSpaceChar(int codePoint)11162     public static boolean isSpaceChar(int codePoint) {
11163         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
11164         // SPACE or NO-BREAK SPACE?
11165         if (codePoint == 0x20 || codePoint == 0xa0) {
11166             return true;
11167         }
11168         if (codePoint < 0x1000) {
11169             return false;
11170         }
11171         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
11172         if (codePoint == 0x1680 || codePoint == 0x180e) {
11173             return true;
11174         }
11175         if (codePoint < 0x2000) {
11176             return false;
11177         }
11178         if (codePoint <= 0xffff) {
11179             // Other whitespace from General Punctuation...
11180             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
11181                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
11182         }
11183         // Let icu4c worry about non-BMP code points.
11184         return isSpaceCharImpl(codePoint);
11185     }
11186 
11187     @FastNative
isSpaceCharImpl(int codePoint)11188     static native boolean isSpaceCharImpl(int codePoint);
11189     // END Android-changed: Reimplement methods natively on top of ICU4C.
11190 
11191     /**
11192      * Determines if the specified character is white space according to Java.
11193      * A character is a Java whitespace character if and only if it satisfies
11194      * one of the following criteria:
11195      * <ul>
11196      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
11197      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
11198      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
11199      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11200      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11201      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11202      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11203      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11204      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11205      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11206      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11207      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11208      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11209      * </ul>
11210      *
11211      * <p><b>Note:</b> This method cannot handle <a
11212      * href="#supplementary"> supplementary characters</a>. To support
11213      * all Unicode characters, including supplementary characters, use
11214      * the {@link #isWhitespace(int)} method.
11215      *
11216      * @param   ch the character to be tested.
11217      * @return  {@code true} if the character is a Java whitespace
11218      *          character; {@code false} otherwise.
11219      * @see     Character#isSpaceChar(char)
11220      * @since   1.1
11221      */
isWhitespace(char ch)11222     public static boolean isWhitespace(char ch) {
11223         return isWhitespace((int)ch);
11224     }
11225 
11226     /**
11227      * Determines if the specified character (Unicode code point) is
11228      * white space according to Java.  A character is a Java
11229      * whitespace character if and only if it satisfies one of the
11230      * following criteria:
11231      * <ul>
11232      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
11233      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
11234      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
11235      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11236      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11237      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11238      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11239      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11240      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11241      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11242      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11243      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11244      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11245      * </ul>
11246      *
11247      * @param   codePoint the character (Unicode code point) to be tested.
11248      * @return  {@code true} if the character is a Java whitespace
11249      *          character; {@code false} otherwise.
11250      * @see     Character#isSpaceChar(int)
11251      * @since   1.5
11252      */
11253     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11254     /*
11255     public static boolean isWhitespace(int codePoint) {
11256         return CharacterData.of(codePoint).isWhitespace(codePoint);
11257     }
11258     */
isWhitespace(int codePoint)11259     public static boolean isWhitespace(int codePoint) {
11260         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
11261         // Any ASCII whitespace character?
11262         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
11263             return true;
11264         }
11265         if (codePoint < 0x1000) {
11266             return false;
11267         }
11268         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
11269         if (codePoint == 0x1680 || codePoint == 0x180e) {
11270             return true;
11271         }
11272         if (codePoint < 0x2000) {
11273             return false;
11274         }
11275         // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
11276         if (codePoint == 0x2007 || codePoint == 0x202f) {
11277             return false;
11278         }
11279         if (codePoint <= 0xffff) {
11280             // Other whitespace from General Punctuation...
11281             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
11282                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
11283         }
11284         // Let icu4c worry about non-BMP code points.
11285         return isWhitespaceImpl(codePoint);
11286     }
11287 
11288     @FastNative
isWhitespaceImpl(int codePoint)11289     native static boolean isWhitespaceImpl(int codePoint);
11290     // END Android-changed: Reimplement methods natively on top of ICU4C.
11291 
11292     /**
11293      * Determines if the specified character is an ISO control
11294      * character.  A character is considered to be an ISO control
11295      * character if its code is in the range {@code '\u005Cu0000'}
11296      * through {@code '\u005Cu001F'} or in the range
11297      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11298      *
11299      * <p><b>Note:</b> This method cannot handle <a
11300      * href="#supplementary"> supplementary characters</a>. To support
11301      * all Unicode characters, including supplementary characters, use
11302      * the {@link #isISOControl(int)} method.
11303      *
11304      * @param   ch      the character to be tested.
11305      * @return  {@code true} if the character is an ISO control character;
11306      *          {@code false} otherwise.
11307      *
11308      * @see     Character#isSpaceChar(char)
11309      * @see     Character#isWhitespace(char)
11310      * @since   1.1
11311      */
isISOControl(char ch)11312     public static boolean isISOControl(char ch) {
11313         return isISOControl((int)ch);
11314     }
11315 
11316     /**
11317      * Determines if the referenced character (Unicode code point) is an ISO control
11318      * character.  A character is considered to be an ISO control
11319      * character if its code is in the range {@code '\u005Cu0000'}
11320      * through {@code '\u005Cu001F'} or in the range
11321      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11322      *
11323      * @param   codePoint the character (Unicode code point) to be tested.
11324      * @return  {@code true} if the character is an ISO control character;
11325      *          {@code false} otherwise.
11326      * @see     Character#isSpaceChar(int)
11327      * @see     Character#isWhitespace(int)
11328      * @since   1.5
11329      */
isISOControl(int codePoint)11330     public static boolean isISOControl(int codePoint) {
11331         // Optimized form of:
11332         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
11333         //     (codePoint >= 0x7F && codePoint <= 0x9F);
11334         return codePoint <= 0x9F &&
11335             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
11336     }
11337 
11338     /**
11339      * Returns a value indicating a character's general category.
11340      *
11341      * <p><b>Note:</b> This method cannot handle <a
11342      * href="#supplementary"> supplementary characters</a>. To support
11343      * all Unicode characters, including supplementary characters, use
11344      * the {@link #getType(int)} method.
11345      *
11346      * @param   ch      the character to be tested.
11347      * @return  a value of type {@code int} representing the
11348      *          character's general category.
11349      * @see     Character#COMBINING_SPACING_MARK
11350      * @see     Character#CONNECTOR_PUNCTUATION
11351      * @see     Character#CONTROL
11352      * @see     Character#CURRENCY_SYMBOL
11353      * @see     Character#DASH_PUNCTUATION
11354      * @see     Character#DECIMAL_DIGIT_NUMBER
11355      * @see     Character#ENCLOSING_MARK
11356      * @see     Character#END_PUNCTUATION
11357      * @see     Character#FINAL_QUOTE_PUNCTUATION
11358      * @see     Character#FORMAT
11359      * @see     Character#INITIAL_QUOTE_PUNCTUATION
11360      * @see     Character#LETTER_NUMBER
11361      * @see     Character#LINE_SEPARATOR
11362      * @see     Character#LOWERCASE_LETTER
11363      * @see     Character#MATH_SYMBOL
11364      * @see     Character#MODIFIER_LETTER
11365      * @see     Character#MODIFIER_SYMBOL
11366      * @see     Character#NON_SPACING_MARK
11367      * @see     Character#OTHER_LETTER
11368      * @see     Character#OTHER_NUMBER
11369      * @see     Character#OTHER_PUNCTUATION
11370      * @see     Character#OTHER_SYMBOL
11371      * @see     Character#PARAGRAPH_SEPARATOR
11372      * @see     Character#PRIVATE_USE
11373      * @see     Character#SPACE_SEPARATOR
11374      * @see     Character#START_PUNCTUATION
11375      * @see     Character#SURROGATE
11376      * @see     Character#TITLECASE_LETTER
11377      * @see     Character#UNASSIGNED
11378      * @see     Character#UPPERCASE_LETTER
11379      * @since   1.1
11380      */
getType(char ch)11381     public static int getType(char ch) {
11382         return getType((int)ch);
11383     }
11384 
11385     /**
11386      * Returns a value indicating a character's general category.
11387      *
11388      * @param   codePoint the character (Unicode code point) to be tested.
11389      * @return  a value of type {@code int} representing the
11390      *          character's general category.
11391      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
11392      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
11393      * @see     Character#CONTROL CONTROL
11394      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
11395      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
11396      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
11397      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
11398      * @see     Character#END_PUNCTUATION END_PUNCTUATION
11399      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
11400      * @see     Character#FORMAT FORMAT
11401      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
11402      * @see     Character#LETTER_NUMBER LETTER_NUMBER
11403      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
11404      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
11405      * @see     Character#MATH_SYMBOL MATH_SYMBOL
11406      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
11407      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
11408      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
11409      * @see     Character#OTHER_LETTER OTHER_LETTER
11410      * @see     Character#OTHER_NUMBER OTHER_NUMBER
11411      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
11412      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
11413      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
11414      * @see     Character#PRIVATE_USE PRIVATE_USE
11415      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
11416      * @see     Character#START_PUNCTUATION START_PUNCTUATION
11417      * @see     Character#SURROGATE SURROGATE
11418      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
11419      * @see     Character#UNASSIGNED UNASSIGNED
11420      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
11421      * @since   1.5
11422      */
11423     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11424     /*
11425     public static int getType(int codePoint) {
11426         return CharacterData.of(codePoint).getType(codePoint);
11427     }
11428     */
getType(int codePoint)11429     public static int getType(int codePoint) {
11430         int type = getTypeImpl(codePoint);
11431         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
11432         if (type <= Character.FORMAT) {
11433             return type;
11434         }
11435         return (type + 1);
11436     }
11437 
11438     @FastNative
getTypeImpl(int codePoint)11439     static native int getTypeImpl(int codePoint);
11440     // END Android-changed: Reimplement methods natively on top of ICU4C.
11441 
11442     /**
11443      * Determines the character representation for a specific digit in
11444      * the specified radix. If the value of {@code radix} is not a
11445      * valid radix, or the value of {@code digit} is not a valid
11446      * digit in the specified radix, the null character
11447      * ({@code '\u005Cu0000'}) is returned.
11448      * <p>
11449      * The {@code radix} argument is valid if it is greater than or
11450      * equal to {@code MIN_RADIX} and less than or equal to
11451      * {@code MAX_RADIX}. The {@code digit} argument is valid if
11452      * {@code 0 <= digit < radix}.
11453      * <p>
11454      * If the digit is less than 10, then
11455      * {@code '0' + digit} is returned. Otherwise, the value
11456      * {@code 'a' + digit - 10} is returned.
11457      *
11458      * @param   digit   the number to convert to a character.
11459      * @param   radix   the radix.
11460      * @return  the {@code char} representation of the specified digit
11461      *          in the specified radix.
11462      * @see     Character#MIN_RADIX
11463      * @see     Character#MAX_RADIX
11464      * @see     Character#digit(char, int)
11465      */
forDigit(int digit, int radix)11466     public static char forDigit(int digit, int radix) {
11467         if ((digit >= radix) || (digit < 0)) {
11468             return '\0';
11469         }
11470         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
11471             return '\0';
11472         }
11473         if (digit < 10) {
11474             return (char)('0' + digit);
11475         }
11476         return (char)('a' - 10 + digit);
11477     }
11478 
11479     /**
11480      * Returns the Unicode directionality property for the given
11481      * character.  Character directionality is used to calculate the
11482      * visual ordering of text. The directionality value of undefined
11483      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
11484      *
11485      * <p><b>Note:</b> This method cannot handle <a
11486      * href="#supplementary"> supplementary characters</a>. To support
11487      * all Unicode characters, including supplementary characters, use
11488      * the {@link #getDirectionality(int)} method.
11489      *
11490      * @param  ch {@code char} for which the directionality property
11491      *            is requested.
11492      * @return the directionality property of the {@code char} value.
11493      *
11494      * @see Character#DIRECTIONALITY_UNDEFINED
11495      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
11496      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
11497      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
11498      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
11499      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
11500      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
11501      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
11502      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
11503      * @see Character#DIRECTIONALITY_NONSPACING_MARK
11504      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
11505      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
11506      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
11507      * @see Character#DIRECTIONALITY_WHITESPACE
11508      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
11509      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
11510      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
11511      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
11512      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
11513      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
11514      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
11515      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
11516      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
11517      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
11518      * @since 1.4
11519      */
getDirectionality(char ch)11520     public static byte getDirectionality(char ch) {
11521         return getDirectionality((int)ch);
11522     }
11523 
11524     /**
11525      * Returns the Unicode directionality property for the given
11526      * character (Unicode code point).  Character directionality is
11527      * used to calculate the visual ordering of text. The
11528      * directionality value of undefined character is {@link
11529      * #DIRECTIONALITY_UNDEFINED}.
11530      *
11531      * @param   codePoint the character (Unicode code point) for which
11532      *          the directionality property is requested.
11533      * @return the directionality property of the character.
11534      *
11535      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
11536      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
11537      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
11538      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
11539      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
11540      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
11541      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
11542      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
11543      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
11544      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
11545      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
11546      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
11547      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
11548      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
11549      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
11550      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
11551      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
11552      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
11553      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
11554      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
11555      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
11556      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
11557      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
11558      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
11559      * @since    1.5
11560      */
11561     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11562     /*
11563     public static byte getDirectionality(int codePoint) {
11564         return CharacterData.of(codePoint).getDirectionality(codePoint);
11565     }
11566     */
getDirectionality(int codePoint)11567     public static byte getDirectionality(int codePoint) {
11568         if (getType(codePoint) == Character.UNASSIGNED) {
11569             return Character.DIRECTIONALITY_UNDEFINED;
11570         }
11571 
11572         byte directionality = getDirectionalityImpl(codePoint);
11573         if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
11574             return DIRECTIONALITY[directionality];
11575         }
11576         return Character.DIRECTIONALITY_UNDEFINED;
11577     }
11578 
11579     @FastNative
getDirectionalityImpl(int codePoint)11580     native static byte getDirectionalityImpl(int codePoint);
11581     // END Android-changed: Reimplement methods natively on top of ICU4C.
11582 
11583     /**
11584      * Determines whether the character is mirrored according to the
11585      * Unicode specification.  Mirrored characters should have their
11586      * glyphs horizontally mirrored when displayed in text that is
11587      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
11588      * PARENTHESIS is semantically defined to be an <i>opening
11589      * parenthesis</i>.  This will appear as a "(" in text that is
11590      * left-to-right but as a ")" in text that is right-to-left.
11591      *
11592      * <p><b>Note:</b> This method cannot handle <a
11593      * href="#supplementary"> supplementary characters</a>. To support
11594      * all Unicode characters, including supplementary characters, use
11595      * the {@link #isMirrored(int)} method.
11596      *
11597      * @param  ch {@code char} for which the mirrored property is requested
11598      * @return {@code true} if the char is mirrored, {@code false}
11599      *         if the {@code char} is not mirrored or is not defined.
11600      * @since 1.4
11601      */
isMirrored(char ch)11602     public static boolean isMirrored(char ch) {
11603         return isMirrored((int)ch);
11604     }
11605 
11606     /**
11607      * Determines whether the specified character (Unicode code point)
11608      * is mirrored according to the Unicode specification.  Mirrored
11609      * characters should have their glyphs horizontally mirrored when
11610      * displayed in text that is right-to-left.  For example,
11611      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
11612      * defined to be an <i>opening parenthesis</i>.  This will appear
11613      * as a "(" in text that is left-to-right but as a ")" in text
11614      * that is right-to-left.
11615      *
11616      * @param   codePoint the character (Unicode code point) to be tested.
11617      * @return  {@code true} if the character is mirrored, {@code false}
11618      *          if the character is not mirrored or is not defined.
11619      * @since   1.5
11620      */
11621     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11622     /*
11623     public static boolean isMirrored(int codePoint) {
11624         return CharacterData.of(codePoint).isMirrored(codePoint);
11625     }
11626     */
isMirrored(int codePoint)11627     public static boolean isMirrored(int codePoint) {
11628         return isMirroredImpl(codePoint);
11629     }
11630 
11631     @FastNative
isMirroredImpl(int codePoint)11632     native static boolean isMirroredImpl(int codePoint);
11633     // END Android-changed: Reimplement methods natively on top of ICU4C.
11634 
11635     /**
11636      * Compares two {@code Character} objects numerically.
11637      *
11638      * @param   anotherCharacter   the {@code Character} to be compared.
11639      * @return  the value {@code 0} if the argument {@code Character}
11640      *          is equal to this {@code Character}; a value less than
11641      *          {@code 0} if this {@code Character} is numerically less
11642      *          than the {@code Character} argument; and a value greater than
11643      *          {@code 0} if this {@code Character} is numerically greater
11644      *          than the {@code Character} argument (unsigned comparison).
11645      *          Note that this is strictly a numerical comparison; it is not
11646      *          locale-dependent.
11647      * @since   1.2
11648      */
compareTo(Character anotherCharacter)11649     public int compareTo(Character anotherCharacter) {
11650         return compare(this.value, anotherCharacter.value);
11651     }
11652 
11653     /**
11654      * Compares two {@code char} values numerically.
11655      * The value returned is identical to what would be returned by:
11656      * <pre>
11657      *    Character.valueOf(x).compareTo(Character.valueOf(y))
11658      * </pre>
11659      *
11660      * @param  x the first {@code char} to compare
11661      * @param  y the second {@code char} to compare
11662      * @return the value {@code 0} if {@code x == y};
11663      *         a value less than {@code 0} if {@code x < y}; and
11664      *         a value greater than {@code 0} if {@code x > y}
11665      * @since 1.7
11666      */
compare(char x, char y)11667     public static int compare(char x, char y) {
11668         return x - y;
11669     }
11670 
11671     // BEGIN Android-removed: Use ICU.
11672     /**
11673      * Converts the character (Unicode code point) argument to uppercase using
11674      * information from the UnicodeData file.
11675      *
11676      * @param   codePoint   the character (Unicode code point) to be converted.
11677      * @return  either the uppercase equivalent of the character, if
11678      *          any, or an error flag ({@code Character.ERROR})
11679      *          that indicates that a 1:M {@code char} mapping exists.
11680      * @see     Character#isLowerCase(char)
11681      * @see     Character#isUpperCase(char)
11682      * @see     Character#toLowerCase(char)
11683      * @see     Character#toTitleCase(char)
11684      * @since 1.4
11685      *
11686     static int toUpperCaseEx(int codePoint) {
11687         assert isValidCodePoint(codePoint);
11688         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
11689     }
11690 
11691     /**
11692      * Converts the character (Unicode code point) argument to uppercase using case
11693      * mapping information from the SpecialCasing file in the Unicode
11694      * specification. If a character has no explicit uppercase
11695      * mapping, then the {@code char} itself is returned in the
11696      * {@code char[]}.
11697      *
11698      * @param   codePoint   the character (Unicode code point) to be converted.
11699      * @return a {@code char[]} with the uppercased character.
11700      * @since 1.4
11701      *
11702     static char[] toUpperCaseCharArray(int codePoint) {
11703         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
11704         assert isBmpCodePoint(codePoint);
11705         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
11706     }
11707     */
11708     // END Android-removed: Use ICU.
11709 
11710     /**
11711      * The number of bits used to represent a {@code char} value in unsigned
11712      * binary form, constant {@code 16}.
11713      *
11714      * @since 1.5
11715      */
11716     public static final int SIZE = 16;
11717 
11718     /**
11719      * The number of bytes used to represent a {@code char} value in unsigned
11720      * binary form.
11721      *
11722      * @since 1.8
11723      */
11724     public static final int BYTES = SIZE / Byte.SIZE;
11725 
11726     /**
11727      * Returns the value obtained by reversing the order of the bytes in the
11728      * specified {@code char} value.
11729      *
11730      * @param ch The {@code char} of which to reverse the byte order.
11731      * @return the value obtained by reversing (or, equivalently, swapping)
11732      *     the bytes in the specified {@code char} value.
11733      * @since 1.5
11734      */
11735     @IntrinsicCandidate
reverseBytes(char ch)11736     public static char reverseBytes(char ch) {
11737         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
11738     }
11739 
11740     /**
11741      * Returns the Unicode name of the specified character
11742      * {@code codePoint}, or null if the code point is
11743      * {@link #UNASSIGNED unassigned}.
11744      * <p>
11745      * Note: if the specified character is not assigned a name by
11746      * the <i>UnicodeData</i> file (part of the Unicode Character
11747      * Database maintained by the Unicode Consortium), the returned
11748      * name is the same as the result of expression:
11749      *
11750      * <blockquote>{@code
11751      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
11752      *     + " "
11753      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11754      *
11755      * }</blockquote>
11756      *
11757      * @param  codePoint the character (Unicode code point)
11758      *
11759      * @return the Unicode name of the specified character, or null if
11760      *         the code point is unassigned.
11761      *
11762      * @throws IllegalArgumentException if the specified
11763      *            {@code codePoint} is not a valid Unicode
11764      *            code point.
11765      *
11766      * @since 1.7
11767      */
getName(int codePoint)11768     public static String getName(int codePoint) {
11769         if (!isValidCodePoint(codePoint)) {
11770             throw new IllegalArgumentException(
11771                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
11772         }
11773         // Android-changed: Use ICU.
11774         // String name = CharacterName.get(codePoint);
11775         String name = getNameImpl(codePoint);
11776         if (name != null)
11777             return name;
11778         if (getType(codePoint) == UNASSIGNED)
11779             return null;
11780         UnicodeBlock block = UnicodeBlock.of(codePoint);
11781         if (block != null)
11782             return block.toString().replace('_', ' ') + " "
11783                    + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11784         // should never come here
11785         return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11786     }
11787 
11788     // BEGIN Android-removed: expose after CharacterName.getCodePoint() is imported.
11789     /**
11790      * Returns the code point value of the Unicode character specified by
11791      * the given Unicode character name.
11792      * <p>
11793      * Note: if a character is not assigned a name by the <i>UnicodeData</i>
11794      * file (part of the Unicode Character Database maintained by the Unicode
11795      * Consortium), its name is defined as the result of expression:
11796      *
11797      * <blockquote>{@code
11798      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
11799      *     + " "
11800      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11801      *
11802      * }</blockquote>
11803      * <p>
11804      * The {@code name} matching is case insensitive, with any leading and
11805      * trailing whitespace character removed.
11806      *
11807      * @param  name the Unicode character name
11808      *
11809      * @return the code point value of the character specified by its name.
11810      *
11811      * @throws IllegalArgumentException if the specified {@code name}
11812      *         is not a valid Unicode character name.
11813      * @throws NullPointerException if {@code name} is {@code null}
11814      *
11815      * @since 9
11816      */
codePointOf(String name)11817     public static int codePointOf(String name) {
11818         name = name.trim().toUpperCase(Locale.ROOT);
11819         // Android-changed: Use ICU4C.
11820         // int cp = CharacterName.getInstance().getCodePoint(name);
11821         int cp = codePointOfImpl(name);
11822         if (cp != -1)
11823             return cp;
11824         try {
11825             int off = name.lastIndexOf(' ');
11826             if (off != -1) {
11827                 cp = Integer.parseInt(name, off + 1, name.length(), 16);
11828                 if (isValidCodePoint(cp) && name.equals(getName(cp)))
11829                     return cp;
11830             }
11831         } catch (Exception x) {}
11832         throw new IllegalArgumentException("Unrecognized character name :" + name);
11833     }
11834     // END Android-removed: expose after CharacterName.getCodePoint() is imported.
11835 
11836     // Android-added: Use ICU.
11837     // Implement getNameImpl() and codePointOfImpl() natively.
getNameImpl(int codePoint)11838     private static native String getNameImpl(int codePoint);
11839 
11840     @FastNative
codePointOfImpl(String name)11841     private static native int codePointOfImpl(String name);
11842 }
11843