1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5  *******************************************************************************
6  * Copyright (C) 1996-2016, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 
11 package android.icu.lang;
12 
13 import java.lang.ref.SoftReference;
14 import java.util.HashMap;
15 import java.util.Iterator;
16 import java.util.Locale;
17 import java.util.Map;
18 
19 import android.icu.impl.CaseMapImpl;
20 import android.icu.impl.IllegalIcuArgumentException;
21 import android.icu.impl.Trie2;
22 import android.icu.impl.UBiDiProps;
23 import android.icu.impl.UCaseProps;
24 import android.icu.impl.UCharacterName;
25 import android.icu.impl.UCharacterNameChoice;
26 import android.icu.impl.UCharacterProperty;
27 import android.icu.impl.UCharacterUtility;
28 import android.icu.impl.UPropertyAliases;
29 import android.icu.lang.UCharacterEnums.ECharacterCategory;
30 import android.icu.lang.UCharacterEnums.ECharacterDirection;
31 import android.icu.text.BreakIterator;
32 import android.icu.text.Edits;
33 import android.icu.text.Normalizer2;
34 import android.icu.util.RangeValueIterator;
35 import android.icu.util.ULocale;
36 import android.icu.util.ValueIterator;
37 import android.icu.util.VersionInfo;
38 
39 /**
40  * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
41  *
42  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
43  * These extensions provide support for more Unicode properties.
44  * Each ICU release supports the latest version of Unicode available at that time.
45  *
46  * <p>For some time before Java 5 added support for supplementary Unicode code points,
47  * The ICU UCharacter class and many other ICU classes already supported them.
48  * Some UCharacter methods and constants were widened slightly differently than
49  * how the Character class methods and constants were widened later.
50  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
51  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
52  *
53  * <p>Code points are represented in these API using ints. While it would be
54  * more convenient in Java to have a separate primitive datatype for them,
55  * ints suffice in the meantime.
56  *
57  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
58  * properties, the main differences between UCharacter and Character are:
59  * <ul>
60  * <li> UCharacter is not designed to be a char wrapper and does not have
61  *      APIs to which involves management of that single char.<br>
62  *      These include:
63  *      <ul>
64  *        <li> char charValue(),
65  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
66  *      </ul>
67  * <li> UCharacter does not include Character APIs that are deprecated, nor
68  *      does it include the Java-specific character information, such as
69  *      boolean isJavaIdentifierPart(char ch).
70  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
71  *      values '10' - '35'. UCharacter also does this in digit and
72  *      getNumericValue, to adhere to the java semantics of these
73  *      methods.  New methods unicodeDigit, and
74  *      getUnicodeNumericValue do not treat the above code points
75  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
76  * </ul>
77  * <p>
78  * In addition to Java compatibility functions, which calculate derived properties,
79  * this API provides low-level access to the Unicode Character Database.
80  * <p>
81  * Unicode assigns each code point (not just assigned character) values for
82  * many properties.
83  * Most of them are simple boolean flags, or constants from a small enumerated list.
84  * For some properties, values are strings or other relatively more complex types.
85  * <p>
86  * For more information see
87  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
88  * (http://www.unicode.org/ucd/)
89  * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
90  * User Guide chapter on Properties</a>
91  * (http://www.icu-project.org/userguide/properties.html).
92  * <p>
93  * There are also functions that provide easy migration from C/POSIX functions
94  * like isblank(). Their use is generally discouraged because the C/POSIX
95  * standards do not define their semantics beyond the ASCII range, which means
96  * that different implementations exhibit very different behavior.
97  * Instead, Unicode properties should be used directly.
98  * <p>
99  * There are also only a few, broad C/POSIX character classes, and they tend
100  * to be used for conflicting purposes. For example, the "isalpha()" class
101  * is sometimes used to determine word boundaries, while a more sophisticated
102  * approach would at least distinguish initial letters from continuation
103  * characters (the latter including combining marks).
104  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
105  * Another example: There is no "istitle()" class for titlecase characters.
106  * <p>
107  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
108  * ICU implements them according to the Standard Recommendations in
109  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
110  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
111  * <p>
112  * API access for C/POSIX character classes is as follows:
113  * <pre>{@code
114  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
115  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
116  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
117  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
118  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
119  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
120  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
121  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
122  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
123  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
124  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
125  * - cntrl:     getType(c)==CONTROL
126  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
127  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
128  * <p>
129  * The C/POSIX character classes are also available in UnicodeSet patterns,
130  * using patterns like [:graph:] or \p{graph}.
131  *
132  * <p><strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions.
133  * Comparison:<ul>
134  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
135  *       most of general categories "Z" (separators) + most whitespace ISO controls
136  *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
137  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
138  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
139  *
140  * <p>
141  * This class is not subclassable.
142  *
143  * @author Syn Wee Quek
144  * @see android.icu.lang.UCharacterEnums
145  */
146 
147 public final class UCharacter implements ECharacterCategory, ECharacterDirection
148 {
149     // public inner classes ----------------------------------------------
150 
151     /**
152      * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
153      *
154      * A family of character subsets representing the character blocks in the
155      * Unicode specification, generated from Unicode Data file Blocks.txt.
156      * Character blocks generally define characters used for a specific script
157      * or purpose. A character is contained by at most one Unicode block.
158      *
159      * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU.
160      */
161     public static final class UnicodeBlock extends Character.Subset
162     {
163         // block id corresponding to icu4c -----------------------------------
164 
165         /**
166          */
167         public static final int INVALID_CODE_ID = -1;
168         /**
169          */
170         public static final int BASIC_LATIN_ID = 1;
171         /**
172          */
173         public static final int LATIN_1_SUPPLEMENT_ID = 2;
174         /**
175          */
176         public static final int LATIN_EXTENDED_A_ID = 3;
177         /**
178          */
179         public static final int LATIN_EXTENDED_B_ID = 4;
180         /**
181          */
182         public static final int IPA_EXTENSIONS_ID = 5;
183         /**
184          */
185         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
186         /**
187          */
188         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
189         /**
190          * Unicode 3.2 renames this block to "Greek and Coptic".
191          */
192         public static final int GREEK_ID = 8;
193         /**
194          */
195         public static final int CYRILLIC_ID = 9;
196         /**
197          */
198         public static final int ARMENIAN_ID = 10;
199         /**
200          */
201         public static final int HEBREW_ID = 11;
202         /**
203          */
204         public static final int ARABIC_ID = 12;
205         /**
206          */
207         public static final int SYRIAC_ID = 13;
208         /**
209          */
210         public static final int THAANA_ID = 14;
211         /**
212          */
213         public static final int DEVANAGARI_ID = 15;
214         /**
215          */
216         public static final int BENGALI_ID = 16;
217         /**
218          */
219         public static final int GURMUKHI_ID = 17;
220         /**
221          */
222         public static final int GUJARATI_ID = 18;
223         /**
224          */
225         public static final int ORIYA_ID = 19;
226         /**
227          */
228         public static final int TAMIL_ID = 20;
229         /**
230          */
231         public static final int TELUGU_ID = 21;
232         /**
233          */
234         public static final int KANNADA_ID = 22;
235         /**
236          */
237         public static final int MALAYALAM_ID = 23;
238         /**
239          */
240         public static final int SINHALA_ID = 24;
241         /**
242          */
243         public static final int THAI_ID = 25;
244         /**
245          */
246         public static final int LAO_ID = 26;
247         /**
248          */
249         public static final int TIBETAN_ID = 27;
250         /**
251          */
252         public static final int MYANMAR_ID = 28;
253         /**
254          */
255         public static final int GEORGIAN_ID = 29;
256         /**
257          */
258         public static final int HANGUL_JAMO_ID = 30;
259         /**
260          */
261         public static final int ETHIOPIC_ID = 31;
262         /**
263          */
264         public static final int CHEROKEE_ID = 32;
265         /**
266          */
267         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
268         /**
269          */
270         public static final int OGHAM_ID = 34;
271         /**
272          */
273         public static final int RUNIC_ID = 35;
274         /**
275          */
276         public static final int KHMER_ID = 36;
277         /**
278          */
279         public static final int MONGOLIAN_ID = 37;
280         /**
281          */
282         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
283         /**
284          */
285         public static final int GREEK_EXTENDED_ID = 39;
286         /**
287          */
288         public static final int GENERAL_PUNCTUATION_ID = 40;
289         /**
290          */
291         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
292         /**
293          */
294         public static final int CURRENCY_SYMBOLS_ID = 42;
295         /**
296          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
297          * Symbols".
298          */
299         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
300         /**
301          */
302         public static final int LETTERLIKE_SYMBOLS_ID = 44;
303         /**
304          */
305         public static final int NUMBER_FORMS_ID = 45;
306         /**
307          */
308         public static final int ARROWS_ID = 46;
309         /**
310          */
311         public static final int MATHEMATICAL_OPERATORS_ID = 47;
312         /**
313          */
314         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
315         /**
316          */
317         public static final int CONTROL_PICTURES_ID = 49;
318         /**
319          */
320         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
321         /**
322          */
323         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
324         /**
325          */
326         public static final int BOX_DRAWING_ID = 52;
327         /**
328          */
329         public static final int BLOCK_ELEMENTS_ID = 53;
330         /**
331          */
332         public static final int GEOMETRIC_SHAPES_ID = 54;
333         /**
334          */
335         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
336         /**
337          */
338         public static final int DINGBATS_ID = 56;
339         /**
340          */
341         public static final int BRAILLE_PATTERNS_ID = 57;
342         /**
343          */
344         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
345         /**
346          */
347         public static final int KANGXI_RADICALS_ID = 59;
348         /**
349          */
350         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
351         /**
352          */
353         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
354         /**
355          */
356         public static final int HIRAGANA_ID = 62;
357         /**
358          */
359         public static final int KATAKANA_ID = 63;
360         /**
361          */
362         public static final int BOPOMOFO_ID = 64;
363         /**
364          */
365         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
366         /**
367          */
368         public static final int KANBUN_ID = 66;
369         /**
370          */
371         public static final int BOPOMOFO_EXTENDED_ID = 67;
372         /**
373          */
374         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
375         /**
376          */
377         public static final int CJK_COMPATIBILITY_ID = 69;
378         /**
379          */
380         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
381         /**
382          */
383         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
384         /**
385          */
386         public static final int YI_SYLLABLES_ID = 72;
387         /**
388          */
389         public static final int YI_RADICALS_ID = 73;
390         /**
391          */
392         public static final int HANGUL_SYLLABLES_ID = 74;
393         /**
394          */
395         public static final int HIGH_SURROGATES_ID = 75;
396         /**
397          */
398         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
399         /**
400          */
401         public static final int LOW_SURROGATES_ID = 77;
402         /**
403          * Same as public static final int PRIVATE_USE.
404          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
405          * and multiple code point ranges had this block.
406          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
407          * and adds separate blocks for the supplementary PUAs.
408          */
409         public static final int PRIVATE_USE_AREA_ID = 78;
410         /**
411          * Same as public static final int PRIVATE_USE_AREA.
412          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
413          * and multiple code point ranges had this block.
414          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
415          * and adds separate blocks for the supplementary PUAs.
416          */
417         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
418         /**
419          */
420         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
421         /**
422          */
423         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
424         /**
425          */
426         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
427         /**
428          */
429         public static final int COMBINING_HALF_MARKS_ID = 82;
430         /**
431          */
432         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
433         /**
434          */
435         public static final int SMALL_FORM_VARIANTS_ID = 84;
436         /**
437          */
438         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
439         /**
440          */
441         public static final int SPECIALS_ID = 86;
442         /**
443          */
444         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
445         /**
446          */
447         public static final int OLD_ITALIC_ID = 88;
448         /**
449          */
450         public static final int GOTHIC_ID = 89;
451         /**
452          */
453         public static final int DESERET_ID = 90;
454         /**
455          */
456         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
457         /**
458          */
459         public static final int MUSICAL_SYMBOLS_ID = 92;
460         /**
461          */
462         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
463         /**
464          */
465         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
466         /**
467          */
468         public static final int
469         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
470         /**
471          */
472         public static final int TAGS_ID = 96;
473 
474         // New blocks in Unicode 3.2
475 
476         /**
477          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
478          */
479         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
480         /**
481          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
482          */
483 
484         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
485         /**
486          */
487         public static final int TAGALOG_ID = 98;
488         /**
489          */
490         public static final int HANUNOO_ID = 99;
491         /**
492          */
493         public static final int BUHID_ID = 100;
494         /**
495          */
496         public static final int TAGBANWA_ID = 101;
497         /**
498          */
499         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
500         /**
501          */
502         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
503         /**
504          */
505         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
506         /**
507          */
508         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
509         /**
510          */
511         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
512         /**
513          */
514         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
515         /**
516          */
517         public static final int VARIATION_SELECTORS_ID = 108;
518         /**
519          */
520         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
521         /**
522          */
523         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
524 
525         /**
526          */
527         public static final int LIMBU_ID = 111; /*[1900]*/
528         /**
529          */
530         public static final int TAI_LE_ID = 112; /*[1950]*/
531         /**
532          */
533         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
534         /**
535          */
536         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
537         /**
538          */
539         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
540         /**
541          */
542         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
543         /**
544          */
545         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
546         /**
547          */
548         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
549         /**
550          */
551         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
552         /**
553          */
554         public static final int UGARITIC_ID = 120; /*[10380]*/
555         /**
556          */
557         public static final int SHAVIAN_ID = 121; /*[10450]*/
558         /**
559          */
560         public static final int OSMANYA_ID = 122; /*[10480]*/
561         /**
562          */
563         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
564         /**
565          */
566         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
567         /**
568          */
569         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
570 
571         /* New blocks in Unicode 4.1 */
572 
573         /**
574          */
575         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
576 
577         /**
578          */
579         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
580 
581         /**
582          */
583         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
584 
585         /**
586          */
587         public static final int BUGINESE_ID = 129; /*[1A00]*/
588 
589         /**
590          */
591         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
592 
593         /**
594          */
595         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
596 
597         /**
598          */
599         public static final int COPTIC_ID = 132; /*[2C80]*/
600 
601         /**
602          */
603         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
604 
605         /**
606          */
607         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
608 
609         /**
610          */
611         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
612 
613         /**
614          */
615         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
616 
617         /**
618          */
619         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
620 
621         /**
622          */
623         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
624 
625         /**
626          */
627         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
628 
629         /**
630          */
631         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
632 
633         /**
634          */
635         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
636 
637         /**
638          */
639         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
640 
641         /**
642          */
643         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
644 
645         /**
646          */
647         public static final int TIFINAGH_ID = 144; /*[2D30]*/
648 
649         /**
650          */
651         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
652 
653         /* New blocks in Unicode 5.0 */
654 
655         /**
656          */
657         public static final int NKO_ID = 146; /*[07C0]*/
658         /**
659          */
660         public static final int BALINESE_ID = 147; /*[1B00]*/
661         /**
662          */
663         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
664         /**
665          */
666         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
667         /**
668          */
669         public static final int PHAGS_PA_ID = 150; /*[A840]*/
670         /**
671          */
672         public static final int PHOENICIAN_ID = 151; /*[10900]*/
673         /**
674          */
675         public static final int CUNEIFORM_ID = 152; /*[12000]*/
676         /**
677          */
678         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
679         /**
680          */
681         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
682 
683         /**
684          */
685         public static final int SUNDANESE_ID = 155; /* [1B80] */
686 
687         /**
688          */
689         public static final int LEPCHA_ID = 156; /* [1C00] */
690 
691         /**
692          */
693         public static final int OL_CHIKI_ID = 157; /* [1C50] */
694 
695         /**
696          */
697         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
698 
699         /**
700          */
701         public static final int VAI_ID = 159; /* [A500] */
702 
703         /**
704          */
705         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
706 
707         /**
708          */
709         public static final int SAURASHTRA_ID = 161; /* [A880] */
710 
711         /**
712          */
713         public static final int KAYAH_LI_ID = 162; /* [A900] */
714 
715         /**
716          */
717         public static final int REJANG_ID = 163; /* [A930] */
718 
719         /**
720          */
721         public static final int CHAM_ID = 164; /* [AA00] */
722 
723         /**
724          */
725         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
726 
727         /**
728          */
729         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
730 
731         /**
732          */
733         public static final int LYCIAN_ID = 167; /* [10280] */
734 
735         /**
736          */
737         public static final int CARIAN_ID = 168; /* [102A0] */
738 
739         /**
740          */
741         public static final int LYDIAN_ID = 169; /* [10920] */
742 
743         /**
744          */
745         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
746 
747         /**
748          */
749         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
750 
751         /* New blocks in Unicode 5.2 */
752 
753         /***/
754         public static final int SAMARITAN_ID = 172; /*[0800]*/
755         /***/
756         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
757         /***/
758         public static final int TAI_THAM_ID = 174; /*[1A20]*/
759         /***/
760         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
761         /***/
762         public static final int LISU_ID = 176; /*[A4D0]*/
763         /***/
764         public static final int BAMUM_ID = 177; /*[A6A0]*/
765         /***/
766         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
767         /***/
768         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
769         /***/
770         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
771         /***/
772         public static final int JAVANESE_ID = 181; /*[A980]*/
773         /***/
774         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
775         /***/
776         public static final int TAI_VIET_ID = 183; /*[AA80]*/
777         /***/
778         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
779         /***/
780         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
781         /***/
782         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
783         /***/
784         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
785         /***/
786         public static final int AVESTAN_ID = 188; /*[10B00]*/
787         /***/
788         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
789         /***/
790         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
791         /***/
792         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
793         /***/
794         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
795         /***/
796         public static final int KAITHI_ID = 193; /*[11080]*/
797         /***/
798         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
799         /***/
800         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
801         /***/
802         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
803         /***/
804         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
805 
806         /* New blocks in Unicode 6.0 */
807 
808         /***/
809         public static final int MANDAIC_ID = 198; /*[0840]*/
810         /***/
811         public static final int BATAK_ID = 199; /*[1BC0]*/
812         /***/
813         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
814         /***/
815         public static final int BRAHMI_ID = 201; /*[11000]*/
816         /***/
817         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
818         /***/
819         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
820         /***/
821         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
822         /***/
823         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
824         /***/
825         public static final int EMOTICONS_ID = 206; /*[1F600]*/
826         /***/
827         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
828         /***/
829         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
830         /***/
831         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
832 
833         /* New blocks in Unicode 6.1 */
834 
835         /***/
836         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
837         /***/
838         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
839         /***/
840         public static final int CHAKMA_ID = 212; /*[11100]*/
841         /***/
842         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
843         /***/
844         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
845         /***/
846         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
847         /***/
848         public static final int MIAO_ID = 216; /*[16F00]*/
849         /***/
850         public static final int SHARADA_ID = 217; /*[11180]*/
851         /***/
852         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
853         /***/
854         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
855         /***/
856         public static final int TAKRI_ID = 220; /*[11680]*/
857 
858         /* New blocks in Unicode 7.0 */
859 
860         /***/
861         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
862         /***/
863         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
864         /***/
865         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
866         /***/
867         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
868         /***/
869         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
870         /***/
871         public static final int ELBASAN_ID = 226; /*[10500]*/
872         /***/
873         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
874         /***/
875         public static final int GRANTHA_ID = 228; /*[11300]*/
876         /***/
877         public static final int KHOJKI_ID = 229; /*[11200]*/
878         /***/
879         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
880         /***/
881         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
882         /***/
883         public static final int LINEAR_A_ID = 232; /*[10600]*/
884         /***/
885         public static final int MAHAJANI_ID = 233; /*[11150]*/
886         /***/
887         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
888         /***/
889         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
890         /***/
891         public static final int MODI_ID = 236; /*[11600]*/
892         /***/
893         public static final int MRO_ID = 237; /*[16A40]*/
894         /***/
895         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
896         /***/
897         public static final int NABATAEAN_ID = 239; /*[10880]*/
898         /***/
899         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
900         /***/
901         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
902         /***/
903         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
904         /***/
905         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
906         /***/
907         public static final int PALMYRENE_ID = 244; /*[10860]*/
908         /***/
909         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
910         /***/
911         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
912         /***/
913         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
914         /***/
915         public static final int SIDDHAM_ID = 248; /*[11580]*/
916         /***/
917         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
918         /***/
919         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
920         /***/
921         public static final int TIRHUTA_ID = 251; /*[11480]*/
922         /***/
923         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
924 
925         /* New blocks in Unicode 8.0 */
926 
927         /***/
928         public static final int AHOM_ID = 253; /*[11700]*/
929         /***/
930         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
931         /***/
932         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
933         /***/
934         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
935         /***/
936         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
937         /***/
938         public static final int HATRAN_ID = 258; /*[108E0]*/
939         /***/
940         public static final int MULTANI_ID = 259; /*[11280]*/
941         /***/
942         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
943         /***/
944         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
945         /***/
946         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
947 
948         /* New blocks in Unicode 9.0 */
949 
950         /***/
951         public static final int ADLAM_ID = 263; /*[1E900]*/
952         /***/
953         public static final int BHAIKSUKI_ID = 264; /*[11C00]*/
954         /***/
955         public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/
956         /***/
957         public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/
958         /***/
959         public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/
960         /***/
961         public static final int MARCHEN_ID = 268; /*[11C70]*/
962         /***/
963         public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/
964         /***/
965         public static final int NEWA_ID = 270; /*[11400]*/
966         /***/
967         public static final int OSAGE_ID = 271; /*[104B0]*/
968         /***/
969         public static final int TANGUT_ID = 272; /*[17000]*/
970         /***/
971         public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/
972 
973         /**
974          * One more than the highest normal UnicodeBlock value.
975          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK).
976          *
977          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
978          * @hide unsupported on Android
979          */
980         @Deprecated
981         public static final int COUNT = 274;
982 
983         // blocks objects ---------------------------------------------------
984 
985         /**
986          * Array of UnicodeBlocks, for easy access in getInstance(int)
987          */
988         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
989 
990         /**
991          */
992         public static final UnicodeBlock NO_BLOCK
993         = new UnicodeBlock("NO_BLOCK", 0);
994 
995         /**
996          */
997         public static final UnicodeBlock BASIC_LATIN
998         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
999         /**
1000          */
1001         public static final UnicodeBlock LATIN_1_SUPPLEMENT
1002         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1003         /**
1004          */
1005         public static final UnicodeBlock LATIN_EXTENDED_A
1006         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1007         /**
1008          */
1009         public static final UnicodeBlock LATIN_EXTENDED_B
1010         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1011         /**
1012          */
1013         public static final UnicodeBlock IPA_EXTENSIONS
1014         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1015         /**
1016          */
1017         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1018         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1019         /**
1020          */
1021         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1022         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1023         /**
1024          * Unicode 3.2 renames this block to "Greek and Coptic".
1025          */
1026         public static final UnicodeBlock GREEK
1027         = new UnicodeBlock("GREEK", GREEK_ID);
1028         /**
1029          */
1030         public static final UnicodeBlock CYRILLIC
1031         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1032         /**
1033          */
1034         public static final UnicodeBlock ARMENIAN
1035         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1036         /**
1037          */
1038         public static final UnicodeBlock HEBREW
1039         = new UnicodeBlock("HEBREW", HEBREW_ID);
1040         /**
1041          */
1042         public static final UnicodeBlock ARABIC
1043         = new UnicodeBlock("ARABIC", ARABIC_ID);
1044         /**
1045          */
1046         public static final UnicodeBlock SYRIAC
1047         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1048         /**
1049          */
1050         public static final UnicodeBlock THAANA
1051         = new UnicodeBlock("THAANA", THAANA_ID);
1052         /**
1053          */
1054         public static final UnicodeBlock DEVANAGARI
1055         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1056         /**
1057          */
1058         public static final UnicodeBlock BENGALI
1059         = new UnicodeBlock("BENGALI", BENGALI_ID);
1060         /**
1061          */
1062         public static final UnicodeBlock GURMUKHI
1063         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1064         /**
1065          */
1066         public static final UnicodeBlock GUJARATI
1067         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1068         /**
1069          */
1070         public static final UnicodeBlock ORIYA
1071         = new UnicodeBlock("ORIYA", ORIYA_ID);
1072         /**
1073          */
1074         public static final UnicodeBlock TAMIL
1075         = new UnicodeBlock("TAMIL", TAMIL_ID);
1076         /**
1077          */
1078         public static final UnicodeBlock TELUGU
1079         = new UnicodeBlock("TELUGU", TELUGU_ID);
1080         /**
1081          */
1082         public static final UnicodeBlock KANNADA
1083         = new UnicodeBlock("KANNADA", KANNADA_ID);
1084         /**
1085          */
1086         public static final UnicodeBlock MALAYALAM
1087         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1088         /**
1089          */
1090         public static final UnicodeBlock SINHALA
1091         = new UnicodeBlock("SINHALA", SINHALA_ID);
1092         /**
1093          */
1094         public static final UnicodeBlock THAI
1095         = new UnicodeBlock("THAI", THAI_ID);
1096         /**
1097          */
1098         public static final UnicodeBlock LAO
1099         = new UnicodeBlock("LAO", LAO_ID);
1100         /**
1101          */
1102         public static final UnicodeBlock TIBETAN
1103         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1104         /**
1105          */
1106         public static final UnicodeBlock MYANMAR
1107         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1108         /**
1109          */
1110         public static final UnicodeBlock GEORGIAN
1111         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1112         /**
1113          */
1114         public static final UnicodeBlock HANGUL_JAMO
1115         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1116         /**
1117          */
1118         public static final UnicodeBlock ETHIOPIC
1119         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1120         /**
1121          */
1122         public static final UnicodeBlock CHEROKEE
1123         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1124         /**
1125          */
1126         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1127         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1128                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1129         /**
1130          */
1131         public static final UnicodeBlock OGHAM
1132         = new UnicodeBlock("OGHAM", OGHAM_ID);
1133         /**
1134          */
1135         public static final UnicodeBlock RUNIC
1136         = new UnicodeBlock("RUNIC", RUNIC_ID);
1137         /**
1138          */
1139         public static final UnicodeBlock KHMER
1140         = new UnicodeBlock("KHMER", KHMER_ID);
1141         /**
1142          */
1143         public static final UnicodeBlock MONGOLIAN
1144         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1145         /**
1146          */
1147         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1148         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1149         /**
1150          */
1151         public static final UnicodeBlock GREEK_EXTENDED
1152         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1153         /**
1154          */
1155         public static final UnicodeBlock GENERAL_PUNCTUATION
1156         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1157         /**
1158          */
1159         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1160         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1161         /**
1162          */
1163         public static final UnicodeBlock CURRENCY_SYMBOLS
1164         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1165         /**
1166          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1167          * Symbols".
1168          */
1169         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1170         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1171         /**
1172          */
1173         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1174         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1175         /**
1176          */
1177         public static final UnicodeBlock NUMBER_FORMS
1178         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1179         /**
1180          */
1181         public static final UnicodeBlock ARROWS
1182         = new UnicodeBlock("ARROWS", ARROWS_ID);
1183         /**
1184          */
1185         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1186         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1187         /**
1188          */
1189         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1190         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1191         /**
1192          */
1193         public static final UnicodeBlock CONTROL_PICTURES
1194         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1195         /**
1196          */
1197         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1198         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1199         /**
1200          */
1201         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1202         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1203         /**
1204          */
1205         public static final UnicodeBlock BOX_DRAWING
1206         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1207         /**
1208          */
1209         public static final UnicodeBlock BLOCK_ELEMENTS
1210         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1211         /**
1212          */
1213         public static final UnicodeBlock GEOMETRIC_SHAPES
1214         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1215         /**
1216          */
1217         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1218         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1219         /**
1220          */
1221         public static final UnicodeBlock DINGBATS
1222         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1223         /**
1224          */
1225         public static final UnicodeBlock BRAILLE_PATTERNS
1226         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1227         /**
1228          */
1229         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1230         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1231         /**
1232          */
1233         public static final UnicodeBlock KANGXI_RADICALS
1234         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1235         /**
1236          */
1237         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1238         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1239                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1240         /**
1241          */
1242         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1243         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1244         /**
1245          */
1246         public static final UnicodeBlock HIRAGANA
1247         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1248         /**
1249          */
1250         public static final UnicodeBlock KATAKANA
1251         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1252         /**
1253          */
1254         public static final UnicodeBlock BOPOMOFO
1255         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1256         /**
1257          */
1258         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1259         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1260         /**
1261          */
1262         public static final UnicodeBlock KANBUN
1263         = new UnicodeBlock("KANBUN", KANBUN_ID);
1264         /**
1265          */
1266         public static final UnicodeBlock BOPOMOFO_EXTENDED
1267         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1268         /**
1269          */
1270         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1271         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1272                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1273         /**
1274          */
1275         public static final UnicodeBlock CJK_COMPATIBILITY
1276         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1277         /**
1278          */
1279         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1280         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1281                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1282         /**
1283          */
1284         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1285         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1286         /**
1287          */
1288         public static final UnicodeBlock YI_SYLLABLES
1289         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1290         /**
1291          */
1292         public static final UnicodeBlock YI_RADICALS
1293         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1294         /**
1295          */
1296         public static final UnicodeBlock HANGUL_SYLLABLES
1297         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1298         /**
1299          */
1300         public static final UnicodeBlock HIGH_SURROGATES
1301         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1302         /**
1303          */
1304         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1305         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1306         /**
1307          */
1308         public static final UnicodeBlock LOW_SURROGATES
1309         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1310         /**
1311          * Same as public static final int PRIVATE_USE.
1312          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1313          * and multiple code point ranges had this block.
1314          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1315          * and adds separate blocks for the supplementary PUAs.
1316          */
1317         public static final UnicodeBlock PRIVATE_USE_AREA
1318         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1319         /**
1320          * Same as public static final int PRIVATE_USE_AREA.
1321          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1322          * and multiple code point ranges had this block.
1323          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1324          * and adds separate blocks for the supplementary PUAs.
1325          */
1326         public static final UnicodeBlock PRIVATE_USE
1327         = PRIVATE_USE_AREA;
1328         /**
1329          */
1330         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1331         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1332         /**
1333          */
1334         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1335         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1336         /**
1337          */
1338         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1339         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1340         /**
1341          */
1342         public static final UnicodeBlock COMBINING_HALF_MARKS
1343         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1344         /**
1345          */
1346         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1347         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1348         /**
1349          */
1350         public static final UnicodeBlock SMALL_FORM_VARIANTS
1351         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1352         /**
1353          */
1354         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1355         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1356         /**
1357          */
1358         public static final UnicodeBlock SPECIALS
1359         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1360         /**
1361          */
1362         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1363         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1364         /**
1365          */
1366         public static final UnicodeBlock OLD_ITALIC
1367         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1368         /**
1369          */
1370         public static final UnicodeBlock GOTHIC
1371         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1372         /**
1373          */
1374         public static final UnicodeBlock DESERET
1375         = new UnicodeBlock("DESERET", DESERET_ID);
1376         /**
1377          */
1378         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1379         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1380         /**
1381          */
1382         public static final UnicodeBlock MUSICAL_SYMBOLS
1383         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1384         /**
1385          */
1386         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1387         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1388                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1389         /**
1390          */
1391         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1392         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1393                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1394         /**
1395          */
1396         public static final UnicodeBlock
1397         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1398         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1399                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1400         /**
1401          */
1402         public static final UnicodeBlock TAGS
1403         = new UnicodeBlock("TAGS", TAGS_ID);
1404 
1405         // New blocks in Unicode 3.2
1406 
1407         /**
1408          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1409          */
1410         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1411         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1412         /**
1413          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1414          */
1415         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1416         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1417         /**
1418          */
1419         public static final UnicodeBlock TAGALOG
1420         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1421         /**
1422          */
1423         public static final UnicodeBlock HANUNOO
1424         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1425         /**
1426          */
1427         public static final UnicodeBlock BUHID
1428         = new UnicodeBlock("BUHID", BUHID_ID);
1429         /**
1430          */
1431         public static final UnicodeBlock TAGBANWA
1432         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1433         /**
1434          */
1435         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1436         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1437                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1438         /**
1439          */
1440         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1441         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1442         /**
1443          */
1444         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1445         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1446         /**
1447          */
1448         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1449         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1450                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1451         /**
1452          */
1453         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1454         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1455                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1456         /**
1457          */
1458         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1459         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1460         /**
1461          */
1462         public static final UnicodeBlock VARIATION_SELECTORS
1463         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1464         /**
1465          */
1466         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1467         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1468                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1469         /**
1470          */
1471         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1472         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1473                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1474 
1475         /**
1476          */
1477         public static final UnicodeBlock LIMBU
1478         = new UnicodeBlock("LIMBU", LIMBU_ID);
1479         /**
1480          */
1481         public static final UnicodeBlock TAI_LE
1482         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1483         /**
1484          */
1485         public static final UnicodeBlock KHMER_SYMBOLS
1486         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1487 
1488         /**
1489          */
1490         public static final UnicodeBlock PHONETIC_EXTENSIONS
1491         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1492 
1493         /**
1494          */
1495         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1496         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1497                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1498         /**
1499          */
1500         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1501         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1502         /**
1503          */
1504         public static final UnicodeBlock LINEAR_B_SYLLABARY
1505         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1506         /**
1507          */
1508         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1509         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1510         /**
1511          */
1512         public static final UnicodeBlock AEGEAN_NUMBERS
1513         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1514         /**
1515          */
1516         public static final UnicodeBlock UGARITIC
1517         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1518         /**
1519          */
1520         public static final UnicodeBlock SHAVIAN
1521         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1522         /**
1523          */
1524         public static final UnicodeBlock OSMANYA
1525         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1526         /**
1527          */
1528         public static final UnicodeBlock CYPRIOT_SYLLABARY
1529         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1530         /**
1531          */
1532         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1533         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1534 
1535         /**
1536          */
1537         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1538         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1539 
1540         /* New blocks in Unicode 4.1 */
1541 
1542         /**
1543          */
1544         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1545                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1546                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1547 
1548         /**
1549          */
1550         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1551                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1552 
1553         /**
1554          */
1555         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1556                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1557 
1558         /**
1559          */
1560         public static final UnicodeBlock BUGINESE =
1561                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1562 
1563         /**
1564          */
1565         public static final UnicodeBlock CJK_STROKES =
1566                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1567 
1568         /**
1569          */
1570         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1571                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1572                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1573 
1574         /**
1575          */
1576         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1577 
1578         /**
1579          */
1580         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1581                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1582 
1583         /**
1584          */
1585         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1586                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1587 
1588         /**
1589          */
1590         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1591                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
1592 
1593         /**
1594          */
1595         public static final UnicodeBlock GLAGOLITIC =
1596                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
1597 
1598         /**
1599          */
1600         public static final UnicodeBlock KHAROSHTHI =
1601                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
1602 
1603         /**
1604          */
1605         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1606                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
1607 
1608         /**
1609          */
1610         public static final UnicodeBlock NEW_TAI_LUE =
1611                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
1612 
1613         /**
1614          */
1615         public static final UnicodeBlock OLD_PERSIAN =
1616                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
1617 
1618         /**
1619          */
1620         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1621                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1622                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
1623 
1624         /**
1625          */
1626         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1627                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
1628 
1629         /**
1630          */
1631         public static final UnicodeBlock SYLOTI_NAGRI =
1632                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
1633 
1634         /**
1635          */
1636         public static final UnicodeBlock TIFINAGH =
1637                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
1638 
1639         /**
1640          */
1641         public static final UnicodeBlock VERTICAL_FORMS =
1642                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
1643 
1644         /**
1645          */
1646         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
1647         /**
1648          */
1649         public static final UnicodeBlock BALINESE =
1650                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
1651         /**
1652          */
1653         public static final UnicodeBlock LATIN_EXTENDED_C =
1654                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
1655         /**
1656          */
1657         public static final UnicodeBlock LATIN_EXTENDED_D =
1658                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
1659         /**
1660          */
1661         public static final UnicodeBlock PHAGS_PA =
1662                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
1663         /**
1664          */
1665         public static final UnicodeBlock PHOENICIAN =
1666                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
1667         /**
1668          */
1669         public static final UnicodeBlock CUNEIFORM =
1670                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
1671         /**
1672          */
1673         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1674                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1675                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
1676         /**
1677          */
1678         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
1679                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
1680 
1681         /**
1682          */
1683         public static final UnicodeBlock SUNDANESE =
1684                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
1685 
1686         /**
1687          */
1688         public static final UnicodeBlock LEPCHA =
1689                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
1690 
1691         /**
1692          */
1693         public static final UnicodeBlock OL_CHIKI =
1694                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
1695 
1696         /**
1697          */
1698         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1699                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
1700 
1701         /**
1702          */
1703         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
1704 
1705         /**
1706          */
1707         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1708                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
1709 
1710         /**
1711          */
1712         public static final UnicodeBlock SAURASHTRA =
1713                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
1714 
1715         /**
1716          */
1717         public static final UnicodeBlock KAYAH_LI =
1718                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
1719 
1720         /**
1721          */
1722         public static final UnicodeBlock REJANG =
1723                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
1724 
1725         /**
1726          */
1727         public static final UnicodeBlock CHAM =
1728                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
1729 
1730         /**
1731          */
1732         public static final UnicodeBlock ANCIENT_SYMBOLS =
1733                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
1734 
1735         /**
1736          */
1737         public static final UnicodeBlock PHAISTOS_DISC =
1738                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
1739 
1740         /**
1741          */
1742         public static final UnicodeBlock LYCIAN =
1743                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
1744 
1745         /**
1746          */
1747         public static final UnicodeBlock CARIAN =
1748                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
1749 
1750         /**
1751          */
1752         public static final UnicodeBlock LYDIAN =
1753                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
1754 
1755         /**
1756          */
1757         public static final UnicodeBlock MAHJONG_TILES =
1758                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
1759 
1760         /**
1761          */
1762         public static final UnicodeBlock DOMINO_TILES =
1763                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
1764 
1765         /* New blocks in Unicode 5.2 */
1766 
1767         /***/
1768         public static final UnicodeBlock SAMARITAN =
1769                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
1770         /***/
1771         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1772                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1773                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
1774         /***/
1775         public static final UnicodeBlock TAI_THAM =
1776                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
1777         /***/
1778         public static final UnicodeBlock VEDIC_EXTENSIONS =
1779                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
1780         /***/
1781         public static final UnicodeBlock LISU =
1782                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
1783         /***/
1784         public static final UnicodeBlock BAMUM =
1785                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
1786         /***/
1787         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
1788                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
1789         /***/
1790         public static final UnicodeBlock DEVANAGARI_EXTENDED =
1791                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
1792         /***/
1793         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
1794                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
1795         /***/
1796         public static final UnicodeBlock JAVANESE =
1797                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
1798         /***/
1799         public static final UnicodeBlock MYANMAR_EXTENDED_A =
1800                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
1801         /***/
1802         public static final UnicodeBlock TAI_VIET =
1803                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
1804         /***/
1805         public static final UnicodeBlock MEETEI_MAYEK =
1806                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
1807         /***/
1808         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
1809                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
1810         /***/
1811         public static final UnicodeBlock IMPERIAL_ARAMAIC =
1812                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
1813         /***/
1814         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
1815                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
1816         /***/
1817         public static final UnicodeBlock AVESTAN =
1818                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
1819         /***/
1820         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
1821                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
1822         /***/
1823         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
1824                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
1825         /***/
1826         public static final UnicodeBlock OLD_TURKIC =
1827                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
1828         /***/
1829         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
1830                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
1831         /***/
1832         public static final UnicodeBlock KAITHI =
1833                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
1834         /***/
1835         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
1836                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
1837         /***/
1838         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
1839                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
1840                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
1841         /***/
1842         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
1843                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
1844                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
1845         /***/
1846         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
1847                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
1848                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
1849 
1850         /* New blocks in Unicode 6.0 */
1851 
1852         /***/
1853         public static final UnicodeBlock MANDAIC =
1854                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
1855         /***/
1856         public static final UnicodeBlock BATAK =
1857                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
1858         /***/
1859         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
1860                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
1861         /***/
1862         public static final UnicodeBlock BRAHMI =
1863                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
1864         /***/
1865         public static final UnicodeBlock BAMUM_SUPPLEMENT =
1866                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
1867         /***/
1868         public static final UnicodeBlock KANA_SUPPLEMENT =
1869                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
1870         /***/
1871         public static final UnicodeBlock PLAYING_CARDS =
1872                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
1873         /***/
1874         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
1875                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
1876                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
1877         /***/
1878         public static final UnicodeBlock EMOTICONS =
1879                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
1880         /***/
1881         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
1882                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
1883         /***/
1884         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
1885                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
1886         /***/
1887         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
1888                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
1889                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
1890 
1891         /* New blocks in Unicode 6.1 */
1892 
1893         /***/
1894         public static final UnicodeBlock ARABIC_EXTENDED_A =
1895                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
1896         /***/
1897         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
1898                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
1899         /***/
1900         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
1901         /***/
1902         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
1903                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
1904         /***/
1905         public static final UnicodeBlock MEROITIC_CURSIVE =
1906                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
1907         /***/
1908         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
1909                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
1910         /***/
1911         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
1912         /***/
1913         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
1914         /***/
1915         public static final UnicodeBlock SORA_SOMPENG =
1916                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
1917         /***/
1918         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
1919                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
1920         /***/
1921         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
1922 
1923         /* New blocks in Unicode 7.0 */
1924 
1925         /***/
1926         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
1927         /***/
1928         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
1929                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
1930         /***/
1931         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
1932                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
1933         /***/
1934         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
1935                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
1936         /***/
1937         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
1938         /***/
1939         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
1940         /***/
1941         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
1942                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
1943         /***/
1944         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
1945         /***/
1946         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
1947         /***/
1948         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
1949         /***/
1950         public static final UnicodeBlock LATIN_EXTENDED_E =
1951                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
1952         /***/
1953         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
1954         /***/
1955         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
1956         /***/
1957         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
1958         /***/
1959         public static final UnicodeBlock MENDE_KIKAKUI =
1960                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
1961         /***/
1962         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
1963         /***/
1964         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
1965         /***/
1966         public static final UnicodeBlock MYANMAR_EXTENDED_B =
1967                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
1968         /***/
1969         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
1970         /***/
1971         public static final UnicodeBlock OLD_NORTH_ARABIAN =
1972                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
1973         /***/
1974         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
1975         /***/
1976         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
1977                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
1978         /***/
1979         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
1980         /***/
1981         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
1982         /***/
1983         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
1984         /***/
1985         public static final UnicodeBlock PSALTER_PAHLAVI =
1986                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
1987         /***/
1988         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
1989                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
1990         /***/
1991         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
1992         /***/
1993         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
1994                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
1995         /***/
1996         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
1997                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
1998         /***/
1999         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
2000         /***/
2001         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
2002 
2003         /* New blocks in Unicode 8.0 */
2004 
2005         /***/
2006         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
2007         /***/
2008         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2009                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
2010         /***/
2011         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2012                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
2013         /***/
2014         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2015                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2016                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
2017         /***/
2018         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2019                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
2020         /***/
2021         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
2022         /***/
2023         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
2024         /***/
2025         public static final UnicodeBlock OLD_HUNGARIAN =
2026                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
2027         /***/
2028         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2029                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2030                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
2031         /***/
2032         public static final UnicodeBlock SUTTON_SIGNWRITING =
2033                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
2034 
2035         /* New blocks in Unicode 9.0 */
2036 
2037         /***/
2038         public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/
2039         /***/
2040         public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/
2041         /***/
2042         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2043                 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/
2044         /***/
2045         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
2046                 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/
2047         /***/
2048         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
2049                 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/
2050         /***/
2051         public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/
2052         /***/
2053         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2054                 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/
2055         /***/
2056         public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/
2057         /***/
2058         public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/
2059         /***/
2060         public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/
2061         /***/
2062         public static final UnicodeBlock TANGUT_COMPONENTS =
2063                 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/
2064 
2065         /**
2066          */
2067         public static final UnicodeBlock INVALID_CODE
2068         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2069 
2070         static {
2071             for (int blockId = 0; blockId < COUNT; ++blockId) {
2072                 if (BLOCKS_[blockId] == null) {
2073                     throw new java.lang.IllegalStateException(
2074                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2075                 }
2076             }
2077         }
2078 
2079         // public methods --------------------------------------------------
2080 
2081         /**
2082          * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID.
2083          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2084          * @param id UnicodeBlock ID
2085          * @return the only instance of the UnicodeBlock with the argument ID
2086          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2087          *         returned.
2088          */
getInstance(int id)2089         public static UnicodeBlock getInstance(int id)
2090         {
2091             if (id >= 0 && id < BLOCKS_.length) {
2092                 return BLOCKS_[id];
2093             }
2094             return INVALID_CODE;
2095         }
2096 
2097         /**
2098          * Returns the Unicode allocation block that contains the code point,
2099          * or null if the code point is not a member of a defined block.
2100          * @param ch code point to be tested
2101          * @return the Unicode allocation block that contains the code point
2102          */
of(int ch)2103         public static UnicodeBlock of(int ch)
2104         {
2105             if (ch > MAX_VALUE) {
2106                 return INVALID_CODE;
2107             }
2108 
2109             return UnicodeBlock.getInstance(
2110                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2111         }
2112 
2113         /**
2114          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
2115          * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike
2116          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
2117          * against the official UCD name and the Java block name
2118          * (ignoring case).
2119          * @param blockName the name of the block to match
2120          * @return the UnicodeBlock with that name
2121          * @throws IllegalArgumentException if the blockName could not be matched
2122          */
forName(String blockName)2123         public static final UnicodeBlock forName(String blockName) {
2124             Map<String, UnicodeBlock> m = null;
2125             if (mref != null) {
2126                 m = mref.get();
2127             }
2128             if (m == null) {
2129                 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length);
2130                 for (int i = 0; i < BLOCKS_.length; ++i) {
2131                     UnicodeBlock b = BLOCKS_[i];
2132                     String name = trimBlockName(
2133                             getPropertyValueName(UProperty.BLOCK, b.getID(),
2134                                     UProperty.NameChoice.LONG));
2135                     m.put(name, b);
2136                 }
2137                 mref = new SoftReference<Map<String, UnicodeBlock>>(m);
2138             }
2139             UnicodeBlock b = m.get(trimBlockName(blockName));
2140             if (b == null) {
2141                 throw new IllegalArgumentException();
2142             }
2143             return b;
2144         }
2145         private static SoftReference<Map<String, UnicodeBlock>> mref;
2146 
trimBlockName(String name)2147         private static String trimBlockName(String name) {
2148             String upper = name.toUpperCase(Locale.ENGLISH);
2149             StringBuilder result = new StringBuilder(upper.length());
2150             for (int i = 0; i < upper.length(); i++) {
2151                 char c = upper.charAt(i);
2152                 if (c != ' ' && c != '_' && c != '-') {
2153                     result.append(c);
2154                 }
2155             }
2156             return result.toString();
2157         }
2158 
2159         /**
2160          * {icu} Returns the type ID of this Unicode block
2161          * @return integer type ID of this Unicode block
2162          */
getID()2163         public int getID()
2164         {
2165             return m_id_;
2166         }
2167 
2168         // private data members ---------------------------------------------
2169 
2170         /**
2171          * Identification code for this UnicodeBlock
2172          */
2173         private int m_id_;
2174 
2175         // private constructor ----------------------------------------------
2176 
2177         /**
2178          * UnicodeBlock constructor
2179          * @param name name of this UnicodeBlock
2180          * @param id unique id of this UnicodeBlock
2181          * @exception NullPointerException if name is <code>null</code>
2182          */
UnicodeBlock(String name, int id)2183         private UnicodeBlock(String name, int id)
2184         {
2185             super(name);
2186             m_id_ = id;
2187             if (id >= 0) {
2188                 BLOCKS_[id] = this;
2189             }
2190         }
2191     }
2192 
2193     /**
2194      * East Asian Width constants.
2195      * @see UProperty#EAST_ASIAN_WIDTH
2196      * @see UCharacter#getIntPropertyValue
2197      */
2198     public static interface EastAsianWidth
2199     {
2200         /**
2201          */
2202         public static final int NEUTRAL = 0;
2203         /**
2204          */
2205         public static final int AMBIGUOUS = 1;
2206         /**
2207          */
2208         public static final int HALFWIDTH = 2;
2209         /**
2210          */
2211         public static final int FULLWIDTH = 3;
2212         /**
2213          */
2214         public static final int NARROW = 4;
2215         /**
2216          */
2217         public static final int WIDE = 5;
2218         /**
2219          * One more than the highest normal EastAsianWidth value.
2220          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH).
2221          *
2222          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2223          * @hide unsupported on Android
2224          */
2225         @Deprecated
2226         public static final int COUNT = 6;
2227     }
2228 
2229     /**
2230      * Decomposition Type constants.
2231      * @see UProperty#DECOMPOSITION_TYPE
2232      */
2233     public static interface DecompositionType
2234     {
2235         /**
2236          */
2237         public static final int NONE = 0;
2238         /**
2239          */
2240         public static final int CANONICAL = 1;
2241         /**
2242          */
2243         public static final int COMPAT = 2;
2244         /**
2245          */
2246         public static final int CIRCLE = 3;
2247         /**
2248          */
2249         public static final int FINAL = 4;
2250         /**
2251          */
2252         public static final int FONT = 5;
2253         /**
2254          */
2255         public static final int FRACTION = 6;
2256         /**
2257          */
2258         public static final int INITIAL = 7;
2259         /**
2260          */
2261         public static final int ISOLATED = 8;
2262         /**
2263          */
2264         public static final int MEDIAL = 9;
2265         /**
2266          */
2267         public static final int NARROW = 10;
2268         /**
2269          */
2270         public static final int NOBREAK = 11;
2271         /**
2272          */
2273         public static final int SMALL = 12;
2274         /**
2275          */
2276         public static final int SQUARE = 13;
2277         /**
2278          */
2279         public static final int SUB = 14;
2280         /**
2281          */
2282         public static final int SUPER = 15;
2283         /**
2284          */
2285         public static final int VERTICAL = 16;
2286         /**
2287          */
2288         public static final int WIDE = 17;
2289         /**
2290          * One more than the highest normal DecompositionType value.
2291          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE).
2292          *
2293          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2294          * @hide unsupported on Android
2295          */
2296         @Deprecated
2297         public static final int COUNT = 18;
2298     }
2299 
2300     /**
2301      * Joining Type constants.
2302      * @see UProperty#JOINING_TYPE
2303      */
2304     public static interface JoiningType
2305     {
2306         /**
2307          */
2308         public static final int NON_JOINING = 0;
2309         /**
2310          */
2311         public static final int JOIN_CAUSING = 1;
2312         /**
2313          */
2314         public static final int DUAL_JOINING = 2;
2315         /**
2316          */
2317         public static final int LEFT_JOINING = 3;
2318         /**
2319          */
2320         public static final int RIGHT_JOINING = 4;
2321         /**
2322          */
2323         public static final int TRANSPARENT = 5;
2324         /**
2325          * One more than the highest normal JoiningType value.
2326          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE).
2327          *
2328          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2329          * @hide unsupported on Android
2330          */
2331         @Deprecated
2332         public static final int COUNT = 6;
2333     }
2334 
2335     /**
2336      * Joining Group constants.
2337      * @see UProperty#JOINING_GROUP
2338      */
2339     public static interface JoiningGroup
2340     {
2341         /**
2342          */
2343         public static final int NO_JOINING_GROUP = 0;
2344         /**
2345          */
2346         public static final int AIN = 1;
2347         /**
2348          */
2349         public static final int ALAPH = 2;
2350         /**
2351          */
2352         public static final int ALEF = 3;
2353         /**
2354          */
2355         public static final int BEH = 4;
2356         /**
2357          */
2358         public static final int BETH = 5;
2359         /**
2360          */
2361         public static final int DAL = 6;
2362         /**
2363          */
2364         public static final int DALATH_RISH = 7;
2365         /**
2366          */
2367         public static final int E = 8;
2368         /**
2369          */
2370         public static final int FEH = 9;
2371         /**
2372          */
2373         public static final int FINAL_SEMKATH = 10;
2374         /**
2375          */
2376         public static final int GAF = 11;
2377         /**
2378          */
2379         public static final int GAMAL = 12;
2380         /**
2381          */
2382         public static final int HAH = 13;
2383         /***/
2384         public static final int TEH_MARBUTA_GOAL = 14;
2385         /**
2386          */
2387         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
2388         /**
2389          */
2390         public static final int HE = 15;
2391         /**
2392          */
2393         public static final int HEH = 16;
2394         /**
2395          */
2396         public static final int HEH_GOAL = 17;
2397         /**
2398          */
2399         public static final int HETH = 18;
2400         /**
2401          */
2402         public static final int KAF = 19;
2403         /**
2404          */
2405         public static final int KAPH = 20;
2406         /**
2407          */
2408         public static final int KNOTTED_HEH = 21;
2409         /**
2410          */
2411         public static final int LAM = 22;
2412         /**
2413          */
2414         public static final int LAMADH = 23;
2415         /**
2416          */
2417         public static final int MEEM = 24;
2418         /**
2419          */
2420         public static final int MIM = 25;
2421         /**
2422          */
2423         public static final int NOON = 26;
2424         /**
2425          */
2426         public static final int NUN = 27;
2427         /**
2428          */
2429         public static final int PE = 28;
2430         /**
2431          */
2432         public static final int QAF = 29;
2433         /**
2434          */
2435         public static final int QAPH = 30;
2436         /**
2437          */
2438         public static final int REH = 31;
2439         /**
2440          */
2441         public static final int REVERSED_PE = 32;
2442         /**
2443          */
2444         public static final int SAD = 33;
2445         /**
2446          */
2447         public static final int SADHE = 34;
2448         /**
2449          */
2450         public static final int SEEN = 35;
2451         /**
2452          */
2453         public static final int SEMKATH = 36;
2454         /**
2455          */
2456         public static final int SHIN = 37;
2457         /**
2458          */
2459         public static final int SWASH_KAF = 38;
2460         /**
2461          */
2462         public static final int SYRIAC_WAW = 39;
2463         /**
2464          */
2465         public static final int TAH = 40;
2466         /**
2467          */
2468         public static final int TAW = 41;
2469         /**
2470          */
2471         public static final int TEH_MARBUTA = 42;
2472         /**
2473          */
2474         public static final int TETH = 43;
2475         /**
2476          */
2477         public static final int WAW = 44;
2478         /**
2479          */
2480         public static final int YEH = 45;
2481         /**
2482          */
2483         public static final int YEH_BARREE = 46;
2484         /**
2485          */
2486         public static final int YEH_WITH_TAIL = 47;
2487         /**
2488          */
2489         public static final int YUDH = 48;
2490         /**
2491          */
2492         public static final int YUDH_HE = 49;
2493         /**
2494          */
2495         public static final int ZAIN = 50;
2496         /**
2497          */
2498         public static final int FE = 51;
2499         /**
2500          */
2501         public static final int KHAPH = 52;
2502         /**
2503          */
2504         public static final int ZHAIN = 53;
2505         /**
2506          */
2507         public static final int BURUSHASKI_YEH_BARREE = 54;
2508         /***/
2509         public static final int FARSI_YEH = 55;
2510         /***/
2511         public static final int NYA = 56;
2512         /***/
2513         public static final int ROHINGYA_YEH = 57;
2514 
2515         /***/
2516         public static final int MANICHAEAN_ALEPH = 58;
2517         /***/
2518         public static final int MANICHAEAN_AYIN = 59;
2519         /***/
2520         public static final int MANICHAEAN_BETH = 60;
2521         /***/
2522         public static final int MANICHAEAN_DALETH = 61;
2523         /***/
2524         public static final int MANICHAEAN_DHAMEDH = 62;
2525         /***/
2526         public static final int MANICHAEAN_FIVE = 63;
2527         /***/
2528         public static final int MANICHAEAN_GIMEL = 64;
2529         /***/
2530         public static final int MANICHAEAN_HETH = 65;
2531         /***/
2532         public static final int MANICHAEAN_HUNDRED = 66;
2533         /***/
2534         public static final int MANICHAEAN_KAPH = 67;
2535         /***/
2536         public static final int MANICHAEAN_LAMEDH = 68;
2537         /***/
2538         public static final int MANICHAEAN_MEM = 69;
2539         /***/
2540         public static final int MANICHAEAN_NUN = 70;
2541         /***/
2542         public static final int MANICHAEAN_ONE = 71;
2543         /***/
2544         public static final int MANICHAEAN_PE = 72;
2545         /***/
2546         public static final int MANICHAEAN_QOPH = 73;
2547         /***/
2548         public static final int MANICHAEAN_RESH = 74;
2549         /***/
2550         public static final int MANICHAEAN_SADHE = 75;
2551         /***/
2552         public static final int MANICHAEAN_SAMEKH = 76;
2553         /***/
2554         public static final int MANICHAEAN_TAW = 77;
2555         /***/
2556         public static final int MANICHAEAN_TEN = 78;
2557         /***/
2558         public static final int MANICHAEAN_TETH = 79;
2559         /***/
2560         public static final int MANICHAEAN_THAMEDH = 80;
2561         /***/
2562         public static final int MANICHAEAN_TWENTY = 81;
2563         /***/
2564         public static final int MANICHAEAN_WAW = 82;
2565         /***/
2566         public static final int MANICHAEAN_YODH = 83;
2567         /***/
2568         public static final int MANICHAEAN_ZAYIN = 84;
2569         /***/
2570         public static final int STRAIGHT_WAW = 85;
2571 
2572         /***/
2573         public static final int AFRICAN_FEH = 86;
2574         /***/
2575         public static final int AFRICAN_NOON = 87;
2576         /***/
2577         public static final int AFRICAN_QAF = 88;
2578 
2579         /**
2580          * One more than the highest normal JoiningGroup value.
2581          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup).
2582          *
2583          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2584          * @hide unsupported on Android
2585          */
2586         @Deprecated
2587         public static final int COUNT = 89;
2588     }
2589 
2590     /**
2591      * Grapheme Cluster Break constants.
2592      * @see UProperty#GRAPHEME_CLUSTER_BREAK
2593      */
2594     public static interface GraphemeClusterBreak {
2595         /**
2596          */
2597         public static final int OTHER = 0;
2598         /**
2599          */
2600         public static final int CONTROL = 1;
2601         /**
2602          */
2603         public static final int CR = 2;
2604         /**
2605          */
2606         public static final int EXTEND = 3;
2607         /**
2608          */
2609         public static final int L = 4;
2610         /**
2611          */
2612         public static final int LF = 5;
2613         /**
2614          */
2615         public static final int LV = 6;
2616         /**
2617          */
2618         public static final int LVT = 7;
2619         /**
2620          */
2621         public static final int T = 8;
2622         /**
2623          */
2624         public static final int V = 9;
2625         /**
2626          */
2627         public static final int SPACING_MARK = 10;
2628         /**
2629          */
2630         public static final int PREPEND = 11;
2631         /***/
2632         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2633         /***/
2634         public static final int E_BASE = 13;          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2635         /***/
2636         public static final int E_BASE_GAZ = 14;      /*[EBG]*/
2637         /***/
2638         public static final int E_MODIFIER = 15;      /*[EM]*/
2639         /***/
2640         public static final int GLUE_AFTER_ZWJ = 16;  /*[GAZ]*/
2641         /***/
2642         public static final int ZWJ = 17;             /*[ZWJ]*/
2643         /**
2644          * One more than the highest normal GraphemeClusterBreak value.
2645          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK).
2646          *
2647          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2648          * @hide unsupported on Android
2649          */
2650         @Deprecated
2651         public static final int COUNT = 18;
2652     }
2653 
2654     /**
2655      * Word Break constants.
2656      * @see UProperty#WORD_BREAK
2657      */
2658     public static interface WordBreak {
2659         /**
2660          */
2661         public static final int OTHER = 0;
2662         /**
2663          */
2664         public static final int ALETTER = 1;
2665         /**
2666          */
2667         public static final int FORMAT = 2;
2668         /**
2669          */
2670         public static final int KATAKANA = 3;
2671         /**
2672          */
2673         public static final int MIDLETTER = 4;
2674         /**
2675          */
2676         public static final int MIDNUM = 5;
2677         /**
2678          */
2679         public static final int NUMERIC = 6;
2680         /**
2681          */
2682         public static final int EXTENDNUMLET = 7;
2683         /**
2684          */
2685         public static final int CR = 8;
2686         /**
2687          */
2688         public static final int EXTEND = 9;
2689         /**
2690          */
2691         public static final int LF = 10;
2692         /**
2693          */
2694         public static final int MIDNUMLET = 11;
2695         /**
2696          */
2697         public static final int NEWLINE = 12;
2698         /***/
2699         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2700         /***/
2701         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
2702         /***/
2703         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
2704         /***/
2705         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
2706         /***/
2707         public static final int E_BASE = 17;           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2708         /***/
2709         public static final int E_BASE_GAZ = 18;       /*[EBG]*/
2710         /***/
2711         public static final int E_MODIFIER = 19;       /*[EM]*/
2712         /***/
2713         public static final int GLUE_AFTER_ZWJ = 20;   /*[GAZ]*/
2714         /***/
2715         public static final int ZWJ = 21;              /*[ZWJ]*/
2716         /**
2717          * One more than the highest normal WordBreak value.
2718          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK).
2719          *
2720          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2721          * @hide unsupported on Android
2722          */
2723         @Deprecated
2724         public static final int COUNT = 22;
2725     }
2726 
2727     /**
2728      * Sentence Break constants.
2729      * @see UProperty#SENTENCE_BREAK
2730      */
2731     public static interface SentenceBreak {
2732         /**
2733          */
2734         public static final int OTHER = 0;
2735         /**
2736          */
2737         public static final int ATERM = 1;
2738         /**
2739          */
2740         public static final int CLOSE = 2;
2741         /**
2742          */
2743         public static final int FORMAT = 3;
2744         /**
2745          */
2746         public static final int LOWER = 4;
2747         /**
2748          */
2749         public static final int NUMERIC = 5;
2750         /**
2751          */
2752         public static final int OLETTER = 6;
2753         /**
2754          */
2755         public static final int SEP = 7;
2756         /**
2757          */
2758         public static final int SP = 8;
2759         /**
2760          */
2761         public static final int STERM = 9;
2762         /**
2763          */
2764         public static final int UPPER = 10;
2765         /**
2766          */
2767         public static final int CR = 11;
2768         /**
2769          */
2770         public static final int EXTEND = 12;
2771         /**
2772          */
2773         public static final int LF = 13;
2774         /**
2775          */
2776         public static final int SCONTINUE = 14;
2777         /**
2778          * One more than the highest normal SentenceBreak value.
2779          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK).
2780          *
2781          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2782          * @hide unsupported on Android
2783          */
2784         @Deprecated
2785         public static final int COUNT = 15;
2786     }
2787 
2788     /**
2789      * Line Break constants.
2790      * @see UProperty#LINE_BREAK
2791      */
2792     public static interface LineBreak
2793     {
2794         /**
2795          */
2796         public static final int UNKNOWN = 0;
2797         /**
2798          */
2799         public static final int AMBIGUOUS = 1;
2800         /**
2801          */
2802         public static final int ALPHABETIC = 2;
2803         /**
2804          */
2805         public static final int BREAK_BOTH = 3;
2806         /**
2807          */
2808         public static final int BREAK_AFTER = 4;
2809         /**
2810          */
2811         public static final int BREAK_BEFORE = 5;
2812         /**
2813          */
2814         public static final int MANDATORY_BREAK = 6;
2815         /**
2816          */
2817         public static final int CONTINGENT_BREAK = 7;
2818         /**
2819          */
2820         public static final int CLOSE_PUNCTUATION = 8;
2821         /**
2822          */
2823         public static final int COMBINING_MARK = 9;
2824         /**
2825          */
2826         public static final int CARRIAGE_RETURN = 10;
2827         /**
2828          */
2829         public static final int EXCLAMATION = 11;
2830         /**
2831          */
2832         public static final int GLUE = 12;
2833         /**
2834          */
2835         public static final int HYPHEN = 13;
2836         /**
2837          */
2838         public static final int IDEOGRAPHIC = 14;
2839         /**
2840          * @see #INSEPARABLE
2841          */
2842         public static final int INSEPERABLE = 15;
2843         /**
2844          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
2845          */
2846         public static final int INSEPARABLE = 15;
2847         /**
2848          */
2849         public static final int INFIX_NUMERIC = 16;
2850         /**
2851          */
2852         public static final int LINE_FEED = 17;
2853         /**
2854          */
2855         public static final int NONSTARTER = 18;
2856         /**
2857          */
2858         public static final int NUMERIC = 19;
2859         /**
2860          */
2861         public static final int OPEN_PUNCTUATION = 20;
2862         /**
2863          */
2864         public static final int POSTFIX_NUMERIC = 21;
2865         /**
2866          */
2867         public static final int PREFIX_NUMERIC = 22;
2868         /**
2869          */
2870         public static final int QUOTATION = 23;
2871         /**
2872          */
2873         public static final int COMPLEX_CONTEXT = 24;
2874         /**
2875          */
2876         public static final int SURROGATE = 25;
2877         /**
2878          */
2879         public static final int SPACE = 26;
2880         /**
2881          */
2882         public static final int BREAK_SYMBOLS = 27;
2883         /**
2884          */
2885         public static final int ZWSPACE = 28;
2886         /**
2887          */
2888         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
2889         /**
2890          */
2891         public static final int WORD_JOINER = 30;      /*[WJ]*/
2892         /**
2893          */
2894         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
2895         /**
2896          */
2897         public static final int H3 = 32;
2898         /**
2899          */
2900         public static final int JL = 33;
2901         /**
2902          */
2903         public static final int JT = 34;
2904         /**
2905          */
2906         public static final int JV = 35;
2907         /***/
2908         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
2909         /***/
2910         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
2911         /***/
2912         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
2913         /***/
2914         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2915         /***/
2916         public static final int E_BASE = 40;  /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2917         /***/
2918         public static final int E_MODIFIER = 41;  /*[EM]*/
2919         /***/
2920         public static final int ZWJ = 42;  /*[ZWJ]*/
2921         /**
2922          * One more than the highest normal LineBreak value.
2923          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK).
2924          *
2925          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2926          * @hide unsupported on Android
2927          */
2928         @Deprecated
2929         public static final int COUNT = 43;
2930     }
2931 
2932     /**
2933      * Numeric Type constants.
2934      * @see UProperty#NUMERIC_TYPE
2935      */
2936     public static interface NumericType
2937     {
2938         /**
2939          */
2940         public static final int NONE = 0;
2941         /**
2942          */
2943         public static final int DECIMAL = 1;
2944         /**
2945          */
2946         public static final int DIGIT = 2;
2947         /**
2948          */
2949         public static final int NUMERIC = 3;
2950         /**
2951          * One more than the highest normal NumericType value.
2952          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE).
2953          *
2954          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2955          * @hide unsupported on Android
2956          */
2957         @Deprecated
2958         public static final int COUNT = 4;
2959     }
2960 
2961     /**
2962      * Hangul Syllable Type constants.
2963      *
2964      * @see UProperty#HANGUL_SYLLABLE_TYPE
2965      */
2966     public static interface HangulSyllableType
2967     {
2968         /**
2969          */
2970         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
2971         /**
2972          */
2973         public static final int LEADING_JAMO        = 1;   /*[L]*/
2974         /**
2975          */
2976         public static final int VOWEL_JAMO          = 2;   /*[V]*/
2977         /**
2978          */
2979         public static final int TRAILING_JAMO       = 3;   /*[T]*/
2980         /**
2981          */
2982         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
2983         /**
2984          */
2985         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
2986         /**
2987          * One more than the highest normal HangulSyllableType value.
2988          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE).
2989          *
2990          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2991          * @hide unsupported on Android
2992          */
2993         @Deprecated
2994         public static final int COUNT               = 6;
2995     }
2996 
2997     /**
2998      * Bidi Paired Bracket Type constants.
2999      *
3000      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3001      */
3002     public static interface BidiPairedBracketType {
3003         /**
3004          * Not a paired bracket.
3005          */
3006         public static final int NONE = 0;
3007         /**
3008          * Open paired bracket.
3009          */
3010         public static final int OPEN = 1;
3011         /**
3012          * Close paired bracket.
3013          */
3014         public static final int CLOSE = 2;
3015         /**
3016          * One more than the highest normal BidiPairedBracketType value.
3017          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE).
3018          *
3019          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3020          * @hide unsupported on Android
3021          */
3022         @Deprecated
3023         public static final int COUNT = 3;
3024     }
3025 
3026     // public data members -----------------------------------------------
3027 
3028     /**
3029      * The lowest Unicode code point value, constant 0.
3030      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
3031      */
3032     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
3033 
3034     /**
3035      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
3036      * Same as {@link Character#MAX_CODE_POINT}.
3037      *
3038      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
3039      * which is still a char with the value U+FFFF.
3040      */
3041     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
3042 
3043     /**
3044      * The minimum value for Supplementary code points, constant U+10000.
3045      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
3046      */
3047     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
3048 
3049     /**
3050      * Unicode value used when translating into Unicode encoding form and there
3051      * is no existing character.
3052      */
3053     public static final int REPLACEMENT_CHAR = '\uFFFD';
3054 
3055     /**
3056      * Special value that is returned by getUnicodeNumericValue(int) when no
3057      * numeric value is defined for a code point.
3058      * @see #getUnicodeNumericValue
3059      */
3060     public static final double NO_NUMERIC_VALUE = -123456789;
3061 
3062     /**
3063      * Compatibility constant for Java Character's MIN_RADIX.
3064      */
3065     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
3066 
3067     /**
3068      * Compatibility constant for Java Character's MAX_RADIX.
3069      */
3070     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
3071 
3072     /**
3073      * Do not lowercase non-initial parts of words when titlecasing.
3074      * Option bit for titlecasing APIs that take an options bit set.
3075      *
3076      * By default, titlecasing will titlecase the first cased character
3077      * of a word and lowercase all other characters.
3078      * With this option, the other characters will not be modified.
3079      *
3080      * @see #toTitleCase
3081      */
3082     public static final int TITLECASE_NO_LOWERCASE = 0x100;
3083 
3084     /**
3085      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
3086      * titlecase exactly the characters at breaks from the iterator.
3087      * Option bit for titlecasing APIs that take an options bit set.
3088      *
3089      * By default, titlecasing will take each break iterator index,
3090      * adjust it by looking for the next cased character, and titlecase that one.
3091      * Other characters are lowercased.
3092      *
3093      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
3094      *
3095      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
3096      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
3097      * cased character F. If F exists, map F to default_title(F); then map each
3098      * subsequent character C to default_lower(C).
3099      *
3100      * @see #toTitleCase
3101      * @see #TITLECASE_NO_LOWERCASE
3102      */
3103     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
3104 
3105     // public methods ----------------------------------------------------
3106 
3107     /**
3108      * Returnss the numeric value of a decimal digit code point.
3109      * <br>This method observes the semantics of
3110      * <code>java.lang.Character.digit()</code>.  Note that this
3111      * will return positive values for code points for which isDigit
3112      * returns false, just like java.lang.Character.
3113      * <br><em>Semantic Change:</em> In release 1.3.1 and
3114      * prior, this did not treat the European letters as having a
3115      * digit value, and also treated numeric letters and other numbers as
3116      * digits.
3117      * This has been changed to conform to the java semantics.
3118      * <br>A code point is a valid digit if and only if:
3119      * <ul>
3120      *   <li>ch is a decimal digit or one of the european letters, and
3121      *   <li>the value of ch is less than the specified radix.
3122      * </ul>
3123      * @param ch the code point to query
3124      * @param radix the radix
3125      * @return the numeric value represented by the code point in the
3126      * specified radix, or -1 if the code point is not a decimal digit
3127      * or if its value is too large for the radix
3128      */
digit(int ch, int radix)3129     public static int digit(int ch, int radix)
3130     {
3131         if (2 <= radix && radix <= 36) {
3132             int value = digit(ch);
3133             if (value < 0) {
3134                 // ch is not a decimal digit, try latin letters
3135                 value = UCharacterProperty.getEuropeanDigit(ch);
3136             }
3137             return (value < radix) ? value : -1;
3138         } else {
3139             return -1;  // invalid radix
3140         }
3141     }
3142 
3143     /**
3144      * Returnss the numeric value of a decimal digit code point.
3145      * <br>This is a convenience overload of <code>digit(int, int)</code>
3146      * that provides a decimal radix.
3147      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
3148      * treated numeric letters and other numbers as digits.  This has
3149      * been changed to conform to the java semantics.
3150      * @param ch the code point to query
3151      * @return the numeric value represented by the code point,
3152      * or -1 if the code point is not a decimal digit or if its
3153      * value is too large for a decimal radix
3154      */
digit(int ch)3155     public static int digit(int ch)
3156     {
3157         return UCharacterProperty.INSTANCE.digit(ch);
3158     }
3159 
3160     /**
3161      * Returns the numeric value of the code point as a nonnegative
3162      * integer.
3163      * <br>If the code point does not have a numeric value, then -1 is returned.
3164      * <br>
3165      * If the code point has a numeric value that cannot be represented as a
3166      * nonnegative integer (for example, a fractional value), then -2 is
3167      * returned.
3168      * @param ch the code point to query
3169      * @return the numeric value of the code point, or -1 if it has no numeric
3170      * value, or -2 if it has a numeric value that cannot be represented as a
3171      * nonnegative integer
3172      */
getNumericValue(int ch)3173     public static int getNumericValue(int ch)
3174     {
3175         return UCharacterProperty.INSTANCE.getNumericValue(ch);
3176     }
3177 
3178     /**
3179      * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the
3180      * Unicode Character Database.
3181      * <p>A "double" return type is necessary because some numeric values are
3182      * fractions, negative, or too large for int.
3183      * <p>For characters without any numeric values in the Unicode Character
3184      * Database, this function will return NO_NUMERIC_VALUE.
3185      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
3186      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
3187      * return type int and returns -1 when the argument ch does not have a
3188      * corresponding numeric value. This has been changed to synch with ICU4C
3189      *
3190      * This corresponds to the ICU4C function u_getNumericValue.
3191      * @param ch Code point to get the numeric value for.
3192      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
3193      */
getUnicodeNumericValue(int ch)3194     public static double getUnicodeNumericValue(int ch)
3195     {
3196         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
3197     }
3198 
3199     /**
3200      * Compatibility override of Java deprecated method.  This
3201      * method will always remain deprecated.
3202      * Same as java.lang.Character.isSpace().
3203      * @param ch the code point
3204      * @return true if the code point is a space character as
3205      * defined by java.lang.Character.isSpace.
3206      * @deprecated ICU 3.4 (Java)
3207      * @hide original deprecated declaration
3208      */
3209     @Deprecated
isSpace(int ch)3210     public static boolean isSpace(int ch) {
3211         return ch <= 0x20 &&
3212                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
3213     }
3214 
3215     /**
3216      * Returns a value indicating a code point's Unicode category.
3217      * Up-to-date Unicode implementation of java.lang.Character.getType()
3218      * except for the above mentioned code points that had their category
3219      * changed.<br>
3220      * Return results are constants from the interface
3221      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
3222      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3223      * those returned by java.lang.Character.getType.  UCharacterCategory values
3224      * match the ones used in ICU4C, while java.lang.Character type
3225      * values, though similar, skip the value 17.
3226      * @param ch code point whose type is to be determined
3227      * @return category which is a value of UCharacterCategory
3228      */
getType(int ch)3229     public static int getType(int ch)
3230     {
3231         return UCharacterProperty.INSTANCE.getType(ch);
3232     }
3233 
3234     /**
3235      * Determines if a code point has a defined meaning in the up-to-date
3236      * Unicode standard.
3237      * E.g. supplementary code points though allocated space are not defined in
3238      * Unicode yet.<br>
3239      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3240      * @param ch code point to be determined if it is defined in the most
3241      *        current version of Unicode
3242      * @return true if this code point is defined in unicode
3243      */
isDefined(int ch)3244     public static boolean isDefined(int ch)
3245     {
3246         return getType(ch) != 0;
3247     }
3248 
3249     /**
3250      * Determines if a code point is a Java digit.
3251      * <br>This method observes the semantics of
3252      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
3253      * digits only.
3254      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
3255      * numeric letters and other numbers as digits.
3256      * This has been changed to conform to the java semantics.
3257      * @param ch code point to query
3258      * @return true if this code point is a digit
3259      */
isDigit(int ch)3260     public static boolean isDigit(int ch)
3261     {
3262         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3263     }
3264 
3265     /**
3266      * Determines if the specified code point is an ISO control character.
3267      * A code point is considered to be an ISO control character if it is in
3268      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
3269      * &#92;u009F.<br>
3270      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3271      * @param ch code point to determine if it is an ISO control character
3272      * @return true if code point is a ISO control character
3273      */
isISOControl(int ch)3274     public static boolean isISOControl(int ch)
3275     {
3276         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
3277                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3278     }
3279 
3280     /**
3281      * Determines if the specified code point is a letter.
3282      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3283      * @param ch code point to determine if it is a letter
3284      * @return true if code point is a letter
3285      */
isLetter(int ch)3286     public static boolean isLetter(int ch)
3287     {
3288         // if props == 0, it will just fall through and return false
3289         return ((1 << getType(ch))
3290                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3291                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3292                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3293                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3294                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3295     }
3296 
3297     /**
3298      * Determines if the specified code point is a letter or digit.
3299      * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii
3300      * characters 'A' - 'Z' and 'a' - 'z' as digits.
3301      * @param ch code point to determine if it is a letter or a digit
3302      * @return true if code point is a letter or a digit
3303      */
isLetterOrDigit(int ch)3304     public static boolean isLetterOrDigit(int ch)
3305     {
3306         return ((1 << getType(ch))
3307                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3308                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3309                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3310                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3311                         | (1 << UCharacterCategory.OTHER_LETTER)
3312                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3313     }
3314 
3315     /**
3316      * Compatibility override of Java deprecated method.  This
3317      * method will always remain deprecated.  Delegates to
3318      * java.lang.Character.isJavaIdentifierStart.
3319      * @param cp the code point
3320      * @return true if the code point can start a java identifier.
3321      * @deprecated ICU 3.4 (Java)
3322      * @hide original deprecated declaration
3323      */
3324     @Deprecated
isJavaLetter(int cp)3325     public static boolean isJavaLetter(int cp) {
3326         return isJavaIdentifierStart(cp);
3327     }
3328 
3329     /**
3330      * Compatibility override of Java deprecated method.  This
3331      * method will always remain deprecated.  Delegates to
3332      * java.lang.Character.isJavaIdentifierPart.
3333      * @param cp the code point
3334      * @return true if the code point can continue a java identifier.
3335      * @deprecated ICU 3.4 (Java)
3336      * @hide original deprecated declaration
3337      */
3338     @Deprecated
isJavaLetterOrDigit(int cp)3339     public static boolean isJavaLetterOrDigit(int cp) {
3340         return isJavaIdentifierPart(cp);
3341     }
3342 
3343     /**
3344      * Compatibility override of Java method, delegates to
3345      * java.lang.Character.isJavaIdentifierStart.
3346      * @param cp the code point
3347      * @return true if the code point can start a java identifier.
3348      */
isJavaIdentifierStart(int cp)3349     public static boolean isJavaIdentifierStart(int cp) {
3350         // note, downcast to char for jdk 1.4 compatibility
3351         return java.lang.Character.isJavaIdentifierStart((char)cp);
3352     }
3353 
3354     /**
3355      * Compatibility override of Java method, delegates to
3356      * java.lang.Character.isJavaIdentifierPart.
3357      * @param cp the code point
3358      * @return true if the code point can continue a java identifier.
3359      */
isJavaIdentifierPart(int cp)3360     public static boolean isJavaIdentifierPart(int cp) {
3361         // note, downcast to char for jdk 1.4 compatibility
3362         return java.lang.Character.isJavaIdentifierPart((char)cp);
3363     }
3364 
3365     /**
3366      * Determines if the specified code point is a lowercase character.
3367      * UnicodeData only contains case mappings for code points where they are
3368      * one-to-one mappings; it also omits information about context-sensitive
3369      * case mappings.<br> For more information about Unicode case mapping
3370      * please refer to the
3371      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
3372      * #21</a>.<br>
3373      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3374      * @param ch code point to determine if it is in lowercase
3375      * @return true if code point is a lowercase character
3376      */
isLowerCase(int ch)3377     public static boolean isLowerCase(int ch)
3378     {
3379         // if props == 0, it will just fall through and return false
3380         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3381     }
3382 
3383     /**
3384      * Determines if the specified code point is a white space character.
3385      * A code point is considered to be an whitespace character if and only
3386      * if it satisfies one of the following criteria:
3387      * <ul>
3388      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
3389      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
3390      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
3391      * <li> It is &#92;u000A, LINE FEED.
3392      * <li> It is &#92;u000B, VERTICAL TABULATION.
3393      * <li> It is &#92;u000C, FORM FEED.
3394      * <li> It is &#92;u000D, CARRIAGE RETURN.
3395      * <li> It is &#92;u001C, FILE SEPARATOR.
3396      * <li> It is &#92;u001D, GROUP SEPARATOR.
3397      * <li> It is &#92;u001E, RECORD SEPARATOR.
3398      * <li> It is &#92;u001F, UNIT SEPARATOR.
3399      * </ul>
3400      *
3401      * This API tries to sync with the semantics of Java's
3402      * java.lang.Character.isWhitespace(), but it may not return
3403      * the exact same results because of the Unicode version
3404      * difference.
3405      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
3406      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
3407      * See http://www.unicode.org/versions/Unicode4.0.1/
3408      * @param ch code point to determine if it is a white space
3409      * @return true if the specified code point is a white space character
3410      */
isWhitespace(int ch)3411     public static boolean isWhitespace(int ch)
3412     {
3413         // exclude no-break spaces
3414         // if props == 0, it will just fall through and return false
3415         return ((1 << getType(ch)) &
3416                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
3417                         | (1 << UCharacterCategory.LINE_SEPARATOR)
3418                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
3419                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
3420                         // TAB VT LF FF CR FS GS RS US NL are all control characters
3421                         // that are white spaces.
3422                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
3423     }
3424 
3425     /**
3426      * Determines if the specified code point is a Unicode specified space
3427      * character, i.e. if code point is in the category Zs, Zl and Zp.
3428      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
3429      * @param ch code point to determine if it is a space
3430      * @return true if the specified code point is a space character
3431      */
isSpaceChar(int ch)3432     public static boolean isSpaceChar(int ch)
3433     {
3434         // if props == 0, it will just fall through and return false
3435         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
3436                 | (1 << UCharacterCategory.LINE_SEPARATOR)
3437                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
3438                 != 0;
3439     }
3440 
3441     /**
3442      * Determines if the specified code point is a titlecase character.
3443      * UnicodeData only contains case mappings for code points where they are
3444      * one-to-one mappings; it also omits information about context-sensitive
3445      * case mappings.<br>
3446      * For more information about Unicode case mapping please refer to the
3447      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3448      * Technical report #21</a>.<br>
3449      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
3450      * @param ch code point to determine if it is in title case
3451      * @return true if the specified code point is a titlecase character
3452      */
isTitleCase(int ch)3453     public static boolean isTitleCase(int ch)
3454     {
3455         // if props == 0, it will just fall through and return false
3456         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
3457     }
3458 
3459     /**
3460      * Determines if the specified code point may be any part of a Unicode
3461      * identifier other than the starting character.
3462      * A code point may be part of a Unicode identifier if and only if it is
3463      * one of the following:
3464      * <ul>
3465      * <li> Lu Uppercase letter
3466      * <li> Ll Lowercase letter
3467      * <li> Lt Titlecase letter
3468      * <li> Lm Modifier letter
3469      * <li> Lo Other letter
3470      * <li> Nl Letter number
3471      * <li> Pc Connecting punctuation character
3472      * <li> Nd decimal number
3473      * <li> Mc Spacing combining mark
3474      * <li> Mn Non-spacing mark
3475      * <li> Cf formatting code
3476      * </ul>
3477      * Up-to-date Unicode implementation of
3478      * java.lang.Character.isUnicodeIdentifierPart().<br>
3479      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3480      * @param ch code point to determine if is can be part of a Unicode
3481      *        identifier
3482      * @return true if code point is any character belonging a unicode
3483      *         identifier suffix after the first character
3484      */
isUnicodeIdentifierPart(int ch)3485     public static boolean isUnicodeIdentifierPart(int ch)
3486     {
3487         // if props == 0, it will just fall through and return false
3488         // cat == format
3489         return ((1 << getType(ch))
3490                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3491                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3492                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3493                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3494                         | (1 << UCharacterCategory.OTHER_LETTER)
3495                         | (1 << UCharacterCategory.LETTER_NUMBER)
3496                         | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
3497                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
3498                         | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
3499                         | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
3500                         || isIdentifierIgnorable(ch);
3501     }
3502 
3503     /**
3504      * Determines if the specified code point is permissible as the first
3505      * character in a Unicode identifier.
3506      * A code point may start a Unicode identifier if it is of type either
3507      * <ul>
3508      * <li> Lu Uppercase letter
3509      * <li> Ll Lowercase letter
3510      * <li> Lt Titlecase letter
3511      * <li> Lm Modifier letter
3512      * <li> Lo Other letter
3513      * <li> Nl Letter number
3514      * </ul>
3515      * Up-to-date Unicode implementation of
3516      * java.lang.Character.isUnicodeIdentifierStart().<br>
3517      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3518      * @param ch code point to determine if it can start a Unicode identifier
3519      * @return true if code point is the first character belonging a unicode
3520      *              identifier
3521      */
isUnicodeIdentifierStart(int ch)3522     public static boolean isUnicodeIdentifierStart(int ch)
3523     {
3524         /*int cat = getType(ch);*/
3525         // if props == 0, it will just fall through and return false
3526         return ((1 << getType(ch))
3527                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3528                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3529                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3530                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3531                         | (1 << UCharacterCategory.OTHER_LETTER)
3532                         | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
3533     }
3534 
3535     /**
3536      * Determines if the specified code point should be regarded as an
3537      * ignorable character in a Java identifier.
3538      * A character is Java-identifier-ignorable if it has the general category
3539      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
3540      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
3541      * Up-to-date Unicode implementation of
3542      * java.lang.Character.isIdentifierIgnorable().<br>
3543      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3544      * <p>Note that Unicode just recommends to ignore Cf (format controls).
3545      * @param ch code point to be determined if it can be ignored in a Unicode
3546      *        identifier.
3547      * @return true if the code point is ignorable
3548      */
isIdentifierIgnorable(int ch)3549     public static boolean isIdentifierIgnorable(int ch)
3550     {
3551         // see java.lang.Character.isIdentifierIgnorable() on range of
3552         // ignorable characters.
3553         if (ch <= 0x9f) {
3554             return isISOControl(ch)
3555                     && !((ch >= 0x9 && ch <= 0xd)
3556                             || (ch >= 0x1c && ch <= 0x1f));
3557         }
3558         return getType(ch) == UCharacterCategory.FORMAT;
3559     }
3560 
3561     /**
3562      * Determines if the specified code point is an uppercase character.
3563      * UnicodeData only contains case mappings for code point where they are
3564      * one-to-one mappings; it also omits information about context-sensitive
3565      * case mappings.<br>
3566      * For language specific case conversion behavior, use
3567      * toUpperCase(locale, str). <br>
3568      * For example, the case conversion for dot-less i and dotted I in Turkish,
3569      * or for final sigma in Greek.
3570      * For more information about Unicode case mapping please refer to the
3571      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3572      * Technical report #21</a>.<br>
3573      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
3574      * @param ch code point to determine if it is in uppercase
3575      * @return true if the code point is an uppercase character
3576      */
isUpperCase(int ch)3577     public static boolean isUpperCase(int ch)
3578     {
3579         // if props == 0, it will just fall through and return false
3580         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
3581     }
3582 
3583     /**
3584      * The given code point is mapped to its lowercase equivalent; if the code
3585      * point has no lowercase equivalent, the code point itself is returned.
3586      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
3587      *
3588      * <p>This function only returns the simple, single-code point case mapping.
3589      * Full case mappings should be used whenever possible because they produce
3590      * better results by working on whole strings.
3591      * They take into account the string context and the language and can map
3592      * to a result string with a different length as appropriate.
3593      * Full case mappings are applied by the case mapping functions
3594      * that take String parameters rather than code points (int).
3595      * See also the User Guide chapter on C/POSIX migration:
3596      * http://www.icu-project.org/userguide/posix.html#case_mappings
3597      *
3598      * @param ch code point whose lowercase equivalent is to be retrieved
3599      * @return the lowercase equivalent code point
3600      */
toLowerCase(int ch)3601     public static int toLowerCase(int ch) {
3602         return UCaseProps.INSTANCE.tolower(ch);
3603     }
3604 
3605     /**
3606      * Converts argument code point and returns a String object representing
3607      * the code point's value in UTF-16 format.
3608      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
3609      *
3610      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
3611      *
3612      * @param ch code point
3613      * @return string representation of the code point, null if code point is not
3614      *         defined in unicode
3615      */
toString(int ch)3616     public static String toString(int ch)
3617     {
3618         if (ch < MIN_VALUE || ch > MAX_VALUE) {
3619             return null;
3620         }
3621 
3622         if (ch < SUPPLEMENTARY_MIN_VALUE) {
3623             return String.valueOf((char)ch);
3624         }
3625 
3626         return new String(Character.toChars(ch));
3627     }
3628 
3629     /**
3630      * Converts the code point argument to titlecase.
3631      * If no titlecase is available, the uppercase is returned. If no uppercase
3632      * is available, the code point itself is returned.
3633      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
3634      *
3635      * <p>This function only returns the simple, single-code point case mapping.
3636      * Full case mappings should be used whenever possible because they produce
3637      * better results by working on whole strings.
3638      * They take into account the string context and the language and can map
3639      * to a result string with a different length as appropriate.
3640      * Full case mappings are applied by the case mapping functions
3641      * that take String parameters rather than code points (int).
3642      * See also the User Guide chapter on C/POSIX migration:
3643      * http://www.icu-project.org/userguide/posix.html#case_mappings
3644      *
3645      * @param ch code point  whose title case is to be retrieved
3646      * @return titlecase code point
3647      */
toTitleCase(int ch)3648     public static int toTitleCase(int ch) {
3649         return UCaseProps.INSTANCE.totitle(ch);
3650     }
3651 
3652     /**
3653      * Converts the character argument to uppercase.
3654      * If no uppercase is available, the character itself is returned.
3655      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
3656      *
3657      * <p>This function only returns the simple, single-code point case mapping.
3658      * Full case mappings should be used whenever possible because they produce
3659      * better results by working on whole strings.
3660      * They take into account the string context and the language and can map
3661      * to a result string with a different length as appropriate.
3662      * Full case mappings are applied by the case mapping functions
3663      * that take String parameters rather than code points (int).
3664      * See also the User Guide chapter on C/POSIX migration:
3665      * http://www.icu-project.org/userguide/posix.html#case_mappings
3666      *
3667      * @param ch code point whose uppercase is to be retrieved
3668      * @return uppercase code point
3669      */
toUpperCase(int ch)3670     public static int toUpperCase(int ch) {
3671         return UCaseProps.INSTANCE.toupper(ch);
3672     }
3673 
3674     // extra methods not in java.lang.Character --------------------------
3675 
3676     /**
3677      * <strong>[icu]</strong> Determines if the code point is a supplementary character.
3678      * A code point is a supplementary character if and only if it is greater
3679      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
3680      * @param ch code point to be determined if it is in the supplementary
3681      *        plane
3682      * @return true if code point is a supplementary character
3683      */
isSupplementary(int ch)3684     public static boolean isSupplementary(int ch)
3685     {
3686         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
3687                 ch <= UCharacter.MAX_VALUE;
3688     }
3689 
3690     /**
3691      * <strong>[icu]</strong> Determines if the code point is in the BMP plane.
3692      * @param ch code point to be determined if it is not a supplementary
3693      *        character
3694      * @return true if code point is not a supplementary character
3695      */
isBMP(int ch)3696     public static boolean isBMP(int ch)
3697     {
3698         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
3699     }
3700 
3701     /**
3702      * <strong>[icu]</strong> Determines whether the specified code point is a printable character
3703      * according to the Unicode standard.
3704      * @param ch code point to be determined if it is printable
3705      * @return true if the code point is a printable character
3706      */
isPrintable(int ch)3707     public static boolean isPrintable(int ch)
3708     {
3709         int cat = getType(ch);
3710         // if props == 0, it will just fall through and return false
3711         return (cat != UCharacterCategory.UNASSIGNED &&
3712                 cat != UCharacterCategory.CONTROL &&
3713                 cat != UCharacterCategory.FORMAT &&
3714                 cat != UCharacterCategory.PRIVATE_USE &&
3715                 cat != UCharacterCategory.SURROGATE &&
3716                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
3717     }
3718 
3719     /**
3720      * <strong>[icu]</strong> Determines whether the specified code point is of base form.
3721      * A code point of base form does not graphically combine with preceding
3722      * characters, and is neither a control nor a format character.
3723      * @param ch code point to be determined if it is of base form
3724      * @return true if the code point is of base form
3725      */
isBaseForm(int ch)3726     public static boolean isBaseForm(int ch)
3727     {
3728         int cat = getType(ch);
3729         // if props == 0, it will just fall through and return false
3730         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
3731                 cat == UCharacterCategory.OTHER_NUMBER ||
3732                 cat == UCharacterCategory.LETTER_NUMBER ||
3733                 cat == UCharacterCategory.UPPERCASE_LETTER ||
3734                 cat == UCharacterCategory.LOWERCASE_LETTER ||
3735                 cat == UCharacterCategory.TITLECASE_LETTER ||
3736                 cat == UCharacterCategory.MODIFIER_LETTER ||
3737                 cat == UCharacterCategory.OTHER_LETTER ||
3738                 cat == UCharacterCategory.NON_SPACING_MARK ||
3739                 cat == UCharacterCategory.ENCLOSING_MARK ||
3740                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
3741     }
3742 
3743     /**
3744      * <strong>[icu]</strong> Returns the Bidirection property of a code point.
3745      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
3746      * property.<br>
3747      * Result returned belongs to the interface
3748      * <a href=UCharacterDirection.html>UCharacterDirection</a>
3749      * @param ch the code point to be determined its direction
3750      * @return direction constant from UCharacterDirection.
3751      */
getDirection(int ch)3752     public static int getDirection(int ch)
3753     {
3754         return UBiDiProps.INSTANCE.getClass(ch);
3755     }
3756 
3757     /**
3758      * Determines whether the code point has the "mirrored" property.
3759      * This property is set for characters that are commonly used in
3760      * Right-To-Left contexts and need to be displayed with a "mirrored"
3761      * glyph.
3762      * @param ch code point whose mirror is to be determined
3763      * @return true if the code point has the "mirrored" property
3764      */
isMirrored(int ch)3765     public static boolean isMirrored(int ch)
3766     {
3767         return UBiDiProps.INSTANCE.isMirrored(ch);
3768     }
3769 
3770     /**
3771      * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point.
3772      * For code points with the "mirrored" property, implementations sometimes
3773      * need a "poor man's" mapping to another code point such that the default
3774      * glyph may serve as the mirror-image of the default glyph of the
3775      * specified code point.<br>
3776      * This is useful for text conversion to and from codepages with visual
3777      * order, and for displays without glyph selection capabilities.
3778      * @param ch code point whose mirror is to be retrieved
3779      * @return another code point that may serve as a mirror-image substitute,
3780      *         or ch itself if there is no such mapping or ch does not have the
3781      *         "mirrored" property
3782      */
getMirror(int ch)3783     public static int getMirror(int ch)
3784     {
3785         return UBiDiProps.INSTANCE.getMirror(ch);
3786     }
3787 
3788     /**
3789      * <strong>[icu]</strong> Maps the specified character to its paired bracket character.
3790      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
3791      * Otherwise c itself is returned.
3792      * See http://www.unicode.org/reports/tr9/
3793      *
3794      * @param c the code point to be mapped
3795      * @return the paired bracket code point,
3796      *         or c itself if there is no such mapping
3797      *         (Bidi_Paired_Bracket_Type=None)
3798      *
3799      * @see UProperty#BIDI_PAIRED_BRACKET
3800      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3801      * @see #getMirror(int)
3802      */
getBidiPairedBracket(int c)3803     public static int getBidiPairedBracket(int c) {
3804         return UBiDiProps.INSTANCE.getPairedBracket(c);
3805     }
3806 
3807     /**
3808      * <strong>[icu]</strong> Returns the combining class of the argument codepoint
3809      * @param ch code point whose combining is to be retrieved
3810      * @return the combining class of the codepoint
3811      */
getCombiningClass(int ch)3812     public static int getCombiningClass(int ch)
3813     {
3814         return Normalizer2.getNFDInstance().getCombiningClass(ch);
3815     }
3816 
3817     /**
3818      * <strong>[icu]</strong> A code point is illegal if and only if
3819      * <ul>
3820      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3821      * <li> A surrogate value, 0xD800 to 0xDFFF
3822      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3823      * </ul>
3824      * Note: legal does not mean that it is assigned in this version of Unicode.
3825      * @param ch code point to determine if it is a legal code point by itself
3826      * @return true if and only if legal.
3827      */
isLegal(int ch)3828     public static boolean isLegal(int ch)
3829     {
3830         if (ch < MIN_VALUE) {
3831             return false;
3832         }
3833         if (ch < Character.MIN_SURROGATE) {
3834             return true;
3835         }
3836         if (ch <= Character.MAX_SURROGATE) {
3837             return false;
3838         }
3839         if (UCharacterUtility.isNonCharacter(ch)) {
3840             return false;
3841         }
3842         return (ch <= MAX_VALUE);
3843     }
3844 
3845     /**
3846      * <strong>[icu]</strong> A string is legal iff all its code points are legal.
3847      * A code point is illegal if and only if
3848      * <ul>
3849      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3850      * <li> A surrogate value, 0xD800 to 0xDFFF
3851      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3852      * </ul>
3853      * Note: legal does not mean that it is assigned in this version of Unicode.
3854      * @param str containing code points to examin
3855      * @return true if and only if legal.
3856      */
isLegal(String str)3857     public static boolean isLegal(String str)
3858     {
3859         int size = str.length();
3860         int codepoint;
3861         for (int i = 0; i < size; i += Character.charCount(codepoint))
3862         {
3863             codepoint = str.codePointAt(i);
3864             if (!isLegal(codepoint)) {
3865                 return false;
3866             }
3867         }
3868         return true;
3869     }
3870 
3871     /**
3872      * <strong>[icu]</strong> Returns the version of Unicode data used.
3873      * @return the unicode version number used
3874      */
getUnicodeVersion()3875     public static VersionInfo getUnicodeVersion()
3876     {
3877         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
3878     }
3879 
3880     /**
3881      * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or
3882      * null if the character is unassigned or outside the range
3883      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3884      * <br>
3885      * Note calling any methods related to code point names, e.g. get*Name*()
3886      * incurs a one-time initialisation cost to construct the name tables.
3887      * @param ch the code point for which to get the name
3888      * @return most current Unicode name
3889      */
getName(int ch)3890     public static String getName(int ch)
3891     {
3892         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
3893     }
3894 
3895     /**
3896      * <strong>[icu]</strong> Returns the names for each of the characters in a string
3897      * @param s string to format
3898      * @param separator string to go between names
3899      * @return string of names
3900      */
getName(String s, String separator)3901     public static String getName(String s, String separator) {
3902         if (s.length() == 1) { // handle common case
3903             return getName(s.charAt(0));
3904         }
3905         int cp;
3906         StringBuilder sb = new StringBuilder();
3907         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
3908             cp = s.codePointAt(i);
3909             if (i != 0) sb.append(separator);
3910             sb.append(UCharacter.getName(cp));
3911         }
3912         return sb.toString();
3913     }
3914 
3915     /**
3916      * <strong>[icu]</strong> Returns null.
3917      * Used to return the Unicode_1_Name property value which was of little practical value.
3918      * @param ch the code point for which to get the name
3919      * @return null
3920      * @deprecated ICU 49
3921      * @hide original deprecated declaration
3922      */
3923     @Deprecated
getName1_0(int ch)3924     public static String getName1_0(int ch)
3925     {
3926         return null;
3927     }
3928 
3929     /**
3930      * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and
3931      * getName1_0(int), this method will return a name even for codepoints that
3932      * are not assigned a name in UnicodeData.txt.
3933      *
3934      * <p>The names are returned in the following order.
3935      * <ul>
3936      * <li> Most current Unicode name if there is any
3937      * <li> Unicode 1.0 name if there is any
3938      * <li> Extended name in the form of
3939      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
3940      * </ul>
3941      * Note calling any methods related to code point names, e.g. get*Name*()
3942      * incurs a one-time initialisation cost to construct the name tables.
3943      * @param ch the code point for which to get the name
3944      * @return a name for the argument codepoint
3945      */
getExtendedName(int ch)3946     public static String getExtendedName(int ch) {
3947         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
3948     }
3949 
3950     /**
3951      * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one.
3952      * Returns null if the character is unassigned or outside the range
3953      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3954      * <br>
3955      * Note calling any methods related to code point names, e.g. get*Name*()
3956      * incurs a one-time initialisation cost to construct the name tables.
3957      * @param ch the code point for which to get the name alias
3958      * @return Unicode name alias, or null
3959      */
getNameAlias(int ch)3960     public static String getNameAlias(int ch)
3961     {
3962         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
3963     }
3964 
3965     /**
3966      * <strong>[icu]</strong> Returns null.
3967      * Used to return the ISO 10646 comment for a character.
3968      * The Unicode ISO_Comment property is deprecated and has no values.
3969      *
3970      * @param ch The code point for which to get the ISO comment.
3971      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
3972      * @return null
3973      * @deprecated ICU 49
3974      * @hide original deprecated declaration
3975      */
3976     @Deprecated
getISOComment(int ch)3977     public static String getISOComment(int ch)
3978     {
3979         return null;
3980     }
3981 
3982     /**
3983      * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and
3984      * return its code point value. All Unicode names are in uppercase.
3985      * Note calling any methods related to code point names, e.g. get*Name*()
3986      * incurs a one-time initialisation cost to construct the name tables.
3987      * @param name most current Unicode character name whose code point is to
3988      *        be returned
3989      * @return code point or -1 if name is not found
3990      */
getCharFromName(String name)3991     public static int getCharFromName(String name){
3992         return UCharacterName.INSTANCE.getCharFromName(
3993                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
3994     }
3995 
3996     /**
3997      * <strong>[icu]</strong> Returns -1.
3998      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
3999      * its code point value.
4000      * @param name Unicode 1.0 code point name whose code point is to be
4001      *             returned
4002      * @return -1
4003      * @deprecated ICU 49
4004      * @see #getName1_0(int)
4005      * @hide original deprecated declaration
4006      */
4007     @Deprecated
getCharFromName1_0(String name)4008     public static int getCharFromName1_0(String name){
4009         return -1;
4010     }
4011 
4012     /**
4013      * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code
4014      * point value. All Unicode names are in uppercase.
4015      * Extended names are all lowercase except for numbers and are contained
4016      * within angle brackets.
4017      * The names are searched in the following order
4018      * <ul>
4019      * <li> Most current Unicode name if there is any
4020      * <li> Unicode 1.0 name if there is any
4021      * <li> Extended name in the form of
4022      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
4023      * </ul>
4024      * Note calling any methods related to code point names, e.g. get*Name*()
4025      * incurs a one-time initialisation cost to construct the name tables.
4026      * @param name codepoint name
4027      * @return code point associated with the name or -1 if the name is not
4028      *         found.
4029      */
getCharFromExtendedName(String name)4030     public static int getCharFromExtendedName(String name){
4031         return UCharacterName.INSTANCE.getCharFromName(
4032                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
4033     }
4034 
4035     /**
4036      * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return
4037      * its code point value. All Unicode names are in uppercase.
4038      * Note calling any methods related to code point names, e.g. get*Name*()
4039      * incurs a one-time initialisation cost to construct the name tables.
4040      * @param name Unicode name alias whose code point is to be returned
4041      * @return code point or -1 if name is not found
4042      */
getCharFromNameAlias(String name)4043     public static int getCharFromNameAlias(String name){
4044         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
4045     }
4046 
4047     /**
4048      * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the
4049      * Unicode database file PropertyAliases.txt.  Most properties
4050      * have more than one name.  The nameChoice determines which one
4051      * is returned.
4052      *
4053      * In addition, this function maps the property
4054      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
4055      * "General_Category_Mask".  These names are not in
4056      * PropertyAliases.txt.
4057      *
4058      * @param property UProperty selector.
4059      *
4060      * @param nameChoice UProperty.NameChoice selector for which name
4061      * to get.  All properties have a long name.  Most have a short
4062      * name, but some do not.  Unicode allows for additional names; if
4063      * present these will be returned by UProperty.NameChoice.LONG + i,
4064      * where i=1, 2,...
4065      *
4066      * @return a name, or null if Unicode explicitly defines no name
4067      * ("n/a") for a given property/nameChoice.  If a given nameChoice
4068      * throws an exception, then all larger values of nameChoice will
4069      * throw an exception.  If null is returned for a given
4070      * nameChoice, then other nameChoice values may return non-null
4071      * results.
4072      *
4073      * @exception IllegalArgumentException thrown if property or
4074      * nameChoice are invalid.
4075      *
4076      * @see UProperty
4077      * @see UProperty.NameChoice
4078      */
getPropertyName(int property, int nameChoice)4079     public static String getPropertyName(int property,
4080             int nameChoice) {
4081         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
4082     }
4083 
4084     /**
4085      * <strong>[icu]</strong> Return the UProperty selector for a given property name, as
4086      * specified in the Unicode database file PropertyAliases.txt.
4087      * Short, long, and any other variants are recognized.
4088      *
4089      * In addition, this function maps the synthetic names "gcm" /
4090      * "General_Category_Mask" to the property
4091      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
4092      * PropertyAliases.txt.
4093      *
4094      * @param propertyAlias the property name to be matched.  The name
4095      * is compared using "loose matching" as described in
4096      * PropertyAliases.txt.
4097      *
4098      * @return a UProperty enum.
4099      *
4100      * @exception IllegalArgumentException thrown if propertyAlias
4101      * is not recognized.
4102      *
4103      * @see UProperty
4104      */
getPropertyEnum(CharSequence propertyAlias)4105     public static int getPropertyEnum(CharSequence propertyAlias) {
4106         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
4107         if (propEnum == UProperty.UNDEFINED) {
4108             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
4109         }
4110         return propEnum;
4111     }
4112 
4113     /**
4114      * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in
4115      * the Unicode database file PropertyValueAliases.txt.  Most
4116      * values have more than one name.  The nameChoice determines
4117      * which one is returned.
4118      *
4119      * Note: Some of the names in PropertyValueAliases.txt can only be
4120      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
4121      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4122      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4123      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4124      *
4125      * @param property UProperty selector constant.
4126      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4127      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4128      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4129      * If out of range, null is returned.
4130      *
4131      * @param value selector for a value for the given property.  In
4132      * general, valid values range from 0 up to some maximum.  There
4133      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
4134      * non-zero value BASIC_LATIN.getID().  (2.)
4135      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
4136      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
4137      * are mask values produced by left-shifting 1 by
4138      * UCharacter.getType().  This allows grouped categories such as
4139      * [:L:] to be represented.  Mask values are non-contiguous.
4140      *
4141      * @param nameChoice UProperty.NameChoice selector for which name
4142      * to get.  All values have a long name.  Most have a short name,
4143      * but some do not.  Unicode allows for additional names; if
4144      * present these will be returned by UProperty.NameChoice.LONG + i,
4145      * where i=1, 2,...
4146      *
4147      * @return a name, or null if Unicode explicitly defines no name
4148      * ("n/a") for a given property/value/nameChoice.  If a given
4149      * nameChoice throws an exception, then all larger values of
4150      * nameChoice will throw an exception.  If null is returned for a
4151      * given nameChoice, then other nameChoice values may return
4152      * non-null results.
4153      *
4154      * @exception IllegalArgumentException thrown if property, value,
4155      * or nameChoice are invalid.
4156      *
4157      * @see UProperty
4158      * @see UProperty.NameChoice
4159      */
getPropertyValueName(int property, int value, int nameChoice)4160     public static String getPropertyValueName(int property,
4161             int value,
4162             int nameChoice)
4163     {
4164         if ((property == UProperty.CANONICAL_COMBINING_CLASS
4165                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
4166                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
4167                 && value >= UCharacter.getIntPropertyMinValue(
4168                         UProperty.CANONICAL_COMBINING_CLASS)
4169                         && value <= UCharacter.getIntPropertyMaxValue(
4170                                 UProperty.CANONICAL_COMBINING_CLASS)
4171                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
4172             // this is hard coded for the valid cc
4173             // because PropertyValueAliases.txt does not contain all of them
4174             try {
4175                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
4176                         nameChoice);
4177             }
4178             catch (IllegalArgumentException e) {
4179                 return null;
4180             }
4181         }
4182         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
4183     }
4184 
4185     /**
4186      * <strong>[icu]</strong> Return the property value integer for a given value name, as
4187      * specified in the Unicode database file PropertyValueAliases.txt.
4188      * Short, long, and any other variants are recognized.
4189      *
4190      * Note: Some of the names in PropertyValueAliases.txt will only be
4191      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4192      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4193      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4194      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4195      *
4196      * @param property UProperty selector constant.
4197      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4198      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4199      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4200      * Only these properties can be enumerated.
4201      *
4202      * @param valueAlias the value name to be matched.  The name is
4203      * compared using "loose matching" as described in
4204      * PropertyValueAliases.txt.
4205      *
4206      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4207      * values are mask values produced by left-shifting 1 by
4208      * UCharacter.getType().  This allows grouped categories such as
4209      * [:L:] to be represented.
4210      *
4211      * @see UProperty
4212      * @throws IllegalArgumentException if property is not a valid UProperty
4213      *         selector or valueAlias is not a value of this property
4214      */
getPropertyValueEnum(int property, CharSequence valueAlias)4215     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
4216         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
4217         if (propEnum == UProperty.UNDEFINED) {
4218             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
4219         }
4220         return propEnum;
4221     }
4222 
4223     /**
4224      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
4225      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
4226      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
4227      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
4228      * @deprecated This API is ICU internal only.
4229      * @hide original deprecated declaration
4230      * @hide draft / provisional / internal are hidden on Android
4231      */
4232     @Deprecated
getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4233     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
4234         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
4235     }
4236 
4237 
4238     /**
4239      * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units.
4240      *
4241      * @param lead the lead char
4242      * @param trail the trail char
4243      * @return code point if surrogate characters are valid.
4244      * @exception IllegalArgumentException thrown when the code units do
4245      *            not form a valid code point
4246      */
getCodePoint(char lead, char trail)4247     public static int getCodePoint(char lead, char trail)
4248     {
4249         if (Character.isSurrogatePair(lead, trail)) {
4250             return Character.toCodePoint(lead, trail);
4251         }
4252         throw new IllegalArgumentException("Illegal surrogate characters");
4253     }
4254 
4255     /**
4256      * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point.
4257      *
4258      * @param char16 the BMP code point
4259      * @return code point if argument is a valid character.
4260      * @exception IllegalArgumentException thrown when char16 is not a valid
4261      *            code point
4262      */
getCodePoint(char char16)4263     public static int getCodePoint(char char16)
4264     {
4265         if (UCharacter.isLegal(char16)) {
4266             return char16;
4267         }
4268         throw new IllegalArgumentException("Illegal codepoint");
4269     }
4270 
4271     /**
4272      * Returns the uppercase version of the argument string.
4273      * Casing is dependent on the default locale and context-sensitive.
4274      * @param str source string to be performed on
4275      * @return uppercase version of the argument string
4276      */
toUpperCase(String str)4277     public static String toUpperCase(String str)
4278     {
4279         return toUpperCase(getDefaultCaseLocale(), str);
4280     }
4281 
4282     /**
4283      * Returns the lowercase version of the argument string.
4284      * Casing is dependent on the default locale and context-sensitive
4285      * @param str source string to be performed on
4286      * @return lowercase version of the argument string
4287      */
toLowerCase(String str)4288     public static String toLowerCase(String str)
4289     {
4290         return toLowerCase(getDefaultCaseLocale(), str);
4291     }
4292 
4293     /**
4294      * <p>Returns the titlecase version of the argument string.
4295      * <p>Position for titlecasing is determined by the argument break
4296      * iterator, hence the user can customize his break iterator for
4297      * a specialized titlecasing. In this case only the forward iteration
4298      * needs to be implemented.
4299      * If the break iterator passed in is null, the default Unicode algorithm
4300      * will be used to determine the titlecase positions.
4301      *
4302      * <p>Only positions returned by the break iterator will be title cased,
4303      * character in between the positions will all be in lower case.
4304      * <p>Casing is dependent on the default locale and context-sensitive
4305      * @param str source string to be performed on
4306      * @param breakiter break iterator to determine the positions in which
4307      *        the character should be title cased.
4308      * @return lowercase version of the argument string
4309      */
toTitleCase(String str, BreakIterator breakiter)4310     public static String toTitleCase(String str, BreakIterator breakiter)
4311     {
4312         return toTitleCase(Locale.getDefault(), str, breakiter, 0);
4313     }
4314 
getDefaultCaseLocale()4315     private static int getDefaultCaseLocale() {
4316         return UCaseProps.getCaseLocale(Locale.getDefault());
4317     }
4318 
getCaseLocale(Locale locale)4319     private static int getCaseLocale(Locale locale) {
4320         if (locale == null) {
4321             locale = Locale.getDefault();
4322         }
4323         return UCaseProps.getCaseLocale(locale);
4324     }
4325 
getCaseLocale(ULocale locale)4326     private static int getCaseLocale(ULocale locale) {
4327         if (locale == null) {
4328             locale = ULocale.getDefault();
4329         }
4330         return UCaseProps.getCaseLocale(locale);
4331     }
4332 
toLowerCase(int caseLocale, String str)4333     private static String toLowerCase(int caseLocale, String str) {
4334         if (str.length() <= 100) {
4335             if (str.isEmpty()) {
4336                 return str;
4337             }
4338             // Collect and apply only changes.
4339             // Good if no or few changes. Bad (slow) if many changes.
4340             Edits edits = new Edits();
4341             StringBuilder replacementChars = CaseMapImpl.toLower(
4342                     caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
4343             return applyEdits(str, replacementChars, edits);
4344         } else {
4345             return CaseMapImpl.toLower(caseLocale, 0, str,
4346                     new StringBuilder(str.length()), null).toString();
4347         }
4348     }
4349 
toUpperCase(int caseLocale, String str)4350     private static String toUpperCase(int caseLocale, String str) {
4351         if (str.length() <= 100) {
4352             if (str.isEmpty()) {
4353                 return str;
4354             }
4355             // Collect and apply only changes.
4356             // Good if no or few changes. Bad (slow) if many changes.
4357             Edits edits = new Edits();
4358             StringBuilder replacementChars = CaseMapImpl.toUpper(
4359                     caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
4360             return applyEdits(str, replacementChars, edits);
4361         } else {
4362             return CaseMapImpl.toUpper(caseLocale, 0, str,
4363                     new StringBuilder(str.length()), null).toString();
4364         }
4365     }
4366 
toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str)4367     private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) {
4368         if (str.length() <= 100) {
4369             if (str.isEmpty()) {
4370                 return str;
4371             }
4372             // Collect and apply only changes.
4373             // Good if no or few changes. Bad (slow) if many changes.
4374             Edits edits = new Edits();
4375             StringBuilder replacementChars = CaseMapImpl.toTitle(
4376                     caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str,
4377                     new StringBuilder(), edits);
4378             return applyEdits(str, replacementChars, edits);
4379         } else {
4380             return CaseMapImpl.toTitle(caseLocale, options, titleIter, str,
4381                     new StringBuilder(str.length()), null).toString();
4382         }
4383     }
4384 
applyEdits(String str, StringBuilder replacementChars, Edits edits)4385     private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
4386         if (!edits.hasChanges()) {
4387             return str;
4388         }
4389         StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
4390         for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
4391             if (ei.hasChange()) {
4392                 int i = ei.replacementIndex();
4393                 result.append(replacementChars, i, i + ei.newLength());
4394             } else {
4395                 int i = ei.sourceIndex();
4396                 result.append(str, i, i + ei.oldLength());
4397             }
4398         }
4399         return result.toString();
4400     }
4401 
4402     /**
4403      * Returns the uppercase version of the argument string.
4404      * Casing is dependent on the argument locale and context-sensitive.
4405      * @param locale which string is to be converted in
4406      * @param str source string to be performed on
4407      * @return uppercase version of the argument string
4408      */
toUpperCase(Locale locale, String str)4409     public static String toUpperCase(Locale locale, String str)
4410     {
4411         return toUpperCase(getCaseLocale(locale), str);
4412     }
4413 
4414     /**
4415      * Returns the uppercase version of the argument string.
4416      * Casing is dependent on the argument locale and context-sensitive.
4417      * @param locale which string is to be converted in
4418      * @param str source string to be performed on
4419      * @return uppercase version of the argument string
4420      */
toUpperCase(ULocale locale, String str)4421     public static String toUpperCase(ULocale locale, String str) {
4422         return toUpperCase(getCaseLocale(locale), str);
4423     }
4424 
4425     /**
4426      * Returns the lowercase version of the argument string.
4427      * Casing is dependent on the argument locale and context-sensitive
4428      * @param locale which string is to be converted in
4429      * @param str source string to be performed on
4430      * @return lowercase version of the argument string
4431      */
toLowerCase(Locale locale, String str)4432     public static String toLowerCase(Locale locale, String str)
4433     {
4434         return toLowerCase(getCaseLocale(locale), str);
4435     }
4436 
4437     /**
4438      * Returns the lowercase version of the argument string.
4439      * Casing is dependent on the argument locale and context-sensitive
4440      * @param locale which string is to be converted in
4441      * @param str source string to be performed on
4442      * @return lowercase version of the argument string
4443      */
toLowerCase(ULocale locale, String str)4444     public static String toLowerCase(ULocale locale, String str) {
4445         return toLowerCase(getCaseLocale(locale), str);
4446     }
4447 
4448     /**
4449      * <p>Returns the titlecase version of the argument string.
4450      * <p>Position for titlecasing is determined by the argument break
4451      * iterator, hence the user can customize his break iterator for
4452      * a specialized titlecasing. In this case only the forward iteration
4453      * needs to be implemented.
4454      * If the break iterator passed in is null, the default Unicode algorithm
4455      * will be used to determine the titlecase positions.
4456      *
4457      * <p>Only positions returned by the break iterator will be title cased,
4458      * character in between the positions will all be in lower case.
4459      * <p>Casing is dependent on the argument locale and context-sensitive
4460      * @param locale which string is to be converted in
4461      * @param str source string to be performed on
4462      * @param breakiter break iterator to determine the positions in which
4463      *        the character should be title cased.
4464      * @return lowercase version of the argument string
4465      */
toTitleCase(Locale locale, String str, BreakIterator breakiter)4466     public static String toTitleCase(Locale locale, String str,
4467             BreakIterator breakiter)
4468     {
4469         return toTitleCase(locale, str, breakiter, 0);
4470     }
4471 
4472     /**
4473      * <p>Returns the titlecase version of the argument string.
4474      * <p>Position for titlecasing is determined by the argument break
4475      * iterator, hence the user can customize his break iterator for
4476      * a specialized titlecasing. In this case only the forward iteration
4477      * needs to be implemented.
4478      * If the break iterator passed in is null, the default Unicode algorithm
4479      * will be used to determine the titlecase positions.
4480      *
4481      * <p>Only positions returned by the break iterator will be title cased,
4482      * character in between the positions will all be in lower case.
4483      * <p>Casing is dependent on the argument locale and context-sensitive
4484      * @param locale which string is to be converted in
4485      * @param str source string to be performed on
4486      * @param titleIter break iterator to determine the positions in which
4487      *        the character should be title cased.
4488      * @return lowercase version of the argument string
4489      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter)4490     public static String toTitleCase(ULocale locale, String str,
4491             BreakIterator titleIter) {
4492         return toTitleCase(locale, str, titleIter, 0);
4493     }
4494 
4495     /**
4496      * <p>Returns the titlecase version of the argument string.
4497      * <p>Position for titlecasing is determined by the argument break
4498      * iterator, hence the user can customize his break iterator for
4499      * a specialized titlecasing. In this case only the forward iteration
4500      * needs to be implemented.
4501      * If the break iterator passed in is null, the default Unicode algorithm
4502      * will be used to determine the titlecase positions.
4503      *
4504      * <p>Only positions returned by the break iterator will be title cased,
4505      * character in between the positions will all be in lower case.
4506      * <p>Casing is dependent on the argument locale and context-sensitive
4507      * @param locale which string is to be converted in
4508      * @param str source string to be performed on
4509      * @param titleIter break iterator to determine the positions in which
4510      *        the character should be title cased.
4511      * @param options bit set to modify the titlecasing operation
4512      * @return lowercase version of the argument string
4513      * @see #TITLECASE_NO_LOWERCASE
4514      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
4515      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)4516     public static String toTitleCase(ULocale locale, String str,
4517             BreakIterator titleIter, int options) {
4518         if(titleIter == null) {
4519             if (locale == null) {
4520                 locale = ULocale.getDefault();
4521             }
4522             titleIter = BreakIterator.getWordInstance(locale);
4523         }
4524         titleIter.setText(str);
4525         return toTitleCase(getCaseLocale(locale), options, titleIter, str);
4526     }
4527 
4528 
4529     private static final int BREAK_MASK =
4530             (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER)
4531             | (1<<UCharacterCategory.OTHER_LETTER)
4532             | (1<<UCharacterCategory.MODIFIER_LETTER);
4533 
4534     /**
4535      * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
4536      * and sometimes has no effect at all; the original string is returned whenever casing
4537      * would not be appropriate for the first word (such as for CJK characters or initial numbers).
4538      * Initial non-letters are skipped in order to find the character to change.
4539      * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
4540      * <p>Examples:
4541      * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
4542      * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
4543      * <tr><td>“contact us”</td><td>“Contact us”</td></tr>
4544      * <tr><td>49ers win!</td><td>49ers win!</td></tr>
4545      * <tr><td>丰(abc)</td><td>丰(abc)</td></tr>
4546      * <tr><td>«ijs»</td><td>«Ijs»</td></tr>
4547      * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr>
4548      * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr>
4549      * </table>
4550      * @param locale the locale for accessing exceptional behavior (eg for tr).
4551      * @param str the source string to change
4552      * @return the modified string, or the original if no modifications were necessary.
4553      * @deprecated ICU internal only
4554      * @hide original deprecated declaration
4555      * @hide draft / provisional / internal are hidden on Android
4556      */
4557     @Deprecated
toTitleFirst(ULocale locale, String str)4558     public static String toTitleFirst(ULocale locale, String str) {
4559         int c = 0;
4560         for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) {
4561             c = UCharacter.codePointAt(str, i);
4562             int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK);
4563             if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK
4564                 break;
4565             }
4566             if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) {
4567                 continue;
4568             }
4569 
4570             // we now have the first cased character
4571             // What we really want is something like:
4572             // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken);
4573             // That is, just give us the titlecased string, for the locale, at i and following,
4574             // and tell us how many characters are replaced.
4575             // The following won't work completely: it needs some more substantial changes to UCaseProps
4576 
4577             String substring = str.substring(i, i+UCharacter.charCount(c));
4578             String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0);
4579 
4580             // skip if no change
4581             if (titled.codePointAt(0) == c) {
4582                 // Using 0 is safe, since any change in titling will not have first initial character
4583                 break;
4584             }
4585             StringBuilder result = new StringBuilder(str.length()).append(str, 0, i);
4586             int startOfSuffix;
4587 
4588             // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps.
4589 
4590             if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') {
4591                 result.append("IJ");
4592                 startOfSuffix = 2;
4593             } else {
4594                 result.append(titled);
4595                 startOfSuffix = i + UCharacter.charCount(c);
4596             }
4597 
4598             // add the remainder, and return
4599             return result.append(str, startOfSuffix, str.length()).toString();
4600         }
4601         return str; // no change
4602     }
4603 
4604     /**
4605      * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string.
4606      * <p>Position for titlecasing is determined by the argument break
4607      * iterator, hence the user can customize his break iterator for
4608      * a specialized titlecasing. In this case only the forward iteration
4609      * needs to be implemented.
4610      * If the break iterator passed in is null, the default Unicode algorithm
4611      * will be used to determine the titlecase positions.
4612      *
4613      * <p>Only positions returned by the break iterator will be title cased,
4614      * character in between the positions will all be in lower case.
4615      * <p>Casing is dependent on the argument locale and context-sensitive
4616      * @param locale which string is to be converted in
4617      * @param str source string to be performed on
4618      * @param titleIter break iterator to determine the positions in which
4619      *        the character should be title cased.
4620      * @param options bit set to modify the titlecasing operation
4621      * @return lowercase version of the argument string
4622      * @see #TITLECASE_NO_LOWERCASE
4623      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
4624      */
toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)4625     public static String toTitleCase(Locale locale, String str,
4626             BreakIterator titleIter,
4627             int options) {
4628         if(titleIter == null) {
4629             titleIter = BreakIterator.getWordInstance(locale);
4630         }
4631         titleIter.setText(str);
4632         return toTitleCase(getCaseLocale(locale), options, titleIter, str);
4633     }
4634 
4635     /**
4636      * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
4637      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4638      * folding equivalent, the character itself is returned.
4639      *
4640      * <p>This function only returns the simple, single-code point case mapping.
4641      * Full case mappings should be used whenever possible because they produce
4642      * better results by working on whole strings.
4643      * They can map to a result string with a different length as appropriate.
4644      * Full case mappings are applied by the case mapping functions
4645      * that take String parameters rather than code points (int).
4646      * See also the User Guide chapter on C/POSIX migration:
4647      * http://www.icu-project.org/userguide/posix.html#case_mappings
4648      *
4649      * @param ch             the character to be converted
4650      * @param defaultmapping Indicates whether the default mappings defined in
4651      *                       CaseFolding.txt are to be used, otherwise the
4652      *                       mappings for dotted I and dotless i marked with
4653      *                       'T' in CaseFolding.txt are included.
4654      * @return               the case folding equivalent of the character, if
4655      *                       any; otherwise the character itself.
4656      * @see                  #foldCase(String, boolean)
4657      */
foldCase(int ch, boolean defaultmapping)4658     public static int foldCase(int ch, boolean defaultmapping) {
4659         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4660     }
4661 
4662     /**
4663      * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
4664      * UnicodeData.txt and CaseFolding.txt; if any character has no case
4665      * folding equivalent, the character itself is returned.
4666      * "Full", multiple-code point case folding mappings are returned here.
4667      * For "simple" single-code point mappings use the API
4668      * foldCase(int ch, boolean defaultmapping).
4669      * @param str            the String to be converted
4670      * @param defaultmapping Indicates whether the default mappings defined in
4671      *                       CaseFolding.txt are to be used, otherwise the
4672      *                       mappings for dotted I and dotless i marked with
4673      *                       'T' in CaseFolding.txt are included.
4674      * @return               the case folding equivalent of the character, if
4675      *                       any; otherwise the character itself.
4676      * @see                  #foldCase(int, boolean)
4677      */
foldCase(String str, boolean defaultmapping)4678     public static String foldCase(String str, boolean defaultmapping) {
4679         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4680     }
4681 
4682     /**
4683      * <strong>[icu]</strong> Option value for case folding: use default mappings defined in
4684      * CaseFolding.txt.
4685      */
4686     public static final int FOLD_CASE_DEFAULT    =      0x0000;
4687     /**
4688      * <strong>[icu]</strong> Option value for case folding:
4689      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
4690      * and dotless i appropriately for Turkic languages (tr, az).
4691      *
4692      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
4693      * are to be included for default mappings and
4694      * excluded for the Turkic-specific mappings.
4695      *
4696      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
4697      * are to be excluded for default mappings and
4698      * included for the Turkic-specific mappings.
4699      */
4700     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
4701 
4702     /**
4703      * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
4704      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4705      * folding equivalent, the character itself is returned.
4706      *
4707      * <p>This function only returns the simple, single-code point case mapping.
4708      * Full case mappings should be used whenever possible because they produce
4709      * better results by working on whole strings.
4710      * They can map to a result string with a different length as appropriate.
4711      * Full case mappings are applied by the case mapping functions
4712      * that take String parameters rather than code points (int).
4713      * See also the User Guide chapter on C/POSIX migration:
4714      * http://www.icu-project.org/userguide/posix.html#case_mappings
4715      *
4716      * @param ch the character to be converted
4717      * @param options A bit set for special processing. Currently the recognised options
4718      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4719      * @return the case folding equivalent of the character, if any; otherwise the
4720      * character itself.
4721      * @see #foldCase(String, boolean)
4722      */
foldCase(int ch, int options)4723     public static int foldCase(int ch, int options) {
4724         return UCaseProps.INSTANCE.fold(ch, options);
4725     }
4726 
4727     /**
4728      * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
4729      * UnicodeData.txt and CaseFolding.txt; if any character has no case
4730      * folding equivalent, the character itself is returned.
4731      * "Full", multiple-code point case folding mappings are returned here.
4732      * For "simple" single-code point mappings use the API
4733      * foldCase(int ch, boolean defaultmapping).
4734      * @param str the String to be converted
4735      * @param options A bit set for special processing. Currently the recognised options
4736      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4737      * @return the case folding equivalent of the character, if any; otherwise the
4738      *         character itself.
4739      * @see #foldCase(int, boolean)
4740      */
foldCase(String str, int options)4741     public static final String foldCase(String str, int options) {
4742         if (str.length() <= 100) {
4743             if (str.isEmpty()) {
4744                 return str;
4745             }
4746             // Collect and apply only changes.
4747             // Good if no or few changes. Bad (slow) if many changes.
4748             Edits edits = new Edits();
4749             StringBuilder replacementChars = CaseMapImpl.fold(
4750                     options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
4751             return applyEdits(str, replacementChars, edits);
4752         } else {
4753             return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString();
4754         }
4755     }
4756 
4757     /**
4758      * <strong>[icu]</strong> Returns the numeric value of a Han character.
4759      *
4760      * <p>This returns the value of Han 'numeric' code points,
4761      * including those for zero, ten, hundred, thousand, ten thousand,
4762      * and hundred million.
4763      * This includes both the standard and 'checkwriting'
4764      * characters, the 'big circle' zero character, and the standard
4765      * zero character.
4766      *
4767      * <p>Note: The Unicode Standard has numeric values for more
4768      * Han characters recognized by this method
4769      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
4770      * and a {@link android.icu.text.NumberFormat} can be used with
4771      * a Chinese {@link android.icu.text.NumberingSystem}.
4772      *
4773      * @param ch code point to query
4774      * @return value if it is a Han 'numeric character,' otherwise return -1.
4775      */
getHanNumericValue(int ch)4776     public static int getHanNumericValue(int ch)
4777     {
4778         switch(ch)
4779         {
4780         case IDEOGRAPHIC_NUMBER_ZERO_ :
4781         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
4782             return 0; // Han Zero
4783         case CJK_IDEOGRAPH_FIRST_ :
4784         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
4785             return 1; // Han One
4786         case CJK_IDEOGRAPH_SECOND_ :
4787         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
4788             return 2; // Han Two
4789         case CJK_IDEOGRAPH_THIRD_ :
4790         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
4791             return 3; // Han Three
4792         case CJK_IDEOGRAPH_FOURTH_ :
4793         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
4794             return 4; // Han Four
4795         case CJK_IDEOGRAPH_FIFTH_ :
4796         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
4797             return 5; // Han Five
4798         case CJK_IDEOGRAPH_SIXTH_ :
4799         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
4800             return 6; // Han Six
4801         case CJK_IDEOGRAPH_SEVENTH_ :
4802         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
4803             return 7; // Han Seven
4804         case CJK_IDEOGRAPH_EIGHTH_ :
4805         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
4806             return 8; // Han Eight
4807         case CJK_IDEOGRAPH_NINETH_ :
4808         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
4809             return 9; // Han Nine
4810         case CJK_IDEOGRAPH_TEN_ :
4811         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
4812             return 10;
4813         case CJK_IDEOGRAPH_HUNDRED_ :
4814         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
4815             return 100;
4816         case CJK_IDEOGRAPH_THOUSAND_ :
4817         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
4818             return 1000;
4819         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
4820             return 10000;
4821         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
4822             return 100000000;
4823         }
4824         return -1; // no value
4825     }
4826 
4827     /**
4828      * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints.
4829      * <p>Example of use:<br>
4830      * <pre>
4831      * RangeValueIterator iterator = UCharacter.getTypeIterator();
4832      * RangeValueIterator.Element element = new RangeValueIterator.Element();
4833      * while (iterator.next(element)) {
4834      *     System.out.println("Codepoint \\u" +
4835      *                        Integer.toHexString(element.start) +
4836      *                        " to codepoint \\u" +
4837      *                        Integer.toHexString(element.limit - 1) +
4838      *                        " has the character type " +
4839      *                        element.value);
4840      * }
4841      * </pre>
4842      * @return an iterator
4843      */
getTypeIterator()4844     public static RangeValueIterator getTypeIterator()
4845     {
4846         return new UCharacterTypeIterator();
4847     }
4848 
4849     private static final class UCharacterTypeIterator implements RangeValueIterator {
UCharacterTypeIterator()4850         UCharacterTypeIterator() {
4851             reset();
4852         }
4853 
4854         // implements RangeValueIterator
4855         @Override
next(Element element)4856         public boolean next(Element element) {
4857             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
4858                 element.start=range.startCodePoint;
4859                 element.limit=range.endCodePoint+1;
4860                 element.value=range.value;
4861                 return true;
4862             } else {
4863                 return false;
4864             }
4865         }
4866 
4867         // implements RangeValueIterator
4868         @Override
reset()4869         public void reset() {
4870             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
4871         }
4872 
4873         private Iterator<Trie2.Range> trieIterator;
4874         private Trie2.Range range;
4875 
4876         private static final class MaskType implements Trie2.ValueMapper {
4877             // Extracts the general category ("character type") from the trie value.
4878             @Override
map(int value)4879             public int map(int value) {
4880                 return value & UCharacterProperty.TYPE_MASK;
4881             }
4882         }
4883         private static final MaskType MASK_TYPE=new MaskType();
4884     }
4885 
4886     /**
4887      * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
4888      * <p>This API only gets the iterator for the modern, most up-to-date
4889      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
4890      * for extended names use getExtendedNameIterator().
4891      * <p>Example of use:<br>
4892      * <pre>
4893      * ValueIterator iterator = UCharacter.getNameIterator();
4894      * ValueIterator.Element element = new ValueIterator.Element();
4895      * while (iterator.next(element)) {
4896      *     System.out.println("Codepoint \\u" +
4897      *                        Integer.toHexString(element.codepoint) +
4898      *                        " has the name " + (String)element.value);
4899      * }
4900      * </pre>
4901      * <p>The maximal range which the name iterator iterates is from
4902      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
4903      * @return an iterator
4904      */
getNameIterator()4905     public static ValueIterator getNameIterator(){
4906         return new UCharacterNameIterator(UCharacterName.INSTANCE,
4907                 UCharacterNameChoice.UNICODE_CHAR_NAME);
4908     }
4909 
4910     /**
4911      * <strong>[icu]</strong> Returns an empty iterator.
4912      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
4913      * @return an empty iterator
4914      * @deprecated ICU 49
4915      * @see #getName1_0(int)
4916      * @hide original deprecated declaration
4917      */
4918     @Deprecated
getName1_0Iterator()4919     public static ValueIterator getName1_0Iterator(){
4920         return new DummyValueIterator();
4921     }
4922 
4923     private static final class DummyValueIterator implements ValueIterator {
4924         @Override
next(Element element)4925         public boolean next(Element element) { return false; }
4926         @Override
reset()4927         public void reset() {}
4928         @Override
setRange(int start, int limit)4929         public void setRange(int start, int limit) {}
4930     }
4931 
4932     /**
4933      * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
4934      * <p>This API only gets the iterator for the extended names.
4935      * For modern, most up-to-date Unicode names use getNameIterator() or
4936      * for older 1.0 Unicode names use get1_0NameIterator().
4937      * <p>Example of use:<br>
4938      * <pre>
4939      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
4940      * ValueIterator.Element element = new ValueIterator.Element();
4941      * while (iterator.next(element)) {
4942      *     System.out.println("Codepoint \\u" +
4943      *                        Integer.toHexString(element.codepoint) +
4944      *                        " has the name " + (String)element.value);
4945      * }
4946      * </pre>
4947      * <p>The maximal range which the name iterator iterates is from
4948      * @return an iterator
4949      */
getExtendedNameIterator()4950     public static ValueIterator getExtendedNameIterator(){
4951         return new UCharacterNameIterator(UCharacterName.INSTANCE,
4952                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
4953     }
4954 
4955     /**
4956      * <strong>[icu]</strong> Returns the "age" of the code point.
4957      * <p>The "age" is the Unicode version when the code point was first
4958      * designated (as a non-character or for Private Use) or assigned a
4959      * character.
4960      * <p>This can be useful to avoid emitting code points to receiving
4961      * processes that do not accept newer characters.
4962      * <p>The data is from the UCD file DerivedAge.txt.
4963      * @param ch The code point.
4964      * @return the Unicode version number
4965      */
getAge(int ch)4966     public static VersionInfo getAge(int ch)
4967     {
4968         if (ch < MIN_VALUE || ch > MAX_VALUE) {
4969             throw new IllegalArgumentException("Codepoint out of bounds");
4970         }
4971         return UCharacterProperty.INSTANCE.getAge(ch);
4972     }
4973 
4974     /**
4975      * <strong>[icu]</strong> <p>Check a binary Unicode property for a code point.
4976      * <p>Unicode, especially in version 3.2, defines many more properties
4977      * than the original set in UnicodeData.txt.
4978      * <p>This API is intended to reflect Unicode properties as defined in
4979      * the Unicode Character Database (UCD) and Unicode Technical Reports
4980      * (UTR).
4981      * <p>For details about the properties see
4982      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
4983      * <p>For names of Unicode properties see the UCD file
4984      * PropertyAliases.txt.
4985      * <p>This API does not check the validity of the codepoint.
4986      * <p>Important: If ICU is built with UCD files from Unicode versions
4987      * below 3.2, then properties marked with "new" are not or
4988      * not fully available.
4989      * @param ch code point to test.
4990      * @param property selector constant from android.icu.lang.UProperty,
4991      *        identifies which binary property to check.
4992      * @return true or false according to the binary Unicode property value
4993      *         for ch. Also false if property is out of bounds or if the
4994      *         Unicode version does not have data for the property at all, or
4995      *         not for this code point.
4996      * @see android.icu.lang.UProperty
4997      */
hasBinaryProperty(int ch, int property)4998     public static boolean hasBinaryProperty(int ch, int property)
4999     {
5000         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5001     }
5002 
5003     /**
5004      * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property.
5005      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
5006      * <p>Different from UCharacter.isLetter(ch)!
5007      * @param ch codepoint to be tested
5008      */
isUAlphabetic(int ch)5009     public static boolean isUAlphabetic(int ch)
5010     {
5011         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5012     }
5013 
5014     /**
5015      * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property.
5016      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
5017      * <p>This is different from UCharacter.isLowerCase(ch)!
5018      * @param ch codepoint to be tested
5019      */
isULowercase(int ch)5020     public static boolean isULowercase(int ch)
5021     {
5022         return hasBinaryProperty(ch, UProperty.LOWERCASE);
5023     }
5024 
5025     /**
5026      * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property.
5027      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
5028      * <p>This is different from UCharacter.isUpperCase(ch)!
5029      * @param ch codepoint to be tested
5030      */
isUUppercase(int ch)5031     public static boolean isUUppercase(int ch)
5032     {
5033         return hasBinaryProperty(ch, UProperty.UPPERCASE);
5034     }
5035 
5036     /**
5037      * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property.
5038      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
5039      * <p>This is different from both UCharacter.isSpace(ch) and
5040      * UCharacter.isWhitespace(ch)!
5041      * @param ch codepoint to be tested
5042      */
isUWhiteSpace(int ch)5043     public static boolean isUWhiteSpace(int ch)
5044     {
5045         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5046     }
5047 
5048     /**
5049      * <strong>[icu]</strong> <p>Returns the property value for an Unicode property type of a code point.
5050      * Also returns binary and mask property values.
5051      * <p>Unicode, especially in version 3.2, defines many more properties than
5052      * the original set in UnicodeData.txt.
5053      * <p>The properties APIs are intended to reflect Unicode properties as
5054      * defined in the Unicode Character Database (UCD) and Unicode Technical
5055      * Reports (UTR). For details about the properties see
5056      * http://www.unicode.org/.
5057      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5058      *
5059      * <pre>
5060      * Sample usage:
5061      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5062      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5063      * boolean b = (ideo == 1) ? true : false;
5064      * </pre>
5065      * @param ch code point to test.
5066      * @param type UProperty selector constant, identifies which binary
5067      *        property to check. Must be
5068      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5069      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5070      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5071      * @return numeric value that is directly the property value or,
5072      *         for enumerated properties, corresponds to the numeric value of
5073      *         the enumerated constant of the respective property value
5074      *         enumeration type (cast to enum type if necessary).
5075      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5076      *         Returns a bit-mask for mask properties.
5077      *         Returns 0 if 'type' is out of bounds or if the Unicode version
5078      *         does not have data for the property at all, or not for this code
5079      *         point.
5080      * @see UProperty
5081      * @see #hasBinaryProperty
5082      * @see #getIntPropertyMinValue
5083      * @see #getIntPropertyMaxValue
5084      * @see #getUnicodeVersion
5085      */
getIntPropertyValue(int ch, int type)5086     public static int getIntPropertyValue(int ch, int type)
5087     {
5088         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
5089     }
5090     /**
5091      * <strong>[icu]</strong> Returns a string version of the property value.
5092      * @param propertyEnum The property enum value.
5093      * @param codepoint The codepoint value.
5094      * @param nameChoice The choice of the name.
5095      * @return value as string
5096      * @deprecated This API is ICU internal only.
5097      * @hide original deprecated declaration
5098      * @hide draft / provisional / internal are hidden on Android
5099      */
5100     @Deprecated
5101     ///CLOVER:OFF
getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5102     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5103         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5104                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5105             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
5106                     nameChoice);
5107         }
5108         if (propertyEnum == UProperty.NUMERIC_VALUE) {
5109             return String.valueOf(getUnicodeNumericValue(codepoint));
5110         }
5111         // otherwise must be string property
5112         switch (propertyEnum) {
5113         case UProperty.AGE: return getAge(codepoint).toString();
5114         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5115         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
5116         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
5117         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5118         case UProperty.NAME: return getName(codepoint);
5119         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
5120         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5121         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5122         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5123         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5124         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5125         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5126         }
5127         throw new IllegalArgumentException("Illegal Property Enum");
5128     }
5129     ///CLOVER:ON
5130 
5131     /**
5132      * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type.
5133      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5134      * to allocate arrays of android.icu.text.UnicodeSet or similar.
5135      * @param type UProperty selector constant, identifies which binary
5136      *        property to check. Must be
5137      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5138      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5139      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5140      *         for a Unicode property. 0 if the property
5141      *         selector 'type' is out of range.
5142      * @see UProperty
5143      * @see #hasBinaryProperty
5144      * @see #getUnicodeVersion
5145      * @see #getIntPropertyMaxValue
5146      * @see #getIntPropertyValue
5147      */
getIntPropertyMinValue(int type)5148     public static int getIntPropertyMinValue(int type){
5149 
5150         return 0; // undefined; and: all other properties have a minimum value of 0
5151     }
5152 
5153 
5154     /**
5155      * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property.
5156      * Can be used together with UCharacter.getIntPropertyMinValue(int)
5157      * to allocate arrays of android.icu.text.UnicodeSet or similar.
5158      * Examples for min/max values (for Unicode 3.2):
5159      * <ul>
5160      * <li> UProperty.BIDI_CLASS:    0/18
5161      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5162      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5163      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5164      * </ul>
5165      * For undefined UProperty constant values, min/max values will be 0/-1.
5166      * @param type UProperty selector constant, identifies which binary
5167      *        property to check. Must be
5168      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5169      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5170      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
5171      *         property. &lt;= 0 if the property selector 'type' is out of range.
5172      * @see UProperty
5173      * @see #hasBinaryProperty
5174      * @see #getUnicodeVersion
5175      * @see #getIntPropertyMaxValue
5176      * @see #getIntPropertyValue
5177      */
getIntPropertyMaxValue(int type)5178     public static int getIntPropertyMaxValue(int type)
5179     {
5180         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
5181     }
5182 
5183     /**
5184      * Provide the java.lang.Character forDigit API, for convenience.
5185      */
forDigit(int digit, int radix)5186     public static char forDigit(int digit, int radix) {
5187         return java.lang.Character.forDigit(digit, radix);
5188     }
5189 
5190     // JDK 1.5 API coverage
5191 
5192     /**
5193      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
5194      */
5195     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
5196 
5197     /**
5198      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
5199      */
5200     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
5201 
5202     /**
5203      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
5204      */
5205     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
5206 
5207     /**
5208      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
5209      */
5210     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
5211 
5212     /**
5213      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
5214      */
5215     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
5216 
5217     /**
5218      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
5219      */
5220     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
5221 
5222     /**
5223      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
5224      */
5225     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
5226 
5227     /**
5228      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
5229      */
5230     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
5231 
5232     /**
5233      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
5234      */
5235     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
5236 
5237     /**
5238      * Equivalent to {@link Character#isValidCodePoint}.
5239      *
5240      * @param cp the code point to check
5241      * @return true if cp is a valid code point
5242      */
isValidCodePoint(int cp)5243     public static final boolean isValidCodePoint(int cp) {
5244         return cp >= 0 && cp <= MAX_CODE_POINT;
5245     }
5246 
5247     /**
5248      * Same as {@link Character#isSupplementaryCodePoint}.
5249      *
5250      * @param cp the code point to check
5251      * @return true if cp is a supplementary code point
5252      */
isSupplementaryCodePoint(int cp)5253     public static final boolean isSupplementaryCodePoint(int cp) {
5254         return Character.isSupplementaryCodePoint(cp);
5255     }
5256 
5257     /**
5258      * Same as {@link Character#isHighSurrogate}.
5259      *
5260      * @param ch the char to check
5261      * @return true if ch is a high (lead) surrogate
5262      */
isHighSurrogate(char ch)5263     public static boolean isHighSurrogate(char ch) {
5264         return Character.isHighSurrogate(ch);
5265     }
5266 
5267     /**
5268      * Same as {@link Character#isLowSurrogate}.
5269      *
5270      * @param ch the char to check
5271      * @return true if ch is a low (trail) surrogate
5272      */
isLowSurrogate(char ch)5273     public static boolean isLowSurrogate(char ch) {
5274         return Character.isLowSurrogate(ch);
5275     }
5276 
5277     /**
5278      * Same as {@link Character#isSurrogatePair}.
5279      *
5280      * @param high the high (lead) char
5281      * @param low the low (trail) char
5282      * @return true if high, low form a surrogate pair
5283      */
isSurrogatePair(char high, char low)5284     public static final boolean isSurrogatePair(char high, char low) {
5285         return Character.isSurrogatePair(high, low);
5286     }
5287 
5288     /**
5289      * Same as {@link Character#charCount}.
5290      * Returns the number of chars needed to represent the code point (1 or 2).
5291      * This does not check the code point for validity.
5292      *
5293      * @param cp the code point to check
5294      * @return the number of chars needed to represent the code point
5295      */
charCount(int cp)5296     public static int charCount(int cp) {
5297         return Character.charCount(cp);
5298     }
5299 
5300     /**
5301      * Same as {@link Character#toCodePoint}.
5302      * Returns the code point represented by the two surrogate code units.
5303      * This does not check the surrogate pair for validity.
5304      *
5305      * @param high the high (lead) surrogate
5306      * @param low the low (trail) surrogate
5307      * @return the code point formed by the surrogate pair
5308      */
toCodePoint(char high, char low)5309     public static final int toCodePoint(char high, char low) {
5310         return Character.toCodePoint(high, low);
5311     }
5312 
5313     /**
5314      * Same as {@link Character#codePointAt(CharSequence, int)}.
5315      * Returns the code point at index.
5316      * This examines only the characters at index and index+1.
5317      *
5318      * @param seq the characters to check
5319      * @param index the index of the first or only char forming the code point
5320      * @return the code point at the index
5321      */
codePointAt(CharSequence seq, int index)5322     public static final int codePointAt(CharSequence seq, int index) {
5323         char c1 = seq.charAt(index++);
5324         if (isHighSurrogate(c1)) {
5325             if (index < seq.length()) {
5326                 char c2 = seq.charAt(index);
5327                 if (isLowSurrogate(c2)) {
5328                     return toCodePoint(c1, c2);
5329                 }
5330             }
5331         }
5332         return c1;
5333     }
5334 
5335     /**
5336      * Same as {@link Character#codePointAt(char[], int)}.
5337      * Returns the code point at index.
5338      * This examines only the characters at index and index+1.
5339      *
5340      * @param text the characters to check
5341      * @param index the index of the first or only char forming the code point
5342      * @return the code point at the index
5343      */
codePointAt(char[] text, int index)5344     public static final int codePointAt(char[] text, int index) {
5345         char c1 = text[index++];
5346         if (isHighSurrogate(c1)) {
5347             if (index < text.length) {
5348                 char c2 = text[index];
5349                 if (isLowSurrogate(c2)) {
5350                     return toCodePoint(c1, c2);
5351                 }
5352             }
5353         }
5354         return c1;
5355     }
5356 
5357     /**
5358      * Same as {@link Character#codePointAt(char[], int, int)}.
5359      * Returns the code point at index.
5360      * This examines only the characters at index and index+1.
5361      *
5362      * @param text the characters to check
5363      * @param index the index of the first or only char forming the code point
5364      * @param limit the limit of the valid text
5365      * @return the code point at the index
5366      */
codePointAt(char[] text, int index, int limit)5367     public static final int codePointAt(char[] text, int index, int limit) {
5368         if (index >= limit || limit > text.length) {
5369             throw new IndexOutOfBoundsException();
5370         }
5371         char c1 = text[index++];
5372         if (isHighSurrogate(c1)) {
5373             if (index < limit) {
5374                 char c2 = text[index];
5375                 if (isLowSurrogate(c2)) {
5376                     return toCodePoint(c1, c2);
5377                 }
5378             }
5379         }
5380         return c1;
5381     }
5382 
5383     /**
5384      * Same as {@link Character#codePointBefore(CharSequence, int)}.
5385      * Return the code point before index.
5386      * This examines only the characters at index-1 and index-2.
5387      *
5388      * @param seq the characters to check
5389      * @param index the index after the last or only char forming the code point
5390      * @return the code point before the index
5391      */
codePointBefore(CharSequence seq, int index)5392     public static final int codePointBefore(CharSequence seq, int index) {
5393         char c2 = seq.charAt(--index);
5394         if (isLowSurrogate(c2)) {
5395             if (index > 0) {
5396                 char c1 = seq.charAt(--index);
5397                 if (isHighSurrogate(c1)) {
5398                     return toCodePoint(c1, c2);
5399                 }
5400             }
5401         }
5402         return c2;
5403     }
5404 
5405     /**
5406      * Same as {@link Character#codePointBefore(char[], int)}.
5407      * Returns the code point before index.
5408      * This examines only the characters at index-1 and index-2.
5409      *
5410      * @param text the characters to check
5411      * @param index the index after the last or only char forming the code point
5412      * @return the code point before the index
5413      */
codePointBefore(char[] text, int index)5414     public static final int codePointBefore(char[] text, int index) {
5415         char c2 = text[--index];
5416         if (isLowSurrogate(c2)) {
5417             if (index > 0) {
5418                 char c1 = text[--index];
5419                 if (isHighSurrogate(c1)) {
5420                     return toCodePoint(c1, c2);
5421                 }
5422             }
5423         }
5424         return c2;
5425     }
5426 
5427     /**
5428      * Same as {@link Character#codePointBefore(char[], int, int)}.
5429      * Return the code point before index.
5430      * This examines only the characters at index-1 and index-2.
5431      *
5432      * @param text the characters to check
5433      * @param index the index after the last or only char forming the code point
5434      * @param limit the start of the valid text
5435      * @return the code point before the index
5436      */
codePointBefore(char[] text, int index, int limit)5437     public static final int codePointBefore(char[] text, int index, int limit) {
5438         if (index <= limit || limit < 0) {
5439             throw new IndexOutOfBoundsException();
5440         }
5441         char c2 = text[--index];
5442         if (isLowSurrogate(c2)) {
5443             if (index > limit) {
5444                 char c1 = text[--index];
5445                 if (isHighSurrogate(c1)) {
5446                     return toCodePoint(c1, c2);
5447                 }
5448             }
5449         }
5450         return c2;
5451     }
5452 
5453     /**
5454      * Same as {@link Character#toChars(int, char[], int)}.
5455      * Writes the chars representing the
5456      * code point into the destination at the given index.
5457      *
5458      * @param cp the code point to convert
5459      * @param dst the destination array into which to put the char(s) representing the code point
5460      * @param dstIndex the index at which to put the first (or only) char
5461      * @return the count of the number of chars written (1 or 2)
5462      * @throws IllegalArgumentException if cp is not a valid code point
5463      */
toChars(int cp, char[] dst, int dstIndex)5464     public static final int toChars(int cp, char[] dst, int dstIndex) {
5465         return Character.toChars(cp, dst, dstIndex);
5466     }
5467 
5468     /**
5469      * Same as {@link Character#toChars(int)}.
5470      * Returns a char array representing the code point.
5471      *
5472      * @param cp the code point to convert
5473      * @return an array containing the char(s) representing the code point
5474      * @throws IllegalArgumentException if cp is not a valid code point
5475      */
toChars(int cp)5476     public static final char[] toChars(int cp) {
5477         return Character.toChars(cp);
5478     }
5479 
5480     /**
5481      * Equivalent to the {@link Character#getDirectionality(char)} method, for
5482      * convenience. Returns a byte representing the directionality of the
5483      * character.
5484      *
5485      * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns
5486      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
5487      *
5488      * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link
5489      * UCharacterDirection} and its interface {@link
5490      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
5491      * defined by <code>java.lang.Character</code>.
5492      * @param cp the code point to check
5493      * @return the directionality of the code point
5494      * @see #getDirection
5495      */
getDirectionality(int cp)5496     public static byte getDirectionality(int cp)
5497     {
5498         return (byte)getDirection(cp);
5499     }
5500 
5501     /**
5502      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
5503      * method, for convenience.  Counts the number of code points in the range
5504      * of text.
5505      * @param text the characters to check
5506      * @param start the start of the range
5507      * @param limit the limit of the range
5508      * @return the number of code points in the range
5509      */
codePointCount(CharSequence text, int start, int limit)5510     public static int codePointCount(CharSequence text, int start, int limit) {
5511         if (start < 0 || limit < start || limit > text.length()) {
5512             throw new IndexOutOfBoundsException("start (" + start +
5513                     ") or limit (" + limit +
5514                     ") invalid or out of range 0, " + text.length());
5515         }
5516 
5517         int len = limit - start;
5518         while (limit > start) {
5519             char ch = text.charAt(--limit);
5520             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5521                 ch = text.charAt(--limit);
5522                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5523                     --len;
5524                     break;
5525                 }
5526             }
5527         }
5528         return len;
5529     }
5530 
5531     /**
5532      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
5533      * convenience. Counts the number of code points in the range of text.
5534      * @param text the characters to check
5535      * @param start the start of the range
5536      * @param limit the limit of the range
5537      * @return the number of code points in the range
5538      */
codePointCount(char[] text, int start, int limit)5539     public static int codePointCount(char[] text, int start, int limit) {
5540         if (start < 0 || limit < start || limit > text.length) {
5541             throw new IndexOutOfBoundsException("start (" + start +
5542                     ") or limit (" + limit +
5543                     ") invalid or out of range 0, " + text.length);
5544         }
5545 
5546         int len = limit - start;
5547         while (limit > start) {
5548             char ch = text[--limit];
5549             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5550                 ch = text[--limit];
5551                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5552                     --len;
5553                     break;
5554                 }
5555             }
5556         }
5557         return len;
5558     }
5559 
5560     /**
5561      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
5562      * method, for convenience.  Adjusts the char index by a code point offset.
5563      * @param text the characters to check
5564      * @param index the index to adjust
5565      * @param codePointOffset the number of code points by which to offset the index
5566      * @return the adjusted index
5567      */
offsetByCodePoints(CharSequence text, int index, int codePointOffset)5568     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
5569         if (index < 0 || index > text.length()) {
5570             throw new IndexOutOfBoundsException("index ( " + index +
5571                     ") out of range 0, " + text.length());
5572         }
5573 
5574         if (codePointOffset < 0) {
5575             while (++codePointOffset <= 0) {
5576                 char ch = text.charAt(--index);
5577                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
5578                     ch = text.charAt(--index);
5579                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5580                         if (++codePointOffset > 0) {
5581                             return index+1;
5582                         }
5583                     }
5584                 }
5585             }
5586         } else {
5587             int limit = text.length();
5588             while (--codePointOffset >= 0) {
5589                 char ch = text.charAt(index++);
5590                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5591                     ch = text.charAt(index++);
5592                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5593                         if (--codePointOffset < 0) {
5594                             return index-1;
5595                         }
5596                     }
5597                 }
5598             }
5599         }
5600 
5601         return index;
5602     }
5603 
5604     /**
5605      * Equivalent to the
5606      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
5607      * method, for convenience.  Adjusts the char index by a code point offset.
5608      * @param text the characters to check
5609      * @param start the start of the range to check
5610      * @param count the length of the range to check
5611      * @param index the index to adjust
5612      * @param codePointOffset the number of code points by which to offset the index
5613      * @return the adjusted index
5614      */
offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)5615     public static int offsetByCodePoints(char[] text, int start, int count, int index,
5616             int codePointOffset) {
5617         int limit = start + count;
5618         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
5619             throw new IndexOutOfBoundsException("index ( " + index +
5620                     ") out of range " + start +
5621                     ", " + limit +
5622                     " in array 0, " + text.length);
5623         }
5624 
5625         if (codePointOffset < 0) {
5626             while (++codePointOffset <= 0) {
5627                 char ch = text[--index];
5628                 if (index < start) {
5629                     throw new IndexOutOfBoundsException("index ( " + index +
5630                             ") < start (" + start +
5631                             ")");
5632                 }
5633                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
5634                     ch = text[--index];
5635                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5636                         if (++codePointOffset > 0) {
5637                             return index+1;
5638                         }
5639                     }
5640                 }
5641             }
5642         } else {
5643             while (--codePointOffset >= 0) {
5644                 char ch = text[index++];
5645                 if (index > limit) {
5646                     throw new IndexOutOfBoundsException("index ( " + index +
5647                             ") > limit (" + limit +
5648                             ")");
5649                 }
5650                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5651                     ch = text[index++];
5652                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5653                         if (--codePointOffset < 0) {
5654                             return index-1;
5655                         }
5656                     }
5657                 }
5658             }
5659         }
5660 
5661         return index;
5662     }
5663 
5664     // private variables -------------------------------------------------
5665 
5666     /**
5667      * To get the last character out from a data type
5668      */
5669     private static final int LAST_CHAR_MASK_ = 0xFFFF;
5670 
5671     //    /**
5672     //     * To get the last byte out from a data type
5673     //     */
5674     //    private static final int LAST_BYTE_MASK_ = 0xFF;
5675     //
5676     //    /**
5677     //     * Shift 16 bits
5678     //     */
5679     //    private static final int SHIFT_16_ = 16;
5680     //
5681     //    /**
5682     //     * Shift 24 bits
5683     //     */
5684     //    private static final int SHIFT_24_ = 24;
5685     //
5686     //    /**
5687     //     * Decimal radix
5688     //     */
5689     //    private static final int DECIMAL_RADIX_ = 10;
5690 
5691     /**
5692      * No break space code point
5693      */
5694     private static final int NO_BREAK_SPACE_ = 0xA0;
5695 
5696     /**
5697      * Figure space code point
5698      */
5699     private static final int FIGURE_SPACE_ = 0x2007;
5700 
5701     /**
5702      * Narrow no break space code point
5703      */
5704     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
5705 
5706     /**
5707      * Ideographic number zero code point
5708      */
5709     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
5710 
5711     /**
5712      * CJK Ideograph, First code point
5713      */
5714     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
5715 
5716     /**
5717      * CJK Ideograph, Second code point
5718      */
5719     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
5720 
5721     /**
5722      * CJK Ideograph, Third code point
5723      */
5724     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
5725 
5726     /**
5727      * CJK Ideograph, Fourth code point
5728      */
5729     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
5730 
5731     /**
5732      * CJK Ideograph, FIFTH code point
5733      */
5734     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
5735 
5736     /**
5737      * CJK Ideograph, Sixth code point
5738      */
5739     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
5740 
5741     /**
5742      * CJK Ideograph, Seventh code point
5743      */
5744     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
5745 
5746     /**
5747      * CJK Ideograph, Eighth code point
5748      */
5749     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
5750 
5751     /**
5752      * CJK Ideograph, Nineth code point
5753      */
5754     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
5755 
5756     /**
5757      * Application Program command code point
5758      */
5759     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
5760 
5761     /**
5762      * Unit separator code point
5763      */
5764     private static final int UNIT_SEPARATOR_ = 0x001F;
5765 
5766     /**
5767      * Delete code point
5768      */
5769     private static final int DELETE_ = 0x007F;
5770 
5771     /**
5772      * Han digit characters
5773      */
5774     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
5775     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
5776     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
5777     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
5778     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
5779     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
5780     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
5781     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
5782     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
5783     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
5784     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
5785     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
5786     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
5787     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
5788     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
5789     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
5790     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
5791     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
5792 
5793     // private constructor -----------------------------------------------
5794     ///CLOVER:OFF
5795     /**
5796      * Private constructor to prevent instantiation
5797      */
UCharacter()5798     private UCharacter()
5799     {
5800     }
5801     ///CLOVER:ON
5802 }
5803