1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 /**
3  *******************************************************************************
4  * Copyright (C) 1996-2016, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  *******************************************************************************
7  */
8 
9 package android.icu.lang;
10 
11 import java.lang.ref.SoftReference;
12 import java.util.HashMap;
13 import java.util.Iterator;
14 import java.util.Locale;
15 import java.util.Map;
16 
17 import android.icu.impl.IllegalIcuArgumentException;
18 import android.icu.impl.Trie2;
19 import android.icu.impl.UBiDiProps;
20 import android.icu.impl.UCaseProps;
21 import android.icu.impl.UCharacterName;
22 import android.icu.impl.UCharacterNameChoice;
23 import android.icu.impl.UCharacterProperty;
24 import android.icu.impl.UCharacterUtility;
25 import android.icu.impl.UPropertyAliases;
26 import android.icu.lang.UCharacterEnums.ECharacterCategory;
27 import android.icu.lang.UCharacterEnums.ECharacterDirection;
28 import android.icu.text.BreakIterator;
29 import android.icu.text.Normalizer2;
30 import android.icu.util.RangeValueIterator;
31 import android.icu.util.ULocale;
32 import android.icu.util.ValueIterator;
33 import android.icu.util.VersionInfo;
34 
35 /**
36  * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
37  *
38  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
39  * These extensions provide support for more Unicode properties.
40  * Each ICU release supports the latest version of Unicode available at that time.
41  *
42  * <p>For some time before Java 5 added support for supplementary Unicode code points,
43  * The ICU UCharacter class and many other ICU classes already supported them.
44  * Some UCharacter methods and constants were widened slightly differently than
45  * how the Character class methods and constants were widened later.
46  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
47  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
48  *
49  * <p>Code points are represented in these API using ints. While it would be
50  * more convenient in Java to have a separate primitive datatype for them,
51  * ints suffice in the meantime.
52  *
53  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
54  * properties, the main differences between UCharacter and Character are:
55  * <ul>
56  * <li> UCharacter is not designed to be a char wrapper and does not have
57  *      APIs to which involves management of that single char.<br>
58  *      These include:
59  *      <ul>
60  *        <li> char charValue(),
61  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
62  *      </ul>
63  * <li> UCharacter does not include Character APIs that are deprecated, nor
64  *      does it include the Java-specific character information, such as
65  *      boolean isJavaIdentifierPart(char ch).
66  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
67  *      values '10' - '35'. UCharacter also does this in digit and
68  *      getNumericValue, to adhere to the java semantics of these
69  *      methods.  New methods unicodeDigit, and
70  *      getUnicodeNumericValue do not treat the above code points
71  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
72  * </ul>
73  * <p>
74  * In addition to Java compatibility functions, which calculate derived properties,
75  * this API provides low-level access to the Unicode Character Database.
76  * </p>
77  * <p>
78  * Unicode assigns each code point (not just assigned character) values for
79  * many properties.
80  * Most of them are simple boolean flags, or constants from a small enumerated list.
81  * For some properties, values are strings or other relatively more complex types.
82  * </p>
83  * <p>
84  * For more information see
85  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
86  * (http://www.unicode.org/ucd/)
87  * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
88  * User Guide chapter on Properties</a>
89  * (http://www.icu-project.org/userguide/properties.html).
90  * </p>
91  * <p>
92  * There are also functions that provide easy migration from C/POSIX functions
93  * like isblank(). Their use is generally discouraged because the C/POSIX
94  * standards do not define their semantics beyond the ASCII range, which means
95  * that different implementations exhibit very different behavior.
96  * Instead, Unicode properties should be used directly.
97  * </p>
98  * <p>
99  * There are also only a few, broad C/POSIX character classes, and they tend
100  * to be used for conflicting purposes. For example, the "isalpha()" class
101  * is sometimes used to determine word boundaries, while a more sophisticated
102  * approach would at least distinguish initial letters from continuation
103  * characters (the latter including combining marks).
104  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
105  * Another example: There is no "istitle()" class for titlecase characters.
106  * </p>
107  * <p>
108  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
109  * ICU implements them according to the Standard Recommendations in
110  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
111  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
112  * </p>
113  * <p>
114  * API access for C/POSIX character classes is as follows:
115  * <pre>{@code
116  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
117  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
118  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
119  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
120  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
121  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
122  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
123  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
124  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
125  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
126  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
127  * - cntrl:     getType(c)==CONTROL
128  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
129  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
130  * </p>
131  * <p>
132  * The C/POSIX character classes are also available in UnicodeSet patterns,
133  * using patterns like [:graph:] or \p{graph}.
134  * </p>
135  *
136  * <strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions.
137  * Comparison:<ul>
138  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
139  *       most of general categories "Z" (separators) + most whitespace ISO controls
140  *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
141  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
142  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
143  * </p>
144  * <p>
145  * This class is not subclassable.
146  * </p>
147  * @author Syn Wee Quek
148  * @see android.icu.lang.UCharacterEnums
149  */
150 
151 public final class UCharacter implements ECharacterCategory, ECharacterDirection
152 {
153     // public inner classes ----------------------------------------------
154 
155     /**
156      * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
157      *
158      * A family of character subsets representing the character blocks in the
159      * Unicode specification, generated from Unicode Data file Blocks.txt.
160      * Character blocks generally define characters used for a specific script
161      * or purpose. A character is contained by at most one Unicode block.
162      *
163      * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU.
164      */
165     public static final class UnicodeBlock extends Character.Subset
166     {
167         // block id corresponding to icu4c -----------------------------------
168 
169         /**
170          */
171         public static final int INVALID_CODE_ID = -1;
172         /**
173          */
174         public static final int BASIC_LATIN_ID = 1;
175         /**
176          */
177         public static final int LATIN_1_SUPPLEMENT_ID = 2;
178         /**
179          */
180         public static final int LATIN_EXTENDED_A_ID = 3;
181         /**
182          */
183         public static final int LATIN_EXTENDED_B_ID = 4;
184         /**
185          */
186         public static final int IPA_EXTENSIONS_ID = 5;
187         /**
188          */
189         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
190         /**
191          */
192         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
193         /**
194          * Unicode 3.2 renames this block to "Greek and Coptic".
195          */
196         public static final int GREEK_ID = 8;
197         /**
198          */
199         public static final int CYRILLIC_ID = 9;
200         /**
201          */
202         public static final int ARMENIAN_ID = 10;
203         /**
204          */
205         public static final int HEBREW_ID = 11;
206         /**
207          */
208         public static final int ARABIC_ID = 12;
209         /**
210          */
211         public static final int SYRIAC_ID = 13;
212         /**
213          */
214         public static final int THAANA_ID = 14;
215         /**
216          */
217         public static final int DEVANAGARI_ID = 15;
218         /**
219          */
220         public static final int BENGALI_ID = 16;
221         /**
222          */
223         public static final int GURMUKHI_ID = 17;
224         /**
225          */
226         public static final int GUJARATI_ID = 18;
227         /**
228          */
229         public static final int ORIYA_ID = 19;
230         /**
231          */
232         public static final int TAMIL_ID = 20;
233         /**
234          */
235         public static final int TELUGU_ID = 21;
236         /**
237          */
238         public static final int KANNADA_ID = 22;
239         /**
240          */
241         public static final int MALAYALAM_ID = 23;
242         /**
243          */
244         public static final int SINHALA_ID = 24;
245         /**
246          */
247         public static final int THAI_ID = 25;
248         /**
249          */
250         public static final int LAO_ID = 26;
251         /**
252          */
253         public static final int TIBETAN_ID = 27;
254         /**
255          */
256         public static final int MYANMAR_ID = 28;
257         /**
258          */
259         public static final int GEORGIAN_ID = 29;
260         /**
261          */
262         public static final int HANGUL_JAMO_ID = 30;
263         /**
264          */
265         public static final int ETHIOPIC_ID = 31;
266         /**
267          */
268         public static final int CHEROKEE_ID = 32;
269         /**
270          */
271         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
272         /**
273          */
274         public static final int OGHAM_ID = 34;
275         /**
276          */
277         public static final int RUNIC_ID = 35;
278         /**
279          */
280         public static final int KHMER_ID = 36;
281         /**
282          */
283         public static final int MONGOLIAN_ID = 37;
284         /**
285          */
286         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
287         /**
288          */
289         public static final int GREEK_EXTENDED_ID = 39;
290         /**
291          */
292         public static final int GENERAL_PUNCTUATION_ID = 40;
293         /**
294          */
295         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
296         /**
297          */
298         public static final int CURRENCY_SYMBOLS_ID = 42;
299         /**
300          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
301          * Symbols".
302          */
303         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
304         /**
305          */
306         public static final int LETTERLIKE_SYMBOLS_ID = 44;
307         /**
308          */
309         public static final int NUMBER_FORMS_ID = 45;
310         /**
311          */
312         public static final int ARROWS_ID = 46;
313         /**
314          */
315         public static final int MATHEMATICAL_OPERATORS_ID = 47;
316         /**
317          */
318         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
319         /**
320          */
321         public static final int CONTROL_PICTURES_ID = 49;
322         /**
323          */
324         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
325         /**
326          */
327         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
328         /**
329          */
330         public static final int BOX_DRAWING_ID = 52;
331         /**
332          */
333         public static final int BLOCK_ELEMENTS_ID = 53;
334         /**
335          */
336         public static final int GEOMETRIC_SHAPES_ID = 54;
337         /**
338          */
339         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
340         /**
341          */
342         public static final int DINGBATS_ID = 56;
343         /**
344          */
345         public static final int BRAILLE_PATTERNS_ID = 57;
346         /**
347          */
348         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
349         /**
350          */
351         public static final int KANGXI_RADICALS_ID = 59;
352         /**
353          */
354         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
355         /**
356          */
357         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
358         /**
359          */
360         public static final int HIRAGANA_ID = 62;
361         /**
362          */
363         public static final int KATAKANA_ID = 63;
364         /**
365          */
366         public static final int BOPOMOFO_ID = 64;
367         /**
368          */
369         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
370         /**
371          */
372         public static final int KANBUN_ID = 66;
373         /**
374          */
375         public static final int BOPOMOFO_EXTENDED_ID = 67;
376         /**
377          */
378         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
379         /**
380          */
381         public static final int CJK_COMPATIBILITY_ID = 69;
382         /**
383          */
384         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
385         /**
386          */
387         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
388         /**
389          */
390         public static final int YI_SYLLABLES_ID = 72;
391         /**
392          */
393         public static final int YI_RADICALS_ID = 73;
394         /**
395          */
396         public static final int HANGUL_SYLLABLES_ID = 74;
397         /**
398          */
399         public static final int HIGH_SURROGATES_ID = 75;
400         /**
401          */
402         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
403         /**
404          */
405         public static final int LOW_SURROGATES_ID = 77;
406         /**
407          * Same as public static final int PRIVATE_USE.
408          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
409          * and multiple code point ranges had this block.
410          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
411          * and adds separate blocks for the supplementary PUAs.
412          */
413         public static final int PRIVATE_USE_AREA_ID = 78;
414         /**
415          * Same as public static final int PRIVATE_USE_AREA.
416          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
417          * and multiple code point ranges had this block.
418          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
419          * and adds separate blocks for the supplementary PUAs.
420          */
421         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
422         /**
423          */
424         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
425         /**
426          */
427         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
428         /**
429          */
430         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
431         /**
432          */
433         public static final int COMBINING_HALF_MARKS_ID = 82;
434         /**
435          */
436         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
437         /**
438          */
439         public static final int SMALL_FORM_VARIANTS_ID = 84;
440         /**
441          */
442         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
443         /**
444          */
445         public static final int SPECIALS_ID = 86;
446         /**
447          */
448         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
449         /**
450          */
451         public static final int OLD_ITALIC_ID = 88;
452         /**
453          */
454         public static final int GOTHIC_ID = 89;
455         /**
456          */
457         public static final int DESERET_ID = 90;
458         /**
459          */
460         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
461         /**
462          */
463         public static final int MUSICAL_SYMBOLS_ID = 92;
464         /**
465          */
466         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
467         /**
468          */
469         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
470         /**
471          */
472         public static final int
473         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
474         /**
475          */
476         public static final int TAGS_ID = 96;
477 
478         // New blocks in Unicode 3.2
479 
480         /**
481          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
482          */
483         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
484         /**
485          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
486          */
487 
488         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
489         /**
490          */
491         public static final int TAGALOG_ID = 98;
492         /**
493          */
494         public static final int HANUNOO_ID = 99;
495         /**
496          */
497         public static final int BUHID_ID = 100;
498         /**
499          */
500         public static final int TAGBANWA_ID = 101;
501         /**
502          */
503         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
504         /**
505          */
506         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
507         /**
508          */
509         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
510         /**
511          */
512         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
513         /**
514          */
515         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
516         /**
517          */
518         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
519         /**
520          */
521         public static final int VARIATION_SELECTORS_ID = 108;
522         /**
523          */
524         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
525         /**
526          */
527         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
528 
529         /**
530          */
531         public static final int LIMBU_ID = 111; /*[1900]*/
532         /**
533          */
534         public static final int TAI_LE_ID = 112; /*[1950]*/
535         /**
536          */
537         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
538         /**
539          */
540         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
541         /**
542          */
543         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
544         /**
545          */
546         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
547         /**
548          */
549         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
550         /**
551          */
552         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
553         /**
554          */
555         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
556         /**
557          */
558         public static final int UGARITIC_ID = 120; /*[10380]*/
559         /**
560          */
561         public static final int SHAVIAN_ID = 121; /*[10450]*/
562         /**
563          */
564         public static final int OSMANYA_ID = 122; /*[10480]*/
565         /**
566          */
567         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
568         /**
569          */
570         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
571         /**
572          */
573         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
574 
575         /* New blocks in Unicode 4.1 */
576 
577         /**
578          */
579         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
580 
581         /**
582          */
583         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
584 
585         /**
586          */
587         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
588 
589         /**
590          */
591         public static final int BUGINESE_ID = 129; /*[1A00]*/
592 
593         /**
594          */
595         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
596 
597         /**
598          */
599         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
600 
601         /**
602          */
603         public static final int COPTIC_ID = 132; /*[2C80]*/
604 
605         /**
606          */
607         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
608 
609         /**
610          */
611         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
612 
613         /**
614          */
615         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
616 
617         /**
618          */
619         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
620 
621         /**
622          */
623         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
624 
625         /**
626          */
627         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
628 
629         /**
630          */
631         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
632 
633         /**
634          */
635         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
636 
637         /**
638          */
639         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
640 
641         /**
642          */
643         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
644 
645         /**
646          */
647         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
648 
649         /**
650          */
651         public static final int TIFINAGH_ID = 144; /*[2D30]*/
652 
653         /**
654          */
655         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
656 
657         /* New blocks in Unicode 5.0 */
658 
659         /**
660          */
661         public static final int NKO_ID = 146; /*[07C0]*/
662         /**
663          */
664         public static final int BALINESE_ID = 147; /*[1B00]*/
665         /**
666          */
667         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
668         /**
669          */
670         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
671         /**
672          */
673         public static final int PHAGS_PA_ID = 150; /*[A840]*/
674         /**
675          */
676         public static final int PHOENICIAN_ID = 151; /*[10900]*/
677         /**
678          */
679         public static final int CUNEIFORM_ID = 152; /*[12000]*/
680         /**
681          */
682         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
683         /**
684          */
685         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
686 
687         /**
688          */
689         public static final int SUNDANESE_ID = 155; /* [1B80] */
690 
691         /**
692          */
693         public static final int LEPCHA_ID = 156; /* [1C00] */
694 
695         /**
696          */
697         public static final int OL_CHIKI_ID = 157; /* [1C50] */
698 
699         /**
700          */
701         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
702 
703         /**
704          */
705         public static final int VAI_ID = 159; /* [A500] */
706 
707         /**
708          */
709         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
710 
711         /**
712          */
713         public static final int SAURASHTRA_ID = 161; /* [A880] */
714 
715         /**
716          */
717         public static final int KAYAH_LI_ID = 162; /* [A900] */
718 
719         /**
720          */
721         public static final int REJANG_ID = 163; /* [A930] */
722 
723         /**
724          */
725         public static final int CHAM_ID = 164; /* [AA00] */
726 
727         /**
728          */
729         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
730 
731         /**
732          */
733         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
734 
735         /**
736          */
737         public static final int LYCIAN_ID = 167; /* [10280] */
738 
739         /**
740          */
741         public static final int CARIAN_ID = 168; /* [102A0] */
742 
743         /**
744          */
745         public static final int LYDIAN_ID = 169; /* [10920] */
746 
747         /**
748          */
749         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
750 
751         /**
752          */
753         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
754 
755         /* New blocks in Unicode 5.2 */
756 
757         /***/
758         public static final int SAMARITAN_ID = 172; /*[0800]*/
759         /***/
760         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
761         /***/
762         public static final int TAI_THAM_ID = 174; /*[1A20]*/
763         /***/
764         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
765         /***/
766         public static final int LISU_ID = 176; /*[A4D0]*/
767         /***/
768         public static final int BAMUM_ID = 177; /*[A6A0]*/
769         /***/
770         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
771         /***/
772         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
773         /***/
774         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
775         /***/
776         public static final int JAVANESE_ID = 181; /*[A980]*/
777         /***/
778         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
779         /***/
780         public static final int TAI_VIET_ID = 183; /*[AA80]*/
781         /***/
782         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
783         /***/
784         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
785         /***/
786         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
787         /***/
788         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
789         /***/
790         public static final int AVESTAN_ID = 188; /*[10B00]*/
791         /***/
792         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
793         /***/
794         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
795         /***/
796         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
797         /***/
798         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
799         /***/
800         public static final int KAITHI_ID = 193; /*[11080]*/
801         /***/
802         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
803         /***/
804         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
805         /***/
806         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
807         /***/
808         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
809 
810         /* New blocks in Unicode 6.0 */
811 
812         /***/
813         public static final int MANDAIC_ID = 198; /*[0840]*/
814         /***/
815         public static final int BATAK_ID = 199; /*[1BC0]*/
816         /***/
817         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
818         /***/
819         public static final int BRAHMI_ID = 201; /*[11000]*/
820         /***/
821         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
822         /***/
823         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
824         /***/
825         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
826         /***/
827         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
828         /***/
829         public static final int EMOTICONS_ID = 206; /*[1F600]*/
830         /***/
831         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
832         /***/
833         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
834         /***/
835         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
836 
837         /* New blocks in Unicode 6.1 */
838 
839         /***/
840         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
841         /***/
842         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
843         /***/
844         public static final int CHAKMA_ID = 212; /*[11100]*/
845         /***/
846         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
847         /***/
848         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
849         /***/
850         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
851         /***/
852         public static final int MIAO_ID = 216; /*[16F00]*/
853         /***/
854         public static final int SHARADA_ID = 217; /*[11180]*/
855         /***/
856         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
857         /***/
858         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
859         /***/
860         public static final int TAKRI_ID = 220; /*[11680]*/
861 
862         /* New blocks in Unicode 7.0 */
863 
864         /***/
865         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
866         /***/
867         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
868         /***/
869         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
870         /***/
871         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
872         /***/
873         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
874         /***/
875         public static final int ELBASAN_ID = 226; /*[10500]*/
876         /***/
877         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
878         /***/
879         public static final int GRANTHA_ID = 228; /*[11300]*/
880         /***/
881         public static final int KHOJKI_ID = 229; /*[11200]*/
882         /***/
883         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
884         /***/
885         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
886         /***/
887         public static final int LINEAR_A_ID = 232; /*[10600]*/
888         /***/
889         public static final int MAHAJANI_ID = 233; /*[11150]*/
890         /***/
891         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
892         /***/
893         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
894         /***/
895         public static final int MODI_ID = 236; /*[11600]*/
896         /***/
897         public static final int MRO_ID = 237; /*[16A40]*/
898         /***/
899         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
900         /***/
901         public static final int NABATAEAN_ID = 239; /*[10880]*/
902         /***/
903         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
904         /***/
905         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
906         /***/
907         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
908         /***/
909         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
910         /***/
911         public static final int PALMYRENE_ID = 244; /*[10860]*/
912         /***/
913         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
914         /***/
915         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
916         /***/
917         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
918         /***/
919         public static final int SIDDHAM_ID = 248; /*[11580]*/
920         /***/
921         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
922         /***/
923         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
924         /***/
925         public static final int TIRHUTA_ID = 251; /*[11480]*/
926         /***/
927         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
928 
929         /* New blocks in Unicode 8.0 */
930 
931         /***/
932         public static final int AHOM_ID = 253; /*[11700]*/
933         /***/
934         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
935         /***/
936         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
937         /***/
938         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
939         /***/
940         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
941         /***/
942         public static final int HATRAN_ID = 258; /*[108E0]*/
943         /***/
944         public static final int MULTANI_ID = 259; /*[11280]*/
945         /***/
946         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
947         /***/
948         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
949         /***/
950         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
951 
952         /**
953          * @hide unsupported on Android
954          */
955         public static final int COUNT = 263;
956 
957         // blocks objects ---------------------------------------------------
958 
959         /**
960          * Array of UnicodeBlocks, for easy access in getInstance(int)
961          */
962         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
963 
964         /**
965          */
966         public static final UnicodeBlock NO_BLOCK
967         = new UnicodeBlock("NO_BLOCK", 0);
968 
969         /**
970          */
971         public static final UnicodeBlock BASIC_LATIN
972         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
973         /**
974          */
975         public static final UnicodeBlock LATIN_1_SUPPLEMENT
976         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
977         /**
978          */
979         public static final UnicodeBlock LATIN_EXTENDED_A
980         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
981         /**
982          */
983         public static final UnicodeBlock LATIN_EXTENDED_B
984         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
985         /**
986          */
987         public static final UnicodeBlock IPA_EXTENSIONS
988         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
989         /**
990          */
991         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
992         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
993         /**
994          */
995         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
996         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
997         /**
998          * Unicode 3.2 renames this block to "Greek and Coptic".
999          */
1000         public static final UnicodeBlock GREEK
1001         = new UnicodeBlock("GREEK", GREEK_ID);
1002         /**
1003          */
1004         public static final UnicodeBlock CYRILLIC
1005         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1006         /**
1007          */
1008         public static final UnicodeBlock ARMENIAN
1009         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1010         /**
1011          */
1012         public static final UnicodeBlock HEBREW
1013         = new UnicodeBlock("HEBREW", HEBREW_ID);
1014         /**
1015          */
1016         public static final UnicodeBlock ARABIC
1017         = new UnicodeBlock("ARABIC", ARABIC_ID);
1018         /**
1019          */
1020         public static final UnicodeBlock SYRIAC
1021         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1022         /**
1023          */
1024         public static final UnicodeBlock THAANA
1025         = new UnicodeBlock("THAANA", THAANA_ID);
1026         /**
1027          */
1028         public static final UnicodeBlock DEVANAGARI
1029         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1030         /**
1031          */
1032         public static final UnicodeBlock BENGALI
1033         = new UnicodeBlock("BENGALI", BENGALI_ID);
1034         /**
1035          */
1036         public static final UnicodeBlock GURMUKHI
1037         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1038         /**
1039          */
1040         public static final UnicodeBlock GUJARATI
1041         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1042         /**
1043          */
1044         public static final UnicodeBlock ORIYA
1045         = new UnicodeBlock("ORIYA", ORIYA_ID);
1046         /**
1047          */
1048         public static final UnicodeBlock TAMIL
1049         = new UnicodeBlock("TAMIL", TAMIL_ID);
1050         /**
1051          */
1052         public static final UnicodeBlock TELUGU
1053         = new UnicodeBlock("TELUGU", TELUGU_ID);
1054         /**
1055          */
1056         public static final UnicodeBlock KANNADA
1057         = new UnicodeBlock("KANNADA", KANNADA_ID);
1058         /**
1059          */
1060         public static final UnicodeBlock MALAYALAM
1061         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1062         /**
1063          */
1064         public static final UnicodeBlock SINHALA
1065         = new UnicodeBlock("SINHALA", SINHALA_ID);
1066         /**
1067          */
1068         public static final UnicodeBlock THAI
1069         = new UnicodeBlock("THAI", THAI_ID);
1070         /**
1071          */
1072         public static final UnicodeBlock LAO
1073         = new UnicodeBlock("LAO", LAO_ID);
1074         /**
1075          */
1076         public static final UnicodeBlock TIBETAN
1077         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1078         /**
1079          */
1080         public static final UnicodeBlock MYANMAR
1081         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1082         /**
1083          */
1084         public static final UnicodeBlock GEORGIAN
1085         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1086         /**
1087          */
1088         public static final UnicodeBlock HANGUL_JAMO
1089         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1090         /**
1091          */
1092         public static final UnicodeBlock ETHIOPIC
1093         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1094         /**
1095          */
1096         public static final UnicodeBlock CHEROKEE
1097         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1098         /**
1099          */
1100         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1101         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1102                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1103         /**
1104          */
1105         public static final UnicodeBlock OGHAM
1106         = new UnicodeBlock("OGHAM", OGHAM_ID);
1107         /**
1108          */
1109         public static final UnicodeBlock RUNIC
1110         = new UnicodeBlock("RUNIC", RUNIC_ID);
1111         /**
1112          */
1113         public static final UnicodeBlock KHMER
1114         = new UnicodeBlock("KHMER", KHMER_ID);
1115         /**
1116          */
1117         public static final UnicodeBlock MONGOLIAN
1118         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1119         /**
1120          */
1121         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1122         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1123         /**
1124          */
1125         public static final UnicodeBlock GREEK_EXTENDED
1126         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1127         /**
1128          */
1129         public static final UnicodeBlock GENERAL_PUNCTUATION
1130         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1131         /**
1132          */
1133         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1134         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1135         /**
1136          */
1137         public static final UnicodeBlock CURRENCY_SYMBOLS
1138         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1139         /**
1140          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1141          * Symbols".
1142          */
1143         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1144         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1145         /**
1146          */
1147         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1148         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1149         /**
1150          */
1151         public static final UnicodeBlock NUMBER_FORMS
1152         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1153         /**
1154          */
1155         public static final UnicodeBlock ARROWS
1156         = new UnicodeBlock("ARROWS", ARROWS_ID);
1157         /**
1158          */
1159         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1160         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1161         /**
1162          */
1163         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1164         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1165         /**
1166          */
1167         public static final UnicodeBlock CONTROL_PICTURES
1168         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1169         /**
1170          */
1171         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1172         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1173         /**
1174          */
1175         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1176         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1177         /**
1178          */
1179         public static final UnicodeBlock BOX_DRAWING
1180         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1181         /**
1182          */
1183         public static final UnicodeBlock BLOCK_ELEMENTS
1184         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1185         /**
1186          */
1187         public static final UnicodeBlock GEOMETRIC_SHAPES
1188         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1189         /**
1190          */
1191         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1192         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1193         /**
1194          */
1195         public static final UnicodeBlock DINGBATS
1196         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1197         /**
1198          */
1199         public static final UnicodeBlock BRAILLE_PATTERNS
1200         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1201         /**
1202          */
1203         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1204         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1205         /**
1206          */
1207         public static final UnicodeBlock KANGXI_RADICALS
1208         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1209         /**
1210          */
1211         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1212         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1213                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1214         /**
1215          */
1216         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1217         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1218         /**
1219          */
1220         public static final UnicodeBlock HIRAGANA
1221         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1222         /**
1223          */
1224         public static final UnicodeBlock KATAKANA
1225         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1226         /**
1227          */
1228         public static final UnicodeBlock BOPOMOFO
1229         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1230         /**
1231          */
1232         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1233         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1234         /**
1235          */
1236         public static final UnicodeBlock KANBUN
1237         = new UnicodeBlock("KANBUN", KANBUN_ID);
1238         /**
1239          */
1240         public static final UnicodeBlock BOPOMOFO_EXTENDED
1241         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1242         /**
1243          */
1244         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1245         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1246                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1247         /**
1248          */
1249         public static final UnicodeBlock CJK_COMPATIBILITY
1250         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1251         /**
1252          */
1253         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1254         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1255                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1256         /**
1257          */
1258         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1259         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1260         /**
1261          */
1262         public static final UnicodeBlock YI_SYLLABLES
1263         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1264         /**
1265          */
1266         public static final UnicodeBlock YI_RADICALS
1267         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1268         /**
1269          */
1270         public static final UnicodeBlock HANGUL_SYLLABLES
1271         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1272         /**
1273          */
1274         public static final UnicodeBlock HIGH_SURROGATES
1275         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1276         /**
1277          */
1278         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1279         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1280         /**
1281          */
1282         public static final UnicodeBlock LOW_SURROGATES
1283         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1284         /**
1285          * Same as public static final int PRIVATE_USE.
1286          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1287          * and multiple code point ranges had this block.
1288          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1289          * and adds separate blocks for the supplementary PUAs.
1290          */
1291         public static final UnicodeBlock PRIVATE_USE_AREA
1292         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1293         /**
1294          * Same as public static final int PRIVATE_USE_AREA.
1295          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1296          * and multiple code point ranges had this block.
1297          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1298          * and adds separate blocks for the supplementary PUAs.
1299          */
1300         public static final UnicodeBlock PRIVATE_USE
1301         = PRIVATE_USE_AREA;
1302         /**
1303          */
1304         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1305         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1306         /**
1307          */
1308         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1309         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1310         /**
1311          */
1312         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1313         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1314         /**
1315          */
1316         public static final UnicodeBlock COMBINING_HALF_MARKS
1317         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1318         /**
1319          */
1320         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1321         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1322         /**
1323          */
1324         public static final UnicodeBlock SMALL_FORM_VARIANTS
1325         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1326         /**
1327          */
1328         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1329         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1330         /**
1331          */
1332         public static final UnicodeBlock SPECIALS
1333         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1334         /**
1335          */
1336         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1337         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1338         /**
1339          */
1340         public static final UnicodeBlock OLD_ITALIC
1341         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1342         /**
1343          */
1344         public static final UnicodeBlock GOTHIC
1345         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1346         /**
1347          */
1348         public static final UnicodeBlock DESERET
1349         = new UnicodeBlock("DESERET", DESERET_ID);
1350         /**
1351          */
1352         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1353         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1354         /**
1355          */
1356         public static final UnicodeBlock MUSICAL_SYMBOLS
1357         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1358         /**
1359          */
1360         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1361         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1362                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1363         /**
1364          */
1365         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1366         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1367                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1368         /**
1369          */
1370         public static final UnicodeBlock
1371         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1372         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1373                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1374         /**
1375          */
1376         public static final UnicodeBlock TAGS
1377         = new UnicodeBlock("TAGS", TAGS_ID);
1378 
1379         // New blocks in Unicode 3.2
1380 
1381         /**
1382          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1383          */
1384         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1385         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1386         /**
1387          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1388          */
1389         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1390         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1391         /**
1392          */
1393         public static final UnicodeBlock TAGALOG
1394         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1395         /**
1396          */
1397         public static final UnicodeBlock HANUNOO
1398         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1399         /**
1400          */
1401         public static final UnicodeBlock BUHID
1402         = new UnicodeBlock("BUHID", BUHID_ID);
1403         /**
1404          */
1405         public static final UnicodeBlock TAGBANWA
1406         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1407         /**
1408          */
1409         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1410         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1411                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1412         /**
1413          */
1414         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1415         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1416         /**
1417          */
1418         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1419         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1420         /**
1421          */
1422         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1423         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1424                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1425         /**
1426          */
1427         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1428         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1429                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1430         /**
1431          */
1432         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1433         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1434         /**
1435          */
1436         public static final UnicodeBlock VARIATION_SELECTORS
1437         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1438         /**
1439          */
1440         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1441         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1442                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1443         /**
1444          */
1445         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1446         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1447                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1448 
1449         /**
1450          */
1451         public static final UnicodeBlock LIMBU
1452         = new UnicodeBlock("LIMBU", LIMBU_ID);
1453         /**
1454          */
1455         public static final UnicodeBlock TAI_LE
1456         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1457         /**
1458          */
1459         public static final UnicodeBlock KHMER_SYMBOLS
1460         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1461 
1462         /**
1463          */
1464         public static final UnicodeBlock PHONETIC_EXTENSIONS
1465         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1466 
1467         /**
1468          */
1469         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1470         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1471                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1472         /**
1473          */
1474         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1475         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1476         /**
1477          */
1478         public static final UnicodeBlock LINEAR_B_SYLLABARY
1479         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1480         /**
1481          */
1482         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1483         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1484         /**
1485          */
1486         public static final UnicodeBlock AEGEAN_NUMBERS
1487         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1488         /**
1489          */
1490         public static final UnicodeBlock UGARITIC
1491         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1492         /**
1493          */
1494         public static final UnicodeBlock SHAVIAN
1495         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1496         /**
1497          */
1498         public static final UnicodeBlock OSMANYA
1499         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1500         /**
1501          */
1502         public static final UnicodeBlock CYPRIOT_SYLLABARY
1503         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1504         /**
1505          */
1506         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1507         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1508 
1509         /**
1510          */
1511         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1512         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1513 
1514         /* New blocks in Unicode 4.1 */
1515 
1516         /**
1517          */
1518         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1519                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1520                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1521 
1522         /**
1523          */
1524         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1525                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1526 
1527         /**
1528          */
1529         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1530                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1531 
1532         /**
1533          */
1534         public static final UnicodeBlock BUGINESE =
1535                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1536 
1537         /**
1538          */
1539         public static final UnicodeBlock CJK_STROKES =
1540                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1541 
1542         /**
1543          */
1544         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1545                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1546                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1547 
1548         /**
1549          */
1550         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1551 
1552         /**
1553          */
1554         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1555                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1556 
1557         /**
1558          */
1559         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1560                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1561 
1562         /**
1563          */
1564         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1565                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
1566 
1567         /**
1568          */
1569         public static final UnicodeBlock GLAGOLITIC =
1570                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
1571 
1572         /**
1573          */
1574         public static final UnicodeBlock KHAROSHTHI =
1575                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
1576 
1577         /**
1578          */
1579         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1580                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
1581 
1582         /**
1583          */
1584         public static final UnicodeBlock NEW_TAI_LUE =
1585                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
1586 
1587         /**
1588          */
1589         public static final UnicodeBlock OLD_PERSIAN =
1590                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
1591 
1592         /**
1593          */
1594         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1595                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1596                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
1597 
1598         /**
1599          */
1600         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1601                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
1602 
1603         /**
1604          */
1605         public static final UnicodeBlock SYLOTI_NAGRI =
1606                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
1607 
1608         /**
1609          */
1610         public static final UnicodeBlock TIFINAGH =
1611                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
1612 
1613         /**
1614          */
1615         public static final UnicodeBlock VERTICAL_FORMS =
1616                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
1617 
1618         /**
1619          */
1620         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
1621         /**
1622          */
1623         public static final UnicodeBlock BALINESE =
1624                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
1625         /**
1626          */
1627         public static final UnicodeBlock LATIN_EXTENDED_C =
1628                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
1629         /**
1630          */
1631         public static final UnicodeBlock LATIN_EXTENDED_D =
1632                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
1633         /**
1634          */
1635         public static final UnicodeBlock PHAGS_PA =
1636                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
1637         /**
1638          */
1639         public static final UnicodeBlock PHOENICIAN =
1640                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
1641         /**
1642          */
1643         public static final UnicodeBlock CUNEIFORM =
1644                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
1645         /**
1646          */
1647         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1648                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1649                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
1650         /**
1651          */
1652         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
1653                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
1654 
1655         /**
1656          */
1657         public static final UnicodeBlock SUNDANESE =
1658                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
1659 
1660         /**
1661          */
1662         public static final UnicodeBlock LEPCHA =
1663                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
1664 
1665         /**
1666          */
1667         public static final UnicodeBlock OL_CHIKI =
1668                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
1669 
1670         /**
1671          */
1672         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1673                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
1674 
1675         /**
1676          */
1677         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
1678 
1679         /**
1680          */
1681         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1682                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
1683 
1684         /**
1685          */
1686         public static final UnicodeBlock SAURASHTRA =
1687                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
1688 
1689         /**
1690          */
1691         public static final UnicodeBlock KAYAH_LI =
1692                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
1693 
1694         /**
1695          */
1696         public static final UnicodeBlock REJANG =
1697                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
1698 
1699         /**
1700          */
1701         public static final UnicodeBlock CHAM =
1702                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
1703 
1704         /**
1705          */
1706         public static final UnicodeBlock ANCIENT_SYMBOLS =
1707                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
1708 
1709         /**
1710          */
1711         public static final UnicodeBlock PHAISTOS_DISC =
1712                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
1713 
1714         /**
1715          */
1716         public static final UnicodeBlock LYCIAN =
1717                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
1718 
1719         /**
1720          */
1721         public static final UnicodeBlock CARIAN =
1722                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
1723 
1724         /**
1725          */
1726         public static final UnicodeBlock LYDIAN =
1727                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
1728 
1729         /**
1730          */
1731         public static final UnicodeBlock MAHJONG_TILES =
1732                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
1733 
1734         /**
1735          */
1736         public static final UnicodeBlock DOMINO_TILES =
1737                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
1738 
1739         /* New blocks in Unicode 5.2 */
1740 
1741         /***/
1742         public static final UnicodeBlock SAMARITAN =
1743                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
1744         /***/
1745         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1746                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1747                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
1748         /***/
1749         public static final UnicodeBlock TAI_THAM =
1750                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
1751         /***/
1752         public static final UnicodeBlock VEDIC_EXTENSIONS =
1753                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
1754         /***/
1755         public static final UnicodeBlock LISU =
1756                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
1757         /***/
1758         public static final UnicodeBlock BAMUM =
1759                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
1760         /***/
1761         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
1762                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
1763         /***/
1764         public static final UnicodeBlock DEVANAGARI_EXTENDED =
1765                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
1766         /***/
1767         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
1768                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
1769         /***/
1770         public static final UnicodeBlock JAVANESE =
1771                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
1772         /***/
1773         public static final UnicodeBlock MYANMAR_EXTENDED_A =
1774                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
1775         /***/
1776         public static final UnicodeBlock TAI_VIET =
1777                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
1778         /***/
1779         public static final UnicodeBlock MEETEI_MAYEK =
1780                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
1781         /***/
1782         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
1783                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
1784         /***/
1785         public static final UnicodeBlock IMPERIAL_ARAMAIC =
1786                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
1787         /***/
1788         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
1789                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
1790         /***/
1791         public static final UnicodeBlock AVESTAN =
1792                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
1793         /***/
1794         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
1795                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
1796         /***/
1797         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
1798                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
1799         /***/
1800         public static final UnicodeBlock OLD_TURKIC =
1801                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
1802         /***/
1803         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
1804                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
1805         /***/
1806         public static final UnicodeBlock KAITHI =
1807                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
1808         /***/
1809         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
1810                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
1811         /***/
1812         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
1813                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
1814                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
1815         /***/
1816         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
1817                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
1818                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
1819         /***/
1820         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
1821                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
1822                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
1823 
1824         /* New blocks in Unicode 6.0 */
1825 
1826         /***/
1827         public static final UnicodeBlock MANDAIC =
1828                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
1829         /***/
1830         public static final UnicodeBlock BATAK =
1831                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
1832         /***/
1833         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
1834                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
1835         /***/
1836         public static final UnicodeBlock BRAHMI =
1837                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
1838         /***/
1839         public static final UnicodeBlock BAMUM_SUPPLEMENT =
1840                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
1841         /***/
1842         public static final UnicodeBlock KANA_SUPPLEMENT =
1843                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
1844         /***/
1845         public static final UnicodeBlock PLAYING_CARDS =
1846                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
1847         /***/
1848         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
1849                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
1850                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
1851         /***/
1852         public static final UnicodeBlock EMOTICONS =
1853                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
1854         /***/
1855         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
1856                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
1857         /***/
1858         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
1859                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
1860         /***/
1861         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
1862                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
1863                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
1864 
1865         /* New blocks in Unicode 6.1 */
1866 
1867         /***/
1868         public static final UnicodeBlock ARABIC_EXTENDED_A =
1869                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
1870         /***/
1871         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
1872                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
1873         /***/
1874         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
1875         /***/
1876         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
1877                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
1878         /***/
1879         public static final UnicodeBlock MEROITIC_CURSIVE =
1880                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
1881         /***/
1882         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
1883                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
1884         /***/
1885         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
1886         /***/
1887         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
1888         /***/
1889         public static final UnicodeBlock SORA_SOMPENG =
1890                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
1891         /***/
1892         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
1893                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
1894         /***/
1895         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
1896 
1897         /* New blocks in Unicode 7.0 */
1898 
1899         /***/
1900         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
1901         /***/
1902         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
1903                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
1904         /***/
1905         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
1906                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
1907         /***/
1908         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
1909                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
1910         /***/
1911         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
1912         /***/
1913         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
1914         /***/
1915         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
1916                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
1917         /***/
1918         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
1919         /***/
1920         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
1921         /***/
1922         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
1923         /***/
1924         public static final UnicodeBlock LATIN_EXTENDED_E =
1925                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
1926         /***/
1927         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
1928         /***/
1929         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
1930         /***/
1931         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
1932         /***/
1933         public static final UnicodeBlock MENDE_KIKAKUI =
1934                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
1935         /***/
1936         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
1937         /***/
1938         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
1939         /***/
1940         public static final UnicodeBlock MYANMAR_EXTENDED_B =
1941                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
1942         /***/
1943         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
1944         /***/
1945         public static final UnicodeBlock OLD_NORTH_ARABIAN =
1946                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
1947         /***/
1948         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
1949         /***/
1950         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
1951                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
1952         /***/
1953         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
1954         /***/
1955         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
1956         /***/
1957         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
1958         /***/
1959         public static final UnicodeBlock PSALTER_PAHLAVI =
1960                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
1961         /***/
1962         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
1963                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
1964         /***/
1965         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
1966         /***/
1967         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
1968                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
1969         /***/
1970         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
1971                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
1972         /***/
1973         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
1974         /***/
1975         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
1976 
1977         /* New blocks in Unicode 8.0 */
1978 
1979         /***/
1980         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
1981         /***/
1982         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
1983                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
1984         /***/
1985         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
1986                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
1987         /***/
1988         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
1989                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
1990                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
1991         /***/
1992         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
1993                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
1994         /***/
1995         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
1996         /***/
1997         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
1998         /***/
1999         public static final UnicodeBlock OLD_HUNGARIAN =
2000                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
2001         /***/
2002         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2003                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2004                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
2005         /***/
2006         public static final UnicodeBlock SUTTON_SIGNWRITING =
2007                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
2008 
2009         /**
2010          */
2011         public static final UnicodeBlock INVALID_CODE
2012         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2013 
2014         static {
2015             for (int blockId = 0; blockId < COUNT; ++blockId) {
2016                 if (BLOCKS_[blockId] == null) {
2017                     throw new java.lang.IllegalStateException(
2018                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2019                 }
2020             }
2021         }
2022 
2023         // public methods --------------------------------------------------
2024 
2025         /**
2026          * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID.
2027          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2028          * @param id UnicodeBlock ID
2029          * @return the only instance of the UnicodeBlock with the argument ID
2030          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2031          *         returned.
2032          */
getInstance(int id)2033         public static UnicodeBlock getInstance(int id)
2034         {
2035             if (id >= 0 && id < BLOCKS_.length) {
2036                 return BLOCKS_[id];
2037             }
2038             return INVALID_CODE;
2039         }
2040 
2041         /**
2042          * Returns the Unicode allocation block that contains the code point,
2043          * or null if the code point is not a member of a defined block.
2044          * @param ch code point to be tested
2045          * @return the Unicode allocation block that contains the code point
2046          */
of(int ch)2047         public static UnicodeBlock of(int ch)
2048         {
2049             if (ch > MAX_VALUE) {
2050                 return INVALID_CODE;
2051             }
2052 
2053             return UnicodeBlock.getInstance(
2054                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2055         }
2056 
2057         /**
2058          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
2059          * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike
2060          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
2061          * against the official UCD name and the Java block name
2062          * (ignoring case).
2063          * @param blockName the name of the block to match
2064          * @return the UnicodeBlock with that name
2065          * @throws IllegalArgumentException if the blockName could not be matched
2066          */
forName(String blockName)2067         public static final UnicodeBlock forName(String blockName) {
2068             Map<String, UnicodeBlock> m = null;
2069             if (mref != null) {
2070                 m = mref.get();
2071             }
2072             if (m == null) {
2073                 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length);
2074                 for (int i = 0; i < BLOCKS_.length; ++i) {
2075                     UnicodeBlock b = BLOCKS_[i];
2076                     String name = trimBlockName(
2077                             getPropertyValueName(UProperty.BLOCK, b.getID(),
2078                                     UProperty.NameChoice.LONG));
2079                     m.put(name, b);
2080                 }
2081                 mref = new SoftReference<Map<String, UnicodeBlock>>(m);
2082             }
2083             UnicodeBlock b = m.get(trimBlockName(blockName));
2084             if (b == null) {
2085                 throw new IllegalArgumentException();
2086             }
2087             return b;
2088         }
2089         private static SoftReference<Map<String, UnicodeBlock>> mref;
2090 
trimBlockName(String name)2091         private static String trimBlockName(String name) {
2092             String upper = name.toUpperCase(Locale.ENGLISH);
2093             StringBuilder result = new StringBuilder(upper.length());
2094             for (int i = 0; i < upper.length(); i++) {
2095                 char c = upper.charAt(i);
2096                 if (c != ' ' && c != '_' && c != '-') {
2097                     result.append(c);
2098                 }
2099             }
2100             return result.toString();
2101         }
2102 
2103         /**
2104          * {icu} Returns the type ID of this Unicode block
2105          * @return integer type ID of this Unicode block
2106          */
getID()2107         public int getID()
2108         {
2109             return m_id_;
2110         }
2111 
2112         // private data members ---------------------------------------------
2113 
2114         /**
2115          * Identification code for this UnicodeBlock
2116          */
2117         private int m_id_;
2118 
2119         // private constructor ----------------------------------------------
2120 
2121         /**
2122          * UnicodeBlock constructor
2123          * @param name name of this UnicodeBlock
2124          * @param id unique id of this UnicodeBlock
2125          * @exception NullPointerException if name is <code>null</code>
2126          */
UnicodeBlock(String name, int id)2127         private UnicodeBlock(String name, int id)
2128         {
2129             super(name);
2130             m_id_ = id;
2131             if (id >= 0) {
2132                 BLOCKS_[id] = this;
2133             }
2134         }
2135     }
2136 
2137     /**
2138      * East Asian Width constants.
2139      * @see UProperty#EAST_ASIAN_WIDTH
2140      * @see UCharacter#getIntPropertyValue
2141      */
2142     public static interface EastAsianWidth
2143     {
2144         /**
2145          */
2146         public static final int NEUTRAL = 0;
2147         /**
2148          */
2149         public static final int AMBIGUOUS = 1;
2150         /**
2151          */
2152         public static final int HALFWIDTH = 2;
2153         /**
2154          */
2155         public static final int FULLWIDTH = 3;
2156         /**
2157          */
2158         public static final int NARROW = 4;
2159         /**
2160          */
2161         public static final int WIDE = 5;
2162         /**
2163          * @hide unsupported on Android
2164          */
2165         public static final int COUNT = 6;
2166     }
2167 
2168     /**
2169      * Decomposition Type constants.
2170      * @see UProperty#DECOMPOSITION_TYPE
2171      */
2172     public static interface DecompositionType
2173     {
2174         /**
2175          */
2176         public static final int NONE = 0;
2177         /**
2178          */
2179         public static final int CANONICAL = 1;
2180         /**
2181          */
2182         public static final int COMPAT = 2;
2183         /**
2184          */
2185         public static final int CIRCLE = 3;
2186         /**
2187          */
2188         public static final int FINAL = 4;
2189         /**
2190          */
2191         public static final int FONT = 5;
2192         /**
2193          */
2194         public static final int FRACTION = 6;
2195         /**
2196          */
2197         public static final int INITIAL = 7;
2198         /**
2199          */
2200         public static final int ISOLATED = 8;
2201         /**
2202          */
2203         public static final int MEDIAL = 9;
2204         /**
2205          */
2206         public static final int NARROW = 10;
2207         /**
2208          */
2209         public static final int NOBREAK = 11;
2210         /**
2211          */
2212         public static final int SMALL = 12;
2213         /**
2214          */
2215         public static final int SQUARE = 13;
2216         /**
2217          */
2218         public static final int SUB = 14;
2219         /**
2220          */
2221         public static final int SUPER = 15;
2222         /**
2223          */
2224         public static final int VERTICAL = 16;
2225         /**
2226          */
2227         public static final int WIDE = 17;
2228         /**
2229          * @hide unsupported on Android
2230          */
2231         public static final int COUNT = 18;
2232     }
2233 
2234     /**
2235      * Joining Type constants.
2236      * @see UProperty#JOINING_TYPE
2237      */
2238     public static interface JoiningType
2239     {
2240         /**
2241          */
2242         public static final int NON_JOINING = 0;
2243         /**
2244          */
2245         public static final int JOIN_CAUSING = 1;
2246         /**
2247          */
2248         public static final int DUAL_JOINING = 2;
2249         /**
2250          */
2251         public static final int LEFT_JOINING = 3;
2252         /**
2253          */
2254         public static final int RIGHT_JOINING = 4;
2255         /**
2256          */
2257         public static final int TRANSPARENT = 5;
2258         /**
2259          * @hide unsupported on Android
2260          */
2261         public static final int COUNT = 6;
2262     }
2263 
2264     /**
2265      * Joining Group constants.
2266      * @see UProperty#JOINING_GROUP
2267      */
2268     public static interface JoiningGroup
2269     {
2270         /**
2271          */
2272         public static final int NO_JOINING_GROUP = 0;
2273         /**
2274          */
2275         public static final int AIN = 1;
2276         /**
2277          */
2278         public static final int ALAPH = 2;
2279         /**
2280          */
2281         public static final int ALEF = 3;
2282         /**
2283          */
2284         public static final int BEH = 4;
2285         /**
2286          */
2287         public static final int BETH = 5;
2288         /**
2289          */
2290         public static final int DAL = 6;
2291         /**
2292          */
2293         public static final int DALATH_RISH = 7;
2294         /**
2295          */
2296         public static final int E = 8;
2297         /**
2298          */
2299         public static final int FEH = 9;
2300         /**
2301          */
2302         public static final int FINAL_SEMKATH = 10;
2303         /**
2304          */
2305         public static final int GAF = 11;
2306         /**
2307          */
2308         public static final int GAMAL = 12;
2309         /**
2310          */
2311         public static final int HAH = 13;
2312         /***/
2313         public static final int TEH_MARBUTA_GOAL = 14;
2314         /**
2315          */
2316         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
2317         /**
2318          */
2319         public static final int HE = 15;
2320         /**
2321          */
2322         public static final int HEH = 16;
2323         /**
2324          */
2325         public static final int HEH_GOAL = 17;
2326         /**
2327          */
2328         public static final int HETH = 18;
2329         /**
2330          */
2331         public static final int KAF = 19;
2332         /**
2333          */
2334         public static final int KAPH = 20;
2335         /**
2336          */
2337         public static final int KNOTTED_HEH = 21;
2338         /**
2339          */
2340         public static final int LAM = 22;
2341         /**
2342          */
2343         public static final int LAMADH = 23;
2344         /**
2345          */
2346         public static final int MEEM = 24;
2347         /**
2348          */
2349         public static final int MIM = 25;
2350         /**
2351          */
2352         public static final int NOON = 26;
2353         /**
2354          */
2355         public static final int NUN = 27;
2356         /**
2357          */
2358         public static final int PE = 28;
2359         /**
2360          */
2361         public static final int QAF = 29;
2362         /**
2363          */
2364         public static final int QAPH = 30;
2365         /**
2366          */
2367         public static final int REH = 31;
2368         /**
2369          */
2370         public static final int REVERSED_PE = 32;
2371         /**
2372          */
2373         public static final int SAD = 33;
2374         /**
2375          */
2376         public static final int SADHE = 34;
2377         /**
2378          */
2379         public static final int SEEN = 35;
2380         /**
2381          */
2382         public static final int SEMKATH = 36;
2383         /**
2384          */
2385         public static final int SHIN = 37;
2386         /**
2387          */
2388         public static final int SWASH_KAF = 38;
2389         /**
2390          */
2391         public static final int SYRIAC_WAW = 39;
2392         /**
2393          */
2394         public static final int TAH = 40;
2395         /**
2396          */
2397         public static final int TAW = 41;
2398         /**
2399          */
2400         public static final int TEH_MARBUTA = 42;
2401         /**
2402          */
2403         public static final int TETH = 43;
2404         /**
2405          */
2406         public static final int WAW = 44;
2407         /**
2408          */
2409         public static final int YEH = 45;
2410         /**
2411          */
2412         public static final int YEH_BARREE = 46;
2413         /**
2414          */
2415         public static final int YEH_WITH_TAIL = 47;
2416         /**
2417          */
2418         public static final int YUDH = 48;
2419         /**
2420          */
2421         public static final int YUDH_HE = 49;
2422         /**
2423          */
2424         public static final int ZAIN = 50;
2425         /**
2426          */
2427         public static final int FE = 51;
2428         /**
2429          */
2430         public static final int KHAPH = 52;
2431         /**
2432          */
2433         public static final int ZHAIN = 53;
2434         /**
2435          */
2436         public static final int BURUSHASKI_YEH_BARREE = 54;
2437         /***/
2438         public static final int FARSI_YEH = 55;
2439         /***/
2440         public static final int NYA = 56;
2441         /***/
2442         public static final int ROHINGYA_YEH = 57;
2443 
2444         /***/
2445         public static final int MANICHAEAN_ALEPH = 58;
2446         /***/
2447         public static final int MANICHAEAN_AYIN = 59;
2448         /***/
2449         public static final int MANICHAEAN_BETH = 60;
2450         /***/
2451         public static final int MANICHAEAN_DALETH = 61;
2452         /***/
2453         public static final int MANICHAEAN_DHAMEDH = 62;
2454         /***/
2455         public static final int MANICHAEAN_FIVE = 63;
2456         /***/
2457         public static final int MANICHAEAN_GIMEL = 64;
2458         /***/
2459         public static final int MANICHAEAN_HETH = 65;
2460         /***/
2461         public static final int MANICHAEAN_HUNDRED = 66;
2462         /***/
2463         public static final int MANICHAEAN_KAPH = 67;
2464         /***/
2465         public static final int MANICHAEAN_LAMEDH = 68;
2466         /***/
2467         public static final int MANICHAEAN_MEM = 69;
2468         /***/
2469         public static final int MANICHAEAN_NUN = 70;
2470         /***/
2471         public static final int MANICHAEAN_ONE = 71;
2472         /***/
2473         public static final int MANICHAEAN_PE = 72;
2474         /***/
2475         public static final int MANICHAEAN_QOPH = 73;
2476         /***/
2477         public static final int MANICHAEAN_RESH = 74;
2478         /***/
2479         public static final int MANICHAEAN_SADHE = 75;
2480         /***/
2481         public static final int MANICHAEAN_SAMEKH = 76;
2482         /***/
2483         public static final int MANICHAEAN_TAW = 77;
2484         /***/
2485         public static final int MANICHAEAN_TEN = 78;
2486         /***/
2487         public static final int MANICHAEAN_TETH = 79;
2488         /***/
2489         public static final int MANICHAEAN_THAMEDH = 80;
2490         /***/
2491         public static final int MANICHAEAN_TWENTY = 81;
2492         /***/
2493         public static final int MANICHAEAN_WAW = 82;
2494         /***/
2495         public static final int MANICHAEAN_YODH = 83;
2496         /***/
2497         public static final int MANICHAEAN_ZAYIN = 84;
2498         /***/
2499         public static final int STRAIGHT_WAW = 85;
2500 
2501         /**
2502          * @hide unsupported on Android
2503          */
2504         public static final int COUNT = 86;
2505     }
2506 
2507     /**
2508      * Grapheme Cluster Break constants.
2509      * @see UProperty#GRAPHEME_CLUSTER_BREAK
2510      */
2511     public static interface GraphemeClusterBreak {
2512         /**
2513          */
2514         public static final int OTHER = 0;
2515         /**
2516          */
2517         public static final int CONTROL = 1;
2518         /**
2519          */
2520         public static final int CR = 2;
2521         /**
2522          */
2523         public static final int EXTEND = 3;
2524         /**
2525          */
2526         public static final int L = 4;
2527         /**
2528          */
2529         public static final int LF = 5;
2530         /**
2531          */
2532         public static final int LV = 6;
2533         /**
2534          */
2535         public static final int LVT = 7;
2536         /**
2537          */
2538         public static final int T = 8;
2539         /**
2540          */
2541         public static final int V = 9;
2542         /**
2543          */
2544         public static final int SPACING_MARK = 10;
2545         /**
2546          */
2547         public static final int PREPEND = 11;
2548         /***/
2549         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2550         /**
2551          * @hide unsupported on Android
2552          */
2553         public static final int COUNT = 13;
2554     }
2555 
2556     /**
2557      * Word Break constants.
2558      * @see UProperty#WORD_BREAK
2559      */
2560     public static interface WordBreak {
2561         /**
2562          */
2563         public static final int OTHER = 0;
2564         /**
2565          */
2566         public static final int ALETTER = 1;
2567         /**
2568          */
2569         public static final int FORMAT = 2;
2570         /**
2571          */
2572         public static final int KATAKANA = 3;
2573         /**
2574          */
2575         public static final int MIDLETTER = 4;
2576         /**
2577          */
2578         public static final int MIDNUM = 5;
2579         /**
2580          */
2581         public static final int NUMERIC = 6;
2582         /**
2583          */
2584         public static final int EXTENDNUMLET = 7;
2585         /**
2586          */
2587         public static final int CR = 8;
2588         /**
2589          */
2590         public static final int EXTEND = 9;
2591         /**
2592          */
2593         public static final int LF = 10;
2594         /**
2595          */
2596         public static final int MIDNUMLET = 11;
2597         /**
2598          */
2599         public static final int NEWLINE = 12;
2600         /***/
2601         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2602         /***/
2603         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
2604         /***/
2605         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
2606         /***/
2607         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
2608         /**
2609          * @hide unsupported on Android
2610          */
2611         public static final int COUNT = 17;
2612     }
2613 
2614     /**
2615      * Sentence Break constants.
2616      * @see UProperty#SENTENCE_BREAK
2617      */
2618     public static interface SentenceBreak {
2619         /**
2620          */
2621         public static final int OTHER = 0;
2622         /**
2623          */
2624         public static final int ATERM = 1;
2625         /**
2626          */
2627         public static final int CLOSE = 2;
2628         /**
2629          */
2630         public static final int FORMAT = 3;
2631         /**
2632          */
2633         public static final int LOWER = 4;
2634         /**
2635          */
2636         public static final int NUMERIC = 5;
2637         /**
2638          */
2639         public static final int OLETTER = 6;
2640         /**
2641          */
2642         public static final int SEP = 7;
2643         /**
2644          */
2645         public static final int SP = 8;
2646         /**
2647          */
2648         public static final int STERM = 9;
2649         /**
2650          */
2651         public static final int UPPER = 10;
2652         /**
2653          */
2654         public static final int CR = 11;
2655         /**
2656          */
2657         public static final int EXTEND = 12;
2658         /**
2659          */
2660         public static final int LF = 13;
2661         /**
2662          */
2663         public static final int SCONTINUE = 14;
2664         /**
2665          * @hide unsupported on Android
2666          */
2667         public static final int COUNT = 15;
2668     }
2669 
2670     /**
2671      * Line Break constants.
2672      * @see UProperty#LINE_BREAK
2673      */
2674     public static interface LineBreak
2675     {
2676         /**
2677          */
2678         public static final int UNKNOWN = 0;
2679         /**
2680          */
2681         public static final int AMBIGUOUS = 1;
2682         /**
2683          */
2684         public static final int ALPHABETIC = 2;
2685         /**
2686          */
2687         public static final int BREAK_BOTH = 3;
2688         /**
2689          */
2690         public static final int BREAK_AFTER = 4;
2691         /**
2692          */
2693         public static final int BREAK_BEFORE = 5;
2694         /**
2695          */
2696         public static final int MANDATORY_BREAK = 6;
2697         /**
2698          */
2699         public static final int CONTINGENT_BREAK = 7;
2700         /**
2701          */
2702         public static final int CLOSE_PUNCTUATION = 8;
2703         /**
2704          */
2705         public static final int COMBINING_MARK = 9;
2706         /**
2707          */
2708         public static final int CARRIAGE_RETURN = 10;
2709         /**
2710          */
2711         public static final int EXCLAMATION = 11;
2712         /**
2713          */
2714         public static final int GLUE = 12;
2715         /**
2716          */
2717         public static final int HYPHEN = 13;
2718         /**
2719          */
2720         public static final int IDEOGRAPHIC = 14;
2721         /**
2722          * @see #INSEPARABLE
2723          */
2724         public static final int INSEPERABLE = 15;
2725         /**
2726          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
2727          */
2728         public static final int INSEPARABLE = 15;
2729         /**
2730          */
2731         public static final int INFIX_NUMERIC = 16;
2732         /**
2733          */
2734         public static final int LINE_FEED = 17;
2735         /**
2736          */
2737         public static final int NONSTARTER = 18;
2738         /**
2739          */
2740         public static final int NUMERIC = 19;
2741         /**
2742          */
2743         public static final int OPEN_PUNCTUATION = 20;
2744         /**
2745          */
2746         public static final int POSTFIX_NUMERIC = 21;
2747         /**
2748          */
2749         public static final int PREFIX_NUMERIC = 22;
2750         /**
2751          */
2752         public static final int QUOTATION = 23;
2753         /**
2754          */
2755         public static final int COMPLEX_CONTEXT = 24;
2756         /**
2757          */
2758         public static final int SURROGATE = 25;
2759         /**
2760          */
2761         public static final int SPACE = 26;
2762         /**
2763          */
2764         public static final int BREAK_SYMBOLS = 27;
2765         /**
2766          */
2767         public static final int ZWSPACE = 28;
2768         /**
2769          */
2770         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
2771         /**
2772          */
2773         public static final int WORD_JOINER = 30;      /*[WJ]*/
2774         /**
2775          */
2776         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
2777         /**
2778          */
2779         public static final int H3 = 32;
2780         /**
2781          */
2782         public static final int JL = 33;
2783         /**
2784          */
2785         public static final int JT = 34;
2786         /**
2787          */
2788         public static final int JV = 35;
2789         /***/
2790         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
2791         /***/
2792         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
2793         /***/
2794         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
2795         /***/
2796         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2797         /**
2798          * @hide unsupported on Android
2799          */
2800         public static final int COUNT = 40;
2801     }
2802 
2803     /**
2804      * Numeric Type constants.
2805      * @see UProperty#NUMERIC_TYPE
2806      */
2807     public static interface NumericType
2808     {
2809         /**
2810          */
2811         public static final int NONE = 0;
2812         /**
2813          */
2814         public static final int DECIMAL = 1;
2815         /**
2816          */
2817         public static final int DIGIT = 2;
2818         /**
2819          */
2820         public static final int NUMERIC = 3;
2821         /**
2822          * @hide unsupported on Android
2823          */
2824         public static final int COUNT = 4;
2825     }
2826 
2827     /**
2828      * Hangul Syllable Type constants.
2829      *
2830      * @see UProperty#HANGUL_SYLLABLE_TYPE
2831      */
2832     public static interface HangulSyllableType
2833     {
2834         /**
2835          */
2836         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
2837         /**
2838          */
2839         public static final int LEADING_JAMO        = 1;   /*[L]*/
2840         /**
2841          */
2842         public static final int VOWEL_JAMO          = 2;   /*[V]*/
2843         /**
2844          */
2845         public static final int TRAILING_JAMO       = 3;   /*[T]*/
2846         /**
2847          */
2848         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
2849         /**
2850          */
2851         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
2852         /**
2853          * @hide unsupported on Android
2854          */
2855         public static final int COUNT               = 6;
2856     }
2857 
2858     /**
2859      * Bidi Paired Bracket Type constants.
2860      *
2861      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
2862      */
2863     public static interface BidiPairedBracketType {
2864         /**
2865          * Not a paired bracket.
2866          */
2867         public static final int NONE = 0;
2868         /**
2869          * Open paired bracket.
2870          */
2871         public static final int OPEN = 1;
2872         /**
2873          * Close paired bracket.
2874          */
2875         public static final int CLOSE = 2;
2876         /**
2877          * @hide unsupported on Android
2878          */
2879         public static final int COUNT = 3;
2880     }
2881 
2882     // public data members -----------------------------------------------
2883 
2884     /**
2885      * The lowest Unicode code point value, constant 0.
2886      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
2887      */
2888     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
2889 
2890     /**
2891      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
2892      * Same as {@link Character#MAX_CODE_POINT}.
2893      *
2894      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
2895      * which is still a char with the value U+FFFF.
2896      */
2897     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
2898 
2899     /**
2900      * The minimum value for Supplementary code points, constant U+10000.
2901      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
2902      */
2903     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
2904 
2905     /**
2906      * Unicode value used when translating into Unicode encoding form and there
2907      * is no existing character.
2908      */
2909     public static final int REPLACEMENT_CHAR = '\uFFFD';
2910 
2911     /**
2912      * Special value that is returned by getUnicodeNumericValue(int) when no
2913      * numeric value is defined for a code point.
2914      * @see #getUnicodeNumericValue
2915      */
2916     public static final double NO_NUMERIC_VALUE = -123456789;
2917 
2918     /**
2919      * Compatibility constant for Java Character's MIN_RADIX.
2920      */
2921     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
2922 
2923     /**
2924      * Compatibility constant for Java Character's MAX_RADIX.
2925      */
2926     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
2927 
2928     /**
2929      * Do not lowercase non-initial parts of words when titlecasing.
2930      * Option bit for titlecasing APIs that take an options bit set.
2931      *
2932      * By default, titlecasing will titlecase the first cased character
2933      * of a word and lowercase all other characters.
2934      * With this option, the other characters will not be modified.
2935      *
2936      * @see #toTitleCase
2937      */
2938     public static final int TITLECASE_NO_LOWERCASE = 0x100;
2939 
2940     /**
2941      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
2942      * titlecase exactly the characters at breaks from the iterator.
2943      * Option bit for titlecasing APIs that take an options bit set.
2944      *
2945      * By default, titlecasing will take each break iterator index,
2946      * adjust it by looking for the next cased character, and titlecase that one.
2947      * Other characters are lowercased.
2948      *
2949      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
2950      *
2951      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
2952      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
2953      * cased character F. If F exists, map F to default_title(F); then map each
2954      * subsequent character C to default_lower(C).
2955      *
2956      * @see #toTitleCase
2957      * @see #TITLECASE_NO_LOWERCASE
2958      */
2959     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
2960 
2961     // public methods ----------------------------------------------------
2962 
2963     /**
2964      * Returnss the numeric value of a decimal digit code point.
2965      * <br>This method observes the semantics of
2966      * <code>java.lang.Character.digit()</code>.  Note that this
2967      * will return positive values for code points for which isDigit
2968      * returns false, just like java.lang.Character.
2969      * <br><em>Semantic Change:</em> In release 1.3.1 and
2970      * prior, this did not treat the European letters as having a
2971      * digit value, and also treated numeric letters and other numbers as
2972      * digits.
2973      * This has been changed to conform to the java semantics.
2974      * <br>A code point is a valid digit if and only if:
2975      * <ul>
2976      *   <li>ch is a decimal digit or one of the european letters, and
2977      *   <li>the value of ch is less than the specified radix.
2978      * </ul>
2979      * @param ch the code point to query
2980      * @param radix the radix
2981      * @return the numeric value represented by the code point in the
2982      * specified radix, or -1 if the code point is not a decimal digit
2983      * or if its value is too large for the radix
2984      */
digit(int ch, int radix)2985     public static int digit(int ch, int radix)
2986     {
2987         if (2 <= radix && radix <= 36) {
2988             int value = digit(ch);
2989             if (value < 0) {
2990                 // ch is not a decimal digit, try latin letters
2991                 value = UCharacterProperty.getEuropeanDigit(ch);
2992             }
2993             return (value < radix) ? value : -1;
2994         } else {
2995             return -1;  // invalid radix
2996         }
2997     }
2998 
2999     /**
3000      * Returnss the numeric value of a decimal digit code point.
3001      * <br>This is a convenience overload of <code>digit(int, int)</code>
3002      * that provides a decimal radix.
3003      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
3004      * treated numeric letters and other numbers as digits.  This has
3005      * been changed to conform to the java semantics.
3006      * @param ch the code point to query
3007      * @return the numeric value represented by the code point,
3008      * or -1 if the code point is not a decimal digit or if its
3009      * value is too large for a decimal radix
3010      */
digit(int ch)3011     public static int digit(int ch)
3012     {
3013         return UCharacterProperty.INSTANCE.digit(ch);
3014     }
3015 
3016     /**
3017      * Returns the numeric value of the code point as a nonnegative
3018      * integer.
3019      * <br>If the code point does not have a numeric value, then -1 is returned.
3020      * <br>
3021      * If the code point has a numeric value that cannot be represented as a
3022      * nonnegative integer (for example, a fractional value), then -2 is
3023      * returned.
3024      * @param ch the code point to query
3025      * @return the numeric value of the code point, or -1 if it has no numeric
3026      * value, or -2 if it has a numeric value that cannot be represented as a
3027      * nonnegative integer
3028      */
getNumericValue(int ch)3029     public static int getNumericValue(int ch)
3030     {
3031         return UCharacterProperty.INSTANCE.getNumericValue(ch);
3032     }
3033 
3034     /**
3035      * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the
3036      * Unicode Character Database.
3037      * <p>A "double" return type is necessary because some numeric values are
3038      * fractions, negative, or too large for int.
3039      * <p>For characters without any numeric values in the Unicode Character
3040      * Database, this function will return NO_NUMERIC_VALUE.
3041      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
3042      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
3043      * return type int and returns -1 when the argument ch does not have a
3044      * corresponding numeric value. This has been changed to synch with ICU4C
3045      *
3046      * This corresponds to the ICU4C function u_getNumericValue.
3047      * @param ch Code point to get the numeric value for.
3048      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
3049      */
getUnicodeNumericValue(int ch)3050     public static double getUnicodeNumericValue(int ch)
3051     {
3052         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
3053     }
3054 
3055     /**
3056      * Compatibility override of Java deprecated method.  This
3057      * method will always remain deprecated.
3058      * Same as java.lang.Character.isSpace().
3059      * @param ch the code point
3060      * @return true if the code point is a space character as
3061      * defined by java.lang.Character.isSpace.
3062      * @deprecated ICU 3.4 (Java)
3063      * @hide original deprecated declaration
3064      */
3065     @Deprecated
isSpace(int ch)3066     public static boolean isSpace(int ch) {
3067         return ch <= 0x20 &&
3068                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
3069     }
3070 
3071     /**
3072      * Returns a value indicating a code point's Unicode category.
3073      * Up-to-date Unicode implementation of java.lang.Character.getType()
3074      * except for the above mentioned code points that had their category
3075      * changed.<br>
3076      * Return results are constants from the interface
3077      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
3078      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3079      * those returned by java.lang.Character.getType.  UCharacterCategory values
3080      * match the ones used in ICU4C, while java.lang.Character type
3081      * values, though similar, skip the value 17.
3082      * @param ch code point whose type is to be determined
3083      * @return category which is a value of UCharacterCategory
3084      */
getType(int ch)3085     public static int getType(int ch)
3086     {
3087         return UCharacterProperty.INSTANCE.getType(ch);
3088     }
3089 
3090     /**
3091      * Determines if a code point has a defined meaning in the up-to-date
3092      * Unicode standard.
3093      * E.g. supplementary code points though allocated space are not defined in
3094      * Unicode yet.<br>
3095      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3096      * @param ch code point to be determined if it is defined in the most
3097      *        current version of Unicode
3098      * @return true if this code point is defined in unicode
3099      */
isDefined(int ch)3100     public static boolean isDefined(int ch)
3101     {
3102         return getType(ch) != 0;
3103     }
3104 
3105     /**
3106      * Determines if a code point is a Java digit.
3107      * <br>This method observes the semantics of
3108      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
3109      * digits only.
3110      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
3111      * numeric letters and other numbers as digits.
3112      * This has been changed to conform to the java semantics.
3113      * @param ch code point to query
3114      * @return true if this code point is a digit
3115      */
isDigit(int ch)3116     public static boolean isDigit(int ch)
3117     {
3118         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3119     }
3120 
3121     /**
3122      * Determines if the specified code point is an ISO control character.
3123      * A code point is considered to be an ISO control character if it is in
3124      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
3125      * &#92;u009F.<br>
3126      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3127      * @param ch code point to determine if it is an ISO control character
3128      * @return true if code point is a ISO control character
3129      */
isISOControl(int ch)3130     public static boolean isISOControl(int ch)
3131     {
3132         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
3133                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3134     }
3135 
3136     /**
3137      * Determines if the specified code point is a letter.
3138      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3139      * @param ch code point to determine if it is a letter
3140      * @return true if code point is a letter
3141      */
isLetter(int ch)3142     public static boolean isLetter(int ch)
3143     {
3144         // if props == 0, it will just fall through and return false
3145         return ((1 << getType(ch))
3146                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3147                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3148                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3149                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3150                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3151     }
3152 
3153     /**
3154      * Determines if the specified code point is a letter or digit.
3155      * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii
3156      * characters 'A' - 'Z' and 'a' - 'z' as digits.
3157      * @param ch code point to determine if it is a letter or a digit
3158      * @return true if code point is a letter or a digit
3159      */
isLetterOrDigit(int ch)3160     public static boolean isLetterOrDigit(int ch)
3161     {
3162         return ((1 << getType(ch))
3163                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3164                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3165                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3166                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3167                         | (1 << UCharacterCategory.OTHER_LETTER)
3168                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3169     }
3170 
3171     /**
3172      * Compatibility override of Java deprecated method.  This
3173      * method will always remain deprecated.  Delegates to
3174      * java.lang.Character.isJavaIdentifierStart.
3175      * @param cp the code point
3176      * @return true if the code point can start a java identifier.
3177      * @deprecated ICU 3.4 (Java)
3178      * @hide original deprecated declaration
3179      */
3180     @Deprecated
isJavaLetter(int cp)3181     public static boolean isJavaLetter(int cp) {
3182         return isJavaIdentifierStart(cp);
3183     }
3184 
3185     /**
3186      * Compatibility override of Java deprecated method.  This
3187      * method will always remain deprecated.  Delegates to
3188      * java.lang.Character.isJavaIdentifierPart.
3189      * @param cp the code point
3190      * @return true if the code point can continue a java identifier.
3191      * @deprecated ICU 3.4 (Java)
3192      * @hide original deprecated declaration
3193      */
3194     @Deprecated
isJavaLetterOrDigit(int cp)3195     public static boolean isJavaLetterOrDigit(int cp) {
3196         return isJavaIdentifierPart(cp);
3197     }
3198 
3199     /**
3200      * Compatibility override of Java method, delegates to
3201      * java.lang.Character.isJavaIdentifierStart.
3202      * @param cp the code point
3203      * @return true if the code point can start a java identifier.
3204      */
isJavaIdentifierStart(int cp)3205     public static boolean isJavaIdentifierStart(int cp) {
3206         // note, downcast to char for jdk 1.4 compatibility
3207         return java.lang.Character.isJavaIdentifierStart((char)cp);
3208     }
3209 
3210     /**
3211      * Compatibility override of Java method, delegates to
3212      * java.lang.Character.isJavaIdentifierPart.
3213      * @param cp the code point
3214      * @return true if the code point can continue a java identifier.
3215      */
isJavaIdentifierPart(int cp)3216     public static boolean isJavaIdentifierPart(int cp) {
3217         // note, downcast to char for jdk 1.4 compatibility
3218         return java.lang.Character.isJavaIdentifierPart((char)cp);
3219     }
3220 
3221     /**
3222      * Determines if the specified code point is a lowercase character.
3223      * UnicodeData only contains case mappings for code points where they are
3224      * one-to-one mappings; it also omits information about context-sensitive
3225      * case mappings.<br> For more information about Unicode case mapping
3226      * please refer to the
3227      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
3228      * #21</a>.<br>
3229      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3230      * @param ch code point to determine if it is in lowercase
3231      * @return true if code point is a lowercase character
3232      */
isLowerCase(int ch)3233     public static boolean isLowerCase(int ch)
3234     {
3235         // if props == 0, it will just fall through and return false
3236         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3237     }
3238 
3239     /**
3240      * Determines if the specified code point is a white space character.
3241      * A code point is considered to be an whitespace character if and only
3242      * if it satisfies one of the following criteria:
3243      * <ul>
3244      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
3245      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
3246      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
3247      * <li> It is &#92;u000A, LINE FEED.
3248      * <li> It is &#92;u000B, VERTICAL TABULATION.
3249      * <li> It is &#92;u000C, FORM FEED.
3250      * <li> It is &#92;u000D, CARRIAGE RETURN.
3251      * <li> It is &#92;u001C, FILE SEPARATOR.
3252      * <li> It is &#92;u001D, GROUP SEPARATOR.
3253      * <li> It is &#92;u001E, RECORD SEPARATOR.
3254      * <li> It is &#92;u001F, UNIT SEPARATOR.
3255      * </ul>
3256      *
3257      * This API tries to sync with the semantics of Java's
3258      * java.lang.Character.isWhitespace(), but it may not return
3259      * the exact same results because of the Unicode version
3260      * difference.
3261      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
3262      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
3263      * See http://www.unicode.org/versions/Unicode4.0.1/
3264      * @param ch code point to determine if it is a white space
3265      * @return true if the specified code point is a white space character
3266      */
isWhitespace(int ch)3267     public static boolean isWhitespace(int ch)
3268     {
3269         // exclude no-break spaces
3270         // if props == 0, it will just fall through and return false
3271         return ((1 << getType(ch)) &
3272                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
3273                         | (1 << UCharacterCategory.LINE_SEPARATOR)
3274                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
3275                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
3276                         // TAB VT LF FF CR FS GS RS US NL are all control characters
3277                         // that are white spaces.
3278                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
3279     }
3280 
3281     /**
3282      * Determines if the specified code point is a Unicode specified space
3283      * character, i.e. if code point is in the category Zs, Zl and Zp.
3284      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
3285      * @param ch code point to determine if it is a space
3286      * @return true if the specified code point is a space character
3287      */
isSpaceChar(int ch)3288     public static boolean isSpaceChar(int ch)
3289     {
3290         // if props == 0, it will just fall through and return false
3291         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
3292                 | (1 << UCharacterCategory.LINE_SEPARATOR)
3293                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
3294                 != 0;
3295     }
3296 
3297     /**
3298      * Determines if the specified code point is a titlecase character.
3299      * UnicodeData only contains case mappings for code points where they are
3300      * one-to-one mappings; it also omits information about context-sensitive
3301      * case mappings.<br>
3302      * For more information about Unicode case mapping please refer to the
3303      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3304      * Technical report #21</a>.<br>
3305      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
3306      * @param ch code point to determine if it is in title case
3307      * @return true if the specified code point is a titlecase character
3308      */
isTitleCase(int ch)3309     public static boolean isTitleCase(int ch)
3310     {
3311         // if props == 0, it will just fall through and return false
3312         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
3313     }
3314 
3315     /**
3316      * Determines if the specified code point may be any part of a Unicode
3317      * identifier other than the starting character.
3318      * A code point may be part of a Unicode identifier if and only if it is
3319      * one of the following:
3320      * <ul>
3321      * <li> Lu Uppercase letter
3322      * <li> Ll Lowercase letter
3323      * <li> Lt Titlecase letter
3324      * <li> Lm Modifier letter
3325      * <li> Lo Other letter
3326      * <li> Nl Letter number
3327      * <li> Pc Connecting punctuation character
3328      * <li> Nd decimal number
3329      * <li> Mc Spacing combining mark
3330      * <li> Mn Non-spacing mark
3331      * <li> Cf formatting code
3332      * </ul>
3333      * Up-to-date Unicode implementation of
3334      * java.lang.Character.isUnicodeIdentifierPart().<br>
3335      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3336      * @param ch code point to determine if is can be part of a Unicode
3337      *        identifier
3338      * @return true if code point is any character belonging a unicode
3339      *         identifier suffix after the first character
3340      */
isUnicodeIdentifierPart(int ch)3341     public static boolean isUnicodeIdentifierPart(int ch)
3342     {
3343         // if props == 0, it will just fall through and return false
3344         // cat == format
3345         return ((1 << getType(ch))
3346                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3347                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3348                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3349                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3350                         | (1 << UCharacterCategory.OTHER_LETTER)
3351                         | (1 << UCharacterCategory.LETTER_NUMBER)
3352                         | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
3353                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
3354                         | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
3355                         | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
3356                         || isIdentifierIgnorable(ch);
3357     }
3358 
3359     /**
3360      * Determines if the specified code point is permissible as the first
3361      * character in a Unicode identifier.
3362      * A code point may start a Unicode identifier if it is of type either
3363      * <ul>
3364      * <li> Lu Uppercase letter
3365      * <li> Ll Lowercase letter
3366      * <li> Lt Titlecase letter
3367      * <li> Lm Modifier letter
3368      * <li> Lo Other letter
3369      * <li> Nl Letter number
3370      * </ul>
3371      * Up-to-date Unicode implementation of
3372      * java.lang.Character.isUnicodeIdentifierStart().<br>
3373      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3374      * @param ch code point to determine if it can start a Unicode identifier
3375      * @return true if code point is the first character belonging a unicode
3376      *              identifier
3377      */
isUnicodeIdentifierStart(int ch)3378     public static boolean isUnicodeIdentifierStart(int ch)
3379     {
3380         /*int cat = getType(ch);*/
3381         // if props == 0, it will just fall through and return false
3382         return ((1 << getType(ch))
3383                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3384                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3385                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3386                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3387                         | (1 << UCharacterCategory.OTHER_LETTER)
3388                         | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
3389     }
3390 
3391     /**
3392      * Determines if the specified code point should be regarded as an
3393      * ignorable character in a Java identifier.
3394      * A character is Java-identifier-ignorable if it has the general category
3395      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
3396      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
3397      * Up-to-date Unicode implementation of
3398      * java.lang.Character.isIdentifierIgnorable().<br>
3399      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3400      * <p>Note that Unicode just recommends to ignore Cf (format controls).
3401      * @param ch code point to be determined if it can be ignored in a Unicode
3402      *        identifier.
3403      * @return true if the code point is ignorable
3404      */
isIdentifierIgnorable(int ch)3405     public static boolean isIdentifierIgnorable(int ch)
3406     {
3407         // see java.lang.Character.isIdentifierIgnorable() on range of
3408         // ignorable characters.
3409         if (ch <= 0x9f) {
3410             return isISOControl(ch)
3411                     && !((ch >= 0x9 && ch <= 0xd)
3412                             || (ch >= 0x1c && ch <= 0x1f));
3413         }
3414         return getType(ch) == UCharacterCategory.FORMAT;
3415     }
3416 
3417     /**
3418      * Determines if the specified code point is an uppercase character.
3419      * UnicodeData only contains case mappings for code point where they are
3420      * one-to-one mappings; it also omits information about context-sensitive
3421      * case mappings.<br>
3422      * For language specific case conversion behavior, use
3423      * toUpperCase(locale, str). <br>
3424      * For example, the case conversion for dot-less i and dotted I in Turkish,
3425      * or for final sigma in Greek.
3426      * For more information about Unicode case mapping please refer to the
3427      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3428      * Technical report #21</a>.<br>
3429      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
3430      * @param ch code point to determine if it is in uppercase
3431      * @return true if the code point is an uppercase character
3432      */
isUpperCase(int ch)3433     public static boolean isUpperCase(int ch)
3434     {
3435         // if props == 0, it will just fall through and return false
3436         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
3437     }
3438 
3439     /**
3440      * The given code point is mapped to its lowercase equivalent; if the code
3441      * point has no lowercase equivalent, the code point itself is returned.
3442      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
3443      *
3444      * <p>This function only returns the simple, single-code point case mapping.
3445      * Full case mappings should be used whenever possible because they produce
3446      * better results by working on whole strings.
3447      * They take into account the string context and the language and can map
3448      * to a result string with a different length as appropriate.
3449      * Full case mappings are applied by the case mapping functions
3450      * that take String parameters rather than code points (int).
3451      * See also the User Guide chapter on C/POSIX migration:
3452      * http://www.icu-project.org/userguide/posix.html#case_mappings
3453      *
3454      * @param ch code point whose lowercase equivalent is to be retrieved
3455      * @return the lowercase equivalent code point
3456      */
toLowerCase(int ch)3457     public static int toLowerCase(int ch) {
3458         return UCaseProps.INSTANCE.tolower(ch);
3459     }
3460 
3461     /**
3462      * Converts argument code point and returns a String object representing
3463      * the code point's value in UTF-16 format.
3464      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
3465      *
3466      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
3467      *
3468      * @param ch code point
3469      * @return string representation of the code point, null if code point is not
3470      *         defined in unicode
3471      */
toString(int ch)3472     public static String toString(int ch)
3473     {
3474         if (ch < MIN_VALUE || ch > MAX_VALUE) {
3475             return null;
3476         }
3477 
3478         if (ch < SUPPLEMENTARY_MIN_VALUE) {
3479             return String.valueOf((char)ch);
3480         }
3481 
3482         return new String(Character.toChars(ch));
3483     }
3484 
3485     /**
3486      * Converts the code point argument to titlecase.
3487      * If no titlecase is available, the uppercase is returned. If no uppercase
3488      * is available, the code point itself is returned.
3489      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
3490      *
3491      * <p>This function only returns the simple, single-code point case mapping.
3492      * Full case mappings should be used whenever possible because they produce
3493      * better results by working on whole strings.
3494      * They take into account the string context and the language and can map
3495      * to a result string with a different length as appropriate.
3496      * Full case mappings are applied by the case mapping functions
3497      * that take String parameters rather than code points (int).
3498      * See also the User Guide chapter on C/POSIX migration:
3499      * http://www.icu-project.org/userguide/posix.html#case_mappings
3500      *
3501      * @param ch code point  whose title case is to be retrieved
3502      * @return titlecase code point
3503      */
toTitleCase(int ch)3504     public static int toTitleCase(int ch) {
3505         return UCaseProps.INSTANCE.totitle(ch);
3506     }
3507 
3508     /**
3509      * Converts the character argument to uppercase.
3510      * If no uppercase is available, the character itself is returned.
3511      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
3512      *
3513      * <p>This function only returns the simple, single-code point case mapping.
3514      * Full case mappings should be used whenever possible because they produce
3515      * better results by working on whole strings.
3516      * They take into account the string context and the language and can map
3517      * to a result string with a different length as appropriate.
3518      * Full case mappings are applied by the case mapping functions
3519      * that take String parameters rather than code points (int).
3520      * See also the User Guide chapter on C/POSIX migration:
3521      * http://www.icu-project.org/userguide/posix.html#case_mappings
3522      *
3523      * @param ch code point whose uppercase is to be retrieved
3524      * @return uppercase code point
3525      */
toUpperCase(int ch)3526     public static int toUpperCase(int ch) {
3527         return UCaseProps.INSTANCE.toupper(ch);
3528     }
3529 
3530     // extra methods not in java.lang.Character --------------------------
3531 
3532     /**
3533      * <strong>[icu]</strong> Determines if the code point is a supplementary character.
3534      * A code point is a supplementary character if and only if it is greater
3535      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
3536      * @param ch code point to be determined if it is in the supplementary
3537      *        plane
3538      * @return true if code point is a supplementary character
3539      */
isSupplementary(int ch)3540     public static boolean isSupplementary(int ch)
3541     {
3542         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
3543                 ch <= UCharacter.MAX_VALUE;
3544     }
3545 
3546     /**
3547      * <strong>[icu]</strong> Determines if the code point is in the BMP plane.
3548      * @param ch code point to be determined if it is not a supplementary
3549      *        character
3550      * @return true if code point is not a supplementary character
3551      */
isBMP(int ch)3552     public static boolean isBMP(int ch)
3553     {
3554         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
3555     }
3556 
3557     /**
3558      * <strong>[icu]</strong> Determines whether the specified code point is a printable character
3559      * according to the Unicode standard.
3560      * @param ch code point to be determined if it is printable
3561      * @return true if the code point is a printable character
3562      */
isPrintable(int ch)3563     public static boolean isPrintable(int ch)
3564     {
3565         int cat = getType(ch);
3566         // if props == 0, it will just fall through and return false
3567         return (cat != UCharacterCategory.UNASSIGNED &&
3568                 cat != UCharacterCategory.CONTROL &&
3569                 cat != UCharacterCategory.FORMAT &&
3570                 cat != UCharacterCategory.PRIVATE_USE &&
3571                 cat != UCharacterCategory.SURROGATE &&
3572                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
3573     }
3574 
3575     /**
3576      * <strong>[icu]</strong> Determines whether the specified code point is of base form.
3577      * A code point of base form does not graphically combine with preceding
3578      * characters, and is neither a control nor a format character.
3579      * @param ch code point to be determined if it is of base form
3580      * @return true if the code point is of base form
3581      */
isBaseForm(int ch)3582     public static boolean isBaseForm(int ch)
3583     {
3584         int cat = getType(ch);
3585         // if props == 0, it will just fall through and return false
3586         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
3587                 cat == UCharacterCategory.OTHER_NUMBER ||
3588                 cat == UCharacterCategory.LETTER_NUMBER ||
3589                 cat == UCharacterCategory.UPPERCASE_LETTER ||
3590                 cat == UCharacterCategory.LOWERCASE_LETTER ||
3591                 cat == UCharacterCategory.TITLECASE_LETTER ||
3592                 cat == UCharacterCategory.MODIFIER_LETTER ||
3593                 cat == UCharacterCategory.OTHER_LETTER ||
3594                 cat == UCharacterCategory.NON_SPACING_MARK ||
3595                 cat == UCharacterCategory.ENCLOSING_MARK ||
3596                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
3597     }
3598 
3599     /**
3600      * <strong>[icu]</strong> Returns the Bidirection property of a code point.
3601      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
3602      * property.<br>
3603      * Result returned belongs to the interface
3604      * <a href=UCharacterDirection.html>UCharacterDirection</a>
3605      * @param ch the code point to be determined its direction
3606      * @return direction constant from UCharacterDirection.
3607      */
getDirection(int ch)3608     public static int getDirection(int ch)
3609     {
3610         return UBiDiProps.INSTANCE.getClass(ch);
3611     }
3612 
3613     /**
3614      * Determines whether the code point has the "mirrored" property.
3615      * This property is set for characters that are commonly used in
3616      * Right-To-Left contexts and need to be displayed with a "mirrored"
3617      * glyph.
3618      * @param ch code point whose mirror is to be determined
3619      * @return true if the code point has the "mirrored" property
3620      */
isMirrored(int ch)3621     public static boolean isMirrored(int ch)
3622     {
3623         return UBiDiProps.INSTANCE.isMirrored(ch);
3624     }
3625 
3626     /**
3627      * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point.
3628      * For code points with the "mirrored" property, implementations sometimes
3629      * need a "poor man's" mapping to another code point such that the default
3630      * glyph may serve as the mirror-image of the default glyph of the
3631      * specified code point.<br>
3632      * This is useful for text conversion to and from codepages with visual
3633      * order, and for displays without glyph selection capabilities.
3634      * @param ch code point whose mirror is to be retrieved
3635      * @return another code point that may serve as a mirror-image substitute,
3636      *         or ch itself if there is no such mapping or ch does not have the
3637      *         "mirrored" property
3638      */
getMirror(int ch)3639     public static int getMirror(int ch)
3640     {
3641         return UBiDiProps.INSTANCE.getMirror(ch);
3642     }
3643 
3644     /**
3645      * <strong>[icu]</strong> Maps the specified character to its paired bracket character.
3646      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
3647      * Otherwise c itself is returned.
3648      * See http://www.unicode.org/reports/tr9/
3649      *
3650      * @param c the code point to be mapped
3651      * @return the paired bracket code point,
3652      *         or c itself if there is no such mapping
3653      *         (Bidi_Paired_Bracket_Type=None)
3654      *
3655      * @see UProperty#BIDI_PAIRED_BRACKET
3656      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3657      * @see #getMirror(int)
3658      */
getBidiPairedBracket(int c)3659     public static int getBidiPairedBracket(int c) {
3660         return UBiDiProps.INSTANCE.getPairedBracket(c);
3661     }
3662 
3663     /**
3664      * <strong>[icu]</strong> Returns the combining class of the argument codepoint
3665      * @param ch code point whose combining is to be retrieved
3666      * @return the combining class of the codepoint
3667      */
getCombiningClass(int ch)3668     public static int getCombiningClass(int ch)
3669     {
3670         return Normalizer2.getNFDInstance().getCombiningClass(ch);
3671     }
3672 
3673     /**
3674      * <strong>[icu]</strong> A code point is illegal if and only if
3675      * <ul>
3676      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3677      * <li> A surrogate value, 0xD800 to 0xDFFF
3678      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3679      * </ul>
3680      * Note: legal does not mean that it is assigned in this version of Unicode.
3681      * @param ch code point to determine if it is a legal code point by itself
3682      * @return true if and only if legal.
3683      */
isLegal(int ch)3684     public static boolean isLegal(int ch)
3685     {
3686         if (ch < MIN_VALUE) {
3687             return false;
3688         }
3689         if (ch < Character.MIN_SURROGATE) {
3690             return true;
3691         }
3692         if (ch <= Character.MAX_SURROGATE) {
3693             return false;
3694         }
3695         if (UCharacterUtility.isNonCharacter(ch)) {
3696             return false;
3697         }
3698         return (ch <= MAX_VALUE);
3699     }
3700 
3701     /**
3702      * <strong>[icu]</strong> A string is legal iff all its code points are legal.
3703      * A code point is illegal if and only if
3704      * <ul>
3705      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3706      * <li> A surrogate value, 0xD800 to 0xDFFF
3707      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3708      * </ul>
3709      * Note: legal does not mean that it is assigned in this version of Unicode.
3710      * @param str containing code points to examin
3711      * @return true if and only if legal.
3712      */
isLegal(String str)3713     public static boolean isLegal(String str)
3714     {
3715         int size = str.length();
3716         int codepoint;
3717         for (int i = 0; i < size; i += Character.charCount(codepoint))
3718         {
3719             codepoint = str.codePointAt(i);
3720             if (!isLegal(codepoint)) {
3721                 return false;
3722             }
3723         }
3724         return true;
3725     }
3726 
3727     /**
3728      * <strong>[icu]</strong> Returns the version of Unicode data used.
3729      * @return the unicode version number used
3730      */
getUnicodeVersion()3731     public static VersionInfo getUnicodeVersion()
3732     {
3733         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
3734     }
3735 
3736     /**
3737      * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or
3738      * null if the character is unassigned or outside the range
3739      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3740      * <br>
3741      * Note calling any methods related to code point names, e.g. get*Name*()
3742      * incurs a one-time initialisation cost to construct the name tables.
3743      * @param ch the code point for which to get the name
3744      * @return most current Unicode name
3745      */
getName(int ch)3746     public static String getName(int ch)
3747     {
3748         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
3749     }
3750 
3751     /**
3752      * <strong>[icu]</strong> Returns the names for each of the characters in a string
3753      * @param s string to format
3754      * @param separator string to go between names
3755      * @return string of names
3756      */
getName(String s, String separator)3757     public static String getName(String s, String separator) {
3758         if (s.length() == 1) { // handle common case
3759             return getName(s.charAt(0));
3760         }
3761         int cp;
3762         StringBuilder sb = new StringBuilder();
3763         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
3764             cp = s.codePointAt(i);
3765             if (i != 0) sb.append(separator);
3766             sb.append(UCharacter.getName(cp));
3767         }
3768         return sb.toString();
3769     }
3770 
3771     /**
3772      * <strong>[icu]</strong> Returns null.
3773      * Used to return the Unicode_1_Name property value which was of little practical value.
3774      * @param ch the code point for which to get the name
3775      * @return null
3776      * @deprecated ICU 49
3777      * @hide original deprecated declaration
3778      */
3779     @Deprecated
getName1_0(int ch)3780     public static String getName1_0(int ch)
3781     {
3782         return null;
3783     }
3784 
3785     /**
3786      * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and
3787      * getName1_0(int), this method will return a name even for codepoints that
3788      * are not assigned a name in UnicodeData.txt.
3789      *
3790      * <p>The names are returned in the following order.
3791      * <ul>
3792      * <li> Most current Unicode name if there is any
3793      * <li> Unicode 1.0 name if there is any
3794      * <li> Extended name in the form of
3795      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
3796      * </ul>
3797      * Note calling any methods related to code point names, e.g. get*Name*()
3798      * incurs a one-time initialisation cost to construct the name tables.
3799      * @param ch the code point for which to get the name
3800      * @return a name for the argument codepoint
3801      */
getExtendedName(int ch)3802     public static String getExtendedName(int ch) {
3803         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
3804     }
3805 
3806     /**
3807      * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one.
3808      * Returns null if the character is unassigned or outside the range
3809      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3810      * <br>
3811      * Note calling any methods related to code point names, e.g. get*Name*()
3812      * incurs a one-time initialisation cost to construct the name tables.
3813      * @param ch the code point for which to get the name alias
3814      * @return Unicode name alias, or null
3815      */
getNameAlias(int ch)3816     public static String getNameAlias(int ch)
3817     {
3818         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
3819     }
3820 
3821     /**
3822      * <strong>[icu]</strong> Returns null.
3823      * Used to return the ISO 10646 comment for a character.
3824      * The Unicode ISO_Comment property is deprecated and has no values.
3825      *
3826      * @param ch The code point for which to get the ISO comment.
3827      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
3828      * @return null
3829      * @deprecated ICU 49
3830      * @hide original deprecated declaration
3831      */
3832     @Deprecated
getISOComment(int ch)3833     public static String getISOComment(int ch)
3834     {
3835         return null;
3836     }
3837 
3838     /**
3839      * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and
3840      * return its code point value. All Unicode names are in uppercase.
3841      * Note calling any methods related to code point names, e.g. get*Name*()
3842      * incurs a one-time initialisation cost to construct the name tables.
3843      * @param name most current Unicode character name whose code point is to
3844      *        be returned
3845      * @return code point or -1 if name is not found
3846      */
getCharFromName(String name)3847     public static int getCharFromName(String name){
3848         return UCharacterName.INSTANCE.getCharFromName(
3849                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
3850     }
3851 
3852     /**
3853      * <strong>[icu]</strong> Returns -1.
3854      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
3855      * its code point value.
3856      * @param name Unicode 1.0 code point name whose code point is to be
3857      *             returned
3858      * @return -1
3859      * @deprecated ICU 49
3860      * @see #getName1_0(int)
3861      * @hide original deprecated declaration
3862      */
3863     @Deprecated
getCharFromName1_0(String name)3864     public static int getCharFromName1_0(String name){
3865         return -1;
3866     }
3867 
3868     /**
3869      * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code
3870      * point value. All Unicode names are in uppercase.
3871      * Extended names are all lowercase except for numbers and are contained
3872      * within angle brackets.
3873      * The names are searched in the following order
3874      * <ul>
3875      * <li> Most current Unicode name if there is any
3876      * <li> Unicode 1.0 name if there is any
3877      * <li> Extended name in the form of
3878      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
3879      * </ul>
3880      * Note calling any methods related to code point names, e.g. get*Name*()
3881      * incurs a one-time initialisation cost to construct the name tables.
3882      * @param name codepoint name
3883      * @return code point associated with the name or -1 if the name is not
3884      *         found.
3885      */
getCharFromExtendedName(String name)3886     public static int getCharFromExtendedName(String name){
3887         return UCharacterName.INSTANCE.getCharFromName(
3888                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
3889     }
3890 
3891     /**
3892      * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return
3893      * its code point value. All Unicode names are in uppercase.
3894      * Note calling any methods related to code point names, e.g. get*Name*()
3895      * incurs a one-time initialisation cost to construct the name tables.
3896      * @param name Unicode name alias whose code point is to be returned
3897      * @return code point or -1 if name is not found
3898      */
getCharFromNameAlias(String name)3899     public static int getCharFromNameAlias(String name){
3900         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
3901     }
3902 
3903     /**
3904      * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the
3905      * Unicode database file PropertyAliases.txt.  Most properties
3906      * have more than one name.  The nameChoice determines which one
3907      * is returned.
3908      *
3909      * In addition, this function maps the property
3910      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
3911      * "General_Category_Mask".  These names are not in
3912      * PropertyAliases.txt.
3913      *
3914      * @param property UProperty selector.
3915      *
3916      * @param nameChoice UProperty.NameChoice selector for which name
3917      * to get.  All properties have a long name.  Most have a short
3918      * name, but some do not.  Unicode allows for additional names; if
3919      * present these will be returned by UProperty.NameChoice.LONG + i,
3920      * where i=1, 2,...
3921      *
3922      * @return a name, or null if Unicode explicitly defines no name
3923      * ("n/a") for a given property/nameChoice.  If a given nameChoice
3924      * throws an exception, then all larger values of nameChoice will
3925      * throw an exception.  If null is returned for a given
3926      * nameChoice, then other nameChoice values may return non-null
3927      * results.
3928      *
3929      * @exception IllegalArgumentException thrown if property or
3930      * nameChoice are invalid.
3931      *
3932      * @see UProperty
3933      * @see UProperty.NameChoice
3934      */
getPropertyName(int property, int nameChoice)3935     public static String getPropertyName(int property,
3936             int nameChoice) {
3937         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
3938     }
3939 
3940     /**
3941      * <strong>[icu]</strong> Return the UProperty selector for a given property name, as
3942      * specified in the Unicode database file PropertyAliases.txt.
3943      * Short, long, and any other variants are recognized.
3944      *
3945      * In addition, this function maps the synthetic names "gcm" /
3946      * "General_Category_Mask" to the property
3947      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
3948      * PropertyAliases.txt.
3949      *
3950      * @param propertyAlias the property name to be matched.  The name
3951      * is compared using "loose matching" as described in
3952      * PropertyAliases.txt.
3953      *
3954      * @return a UProperty enum.
3955      *
3956      * @exception IllegalArgumentException thrown if propertyAlias
3957      * is not recognized.
3958      *
3959      * @see UProperty
3960      */
getPropertyEnum(CharSequence propertyAlias)3961     public static int getPropertyEnum(CharSequence propertyAlias) {
3962         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
3963         if (propEnum == UProperty.UNDEFINED) {
3964             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
3965         }
3966         return propEnum;
3967     }
3968 
3969     /**
3970      * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in
3971      * the Unicode database file PropertyValueAliases.txt.  Most
3972      * values have more than one name.  The nameChoice determines
3973      * which one is returned.
3974      *
3975      * Note: Some of the names in PropertyValueAliases.txt can only be
3976      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
3977      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
3978      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
3979      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
3980      *
3981      * @param property UProperty selector constant.
3982      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
3983      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
3984      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
3985      * If out of range, null is returned.
3986      *
3987      * @param value selector for a value for the given property.  In
3988      * general, valid values range from 0 up to some maximum.  There
3989      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
3990      * non-zero value BASIC_LATIN.getID().  (2.)
3991      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
3992      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
3993      * are mask values produced by left-shifting 1 by
3994      * UCharacter.getType().  This allows grouped categories such as
3995      * [:L:] to be represented.  Mask values are non-contiguous.
3996      *
3997      * @param nameChoice UProperty.NameChoice selector for which name
3998      * to get.  All values have a long name.  Most have a short name,
3999      * but some do not.  Unicode allows for additional names; if
4000      * present these will be returned by UProperty.NameChoice.LONG + i,
4001      * where i=1, 2,...
4002      *
4003      * @return a name, or null if Unicode explicitly defines no name
4004      * ("n/a") for a given property/value/nameChoice.  If a given
4005      * nameChoice throws an exception, then all larger values of
4006      * nameChoice will throw an exception.  If null is returned for a
4007      * given nameChoice, then other nameChoice values may return
4008      * non-null results.
4009      *
4010      * @exception IllegalArgumentException thrown if property, value,
4011      * or nameChoice are invalid.
4012      *
4013      * @see UProperty
4014      * @see UProperty.NameChoice
4015      */
getPropertyValueName(int property, int value, int nameChoice)4016     public static String getPropertyValueName(int property,
4017             int value,
4018             int nameChoice)
4019     {
4020         if ((property == UProperty.CANONICAL_COMBINING_CLASS
4021                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
4022                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
4023                 && value >= UCharacter.getIntPropertyMinValue(
4024                         UProperty.CANONICAL_COMBINING_CLASS)
4025                         && value <= UCharacter.getIntPropertyMaxValue(
4026                                 UProperty.CANONICAL_COMBINING_CLASS)
4027                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
4028             // this is hard coded for the valid cc
4029             // because PropertyValueAliases.txt does not contain all of them
4030             try {
4031                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
4032                         nameChoice);
4033             }
4034             catch (IllegalArgumentException e) {
4035                 return null;
4036             }
4037         }
4038         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
4039     }
4040 
4041     /**
4042      * <strong>[icu]</strong> Return the property value integer for a given value name, as
4043      * specified in the Unicode database file PropertyValueAliases.txt.
4044      * Short, long, and any other variants are recognized.
4045      *
4046      * Note: Some of the names in PropertyValueAliases.txt will only be
4047      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4048      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4049      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4050      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4051      *
4052      * @param property UProperty selector constant.
4053      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4054      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4055      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4056      * Only these properties can be enumerated.
4057      *
4058      * @param valueAlias the value name to be matched.  The name is
4059      * compared using "loose matching" as described in
4060      * PropertyValueAliases.txt.
4061      *
4062      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4063      * values are mask values produced by left-shifting 1 by
4064      * UCharacter.getType().  This allows grouped categories such as
4065      * [:L:] to be represented.
4066      *
4067      * @see UProperty
4068      * @throws IllegalArgumentException if property is not a valid UProperty
4069      *         selector or valueAlias is not a value of this property
4070      */
getPropertyValueEnum(int property, CharSequence valueAlias)4071     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
4072         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
4073         if (propEnum == UProperty.UNDEFINED) {
4074             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
4075         }
4076         return propEnum;
4077     }
4078 
4079     /**
4080      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
4081      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
4082      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
4083      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
4084      * @deprecated This API is ICU internal only.
4085      * @hide original deprecated declaration
4086      * @hide draft / provisional / internal are hidden on Android
4087      */
4088     @Deprecated
getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4089     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
4090         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
4091     }
4092 
4093 
4094     /**
4095      * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units.
4096      *
4097      * @param lead the lead char
4098      * @param trail the trail char
4099      * @return code point if surrogate characters are valid.
4100      * @exception IllegalArgumentException thrown when the code units do
4101      *            not form a valid code point
4102      */
getCodePoint(char lead, char trail)4103     public static int getCodePoint(char lead, char trail)
4104     {
4105         if (Character.isSurrogatePair(lead, trail)) {
4106             return Character.toCodePoint(lead, trail);
4107         }
4108         throw new IllegalArgumentException("Illegal surrogate characters");
4109     }
4110 
4111     /**
4112      * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point.
4113      *
4114      * @param char16 the BMP code point
4115      * @return code point if argument is a valid character.
4116      * @exception IllegalArgumentException thrown when char16 is not a valid
4117      *            code point
4118      */
getCodePoint(char char16)4119     public static int getCodePoint(char char16)
4120     {
4121         if (UCharacter.isLegal(char16)) {
4122             return char16;
4123         }
4124         throw new IllegalArgumentException("Illegal codepoint");
4125     }
4126 
4127     /**
4128      * Implementation of UCaseProps.ContextIterator, iterates over a String.
4129      * See ustrcase.c/utf16_caseContextIterator().
4130      */
4131     private static class StringContextIterator implements UCaseProps.ContextIterator {
4132         /**
4133          * Constructor.
4134          * @param s String to iterate over.
4135          */
StringContextIterator(String s)4136         StringContextIterator(String s) {
4137             this.s=s;
4138             limit=s.length();
4139             cpStart=cpLimit=index=0;
4140             dir=0;
4141         }
4142 
4143         /**
4144          * Set the iteration limit for nextCaseMapCP() to an index within the string.
4145          * If the limit parameter is negative or past the string, then the
4146          * string length is restored as the iteration limit.
4147          *
4148          * <p>This limit does not affect the next() function which always
4149          * iterates to the very end of the string.
4150          *
4151          * @param lim The iteration limit.
4152          */
setLimit(int lim)4153         public void setLimit(int lim) {
4154             if(0<=lim && lim<=s.length()) {
4155                 limit=lim;
4156             } else {
4157                 limit=s.length();
4158             }
4159         }
4160 
4161         /**
4162          * Move to the iteration limit without fetching code points up to there.
4163          */
moveToLimit()4164         public void moveToLimit() {
4165             cpStart=cpLimit=limit;
4166         }
4167 
4168         /**
4169          * Iterate forward through the string to fetch the next code point
4170          * to be case-mapped, and set the context indexes for it.
4171          *
4172          * <p>When the iteration limit is reached (and -1 is returned),
4173          * getCPStart() will be at the iteration limit.
4174          *
4175          * <p>Iteration with next() does not affect the position for nextCaseMapCP().
4176          *
4177          * @return The next code point to be case-mapped, or <0 when the iteration is done.
4178          */
nextCaseMapCP()4179         public int nextCaseMapCP() {
4180             cpStart=cpLimit;
4181             if(cpLimit<limit) {
4182                 int c=s.codePointAt(cpLimit);
4183                 cpLimit+=Character.charCount(c);
4184                 return c;
4185             } else {
4186                 return -1;
4187             }
4188         }
4189 
4190         /**
4191          * Returns the start of the code point that was last returned
4192          * by nextCaseMapCP().
4193          */
getCPStart()4194         public int getCPStart() {
4195             return cpStart;
4196         }
4197 
4198         /**
4199          * Returns the limit of the code point that was last returned
4200          * by nextCaseMapCP().
4201          */
getCPLimit()4202         public int getCPLimit() {
4203             return cpLimit;
4204         }
4205 
4206         // implement UCaseProps.ContextIterator
4207         // The following code is not used anywhere in this private class
reset(int direction)4208         public void reset(int direction) {
4209             if(direction>0) {
4210                 /* reset for forward iteration */
4211                 dir=1;
4212                 index=cpLimit;
4213             } else if(direction<0) {
4214                 /* reset for backward iteration */
4215                 dir=-1;
4216                 index=cpStart;
4217             } else {
4218                 // not a valid direction
4219                 dir=0;
4220                 index=0;
4221             }
4222         }
4223 
next()4224         public int next() {
4225             int c;
4226 
4227             if(dir>0 && index<s.length()) {
4228                 c=s.codePointAt(index);
4229                 index+=Character.charCount(c);
4230                 return c;
4231             } else if(dir<0 && index>0) {
4232                 c=s.codePointBefore(index);
4233                 index-=Character.charCount(c);
4234                 return c;
4235             }
4236             return -1;
4237         }
4238 
4239         // variables
4240         protected String s;
4241         protected int index, limit, cpStart, cpLimit;
4242         protected int dir; // 0=initial state  >0=forward  <0=backward
4243     }
4244 
4245     /**
4246      * Returns the uppercase version of the argument string.
4247      * Casing is dependent on the default locale and context-sensitive.
4248      * @param str source string to be performed on
4249      * @return uppercase version of the argument string
4250      */
toUpperCase(String str)4251     public static String toUpperCase(String str)
4252     {
4253         return toUpperCase(ULocale.getDefault(), str);
4254     }
4255 
4256     /**
4257      * Returns the lowercase version of the argument string.
4258      * Casing is dependent on the default locale and context-sensitive
4259      * @param str source string to be performed on
4260      * @return lowercase version of the argument string
4261      */
toLowerCase(String str)4262     public static String toLowerCase(String str)
4263     {
4264         return toLowerCase(ULocale.getDefault(), str);
4265     }
4266 
4267     /**
4268      * <p>Returns the titlecase version of the argument string.
4269      * <p>Position for titlecasing is determined by the argument break
4270      * iterator, hence the user can customize his break iterator for
4271      * a specialized titlecasing. In this case only the forward iteration
4272      * needs to be implemented.
4273      * If the break iterator passed in is null, the default Unicode algorithm
4274      * will be used to determine the titlecase positions.
4275      *
4276      * <p>Only positions returned by the break iterator will be title cased,
4277      * character in between the positions will all be in lower case.
4278      * <p>Casing is dependent on the default locale and context-sensitive
4279      * @param str source string to be performed on
4280      * @param breakiter break iterator to determine the positions in which
4281      *        the character should be title cased.
4282      * @return lowercase version of the argument string
4283      */
toTitleCase(String str, BreakIterator breakiter)4284     public static String toTitleCase(String str, BreakIterator breakiter)
4285     {
4286         return toTitleCase(ULocale.getDefault(), str, breakiter);
4287     }
4288 
4289     /**
4290      * Returns the uppercase version of the argument string.
4291      * Casing is dependent on the argument locale and context-sensitive.
4292      * @param locale which string is to be converted in
4293      * @param str source string to be performed on
4294      * @return uppercase version of the argument string
4295      */
toUpperCase(Locale locale, String str)4296     public static String toUpperCase(Locale locale, String str)
4297     {
4298         return toUpperCase(ULocale.forLocale(locale), str);
4299     }
4300 
4301     /**
4302      * Returns the uppercase version of the argument string.
4303      * Casing is dependent on the argument locale and context-sensitive.
4304      * @param locale which string is to be converted in
4305      * @param str source string to be performed on
4306      * @return uppercase version of the argument string
4307      */
toUpperCase(ULocale locale, String str)4308     public static String toUpperCase(ULocale locale, String str) {
4309         StringContextIterator iter = new StringContextIterator(str);
4310         StringBuilder result = new StringBuilder(str.length());
4311         int[] locCache = new int[1];
4312         int c;
4313 
4314         if (locale == null) {
4315             locale = ULocale.getDefault();
4316         }
4317         locCache[0]=0;
4318 
4319         while((c=iter.nextCaseMapCP())>=0) {
4320             c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache);
4321 
4322             /* decode the result */
4323             if(c<0) {
4324                 /* (not) original code point */
4325                 c=~c;
4326             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4327                 /* mapping already appended to result */
4328                 continue;
4329                 /* } else { append single-code point mapping */
4330             }
4331             result.appendCodePoint(c);
4332         }
4333         return result.toString();
4334     }
4335 
4336     /**
4337      * Returns the lowercase version of the argument string.
4338      * Casing is dependent on the argument locale and context-sensitive
4339      * @param locale which string is to be converted in
4340      * @param str source string to be performed on
4341      * @return lowercase version of the argument string
4342      */
toLowerCase(Locale locale, String str)4343     public static String toLowerCase(Locale locale, String str)
4344     {
4345         return toLowerCase(ULocale.forLocale(locale), str);
4346     }
4347 
4348     /**
4349      * Returns the lowercase version of the argument string.
4350      * Casing is dependent on the argument locale and context-sensitive
4351      * @param locale which string is to be converted in
4352      * @param str source string to be performed on
4353      * @return lowercase version of the argument string
4354      */
toLowerCase(ULocale locale, String str)4355     public static String toLowerCase(ULocale locale, String str) {
4356         StringContextIterator iter = new StringContextIterator(str);
4357         StringBuilder result = new StringBuilder(str.length());
4358         int[] locCache = new int[1];
4359         int c;
4360 
4361         if (locale == null) {
4362             locale = ULocale.getDefault();
4363         }
4364         locCache[0]=0;
4365 
4366         while((c=iter.nextCaseMapCP())>=0) {
4367             c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache);
4368 
4369             /* decode the result */
4370             if(c<0) {
4371                 /* (not) original code point */
4372                 c=~c;
4373             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4374                 /* mapping already appended to result */
4375                 continue;
4376                 /* } else { append single-code point mapping */
4377             }
4378             result.appendCodePoint(c);
4379         }
4380         return result.toString();
4381     }
4382 
4383     /**
4384      * <p>Returns the titlecase version of the argument string.
4385      * <p>Position for titlecasing is determined by the argument break
4386      * iterator, hence the user can customize his break iterator for
4387      * a specialized titlecasing. In this case only the forward iteration
4388      * needs to be implemented.
4389      * If the break iterator passed in is null, the default Unicode algorithm
4390      * will be used to determine the titlecase positions.
4391      *
4392      * <p>Only positions returned by the break iterator will be title cased,
4393      * character in between the positions will all be in lower case.
4394      * <p>Casing is dependent on the argument locale and context-sensitive
4395      * @param locale which string is to be converted in
4396      * @param str source string to be performed on
4397      * @param breakiter break iterator to determine the positions in which
4398      *        the character should be title cased.
4399      * @return lowercase version of the argument string
4400      */
toTitleCase(Locale locale, String str, BreakIterator breakiter)4401     public static String toTitleCase(Locale locale, String str,
4402             BreakIterator breakiter)
4403     {
4404         return toTitleCase(ULocale.forLocale(locale), str, breakiter);
4405     }
4406 
4407     /**
4408      * <p>Returns the titlecase version of the argument string.
4409      * <p>Position for titlecasing is determined by the argument break
4410      * iterator, hence the user can customize his break iterator for
4411      * a specialized titlecasing. In this case only the forward iteration
4412      * needs to be implemented.
4413      * If the break iterator passed in is null, the default Unicode algorithm
4414      * will be used to determine the titlecase positions.
4415      *
4416      * <p>Only positions returned by the break iterator will be title cased,
4417      * character in between the positions will all be in lower case.
4418      * <p>Casing is dependent on the argument locale and context-sensitive
4419      * @param locale which string is to be converted in
4420      * @param str source string to be performed on
4421      * @param titleIter break iterator to determine the positions in which
4422      *        the character should be title cased.
4423      * @return lowercase version of the argument string
4424      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter)4425     public static String toTitleCase(ULocale locale, String str,
4426             BreakIterator titleIter) {
4427         return toTitleCase(locale, str, titleIter, 0);
4428     }
4429 
4430     /**
4431      * <p>Returns the titlecase version of the argument string.
4432      * <p>Position for titlecasing is determined by the argument break
4433      * iterator, hence the user can customize his break iterator for
4434      * a specialized titlecasing. In this case only the forward iteration
4435      * needs to be implemented.
4436      * If the break iterator passed in is null, the default Unicode algorithm
4437      * will be used to determine the titlecase positions.
4438      *
4439      * <p>Only positions returned by the break iterator will be title cased,
4440      * character in between the positions will all be in lower case.
4441      * <p>Casing is dependent on the argument locale and context-sensitive
4442      * @param locale which string is to be converted in
4443      * @param str source string to be performed on
4444      * @param titleIter break iterator to determine the positions in which
4445      *        the character should be title cased.
4446      * @param options bit set to modify the titlecasing operation
4447      * @return lowercase version of the argument string
4448      * @see #TITLECASE_NO_LOWERCASE
4449      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
4450      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)4451     public static String toTitleCase(ULocale locale, String str,
4452             BreakIterator titleIter,
4453             int options) {
4454         StringContextIterator iter = new StringContextIterator(str);
4455         StringBuilder result = new StringBuilder(str.length());
4456         int[] locCache = new int[1];
4457         int c, nc, srcLength = str.length();
4458 
4459         if (locale == null) {
4460             locale = ULocale.getDefault();
4461         }
4462         locCache[0]=0;
4463 
4464         if(titleIter == null) {
4465             titleIter = BreakIterator.getWordInstance(locale);
4466         }
4467         titleIter.setText(str);
4468 
4469         int prev, titleStart, index;
4470         boolean isFirstIndex;
4471         boolean isDutch = locale.getLanguage().equals("nl");
4472         boolean FirstIJ = true;
4473 
4474         /* set up local variables */
4475         prev=0;
4476         isFirstIndex=true;
4477 
4478         /* titlecasing loop */
4479         while(prev<srcLength) {
4480             /* find next index where to titlecase */
4481             if(isFirstIndex) {
4482                 isFirstIndex=false;
4483                 index=titleIter.first();
4484             } else {
4485                 index=titleIter.next();
4486             }
4487             if(index==BreakIterator.DONE || index>srcLength) {
4488                 index=srcLength;
4489             }
4490 
4491             /*
4492              * Unicode 4 & 5 section 3.13 Default Case Operations:
4493              *
4494              * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
4495              * #29, "Text Boundaries." Between each pair of word boundaries, find the first
4496              * cased character F. If F exists, map F to default_title(F); then map each
4497              * subsequent character C to default_lower(C).
4498              *
4499              * In this implementation, segment [prev..index[ into 3 parts:
4500              * a) uncased characters (copy as-is) [prev..titleStart[
4501              * b) first case letter (titlecase)         [titleStart..titleLimit[
4502              * c) subsequent characters (lowercase)                 [titleLimit..index[
4503              */
4504             if(prev<index) {
4505                 /* find and copy uncased characters [prev..titleStart[ */
4506                 iter.setLimit(index);
4507                 c=iter.nextCaseMapCP();
4508                 if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0
4509                         && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
4510                     while((c=iter.nextCaseMapCP())>=0
4511                             && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
4512                     titleStart=iter.getCPStart();
4513                     if(prev<titleStart) {
4514                         result.append(str, prev, titleStart);
4515                     }
4516                 } else {
4517                     titleStart=prev;
4518                 }
4519 
4520                 if(titleStart<index) {
4521                     FirstIJ = true;
4522                     /* titlecase c which is from titleStart */
4523                     c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache);
4524 
4525                     /* decode the result and lowercase up to index */
4526                     for(;;) {
4527                         if(c<0) {
4528                             /* (not) original code point */
4529                             c=~c;
4530                             result.appendCodePoint(c);
4531                         } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4532                             /* mapping already appended to result */
4533                         } else {
4534                             /* append single-code point mapping */
4535                             result.appendCodePoint(c);
4536                         }
4537 
4538                         if((options&TITLECASE_NO_LOWERCASE)!=0) {
4539                             /* Optionally just copy the rest of the word unchanged. */
4540 
4541                             int titleLimit=iter.getCPLimit();
4542                             if(titleLimit<index) {
4543                                 /* Special Case - Dutch IJ Titlecasing */
4544                                 if (isDutch && c == 0x0049 && str.charAt(titleLimit) == 'j') {
4545                                     result.append('J').append(str, titleLimit + 1, index);
4546                                 } else {
4547                                     result.append(str, titleLimit, index);
4548                                 }
4549                             }
4550                             iter.moveToLimit();
4551                             break;
4552                         } else if((nc=iter.nextCaseMapCP())>=0) {
4553                             if (isDutch && (nc == 0x004A ||  nc == 0x006A)
4554                                     && (c == 0x0049) && (FirstIJ == true)) {
4555                                 c = 0x004A; /* J */
4556                                 FirstIJ = false;
4557                             } else {
4558                                 /* Normal operation: Lowercase the rest of the word. */
4559                                 c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale,
4560                                         locCache);
4561                             }
4562                         } else {
4563                             break;
4564                         }
4565                     }
4566                 }
4567             }
4568 
4569             prev=index;
4570         }
4571         return result.toString();
4572     }
4573 
4574 
4575     private static final int BREAK_MASK =
4576             (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER)
4577             | (1<<UCharacterCategory.OTHER_LETTER)
4578             | (1<<UCharacterCategory.MODIFIER_LETTER);
4579 
4580     /**
4581      * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
4582      * and sometimes has no effect at all; the original string is returned whenever casing
4583      * would not be appropriate for the first word (such as for CJK characters or initial numbers).
4584      * Initial non-letters are skipped in order to find the character to change.
4585      * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
4586      * <p>Examples:
4587      * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
4588      * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
4589      * <tr><td>“contact us”</td><td>“Contact us”</td></tr>
4590      * <tr><td>49ers win!</td><td>49ers win!</td></tr>
4591      * <tr><td>丰(abc)</td><td>丰(abc)</td></tr>
4592      * <tr><td>«ijs»</td><td>«Ijs»</td></tr>
4593      * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr>
4594      * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr>
4595      * </table>
4596      * @param locale the locale for accessing exceptional behavior (eg for tr).
4597      * @param str the source string to change
4598      * @return the modified string, or the original if no modifications were necessary.
4599      * @deprecated ICU internal only
4600      * @hide original deprecated declaration
4601      * @hide draft / provisional / internal are hidden on Android
4602      */
4603     @Deprecated
toTitleFirst(ULocale locale, String str)4604     public static String toTitleFirst(ULocale locale, String str) {
4605         int c = 0;
4606         for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) {
4607             c = UCharacter.codePointAt(str, i);
4608             int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK);
4609             if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK
4610                 break;
4611             }
4612             if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) {
4613                 continue;
4614             }
4615 
4616             // we now have the first cased character
4617             // What we really want is something like:
4618             // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken);
4619             // That is, just give us the titlecased string, for the locale, at i and following,
4620             // and tell us how many characters are replaced.
4621             // The following won't work completely: it needs some more substantial changes to UCaseProps
4622 
4623             String substring = str.substring(i, i+UCharacter.charCount(c));
4624             String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0);
4625 
4626             // skip if no change
4627             if (titled.codePointAt(0) == c) {
4628                 // Using 0 is safe, since any change in titling will not have first initial character
4629                 break;
4630             }
4631             StringBuilder result = new StringBuilder(str.length()).append(str, 0, i);
4632             int startOfSuffix;
4633 
4634             // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps.
4635 
4636             if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') {
4637                 result.append("IJ");
4638                 startOfSuffix = 2;
4639             } else {
4640                 result.append(titled);
4641                 startOfSuffix = i + UCharacter.charCount(c);
4642             }
4643 
4644             // add the remainder, and return
4645             return result.append(str, startOfSuffix, str.length()).toString();
4646         }
4647         return str; // no change
4648     }
4649 
4650     /**
4651      * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string.
4652      * <p>Position for titlecasing is determined by the argument break
4653      * iterator, hence the user can customize his break iterator for
4654      * a specialized titlecasing. In this case only the forward iteration
4655      * needs to be implemented.
4656      * If the break iterator passed in is null, the default Unicode algorithm
4657      * will be used to determine the titlecase positions.
4658      *
4659      * <p>Only positions returned by the break iterator will be title cased,
4660      * character in between the positions will all be in lower case.
4661      * <p>Casing is dependent on the argument locale and context-sensitive
4662      * @param locale which string is to be converted in
4663      * @param str source string to be performed on
4664      * @param titleIter break iterator to determine the positions in which
4665      *        the character should be title cased.
4666      * @param options bit set to modify the titlecasing operation
4667      * @return lowercase version of the argument string
4668      * @see #TITLECASE_NO_LOWERCASE
4669      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
4670      */
toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)4671     public static String toTitleCase(Locale locale, String str,
4672             BreakIterator titleIter,
4673             int options) {
4674         return toTitleCase(ULocale.forLocale(locale), str, titleIter, options);
4675     }
4676 
4677     /**
4678      * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
4679      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4680      * folding equivalent, the character itself is returned.
4681      *
4682      * <p>This function only returns the simple, single-code point case mapping.
4683      * Full case mappings should be used whenever possible because they produce
4684      * better results by working on whole strings.
4685      * They can map to a result string with a different length as appropriate.
4686      * Full case mappings are applied by the case mapping functions
4687      * that take String parameters rather than code points (int).
4688      * See also the User Guide chapter on C/POSIX migration:
4689      * http://www.icu-project.org/userguide/posix.html#case_mappings
4690      *
4691      * @param ch             the character to be converted
4692      * @param defaultmapping Indicates whether the default mappings defined in
4693      *                       CaseFolding.txt are to be used, otherwise the
4694      *                       mappings for dotted I and dotless i marked with
4695      *                       'T' in CaseFolding.txt are included.
4696      * @return               the case folding equivalent of the character, if
4697      *                       any; otherwise the character itself.
4698      * @see                  #foldCase(String, boolean)
4699      */
foldCase(int ch, boolean defaultmapping)4700     public static int foldCase(int ch, boolean defaultmapping) {
4701         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4702     }
4703 
4704     /**
4705      * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
4706      * UnicodeData.txt and CaseFolding.txt; if any character has no case
4707      * folding equivalent, the character itself is returned.
4708      * "Full", multiple-code point case folding mappings are returned here.
4709      * For "simple" single-code point mappings use the API
4710      * foldCase(int ch, boolean defaultmapping).
4711      * @param str            the String to be converted
4712      * @param defaultmapping Indicates whether the default mappings defined in
4713      *                       CaseFolding.txt are to be used, otherwise the
4714      *                       mappings for dotted I and dotless i marked with
4715      *                       'T' in CaseFolding.txt are included.
4716      * @return               the case folding equivalent of the character, if
4717      *                       any; otherwise the character itself.
4718      * @see                  #foldCase(int, boolean)
4719      */
foldCase(String str, boolean defaultmapping)4720     public static String foldCase(String str, boolean defaultmapping) {
4721         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4722     }
4723 
4724     /**
4725      * <strong>[icu]</strong> Option value for case folding: use default mappings defined in
4726      * CaseFolding.txt.
4727      */
4728     public static final int FOLD_CASE_DEFAULT    =      0x0000;
4729     /**
4730      * <strong>[icu]</strong> Option value for case folding:
4731      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
4732      * and dotless i appropriately for Turkic languages (tr, az).
4733      *
4734      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
4735      * are to be included for default mappings and
4736      * excluded for the Turkic-specific mappings.
4737      *
4738      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
4739      * are to be excluded for default mappings and
4740      * included for the Turkic-specific mappings.
4741      */
4742     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
4743 
4744     /**
4745      * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
4746      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4747      * folding equivalent, the character itself is returned.
4748      *
4749      * <p>This function only returns the simple, single-code point case mapping.
4750      * Full case mappings should be used whenever possible because they produce
4751      * better results by working on whole strings.
4752      * They can map to a result string with a different length as appropriate.
4753      * Full case mappings are applied by the case mapping functions
4754      * that take String parameters rather than code points (int).
4755      * See also the User Guide chapter on C/POSIX migration:
4756      * http://www.icu-project.org/userguide/posix.html#case_mappings
4757      *
4758      * @param ch the character to be converted
4759      * @param options A bit set for special processing. Currently the recognised options
4760      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4761      * @return the case folding equivalent of the character, if any; otherwise the
4762      * character itself.
4763      * @see #foldCase(String, boolean)
4764      */
foldCase(int ch, int options)4765     public static int foldCase(int ch, int options) {
4766         return UCaseProps.INSTANCE.fold(ch, options);
4767     }
4768 
4769     /**
4770      * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
4771      * UnicodeData.txt and CaseFolding.txt; if any character has no case
4772      * folding equivalent, the character itself is returned.
4773      * "Full", multiple-code point case folding mappings are returned here.
4774      * For "simple" single-code point mappings use the API
4775      * foldCase(int ch, boolean defaultmapping).
4776      * @param str the String to be converted
4777      * @param options A bit set for special processing. Currently the recognised options
4778      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4779      * @return the case folding equivalent of the character, if any; otherwise the
4780      *         character itself.
4781      * @see #foldCase(int, boolean)
4782      */
foldCase(String str, int options)4783     public static final String foldCase(String str, int options) {
4784         StringBuilder result = new StringBuilder(str.length());
4785         int c, i, length;
4786 
4787         length = str.length();
4788         for(i=0; i<length;) {
4789             c=str.codePointAt(i);
4790             i+=Character.charCount(c);
4791             c = UCaseProps.INSTANCE.toFullFolding(c, result, options);
4792 
4793             /* decode the result */
4794             if(c<0) {
4795                 /* (not) original code point */
4796                 c=~c;
4797             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4798                 /* mapping already appended to result */
4799                 continue;
4800                 /* } else { append single-code point mapping */
4801             }
4802             result.appendCodePoint(c);
4803         }
4804         return result.toString();
4805     }
4806 
4807     /**
4808      * <strong>[icu]</strong> Returns the numeric value of a Han character.
4809      *
4810      * <p>This returns the value of Han 'numeric' code points,
4811      * including those for zero, ten, hundred, thousand, ten thousand,
4812      * and hundred million.
4813      * This includes both the standard and 'checkwriting'
4814      * characters, the 'big circle' zero character, and the standard
4815      * zero character.
4816      *
4817      * <p>Note: The Unicode Standard has numeric values for more
4818      * Han characters recognized by this method
4819      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
4820      * and a {@link android.icu.text.NumberFormat} can be used with
4821      * a Chinese {@link android.icu.text.NumberingSystem}.
4822      *
4823      * @param ch code point to query
4824      * @return value if it is a Han 'numeric character,' otherwise return -1.
4825      */
getHanNumericValue(int ch)4826     public static int getHanNumericValue(int ch)
4827     {
4828         switch(ch)
4829         {
4830         case IDEOGRAPHIC_NUMBER_ZERO_ :
4831         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
4832             return 0; // Han Zero
4833         case CJK_IDEOGRAPH_FIRST_ :
4834         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
4835             return 1; // Han One
4836         case CJK_IDEOGRAPH_SECOND_ :
4837         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
4838             return 2; // Han Two
4839         case CJK_IDEOGRAPH_THIRD_ :
4840         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
4841             return 3; // Han Three
4842         case CJK_IDEOGRAPH_FOURTH_ :
4843         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
4844             return 4; // Han Four
4845         case CJK_IDEOGRAPH_FIFTH_ :
4846         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
4847             return 5; // Han Five
4848         case CJK_IDEOGRAPH_SIXTH_ :
4849         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
4850             return 6; // Han Six
4851         case CJK_IDEOGRAPH_SEVENTH_ :
4852         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
4853             return 7; // Han Seven
4854         case CJK_IDEOGRAPH_EIGHTH_ :
4855         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
4856             return 8; // Han Eight
4857         case CJK_IDEOGRAPH_NINETH_ :
4858         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
4859             return 9; // Han Nine
4860         case CJK_IDEOGRAPH_TEN_ :
4861         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
4862             return 10;
4863         case CJK_IDEOGRAPH_HUNDRED_ :
4864         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
4865             return 100;
4866         case CJK_IDEOGRAPH_THOUSAND_ :
4867         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
4868             return 1000;
4869         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
4870             return 10000;
4871         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
4872             return 100000000;
4873         }
4874         return -1; // no value
4875     }
4876 
4877     /**
4878      * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints.
4879      * <p>Example of use:<br>
4880      * <pre>
4881      * RangeValueIterator iterator = UCharacter.getTypeIterator();
4882      * RangeValueIterator.Element element = new RangeValueIterator.Element();
4883      * while (iterator.next(element)) {
4884      *     System.out.println("Codepoint \\u" +
4885      *                        Integer.toHexString(element.start) +
4886      *                        " to codepoint \\u" +
4887      *                        Integer.toHexString(element.limit - 1) +
4888      *                        " has the character type " +
4889      *                        element.value);
4890      * }
4891      * </pre>
4892      * @return an iterator
4893      */
getTypeIterator()4894     public static RangeValueIterator getTypeIterator()
4895     {
4896         return new UCharacterTypeIterator();
4897     }
4898 
4899     private static final class UCharacterTypeIterator implements RangeValueIterator {
UCharacterTypeIterator()4900         UCharacterTypeIterator() {
4901             reset();
4902         }
4903 
4904         // implements RangeValueIterator
next(Element element)4905         public boolean next(Element element) {
4906             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
4907                 element.start=range.startCodePoint;
4908                 element.limit=range.endCodePoint+1;
4909                 element.value=range.value;
4910                 return true;
4911             } else {
4912                 return false;
4913             }
4914         }
4915 
4916         // implements RangeValueIterator
reset()4917         public void reset() {
4918             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
4919         }
4920 
4921         private Iterator<Trie2.Range> trieIterator;
4922         private Trie2.Range range;
4923 
4924         private static final class MaskType implements Trie2.ValueMapper {
4925             // Extracts the general category ("character type") from the trie value.
map(int value)4926             public int map(int value) {
4927                 return value & UCharacterProperty.TYPE_MASK;
4928             }
4929         }
4930         private static final MaskType MASK_TYPE=new MaskType();
4931     }
4932 
4933     /**
4934      * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
4935      * <p>This API only gets the iterator for the modern, most up-to-date
4936      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
4937      * for extended names use getExtendedNameIterator().
4938      * <p>Example of use:<br>
4939      * <pre>
4940      * ValueIterator iterator = UCharacter.getNameIterator();
4941      * ValueIterator.Element element = new ValueIterator.Element();
4942      * while (iterator.next(element)) {
4943      *     System.out.println("Codepoint \\u" +
4944      *                        Integer.toHexString(element.codepoint) +
4945      *                        " has the name " + (String)element.value);
4946      * }
4947      * </pre>
4948      * <p>The maximal range which the name iterator iterates is from
4949      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
4950      * @return an iterator
4951      */
getNameIterator()4952     public static ValueIterator getNameIterator(){
4953         return new UCharacterNameIterator(UCharacterName.INSTANCE,
4954                 UCharacterNameChoice.UNICODE_CHAR_NAME);
4955     }
4956 
4957     /**
4958      * <strong>[icu]</strong> Returns an empty iterator.
4959      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
4960      * @return an empty iterator
4961      * @deprecated ICU 49
4962      * @see #getName1_0(int)
4963      * @hide original deprecated declaration
4964      */
4965     @Deprecated
getName1_0Iterator()4966     public static ValueIterator getName1_0Iterator(){
4967         return new DummyValueIterator();
4968     }
4969 
4970     private static final class DummyValueIterator implements ValueIterator {
next(Element element)4971         public boolean next(Element element) { return false; }
reset()4972         public void reset() {}
setRange(int start, int limit)4973         public void setRange(int start, int limit) {}
4974     }
4975 
4976     /**
4977      * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
4978      * <p>This API only gets the iterator for the extended names.
4979      * For modern, most up-to-date Unicode names use getNameIterator() or
4980      * for older 1.0 Unicode names use get1_0NameIterator().
4981      * <p>Example of use:<br>
4982      * <pre>
4983      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
4984      * ValueIterator.Element element = new ValueIterator.Element();
4985      * while (iterator.next(element)) {
4986      *     System.out.println("Codepoint \\u" +
4987      *                        Integer.toHexString(element.codepoint) +
4988      *                        " has the name " + (String)element.value);
4989      * }
4990      * </pre>
4991      * <p>The maximal range which the name iterator iterates is from
4992      * @return an iterator
4993      */
getExtendedNameIterator()4994     public static ValueIterator getExtendedNameIterator(){
4995         return new UCharacterNameIterator(UCharacterName.INSTANCE,
4996                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
4997     }
4998 
4999     /**
5000      * <strong>[icu]</strong> Returns the "age" of the code point.
5001      * <p>The "age" is the Unicode version when the code point was first
5002      * designated (as a non-character or for Private Use) or assigned a
5003      * character.
5004      * <p>This can be useful to avoid emitting code points to receiving
5005      * processes that do not accept newer characters.
5006      * <p>The data is from the UCD file DerivedAge.txt.
5007      * @param ch The code point.
5008      * @return the Unicode version number
5009      */
getAge(int ch)5010     public static VersionInfo getAge(int ch)
5011     {
5012         if (ch < MIN_VALUE || ch > MAX_VALUE) {
5013             throw new IllegalArgumentException("Codepoint out of bounds");
5014         }
5015         return UCharacterProperty.INSTANCE.getAge(ch);
5016     }
5017 
5018     /**
5019      * <strong>[icu]</strong> <p>Check a binary Unicode property for a code point.
5020      * <p>Unicode, especially in version 3.2, defines many more properties
5021      * than the original set in UnicodeData.txt.
5022      * <p>This API is intended to reflect Unicode properties as defined in
5023      * the Unicode Character Database (UCD) and Unicode Technical Reports
5024      * (UTR).
5025      * <p>For details about the properties see
5026      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
5027      * <p>For names of Unicode properties see the UCD file
5028      * PropertyAliases.txt.
5029      * <p>This API does not check the validity of the codepoint.
5030      * <p>Important: If ICU is built with UCD files from Unicode versions
5031      * below 3.2, then properties marked with "new" are not or
5032      * not fully available.
5033      * @param ch code point to test.
5034      * @param property selector constant from android.icu.lang.UProperty,
5035      *        identifies which binary property to check.
5036      * @return true or false according to the binary Unicode property value
5037      *         for ch. Also false if property is out of bounds or if the
5038      *         Unicode version does not have data for the property at all, or
5039      *         not for this code point.
5040      * @see android.icu.lang.UProperty
5041      */
hasBinaryProperty(int ch, int property)5042     public static boolean hasBinaryProperty(int ch, int property)
5043     {
5044         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5045     }
5046 
5047     /**
5048      * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property.
5049      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
5050      * <p>Different from UCharacter.isLetter(ch)!
5051      * @param ch codepoint to be tested
5052      */
isUAlphabetic(int ch)5053     public static boolean isUAlphabetic(int ch)
5054     {
5055         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5056     }
5057 
5058     /**
5059      * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property.
5060      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
5061      * <p>This is different from UCharacter.isLowerCase(ch)!
5062      * @param ch codepoint to be tested
5063      */
isULowercase(int ch)5064     public static boolean isULowercase(int ch)
5065     {
5066         return hasBinaryProperty(ch, UProperty.LOWERCASE);
5067     }
5068 
5069     /**
5070      * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property.
5071      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
5072      * <p>This is different from UCharacter.isUpperCase(ch)!
5073      * @param ch codepoint to be tested
5074      */
isUUppercase(int ch)5075     public static boolean isUUppercase(int ch)
5076     {
5077         return hasBinaryProperty(ch, UProperty.UPPERCASE);
5078     }
5079 
5080     /**
5081      * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property.
5082      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
5083      * <p>This is different from both UCharacter.isSpace(ch) and
5084      * UCharacter.isWhitespace(ch)!
5085      * @param ch codepoint to be tested
5086      */
isUWhiteSpace(int ch)5087     public static boolean isUWhiteSpace(int ch)
5088     {
5089         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5090     }
5091 
5092     /**
5093      * <strong>[icu]</strong> <p>Returns the property value for an Unicode property type of a code point.
5094      * Also returns binary and mask property values.
5095      * <p>Unicode, especially in version 3.2, defines many more properties than
5096      * the original set in UnicodeData.txt.
5097      * <p>The properties APIs are intended to reflect Unicode properties as
5098      * defined in the Unicode Character Database (UCD) and Unicode Technical
5099      * Reports (UTR). For details about the properties see
5100      * http://www.unicode.org/.
5101      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5102      *
5103      * <pre>
5104      * Sample usage:
5105      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5106      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5107      * boolean b = (ideo == 1) ? true : false;
5108      * </pre>
5109      * @param ch code point to test.
5110      * @param type UProperty selector constant, identifies which binary
5111      *        property to check. Must be
5112      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5113      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5114      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5115      * @return numeric value that is directly the property value or,
5116      *         for enumerated properties, corresponds to the numeric value of
5117      *         the enumerated constant of the respective property value
5118      *         enumeration type (cast to enum type if necessary).
5119      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5120      *         Returns a bit-mask for mask properties.
5121      *         Returns 0 if 'type' is out of bounds or if the Unicode version
5122      *         does not have data for the property at all, or not for this code
5123      *         point.
5124      * @see UProperty
5125      * @see #hasBinaryProperty
5126      * @see #getIntPropertyMinValue
5127      * @see #getIntPropertyMaxValue
5128      * @see #getUnicodeVersion
5129      */
getIntPropertyValue(int ch, int type)5130     public static int getIntPropertyValue(int ch, int type)
5131     {
5132         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
5133     }
5134     /**
5135      * <strong>[icu]</strong> Returns a string version of the property value.
5136      * @param propertyEnum The property enum value.
5137      * @param codepoint The codepoint value.
5138      * @param nameChoice The choice of the name.
5139      * @return value as string
5140      * @deprecated This API is ICU internal only.
5141      * @hide original deprecated declaration
5142      * @hide draft / provisional / internal are hidden on Android
5143      */
5144     @Deprecated
5145     ///CLOVER:OFF
getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5146     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5147         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5148                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5149             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
5150                     nameChoice);
5151         }
5152         if (propertyEnum == UProperty.NUMERIC_VALUE) {
5153             return String.valueOf(getUnicodeNumericValue(codepoint));
5154         }
5155         // otherwise must be string property
5156         switch (propertyEnum) {
5157         case UProperty.AGE: return getAge(codepoint).toString();
5158         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5159         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
5160         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
5161         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5162         case UProperty.NAME: return getName(codepoint);
5163         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
5164         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5165         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5166         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5167         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5168         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5169         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5170         }
5171         throw new IllegalArgumentException("Illegal Property Enum");
5172     }
5173     ///CLOVER:ON
5174 
5175     /**
5176      * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type.
5177      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5178      * to allocate arrays of android.icu.text.UnicodeSet or similar.
5179      * @param type UProperty selector constant, identifies which binary
5180      *        property to check. Must be
5181      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5182      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5183      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5184      *         for a Unicode property. 0 if the property
5185      *         selector 'type' is out of range.
5186      * @see UProperty
5187      * @see #hasBinaryProperty
5188      * @see #getUnicodeVersion
5189      * @see #getIntPropertyMaxValue
5190      * @see #getIntPropertyValue
5191      */
getIntPropertyMinValue(int type)5192     public static int getIntPropertyMinValue(int type){
5193 
5194         return 0; // undefined; and: all other properties have a minimum value of 0
5195     }
5196 
5197 
5198     /**
5199      * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property.
5200      * Can be used together with UCharacter.getIntPropertyMinValue(int)
5201      * to allocate arrays of android.icu.text.UnicodeSet or similar.
5202      * Examples for min/max values (for Unicode 3.2):
5203      * <ul>
5204      * <li> UProperty.BIDI_CLASS:    0/18
5205      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5206      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5207      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5208      * </ul>
5209      * For undefined UProperty constant values, min/max values will be 0/-1.
5210      * @param type UProperty selector constant, identifies which binary
5211      *        property to check. Must be
5212      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5213      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5214      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
5215      *         property. &lt;= 0 if the property selector 'type' is out of range.
5216      * @see UProperty
5217      * @see #hasBinaryProperty
5218      * @see #getUnicodeVersion
5219      * @see #getIntPropertyMaxValue
5220      * @see #getIntPropertyValue
5221      */
getIntPropertyMaxValue(int type)5222     public static int getIntPropertyMaxValue(int type)
5223     {
5224         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
5225     }
5226 
5227     /**
5228      * Provide the java.lang.Character forDigit API, for convenience.
5229      */
forDigit(int digit, int radix)5230     public static char forDigit(int digit, int radix) {
5231         return java.lang.Character.forDigit(digit, radix);
5232     }
5233 
5234     // JDK 1.5 API coverage
5235 
5236     /**
5237      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
5238      */
5239     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
5240 
5241     /**
5242      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
5243      */
5244     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
5245 
5246     /**
5247      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
5248      */
5249     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
5250 
5251     /**
5252      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
5253      */
5254     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
5255 
5256     /**
5257      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
5258      */
5259     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
5260 
5261     /**
5262      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
5263      */
5264     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
5265 
5266     /**
5267      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
5268      */
5269     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
5270 
5271     /**
5272      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
5273      */
5274     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
5275 
5276     /**
5277      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
5278      */
5279     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
5280 
5281     /**
5282      * Equivalent to {@link Character#isValidCodePoint}.
5283      *
5284      * @param cp the code point to check
5285      * @return true if cp is a valid code point
5286      */
isValidCodePoint(int cp)5287     public static final boolean isValidCodePoint(int cp) {
5288         return cp >= 0 && cp <= MAX_CODE_POINT;
5289     }
5290 
5291     /**
5292      * Same as {@link Character#isSupplementaryCodePoint}.
5293      *
5294      * @param cp the code point to check
5295      * @return true if cp is a supplementary code point
5296      */
isSupplementaryCodePoint(int cp)5297     public static final boolean isSupplementaryCodePoint(int cp) {
5298         return Character.isSupplementaryCodePoint(cp);
5299     }
5300 
5301     /**
5302      * Same as {@link Character#isHighSurrogate}.
5303      *
5304      * @param ch the char to check
5305      * @return true if ch is a high (lead) surrogate
5306      */
isHighSurrogate(char ch)5307     public static boolean isHighSurrogate(char ch) {
5308         return Character.isHighSurrogate(ch);
5309     }
5310 
5311     /**
5312      * Same as {@link Character#isLowSurrogate}.
5313      *
5314      * @param ch the char to check
5315      * @return true if ch is a low (trail) surrogate
5316      */
isLowSurrogate(char ch)5317     public static boolean isLowSurrogate(char ch) {
5318         return Character.isLowSurrogate(ch);
5319     }
5320 
5321     /**
5322      * Same as {@link Character#isSurrogatePair}.
5323      *
5324      * @param high the high (lead) char
5325      * @param low the low (trail) char
5326      * @return true if high, low form a surrogate pair
5327      */
isSurrogatePair(char high, char low)5328     public static final boolean isSurrogatePair(char high, char low) {
5329         return Character.isSurrogatePair(high, low);
5330     }
5331 
5332     /**
5333      * Same as {@link Character#charCount}.
5334      * Returns the number of chars needed to represent the code point (1 or 2).
5335      * This does not check the code point for validity.
5336      *
5337      * @param cp the code point to check
5338      * @return the number of chars needed to represent the code point
5339      */
charCount(int cp)5340     public static int charCount(int cp) {
5341         return Character.charCount(cp);
5342     }
5343 
5344     /**
5345      * Same as {@link Character#toCodePoint}.
5346      * Returns the code point represented by the two surrogate code units.
5347      * This does not check the surrogate pair for validity.
5348      *
5349      * @param high the high (lead) surrogate
5350      * @param low the low (trail) surrogate
5351      * @return the code point formed by the surrogate pair
5352      */
toCodePoint(char high, char low)5353     public static final int toCodePoint(char high, char low) {
5354         return Character.toCodePoint(high, low);
5355     }
5356 
5357     /**
5358      * Same as {@link Character#codePointAt(CharSequence, int)}.
5359      * Returns the code point at index.
5360      * This examines only the characters at index and index+1.
5361      *
5362      * @param seq the characters to check
5363      * @param index the index of the first or only char forming the code point
5364      * @return the code point at the index
5365      */
codePointAt(CharSequence seq, int index)5366     public static final int codePointAt(CharSequence seq, int index) {
5367         char c1 = seq.charAt(index++);
5368         if (isHighSurrogate(c1)) {
5369             if (index < seq.length()) {
5370                 char c2 = seq.charAt(index);
5371                 if (isLowSurrogate(c2)) {
5372                     return toCodePoint(c1, c2);
5373                 }
5374             }
5375         }
5376         return c1;
5377     }
5378 
5379     /**
5380      * Same as {@link Character#codePointAt(char[], int)}.
5381      * Returns the code point at index.
5382      * This examines only the characters at index and index+1.
5383      *
5384      * @param text the characters to check
5385      * @param index the index of the first or only char forming the code point
5386      * @return the code point at the index
5387      */
codePointAt(char[] text, int index)5388     public static final int codePointAt(char[] text, int index) {
5389         char c1 = text[index++];
5390         if (isHighSurrogate(c1)) {
5391             if (index < text.length) {
5392                 char c2 = text[index];
5393                 if (isLowSurrogate(c2)) {
5394                     return toCodePoint(c1, c2);
5395                 }
5396             }
5397         }
5398         return c1;
5399     }
5400 
5401     /**
5402      * Same as {@link Character#codePointAt(char[], int, int)}.
5403      * Returns the code point at index.
5404      * This examines only the characters at index and index+1.
5405      *
5406      * @param text the characters to check
5407      * @param index the index of the first or only char forming the code point
5408      * @param limit the limit of the valid text
5409      * @return the code point at the index
5410      */
codePointAt(char[] text, int index, int limit)5411     public static final int codePointAt(char[] text, int index, int limit) {
5412         if (index >= limit || limit > text.length) {
5413             throw new IndexOutOfBoundsException();
5414         }
5415         char c1 = text[index++];
5416         if (isHighSurrogate(c1)) {
5417             if (index < limit) {
5418                 char c2 = text[index];
5419                 if (isLowSurrogate(c2)) {
5420                     return toCodePoint(c1, c2);
5421                 }
5422             }
5423         }
5424         return c1;
5425     }
5426 
5427     /**
5428      * Same as {@link Character#codePointBefore(CharSequence, int)}.
5429      * Return the code point before index.
5430      * This examines only the characters at index-1 and index-2.
5431      *
5432      * @param seq the characters to check
5433      * @param index the index after the last or only char forming the code point
5434      * @return the code point before the index
5435      */
codePointBefore(CharSequence seq, int index)5436     public static final int codePointBefore(CharSequence seq, int index) {
5437         char c2 = seq.charAt(--index);
5438         if (isLowSurrogate(c2)) {
5439             if (index > 0) {
5440                 char c1 = seq.charAt(--index);
5441                 if (isHighSurrogate(c1)) {
5442                     return toCodePoint(c1, c2);
5443                 }
5444             }
5445         }
5446         return c2;
5447     }
5448 
5449     /**
5450      * Same as {@link Character#codePointBefore(char[], int)}.
5451      * Returns the code point before index.
5452      * This examines only the characters at index-1 and index-2.
5453      *
5454      * @param text the characters to check
5455      * @param index the index after the last or only char forming the code point
5456      * @return the code point before the index
5457      */
codePointBefore(char[] text, int index)5458     public static final int codePointBefore(char[] text, int index) {
5459         char c2 = text[--index];
5460         if (isLowSurrogate(c2)) {
5461             if (index > 0) {
5462                 char c1 = text[--index];
5463                 if (isHighSurrogate(c1)) {
5464                     return toCodePoint(c1, c2);
5465                 }
5466             }
5467         }
5468         return c2;
5469     }
5470 
5471     /**
5472      * Same as {@link Character#codePointBefore(char[], int, int)}.
5473      * Return the code point before index.
5474      * This examines only the characters at index-1 and index-2.
5475      *
5476      * @param text the characters to check
5477      * @param index the index after the last or only char forming the code point
5478      * @param limit the start of the valid text
5479      * @return the code point before the index
5480      */
codePointBefore(char[] text, int index, int limit)5481     public static final int codePointBefore(char[] text, int index, int limit) {
5482         if (index <= limit || limit < 0) {
5483             throw new IndexOutOfBoundsException();
5484         }
5485         char c2 = text[--index];
5486         if (isLowSurrogate(c2)) {
5487             if (index > limit) {
5488                 char c1 = text[--index];
5489                 if (isHighSurrogate(c1)) {
5490                     return toCodePoint(c1, c2);
5491                 }
5492             }
5493         }
5494         return c2;
5495     }
5496 
5497     /**
5498      * Same as {@link Character#toChars(int, char[], int)}.
5499      * Writes the chars representing the
5500      * code point into the destination at the given index.
5501      *
5502      * @param cp the code point to convert
5503      * @param dst the destination array into which to put the char(s) representing the code point
5504      * @param dstIndex the index at which to put the first (or only) char
5505      * @return the count of the number of chars written (1 or 2)
5506      * @throws IllegalArgumentException if cp is not a valid code point
5507      */
toChars(int cp, char[] dst, int dstIndex)5508     public static final int toChars(int cp, char[] dst, int dstIndex) {
5509         return Character.toChars(cp, dst, dstIndex);
5510     }
5511 
5512     /**
5513      * Same as {@link Character#toChars(int)}.
5514      * Returns a char array representing the code point.
5515      *
5516      * @param cp the code point to convert
5517      * @return an array containing the char(s) representing the code point
5518      * @throws IllegalArgumentException if cp is not a valid code point
5519      */
toChars(int cp)5520     public static final char[] toChars(int cp) {
5521         return Character.toChars(cp);
5522     }
5523 
5524     /**
5525      * Equivalent to the {@link Character#getDirectionality(char)} method, for
5526      * convenience. Returns a byte representing the directionality of the
5527      * character.
5528      *
5529      * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns
5530      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
5531      *
5532      * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link
5533      * UCharacterDirection} and its interface {@link
5534      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
5535      * defined by <code>java.lang.Character</code>.
5536      * @param cp the code point to check
5537      * @return the directionality of the code point
5538      * @see #getDirection
5539      */
getDirectionality(int cp)5540     public static byte getDirectionality(int cp)
5541     {
5542         return (byte)getDirection(cp);
5543     }
5544 
5545     /**
5546      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
5547      * method, for convenience.  Counts the number of code points in the range
5548      * of text.
5549      * @param text the characters to check
5550      * @param start the start of the range
5551      * @param limit the limit of the range
5552      * @return the number of code points in the range
5553      */
codePointCount(CharSequence text, int start, int limit)5554     public static int codePointCount(CharSequence text, int start, int limit) {
5555         if (start < 0 || limit < start || limit > text.length()) {
5556             throw new IndexOutOfBoundsException("start (" + start +
5557                     ") or limit (" + limit +
5558                     ") invalid or out of range 0, " + text.length());
5559         }
5560 
5561         int len = limit - start;
5562         while (limit > start) {
5563             char ch = text.charAt(--limit);
5564             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5565                 ch = text.charAt(--limit);
5566                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5567                     --len;
5568                     break;
5569                 }
5570             }
5571         }
5572         return len;
5573     }
5574 
5575     /**
5576      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
5577      * convenience. Counts the number of code points in the range of text.
5578      * @param text the characters to check
5579      * @param start the start of the range
5580      * @param limit the limit of the range
5581      * @return the number of code points in the range
5582      */
codePointCount(char[] text, int start, int limit)5583     public static int codePointCount(char[] text, int start, int limit) {
5584         if (start < 0 || limit < start || limit > text.length) {
5585             throw new IndexOutOfBoundsException("start (" + start +
5586                     ") or limit (" + limit +
5587                     ") invalid or out of range 0, " + text.length);
5588         }
5589 
5590         int len = limit - start;
5591         while (limit > start) {
5592             char ch = text[--limit];
5593             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5594                 ch = text[--limit];
5595                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5596                     --len;
5597                     break;
5598                 }
5599             }
5600         }
5601         return len;
5602     }
5603 
5604     /**
5605      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
5606      * method, for convenience.  Adjusts the char index by a code point offset.
5607      * @param text the characters to check
5608      * @param index the index to adjust
5609      * @param codePointOffset the number of code points by which to offset the index
5610      * @return the adjusted index
5611      */
offsetByCodePoints(CharSequence text, int index, int codePointOffset)5612     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
5613         if (index < 0 || index > text.length()) {
5614             throw new IndexOutOfBoundsException("index ( " + index +
5615                     ") out of range 0, " + text.length());
5616         }
5617 
5618         if (codePointOffset < 0) {
5619             while (++codePointOffset <= 0) {
5620                 char ch = text.charAt(--index);
5621                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
5622                     ch = text.charAt(--index);
5623                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5624                         if (++codePointOffset > 0) {
5625                             return index+1;
5626                         }
5627                     }
5628                 }
5629             }
5630         } else {
5631             int limit = text.length();
5632             while (--codePointOffset >= 0) {
5633                 char ch = text.charAt(index++);
5634                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5635                     ch = text.charAt(index++);
5636                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5637                         if (--codePointOffset < 0) {
5638                             return index-1;
5639                         }
5640                     }
5641                 }
5642             }
5643         }
5644 
5645         return index;
5646     }
5647 
5648     /**
5649      * Equivalent to the
5650      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
5651      * method, for convenience.  Adjusts the char index by a code point offset.
5652      * @param text the characters to check
5653      * @param start the start of the range to check
5654      * @param count the length of the range to check
5655      * @param index the index to adjust
5656      * @param codePointOffset the number of code points by which to offset the index
5657      * @return the adjusted index
5658      */
offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)5659     public static int offsetByCodePoints(char[] text, int start, int count, int index,
5660             int codePointOffset) {
5661         int limit = start + count;
5662         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
5663             throw new IndexOutOfBoundsException("index ( " + index +
5664                     ") out of range " + start +
5665                     ", " + limit +
5666                     " in array 0, " + text.length);
5667         }
5668 
5669         if (codePointOffset < 0) {
5670             while (++codePointOffset <= 0) {
5671                 char ch = text[--index];
5672                 if (index < start) {
5673                     throw new IndexOutOfBoundsException("index ( " + index +
5674                             ") < start (" + start +
5675                             ")");
5676                 }
5677                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
5678                     ch = text[--index];
5679                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5680                         if (++codePointOffset > 0) {
5681                             return index+1;
5682                         }
5683                     }
5684                 }
5685             }
5686         } else {
5687             while (--codePointOffset >= 0) {
5688                 char ch = text[index++];
5689                 if (index > limit) {
5690                     throw new IndexOutOfBoundsException("index ( " + index +
5691                             ") > limit (" + limit +
5692                             ")");
5693                 }
5694                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5695                     ch = text[index++];
5696                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5697                         if (--codePointOffset < 0) {
5698                             return index-1;
5699                         }
5700                     }
5701                 }
5702             }
5703         }
5704 
5705         return index;
5706     }
5707 
5708     // private variables -------------------------------------------------
5709 
5710     /**
5711      * To get the last character out from a data type
5712      */
5713     private static final int LAST_CHAR_MASK_ = 0xFFFF;
5714 
5715     //    /**
5716     //     * To get the last byte out from a data type
5717     //     */
5718     //    private static final int LAST_BYTE_MASK_ = 0xFF;
5719     //
5720     //    /**
5721     //     * Shift 16 bits
5722     //     */
5723     //    private static final int SHIFT_16_ = 16;
5724     //
5725     //    /**
5726     //     * Shift 24 bits
5727     //     */
5728     //    private static final int SHIFT_24_ = 24;
5729     //
5730     //    /**
5731     //     * Decimal radix
5732     //     */
5733     //    private static final int DECIMAL_RADIX_ = 10;
5734 
5735     /**
5736      * No break space code point
5737      */
5738     private static final int NO_BREAK_SPACE_ = 0xA0;
5739 
5740     /**
5741      * Figure space code point
5742      */
5743     private static final int FIGURE_SPACE_ = 0x2007;
5744 
5745     /**
5746      * Narrow no break space code point
5747      */
5748     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
5749 
5750     /**
5751      * Ideographic number zero code point
5752      */
5753     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
5754 
5755     /**
5756      * CJK Ideograph, First code point
5757      */
5758     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
5759 
5760     /**
5761      * CJK Ideograph, Second code point
5762      */
5763     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
5764 
5765     /**
5766      * CJK Ideograph, Third code point
5767      */
5768     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
5769 
5770     /**
5771      * CJK Ideograph, Fourth code point
5772      */
5773     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
5774 
5775     /**
5776      * CJK Ideograph, FIFTH code point
5777      */
5778     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
5779 
5780     /**
5781      * CJK Ideograph, Sixth code point
5782      */
5783     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
5784 
5785     /**
5786      * CJK Ideograph, Seventh code point
5787      */
5788     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
5789 
5790     /**
5791      * CJK Ideograph, Eighth code point
5792      */
5793     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
5794 
5795     /**
5796      * CJK Ideograph, Nineth code point
5797      */
5798     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
5799 
5800     /**
5801      * Application Program command code point
5802      */
5803     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
5804 
5805     /**
5806      * Unit separator code point
5807      */
5808     private static final int UNIT_SEPARATOR_ = 0x001F;
5809 
5810     /**
5811      * Delete code point
5812      */
5813     private static final int DELETE_ = 0x007F;
5814 
5815     /**
5816      * Han digit characters
5817      */
5818     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
5819     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
5820     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
5821     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
5822     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
5823     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
5824     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
5825     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
5826     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
5827     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
5828     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
5829     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
5830     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
5831     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
5832     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
5833     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
5834     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
5835     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
5836 
5837     // private constructor -----------------------------------------------
5838     ///CLOVER:OFF
5839     /**
5840      * Private constructor to prevent instantiation
5841      */
UCharacter()5842     private UCharacter()
5843     {
5844     }
5845     ///CLOVER:ON
5846 }
5847