1 /**
2  *******************************************************************************
3  * Copyright (C) 1996-2016, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  */
7 
8 package com.ibm.icu.lang;
9 
10 import java.lang.ref.SoftReference;
11 import java.util.HashMap;
12 import java.util.Iterator;
13 import java.util.Locale;
14 import java.util.Map;
15 
16 import com.ibm.icu.impl.IllegalIcuArgumentException;
17 import com.ibm.icu.impl.Trie2;
18 import com.ibm.icu.impl.UBiDiProps;
19 import com.ibm.icu.impl.UCaseProps;
20 import com.ibm.icu.impl.UCharacterName;
21 import com.ibm.icu.impl.UCharacterNameChoice;
22 import com.ibm.icu.impl.UCharacterProperty;
23 import com.ibm.icu.impl.UCharacterUtility;
24 import com.ibm.icu.impl.UPropertyAliases;
25 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
26 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
27 import com.ibm.icu.text.BreakIterator;
28 import com.ibm.icu.text.Normalizer2;
29 import com.ibm.icu.util.RangeValueIterator;
30 import com.ibm.icu.util.ULocale;
31 import com.ibm.icu.util.ValueIterator;
32 import com.ibm.icu.util.VersionInfo;
33 
34 /**
35  * {@icuenhanced java.lang.Character}.{@icu _usage_}
36  *
37  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
38  * These extensions provide support for more Unicode properties.
39  * Each ICU release supports the latest version of Unicode available at that time.
40  *
41  * <p>For some time before Java 5 added support for supplementary Unicode code points,
42  * The ICU UCharacter class and many other ICU classes already supported them.
43  * Some UCharacter methods and constants were widened slightly differently than
44  * how the Character class methods and constants were widened later.
45  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
46  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
47  *
48  * <p>Code points are represented in these API using ints. While it would be
49  * more convenient in Java to have a separate primitive datatype for them,
50  * ints suffice in the meantime.
51  *
52  * <p>To use this class please add the jar file name icu4j.jar to the
53  * class path, since it contains data files which supply the information used
54  * by this file.<br>
55  * E.g. In Windows <br>
56  * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
57  * Otherwise, another method would be to copy the files uprops.dat and
58  * unames.icu from the icu4j source subdirectory
59  * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
60  * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
61  *
62  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
63  * properties, the main differences between UCharacter and Character are:
64  * <ul>
65  * <li> UCharacter is not designed to be a char wrapper and does not have
66  *      APIs to which involves management of that single char.<br>
67  *      These include:
68  *      <ul>
69  *        <li> char charValue(),
70  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
71  *      </ul>
72  * <li> UCharacter does not include Character APIs that are deprecated, nor
73  *      does it include the Java-specific character information, such as
74  *      boolean isJavaIdentifierPart(char ch).
75  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
76  *      values '10' - '35'. UCharacter also does this in digit and
77  *      getNumericValue, to adhere to the java semantics of these
78  *      methods.  New methods unicodeDigit, and
79  *      getUnicodeNumericValue do not treat the above code points
80  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
81  * </ul>
82  * <p>
83  * Further detail on differences can be determined using the program
84  *        <a href=
85  * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
86  *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
87  * <p>
88  * In addition to Java compatibility functions, which calculate derived properties,
89  * this API provides low-level access to the Unicode Character Database.
90  * <p>
91  * Unicode assigns each code point (not just assigned character) values for
92  * many properties.
93  * Most of them are simple boolean flags, or constants from a small enumerated list.
94  * For some properties, values are strings or other relatively more complex types.
95  * <p>
96  * For more information see
97  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
98  * (http://www.unicode.org/ucd/)
99  * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
100  * User Guide chapter on Properties</a>
101  * (http://www.icu-project.org/userguide/properties.html).
102  * <p>
103  * There are also functions that provide easy migration from C/POSIX functions
104  * like isblank(). Their use is generally discouraged because the C/POSIX
105  * standards do not define their semantics beyond the ASCII range, which means
106  * that different implementations exhibit very different behavior.
107  * Instead, Unicode properties should be used directly.
108  * <p>
109  * There are also only a few, broad C/POSIX character classes, and they tend
110  * to be used for conflicting purposes. For example, the "isalpha()" class
111  * is sometimes used to determine word boundaries, while a more sophisticated
112  * approach would at least distinguish initial letters from continuation
113  * characters (the latter including combining marks).
114  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
115  * Another example: There is no "istitle()" class for titlecase characters.
116  * <p>
117  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
118  * ICU implements them according to the Standard Recommendations in
119  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
120  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
121  * <p>
122  * API access for C/POSIX character classes is as follows:
123  * <pre>{@code
124  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
125  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
126  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
127  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
128  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
129  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
130  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
131  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
132  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
133  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
134  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
135  * - cntrl:     getType(c)==CONTROL
136  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
137  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
138  * <p>
139  * The C/POSIX character classes are also available in UnicodeSet patterns,
140  * using patterns like [:graph:] or \p{graph}.
141  *
142  * <p>{@icunote} There are several ICU (and Java) whitespace functions.
143  * Comparison:<ul>
144  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
145  *       most of general categories "Z" (separators) + most whitespace ISO controls
146  *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
147  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
148  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
149  *
150  * <p>
151  * This class is not subclassable.
152  *
153  * @author Syn Wee Quek
154  * @stable ICU 2.1
155  * @see com.ibm.icu.lang.UCharacterEnums
156  */
157 
158 public final class UCharacter implements ECharacterCategory, ECharacterDirection
159 {
160     // public inner classes ----------------------------------------------
161 
162     /**
163      * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_}
164      *
165      * A family of character subsets representing the character blocks in the
166      * Unicode specification, generated from Unicode Data file Blocks.txt.
167      * Character blocks generally define characters used for a specific script
168      * or purpose. A character is contained by at most one Unicode block.
169      *
170      * {@icunote} All fields named XXX_ID are specific to ICU.
171      *
172      * @stable ICU 2.4
173      */
174     public static final class UnicodeBlock extends Character.Subset
175     {
176         // block id corresponding to icu4c -----------------------------------
177 
178         /**
179          * @stable ICU 2.4
180          */
181         public static final int INVALID_CODE_ID = -1;
182         /**
183          * @stable ICU 2.4
184          */
185         public static final int BASIC_LATIN_ID = 1;
186         /**
187          * @stable ICU 2.4
188          */
189         public static final int LATIN_1_SUPPLEMENT_ID = 2;
190         /**
191          * @stable ICU 2.4
192          */
193         public static final int LATIN_EXTENDED_A_ID = 3;
194         /**
195          * @stable ICU 2.4
196          */
197         public static final int LATIN_EXTENDED_B_ID = 4;
198         /**
199          * @stable ICU 2.4
200          */
201         public static final int IPA_EXTENSIONS_ID = 5;
202         /**
203          * @stable ICU 2.4
204          */
205         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
206         /**
207          * @stable ICU 2.4
208          */
209         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
210         /**
211          * Unicode 3.2 renames this block to "Greek and Coptic".
212          * @stable ICU 2.4
213          */
214         public static final int GREEK_ID = 8;
215         /**
216          * @stable ICU 2.4
217          */
218         public static final int CYRILLIC_ID = 9;
219         /**
220          * @stable ICU 2.4
221          */
222         public static final int ARMENIAN_ID = 10;
223         /**
224          * @stable ICU 2.4
225          */
226         public static final int HEBREW_ID = 11;
227         /**
228          * @stable ICU 2.4
229          */
230         public static final int ARABIC_ID = 12;
231         /**
232          * @stable ICU 2.4
233          */
234         public static final int SYRIAC_ID = 13;
235         /**
236          * @stable ICU 2.4
237          */
238         public static final int THAANA_ID = 14;
239         /**
240          * @stable ICU 2.4
241          */
242         public static final int DEVANAGARI_ID = 15;
243         /**
244          * @stable ICU 2.4
245          */
246         public static final int BENGALI_ID = 16;
247         /**
248          * @stable ICU 2.4
249          */
250         public static final int GURMUKHI_ID = 17;
251         /**
252          * @stable ICU 2.4
253          */
254         public static final int GUJARATI_ID = 18;
255         /**
256          * @stable ICU 2.4
257          */
258         public static final int ORIYA_ID = 19;
259         /**
260          * @stable ICU 2.4
261          */
262         public static final int TAMIL_ID = 20;
263         /**
264          * @stable ICU 2.4
265          */
266         public static final int TELUGU_ID = 21;
267         /**
268          * @stable ICU 2.4
269          */
270         public static final int KANNADA_ID = 22;
271         /**
272          * @stable ICU 2.4
273          */
274         public static final int MALAYALAM_ID = 23;
275         /**
276          * @stable ICU 2.4
277          */
278         public static final int SINHALA_ID = 24;
279         /**
280          * @stable ICU 2.4
281          */
282         public static final int THAI_ID = 25;
283         /**
284          * @stable ICU 2.4
285          */
286         public static final int LAO_ID = 26;
287         /**
288          * @stable ICU 2.4
289          */
290         public static final int TIBETAN_ID = 27;
291         /**
292          * @stable ICU 2.4
293          */
294         public static final int MYANMAR_ID = 28;
295         /**
296          * @stable ICU 2.4
297          */
298         public static final int GEORGIAN_ID = 29;
299         /**
300          * @stable ICU 2.4
301          */
302         public static final int HANGUL_JAMO_ID = 30;
303         /**
304          * @stable ICU 2.4
305          */
306         public static final int ETHIOPIC_ID = 31;
307         /**
308          * @stable ICU 2.4
309          */
310         public static final int CHEROKEE_ID = 32;
311         /**
312          * @stable ICU 2.4
313          */
314         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
315         /**
316          * @stable ICU 2.4
317          */
318         public static final int OGHAM_ID = 34;
319         /**
320          * @stable ICU 2.4
321          */
322         public static final int RUNIC_ID = 35;
323         /**
324          * @stable ICU 2.4
325          */
326         public static final int KHMER_ID = 36;
327         /**
328          * @stable ICU 2.4
329          */
330         public static final int MONGOLIAN_ID = 37;
331         /**
332          * @stable ICU 2.4
333          */
334         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
335         /**
336          * @stable ICU 2.4
337          */
338         public static final int GREEK_EXTENDED_ID = 39;
339         /**
340          * @stable ICU 2.4
341          */
342         public static final int GENERAL_PUNCTUATION_ID = 40;
343         /**
344          * @stable ICU 2.4
345          */
346         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
347         /**
348          * @stable ICU 2.4
349          */
350         public static final int CURRENCY_SYMBOLS_ID = 42;
351         /**
352          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
353          * Symbols".
354          * @stable ICU 2.4
355          */
356         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
357         /**
358          * @stable ICU 2.4
359          */
360         public static final int LETTERLIKE_SYMBOLS_ID = 44;
361         /**
362          * @stable ICU 2.4
363          */
364         public static final int NUMBER_FORMS_ID = 45;
365         /**
366          * @stable ICU 2.4
367          */
368         public static final int ARROWS_ID = 46;
369         /**
370          * @stable ICU 2.4
371          */
372         public static final int MATHEMATICAL_OPERATORS_ID = 47;
373         /**
374          * @stable ICU 2.4
375          */
376         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
377         /**
378          * @stable ICU 2.4
379          */
380         public static final int CONTROL_PICTURES_ID = 49;
381         /**
382          * @stable ICU 2.4
383          */
384         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
385         /**
386          * @stable ICU 2.4
387          */
388         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
389         /**
390          * @stable ICU 2.4
391          */
392         public static final int BOX_DRAWING_ID = 52;
393         /**
394          * @stable ICU 2.4
395          */
396         public static final int BLOCK_ELEMENTS_ID = 53;
397         /**
398          * @stable ICU 2.4
399          */
400         public static final int GEOMETRIC_SHAPES_ID = 54;
401         /**
402          * @stable ICU 2.4
403          */
404         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
405         /**
406          * @stable ICU 2.4
407          */
408         public static final int DINGBATS_ID = 56;
409         /**
410          * @stable ICU 2.4
411          */
412         public static final int BRAILLE_PATTERNS_ID = 57;
413         /**
414          * @stable ICU 2.4
415          */
416         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
417         /**
418          * @stable ICU 2.4
419          */
420         public static final int KANGXI_RADICALS_ID = 59;
421         /**
422          * @stable ICU 2.4
423          */
424         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
425         /**
426          * @stable ICU 2.4
427          */
428         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
429         /**
430          * @stable ICU 2.4
431          */
432         public static final int HIRAGANA_ID = 62;
433         /**
434          * @stable ICU 2.4
435          */
436         public static final int KATAKANA_ID = 63;
437         /**
438          * @stable ICU 2.4
439          */
440         public static final int BOPOMOFO_ID = 64;
441         /**
442          * @stable ICU 2.4
443          */
444         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
445         /**
446          * @stable ICU 2.4
447          */
448         public static final int KANBUN_ID = 66;
449         /**
450          * @stable ICU 2.4
451          */
452         public static final int BOPOMOFO_EXTENDED_ID = 67;
453         /**
454          * @stable ICU 2.4
455          */
456         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
457         /**
458          * @stable ICU 2.4
459          */
460         public static final int CJK_COMPATIBILITY_ID = 69;
461         /**
462          * @stable ICU 2.4
463          */
464         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
465         /**
466          * @stable ICU 2.4
467          */
468         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
469         /**
470          * @stable ICU 2.4
471          */
472         public static final int YI_SYLLABLES_ID = 72;
473         /**
474          * @stable ICU 2.4
475          */
476         public static final int YI_RADICALS_ID = 73;
477         /**
478          * @stable ICU 2.4
479          */
480         public static final int HANGUL_SYLLABLES_ID = 74;
481         /**
482          * @stable ICU 2.4
483          */
484         public static final int HIGH_SURROGATES_ID = 75;
485         /**
486          * @stable ICU 2.4
487          */
488         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
489         /**
490          * @stable ICU 2.4
491          */
492         public static final int LOW_SURROGATES_ID = 77;
493         /**
494          * Same as public static final int PRIVATE_USE.
495          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
496          * and multiple code point ranges had this block.
497          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
498          * and adds separate blocks for the supplementary PUAs.
499          * @stable ICU 2.4
500          */
501         public static final int PRIVATE_USE_AREA_ID = 78;
502         /**
503          * Same as public static final int PRIVATE_USE_AREA.
504          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
505          * and multiple code point ranges had this block.
506          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
507          * and adds separate blocks for the supplementary PUAs.
508          * @stable ICU 2.4
509          */
510         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
511         /**
512          * @stable ICU 2.4
513          */
514         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
515         /**
516          * @stable ICU 2.4
517          */
518         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
519         /**
520          * @stable ICU 2.4
521          */
522         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
523         /**
524          * @stable ICU 2.4
525          */
526         public static final int COMBINING_HALF_MARKS_ID = 82;
527         /**
528          * @stable ICU 2.4
529          */
530         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
531         /**
532          * @stable ICU 2.4
533          */
534         public static final int SMALL_FORM_VARIANTS_ID = 84;
535         /**
536          * @stable ICU 2.4
537          */
538         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
539         /**
540          * @stable ICU 2.4
541          */
542         public static final int SPECIALS_ID = 86;
543         /**
544          * @stable ICU 2.4
545          */
546         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
547         /**
548          * @stable ICU 2.4
549          */
550         public static final int OLD_ITALIC_ID = 88;
551         /**
552          * @stable ICU 2.4
553          */
554         public static final int GOTHIC_ID = 89;
555         /**
556          * @stable ICU 2.4
557          */
558         public static final int DESERET_ID = 90;
559         /**
560          * @stable ICU 2.4
561          */
562         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
563         /**
564          * @stable ICU 2.4
565          */
566         public static final int MUSICAL_SYMBOLS_ID = 92;
567         /**
568          * @stable ICU 2.4
569          */
570         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
571         /**
572          * @stable ICU 2.4
573          */
574         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
575         /**
576          * @stable ICU 2.4
577          */
578         public static final int
579         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
580         /**
581          * @stable ICU 2.4
582          */
583         public static final int TAGS_ID = 96;
584 
585         // New blocks in Unicode 3.2
586 
587         /**
588          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
589          * @stable ICU 2.4
590          */
591         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
592         /**
593          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
594          * @stable ICU 3.0
595          */
596 
597         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
598         /**
599          * @stable ICU 2.4
600          */
601         public static final int TAGALOG_ID = 98;
602         /**
603          * @stable ICU 2.4
604          */
605         public static final int HANUNOO_ID = 99;
606         /**
607          * @stable ICU 2.4
608          */
609         public static final int BUHID_ID = 100;
610         /**
611          * @stable ICU 2.4
612          */
613         public static final int TAGBANWA_ID = 101;
614         /**
615          * @stable ICU 2.4
616          */
617         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
618         /**
619          * @stable ICU 2.4
620          */
621         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
622         /**
623          * @stable ICU 2.4
624          */
625         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
626         /**
627          * @stable ICU 2.4
628          */
629         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
630         /**
631          * @stable ICU 2.4
632          */
633         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
634         /**
635          * @stable ICU 2.4
636          */
637         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
638         /**
639          * @stable ICU 2.4
640          */
641         public static final int VARIATION_SELECTORS_ID = 108;
642         /**
643          * @stable ICU 2.4
644          */
645         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
646         /**
647          * @stable ICU 2.4
648          */
649         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
650 
651         /**
652          * @stable ICU 2.6
653          */
654         public static final int LIMBU_ID = 111; /*[1900]*/
655         /**
656          * @stable ICU 2.6
657          */
658         public static final int TAI_LE_ID = 112; /*[1950]*/
659         /**
660          * @stable ICU 2.6
661          */
662         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
663         /**
664          * @stable ICU 2.6
665          */
666         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
667         /**
668          * @stable ICU 2.6
669          */
670         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
671         /**
672          * @stable ICU 2.6
673          */
674         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
675         /**
676          * @stable ICU 2.6
677          */
678         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
679         /**
680          * @stable ICU 2.6
681          */
682         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
683         /**
684          * @stable ICU 2.6
685          */
686         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
687         /**
688          * @stable ICU 2.6
689          */
690         public static final int UGARITIC_ID = 120; /*[10380]*/
691         /**
692          * @stable ICU 2.6
693          */
694         public static final int SHAVIAN_ID = 121; /*[10450]*/
695         /**
696          * @stable ICU 2.6
697          */
698         public static final int OSMANYA_ID = 122; /*[10480]*/
699         /**
700          * @stable ICU 2.6
701          */
702         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
703         /**
704          * @stable ICU 2.6
705          */
706         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
707         /**
708          * @stable ICU 2.6
709          */
710         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
711 
712         /* New blocks in Unicode 4.1 */
713 
714         /**
715          * @stable ICU 3.4
716          */
717         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
718 
719         /**
720          * @stable ICU 3.4
721          */
722         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
723 
724         /**
725          * @stable ICU 3.4
726          */
727         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
728 
729         /**
730          * @stable ICU 3.4
731          */
732         public static final int BUGINESE_ID = 129; /*[1A00]*/
733 
734         /**
735          * @stable ICU 3.4
736          */
737         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
738 
739         /**
740          * @stable ICU 3.4
741          */
742         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
743 
744         /**
745          * @stable ICU 3.4
746          */
747         public static final int COPTIC_ID = 132; /*[2C80]*/
748 
749         /**
750          * @stable ICU 3.4
751          */
752         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
753 
754         /**
755          * @stable ICU 3.4
756          */
757         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
758 
759         /**
760          * @stable ICU 3.4
761          */
762         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
763 
764         /**
765          * @stable ICU 3.4
766          */
767         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
768 
769         /**
770          * @stable ICU 3.4
771          */
772         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
773 
774         /**
775          * @stable ICU 3.4
776          */
777         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
778 
779         /**
780          * @stable ICU 3.4
781          */
782         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
783 
784         /**
785          * @stable ICU 3.4
786          */
787         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
788 
789         /**
790          * @stable ICU 3.4
791          */
792         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
793 
794         /**
795          * @stable ICU 3.4
796          */
797         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
798 
799         /**
800          * @stable ICU 3.4
801          */
802         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
803 
804         /**
805          * @stable ICU 3.4
806          */
807         public static final int TIFINAGH_ID = 144; /*[2D30]*/
808 
809         /**
810          * @stable ICU 3.4
811          */
812         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
813 
814         /* New blocks in Unicode 5.0 */
815 
816         /**
817          * @stable ICU 3.6
818          */
819         public static final int NKO_ID = 146; /*[07C0]*/
820         /**
821          * @stable ICU 3.6
822          */
823         public static final int BALINESE_ID = 147; /*[1B00]*/
824         /**
825          * @stable ICU 3.6
826          */
827         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
828         /**
829          * @stable ICU 3.6
830          */
831         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
832         /**
833          * @stable ICU 3.6
834          */
835         public static final int PHAGS_PA_ID = 150; /*[A840]*/
836         /**
837          * @stable ICU 3.6
838          */
839         public static final int PHOENICIAN_ID = 151; /*[10900]*/
840         /**
841          * @stable ICU 3.6
842          */
843         public static final int CUNEIFORM_ID = 152; /*[12000]*/
844         /**
845          * @stable ICU 3.6
846          */
847         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
848         /**
849          * @stable ICU 3.6
850          */
851         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
852 
853         /**
854          * @stable ICU 4.0
855          */
856         public static final int SUNDANESE_ID = 155; /* [1B80] */
857 
858         /**
859          * @stable ICU 4.0
860          */
861         public static final int LEPCHA_ID = 156; /* [1C00] */
862 
863         /**
864          * @stable ICU 4.0
865          */
866         public static final int OL_CHIKI_ID = 157; /* [1C50] */
867 
868         /**
869          * @stable ICU 4.0
870          */
871         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
872 
873         /**
874          * @stable ICU 4.0
875          */
876         public static final int VAI_ID = 159; /* [A500] */
877 
878         /**
879          * @stable ICU 4.0
880          */
881         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
882 
883         /**
884          * @stable ICU 4.0
885          */
886         public static final int SAURASHTRA_ID = 161; /* [A880] */
887 
888         /**
889          * @stable ICU 4.0
890          */
891         public static final int KAYAH_LI_ID = 162; /* [A900] */
892 
893         /**
894          * @stable ICU 4.0
895          */
896         public static final int REJANG_ID = 163; /* [A930] */
897 
898         /**
899          * @stable ICU 4.0
900          */
901         public static final int CHAM_ID = 164; /* [AA00] */
902 
903         /**
904          * @stable ICU 4.0
905          */
906         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
907 
908         /**
909          * @stable ICU 4.0
910          */
911         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
912 
913         /**
914          * @stable ICU 4.0
915          */
916         public static final int LYCIAN_ID = 167; /* [10280] */
917 
918         /**
919          * @stable ICU 4.0
920          */
921         public static final int CARIAN_ID = 168; /* [102A0] */
922 
923         /**
924          * @stable ICU 4.0
925          */
926         public static final int LYDIAN_ID = 169; /* [10920] */
927 
928         /**
929          * @stable ICU 4.0
930          */
931         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
932 
933         /**
934          * @stable ICU 4.0
935          */
936         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
937 
938         /* New blocks in Unicode 5.2 */
939 
940         /** @stable ICU 4.4 */
941         public static final int SAMARITAN_ID = 172; /*[0800]*/
942         /** @stable ICU 4.4 */
943         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
944         /** @stable ICU 4.4 */
945         public static final int TAI_THAM_ID = 174; /*[1A20]*/
946         /** @stable ICU 4.4 */
947         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
948         /** @stable ICU 4.4 */
949         public static final int LISU_ID = 176; /*[A4D0]*/
950         /** @stable ICU 4.4 */
951         public static final int BAMUM_ID = 177; /*[A6A0]*/
952         /** @stable ICU 4.4 */
953         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
954         /** @stable ICU 4.4 */
955         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
956         /** @stable ICU 4.4 */
957         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
958         /** @stable ICU 4.4 */
959         public static final int JAVANESE_ID = 181; /*[A980]*/
960         /** @stable ICU 4.4 */
961         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
962         /** @stable ICU 4.4 */
963         public static final int TAI_VIET_ID = 183; /*[AA80]*/
964         /** @stable ICU 4.4 */
965         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
966         /** @stable ICU 4.4 */
967         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
968         /** @stable ICU 4.4 */
969         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
970         /** @stable ICU 4.4 */
971         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
972         /** @stable ICU 4.4 */
973         public static final int AVESTAN_ID = 188; /*[10B00]*/
974         /** @stable ICU 4.4 */
975         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
976         /** @stable ICU 4.4 */
977         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
978         /** @stable ICU 4.4 */
979         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
980         /** @stable ICU 4.4 */
981         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
982         /** @stable ICU 4.4 */
983         public static final int KAITHI_ID = 193; /*[11080]*/
984         /** @stable ICU 4.4 */
985         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
986         /** @stable ICU 4.4 */
987         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
988         /** @stable ICU 4.4 */
989         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
990         /** @stable ICU 4.4 */
991         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
992 
993         /* New blocks in Unicode 6.0 */
994 
995         /** @stable ICU 4.6 */
996         public static final int MANDAIC_ID = 198; /*[0840]*/
997         /** @stable ICU 4.6 */
998         public static final int BATAK_ID = 199; /*[1BC0]*/
999         /** @stable ICU 4.6 */
1000         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
1001         /** @stable ICU 4.6 */
1002         public static final int BRAHMI_ID = 201; /*[11000]*/
1003         /** @stable ICU 4.6 */
1004         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
1005         /** @stable ICU 4.6 */
1006         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
1007         /** @stable ICU 4.6 */
1008         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
1009         /** @stable ICU 4.6 */
1010         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
1011         /** @stable ICU 4.6 */
1012         public static final int EMOTICONS_ID = 206; /*[1F600]*/
1013         /** @stable ICU 4.6 */
1014         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
1015         /** @stable ICU 4.6 */
1016         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
1017         /** @stable ICU 4.6 */
1018         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
1019 
1020         /* New blocks in Unicode 6.1 */
1021 
1022         /** @stable ICU 49 */
1023         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
1024         /** @stable ICU 49 */
1025         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
1026         /** @stable ICU 49 */
1027         public static final int CHAKMA_ID = 212; /*[11100]*/
1028         /** @stable ICU 49 */
1029         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
1030         /** @stable ICU 49 */
1031         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
1032         /** @stable ICU 49 */
1033         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
1034         /** @stable ICU 49 */
1035         public static final int MIAO_ID = 216; /*[16F00]*/
1036         /** @stable ICU 49 */
1037         public static final int SHARADA_ID = 217; /*[11180]*/
1038         /** @stable ICU 49 */
1039         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
1040         /** @stable ICU 49 */
1041         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
1042         /** @stable ICU 49 */
1043         public static final int TAKRI_ID = 220; /*[11680]*/
1044 
1045         /* New blocks in Unicode 7.0 */
1046 
1047         /** @stable ICU 54 */
1048         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
1049         /** @stable ICU 54 */
1050         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
1051         /** @stable ICU 54 */
1052         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
1053         /** @stable ICU 54 */
1054         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
1055         /** @stable ICU 54 */
1056         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
1057         /** @stable ICU 54 */
1058         public static final int ELBASAN_ID = 226; /*[10500]*/
1059         /** @stable ICU 54 */
1060         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
1061         /** @stable ICU 54 */
1062         public static final int GRANTHA_ID = 228; /*[11300]*/
1063         /** @stable ICU 54 */
1064         public static final int KHOJKI_ID = 229; /*[11200]*/
1065         /** @stable ICU 54 */
1066         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
1067         /** @stable ICU 54 */
1068         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
1069         /** @stable ICU 54 */
1070         public static final int LINEAR_A_ID = 232; /*[10600]*/
1071         /** @stable ICU 54 */
1072         public static final int MAHAJANI_ID = 233; /*[11150]*/
1073         /** @stable ICU 54 */
1074         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
1075         /** @stable ICU 54 */
1076         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
1077         /** @stable ICU 54 */
1078         public static final int MODI_ID = 236; /*[11600]*/
1079         /** @stable ICU 54 */
1080         public static final int MRO_ID = 237; /*[16A40]*/
1081         /** @stable ICU 54 */
1082         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
1083         /** @stable ICU 54 */
1084         public static final int NABATAEAN_ID = 239; /*[10880]*/
1085         /** @stable ICU 54 */
1086         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
1087         /** @stable ICU 54 */
1088         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
1089         /** @stable ICU 54 */
1090         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
1091         /** @stable ICU 54 */
1092         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
1093         /** @stable ICU 54 */
1094         public static final int PALMYRENE_ID = 244; /*[10860]*/
1095         /** @stable ICU 54 */
1096         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
1097         /** @stable ICU 54 */
1098         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
1099         /** @stable ICU 54 */
1100         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
1101         /** @stable ICU 54 */
1102         public static final int SIDDHAM_ID = 248; /*[11580]*/
1103         /** @stable ICU 54 */
1104         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
1105         /** @stable ICU 54 */
1106         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
1107         /** @stable ICU 54 */
1108         public static final int TIRHUTA_ID = 251; /*[11480]*/
1109         /** @stable ICU 54 */
1110         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
1111 
1112         /* New blocks in Unicode 8.0 */
1113 
1114         /** @stable ICU 56 */
1115         public static final int AHOM_ID = 253; /*[11700]*/
1116         /** @stable ICU 56 */
1117         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
1118         /** @stable ICU 56 */
1119         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
1120         /** @stable ICU 56 */
1121         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
1122         /** @stable ICU 56 */
1123         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
1124         /** @stable ICU 56 */
1125         public static final int HATRAN_ID = 258; /*[108E0]*/
1126         /** @stable ICU 56 */
1127         public static final int MULTANI_ID = 259; /*[11280]*/
1128         /** @stable ICU 56 */
1129         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
1130         /** @stable ICU 56 */
1131         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
1132         /** @stable ICU 56 */
1133         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
1134 
1135         /**
1136          * @stable ICU 2.4
1137          */
1138         public static final int COUNT = 263;
1139 
1140         // blocks objects ---------------------------------------------------
1141 
1142         /**
1143          * Array of UnicodeBlocks, for easy access in getInstance(int)
1144          */
1145         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
1146 
1147         /**
1148          * @stable ICU 2.6
1149          */
1150         public static final UnicodeBlock NO_BLOCK
1151         = new UnicodeBlock("NO_BLOCK", 0);
1152 
1153         /**
1154          * @stable ICU 2.4
1155          */
1156         public static final UnicodeBlock BASIC_LATIN
1157         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
1158         /**
1159          * @stable ICU 2.4
1160          */
1161         public static final UnicodeBlock LATIN_1_SUPPLEMENT
1162         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1163         /**
1164          * @stable ICU 2.4
1165          */
1166         public static final UnicodeBlock LATIN_EXTENDED_A
1167         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1168         /**
1169          * @stable ICU 2.4
1170          */
1171         public static final UnicodeBlock LATIN_EXTENDED_B
1172         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1173         /**
1174          * @stable ICU 2.4
1175          */
1176         public static final UnicodeBlock IPA_EXTENSIONS
1177         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1178         /**
1179          * @stable ICU 2.4
1180          */
1181         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1182         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1183         /**
1184          * @stable ICU 2.4
1185          */
1186         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1187         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1188         /**
1189          * Unicode 3.2 renames this block to "Greek and Coptic".
1190          * @stable ICU 2.4
1191          */
1192         public static final UnicodeBlock GREEK
1193         = new UnicodeBlock("GREEK", GREEK_ID);
1194         /**
1195          * @stable ICU 2.4
1196          */
1197         public static final UnicodeBlock CYRILLIC
1198         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1199         /**
1200          * @stable ICU 2.4
1201          */
1202         public static final UnicodeBlock ARMENIAN
1203         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1204         /**
1205          * @stable ICU 2.4
1206          */
1207         public static final UnicodeBlock HEBREW
1208         = new UnicodeBlock("HEBREW", HEBREW_ID);
1209         /**
1210          * @stable ICU 2.4
1211          */
1212         public static final UnicodeBlock ARABIC
1213         = new UnicodeBlock("ARABIC", ARABIC_ID);
1214         /**
1215          * @stable ICU 2.4
1216          */
1217         public static final UnicodeBlock SYRIAC
1218         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1219         /**
1220          * @stable ICU 2.4
1221          */
1222         public static final UnicodeBlock THAANA
1223         = new UnicodeBlock("THAANA", THAANA_ID);
1224         /**
1225          * @stable ICU 2.4
1226          */
1227         public static final UnicodeBlock DEVANAGARI
1228         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1229         /**
1230          * @stable ICU 2.4
1231          */
1232         public static final UnicodeBlock BENGALI
1233         = new UnicodeBlock("BENGALI", BENGALI_ID);
1234         /**
1235          * @stable ICU 2.4
1236          */
1237         public static final UnicodeBlock GURMUKHI
1238         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1239         /**
1240          * @stable ICU 2.4
1241          */
1242         public static final UnicodeBlock GUJARATI
1243         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1244         /**
1245          * @stable ICU 2.4
1246          */
1247         public static final UnicodeBlock ORIYA
1248         = new UnicodeBlock("ORIYA", ORIYA_ID);
1249         /**
1250          * @stable ICU 2.4
1251          */
1252         public static final UnicodeBlock TAMIL
1253         = new UnicodeBlock("TAMIL", TAMIL_ID);
1254         /**
1255          * @stable ICU 2.4
1256          */
1257         public static final UnicodeBlock TELUGU
1258         = new UnicodeBlock("TELUGU", TELUGU_ID);
1259         /**
1260          * @stable ICU 2.4
1261          */
1262         public static final UnicodeBlock KANNADA
1263         = new UnicodeBlock("KANNADA", KANNADA_ID);
1264         /**
1265          * @stable ICU 2.4
1266          */
1267         public static final UnicodeBlock MALAYALAM
1268         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1269         /**
1270          * @stable ICU 2.4
1271          */
1272         public static final UnicodeBlock SINHALA
1273         = new UnicodeBlock("SINHALA", SINHALA_ID);
1274         /**
1275          * @stable ICU 2.4
1276          */
1277         public static final UnicodeBlock THAI
1278         = new UnicodeBlock("THAI", THAI_ID);
1279         /**
1280          * @stable ICU 2.4
1281          */
1282         public static final UnicodeBlock LAO
1283         = new UnicodeBlock("LAO", LAO_ID);
1284         /**
1285          * @stable ICU 2.4
1286          */
1287         public static final UnicodeBlock TIBETAN
1288         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1289         /**
1290          * @stable ICU 2.4
1291          */
1292         public static final UnicodeBlock MYANMAR
1293         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1294         /**
1295          * @stable ICU 2.4
1296          */
1297         public static final UnicodeBlock GEORGIAN
1298         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1299         /**
1300          * @stable ICU 2.4
1301          */
1302         public static final UnicodeBlock HANGUL_JAMO
1303         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1304         /**
1305          * @stable ICU 2.4
1306          */
1307         public static final UnicodeBlock ETHIOPIC
1308         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1309         /**
1310          * @stable ICU 2.4
1311          */
1312         public static final UnicodeBlock CHEROKEE
1313         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1314         /**
1315          * @stable ICU 2.4
1316          */
1317         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1318         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1319                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1320         /**
1321          * @stable ICU 2.4
1322          */
1323         public static final UnicodeBlock OGHAM
1324         = new UnicodeBlock("OGHAM", OGHAM_ID);
1325         /**
1326          * @stable ICU 2.4
1327          */
1328         public static final UnicodeBlock RUNIC
1329         = new UnicodeBlock("RUNIC", RUNIC_ID);
1330         /**
1331          * @stable ICU 2.4
1332          */
1333         public static final UnicodeBlock KHMER
1334         = new UnicodeBlock("KHMER", KHMER_ID);
1335         /**
1336          * @stable ICU 2.4
1337          */
1338         public static final UnicodeBlock MONGOLIAN
1339         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1340         /**
1341          * @stable ICU 2.4
1342          */
1343         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1344         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1345         /**
1346          * @stable ICU 2.4
1347          */
1348         public static final UnicodeBlock GREEK_EXTENDED
1349         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1350         /**
1351          * @stable ICU 2.4
1352          */
1353         public static final UnicodeBlock GENERAL_PUNCTUATION
1354         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1355         /**
1356          * @stable ICU 2.4
1357          */
1358         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1359         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1360         /**
1361          * @stable ICU 2.4
1362          */
1363         public static final UnicodeBlock CURRENCY_SYMBOLS
1364         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1365         /**
1366          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1367          * Symbols".
1368          * @stable ICU 2.4
1369          */
1370         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1371         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1372         /**
1373          * @stable ICU 2.4
1374          */
1375         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1376         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1377         /**
1378          * @stable ICU 2.4
1379          */
1380         public static final UnicodeBlock NUMBER_FORMS
1381         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1382         /**
1383          * @stable ICU 2.4
1384          */
1385         public static final UnicodeBlock ARROWS
1386         = new UnicodeBlock("ARROWS", ARROWS_ID);
1387         /**
1388          * @stable ICU 2.4
1389          */
1390         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1391         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1392         /**
1393          * @stable ICU 2.4
1394          */
1395         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1396         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1397         /**
1398          * @stable ICU 2.4
1399          */
1400         public static final UnicodeBlock CONTROL_PICTURES
1401         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1402         /**
1403          * @stable ICU 2.4
1404          */
1405         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1406         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1407         /**
1408          * @stable ICU 2.4
1409          */
1410         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1411         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1412         /**
1413          * @stable ICU 2.4
1414          */
1415         public static final UnicodeBlock BOX_DRAWING
1416         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1417         /**
1418          * @stable ICU 2.4
1419          */
1420         public static final UnicodeBlock BLOCK_ELEMENTS
1421         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1422         /**
1423          * @stable ICU 2.4
1424          */
1425         public static final UnicodeBlock GEOMETRIC_SHAPES
1426         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1427         /**
1428          * @stable ICU 2.4
1429          */
1430         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1431         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1432         /**
1433          * @stable ICU 2.4
1434          */
1435         public static final UnicodeBlock DINGBATS
1436         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1437         /**
1438          * @stable ICU 2.4
1439          */
1440         public static final UnicodeBlock BRAILLE_PATTERNS
1441         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1442         /**
1443          * @stable ICU 2.4
1444          */
1445         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1446         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1447         /**
1448          * @stable ICU 2.4
1449          */
1450         public static final UnicodeBlock KANGXI_RADICALS
1451         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1452         /**
1453          * @stable ICU 2.4
1454          */
1455         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1456         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1457                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1458         /**
1459          * @stable ICU 2.4
1460          */
1461         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1462         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1463         /**
1464          * @stable ICU 2.4
1465          */
1466         public static final UnicodeBlock HIRAGANA
1467         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1468         /**
1469          * @stable ICU 2.4
1470          */
1471         public static final UnicodeBlock KATAKANA
1472         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1473         /**
1474          * @stable ICU 2.4
1475          */
1476         public static final UnicodeBlock BOPOMOFO
1477         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1478         /**
1479          * @stable ICU 2.4
1480          */
1481         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1482         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1483         /**
1484          * @stable ICU 2.4
1485          */
1486         public static final UnicodeBlock KANBUN
1487         = new UnicodeBlock("KANBUN", KANBUN_ID);
1488         /**
1489          * @stable ICU 2.4
1490          */
1491         public static final UnicodeBlock BOPOMOFO_EXTENDED
1492         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1493         /**
1494          * @stable ICU 2.4
1495          */
1496         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1497         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1498                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1499         /**
1500          * @stable ICU 2.4
1501          */
1502         public static final UnicodeBlock CJK_COMPATIBILITY
1503         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1504         /**
1505          * @stable ICU 2.4
1506          */
1507         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1508         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1509                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1510         /**
1511          * @stable ICU 2.4
1512          */
1513         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1514         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1515         /**
1516          * @stable ICU 2.4
1517          */
1518         public static final UnicodeBlock YI_SYLLABLES
1519         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1520         /**
1521          * @stable ICU 2.4
1522          */
1523         public static final UnicodeBlock YI_RADICALS
1524         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1525         /**
1526          * @stable ICU 2.4
1527          */
1528         public static final UnicodeBlock HANGUL_SYLLABLES
1529         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1530         /**
1531          * @stable ICU 2.4
1532          */
1533         public static final UnicodeBlock HIGH_SURROGATES
1534         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1535         /**
1536          * @stable ICU 2.4
1537          */
1538         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1539         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1540         /**
1541          * @stable ICU 2.4
1542          */
1543         public static final UnicodeBlock LOW_SURROGATES
1544         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1545         /**
1546          * Same as public static final int PRIVATE_USE.
1547          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1548          * and multiple code point ranges had this block.
1549          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1550          * and adds separate blocks for the supplementary PUAs.
1551          * @stable ICU 2.4
1552          */
1553         public static final UnicodeBlock PRIVATE_USE_AREA
1554         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1555         /**
1556          * Same as public static final int PRIVATE_USE_AREA.
1557          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1558          * and multiple code point ranges had this block.
1559          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1560          * and adds separate blocks for the supplementary PUAs.
1561          * @stable ICU 2.4
1562          */
1563         public static final UnicodeBlock PRIVATE_USE
1564         = PRIVATE_USE_AREA;
1565         /**
1566          * @stable ICU 2.4
1567          */
1568         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1569         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1570         /**
1571          * @stable ICU 2.4
1572          */
1573         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1574         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1575         /**
1576          * @stable ICU 2.4
1577          */
1578         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1579         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1580         /**
1581          * @stable ICU 2.4
1582          */
1583         public static final UnicodeBlock COMBINING_HALF_MARKS
1584         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1585         /**
1586          * @stable ICU 2.4
1587          */
1588         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1589         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1590         /**
1591          * @stable ICU 2.4
1592          */
1593         public static final UnicodeBlock SMALL_FORM_VARIANTS
1594         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1595         /**
1596          * @stable ICU 2.4
1597          */
1598         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1599         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1600         /**
1601          * @stable ICU 2.4
1602          */
1603         public static final UnicodeBlock SPECIALS
1604         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1605         /**
1606          * @stable ICU 2.4
1607          */
1608         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1609         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1610         /**
1611          * @stable ICU 2.4
1612          */
1613         public static final UnicodeBlock OLD_ITALIC
1614         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1615         /**
1616          * @stable ICU 2.4
1617          */
1618         public static final UnicodeBlock GOTHIC
1619         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1620         /**
1621          * @stable ICU 2.4
1622          */
1623         public static final UnicodeBlock DESERET
1624         = new UnicodeBlock("DESERET", DESERET_ID);
1625         /**
1626          * @stable ICU 2.4
1627          */
1628         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1629         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1630         /**
1631          * @stable ICU 2.4
1632          */
1633         public static final UnicodeBlock MUSICAL_SYMBOLS
1634         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1635         /**
1636          * @stable ICU 2.4
1637          */
1638         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1639         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1640                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1641         /**
1642          * @stable ICU 2.4
1643          */
1644         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1645         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1646                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1647         /**
1648          * @stable ICU 2.4
1649          */
1650         public static final UnicodeBlock
1651         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1652         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1653                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1654         /**
1655          * @stable ICU 2.4
1656          */
1657         public static final UnicodeBlock TAGS
1658         = new UnicodeBlock("TAGS", TAGS_ID);
1659 
1660         // New blocks in Unicode 3.2
1661 
1662         /**
1663          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1664          * @stable ICU 2.4
1665          */
1666         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1667         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1668         /**
1669          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1670          * @stable ICU 3.0
1671          */
1672         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1673         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1674         /**
1675          * @stable ICU 2.4
1676          */
1677         public static final UnicodeBlock TAGALOG
1678         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1679         /**
1680          * @stable ICU 2.4
1681          */
1682         public static final UnicodeBlock HANUNOO
1683         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1684         /**
1685          * @stable ICU 2.4
1686          */
1687         public static final UnicodeBlock BUHID
1688         = new UnicodeBlock("BUHID", BUHID_ID);
1689         /**
1690          * @stable ICU 2.4
1691          */
1692         public static final UnicodeBlock TAGBANWA
1693         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1694         /**
1695          * @stable ICU 2.4
1696          */
1697         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1698         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1699                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1700         /**
1701          * @stable ICU 2.4
1702          */
1703         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1704         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1705         /**
1706          * @stable ICU 2.4
1707          */
1708         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1709         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1710         /**
1711          * @stable ICU 2.4
1712          */
1713         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1714         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1715                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1716         /**
1717          * @stable ICU 2.4
1718          */
1719         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1720         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1721                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1722         /**
1723          * @stable ICU 2.4
1724          */
1725         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1726         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1727         /**
1728          * @stable ICU 2.4
1729          */
1730         public static final UnicodeBlock VARIATION_SELECTORS
1731         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1732         /**
1733          * @stable ICU 2.4
1734          */
1735         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1736         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1737                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1738         /**
1739          * @stable ICU 2.4
1740          */
1741         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1742         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1743                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1744 
1745         /**
1746          * @stable ICU 2.6
1747          */
1748         public static final UnicodeBlock LIMBU
1749         = new UnicodeBlock("LIMBU", LIMBU_ID);
1750         /**
1751          * @stable ICU 2.6
1752          */
1753         public static final UnicodeBlock TAI_LE
1754         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1755         /**
1756          * @stable ICU 2.6
1757          */
1758         public static final UnicodeBlock KHMER_SYMBOLS
1759         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1760 
1761         /**
1762          * @stable ICU 2.6
1763          */
1764         public static final UnicodeBlock PHONETIC_EXTENSIONS
1765         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1766 
1767         /**
1768          * @stable ICU 2.6
1769          */
1770         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1771         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1772                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1773         /**
1774          * @stable ICU 2.6
1775          */
1776         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1777         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1778         /**
1779          * @stable ICU 2.6
1780          */
1781         public static final UnicodeBlock LINEAR_B_SYLLABARY
1782         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1783         /**
1784          * @stable ICU 2.6
1785          */
1786         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1787         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1788         /**
1789          * @stable ICU 2.6
1790          */
1791         public static final UnicodeBlock AEGEAN_NUMBERS
1792         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1793         /**
1794          * @stable ICU 2.6
1795          */
1796         public static final UnicodeBlock UGARITIC
1797         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1798         /**
1799          * @stable ICU 2.6
1800          */
1801         public static final UnicodeBlock SHAVIAN
1802         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1803         /**
1804          * @stable ICU 2.6
1805          */
1806         public static final UnicodeBlock OSMANYA
1807         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1808         /**
1809          * @stable ICU 2.6
1810          */
1811         public static final UnicodeBlock CYPRIOT_SYLLABARY
1812         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1813         /**
1814          * @stable ICU 2.6
1815          */
1816         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1817         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1818 
1819         /**
1820          * @stable ICU 2.6
1821          */
1822         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1823         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1824 
1825         /* New blocks in Unicode 4.1 */
1826 
1827         /**
1828          * @stable ICU 3.4
1829          */
1830         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1831                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1832                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1833 
1834         /**
1835          * @stable ICU 3.4
1836          */
1837         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1838                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1839 
1840         /**
1841          * @stable ICU 3.4
1842          */
1843         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1844                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1845 
1846         /**
1847          * @stable ICU 3.4
1848          */
1849         public static final UnicodeBlock BUGINESE =
1850                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1851 
1852         /**
1853          * @stable ICU 3.4
1854          */
1855         public static final UnicodeBlock CJK_STROKES =
1856                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1857 
1858         /**
1859          * @stable ICU 3.4
1860          */
1861         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1862                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1863                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1864 
1865         /**
1866          * @stable ICU 3.4
1867          */
1868         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1869 
1870         /**
1871          * @stable ICU 3.4
1872          */
1873         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1874                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1875 
1876         /**
1877          * @stable ICU 3.4
1878          */
1879         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1880                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1881 
1882         /**
1883          * @stable ICU 3.4
1884          */
1885         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1886                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
1887 
1888         /**
1889          * @stable ICU 3.4
1890          */
1891         public static final UnicodeBlock GLAGOLITIC =
1892                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
1893 
1894         /**
1895          * @stable ICU 3.4
1896          */
1897         public static final UnicodeBlock KHAROSHTHI =
1898                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
1899 
1900         /**
1901          * @stable ICU 3.4
1902          */
1903         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1904                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
1905 
1906         /**
1907          * @stable ICU 3.4
1908          */
1909         public static final UnicodeBlock NEW_TAI_LUE =
1910                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
1911 
1912         /**
1913          * @stable ICU 3.4
1914          */
1915         public static final UnicodeBlock OLD_PERSIAN =
1916                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
1917 
1918         /**
1919          * @stable ICU 3.4
1920          */
1921         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1922                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1923                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
1924 
1925         /**
1926          * @stable ICU 3.4
1927          */
1928         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1929                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
1930 
1931         /**
1932          * @stable ICU 3.4
1933          */
1934         public static final UnicodeBlock SYLOTI_NAGRI =
1935                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
1936 
1937         /**
1938          * @stable ICU 3.4
1939          */
1940         public static final UnicodeBlock TIFINAGH =
1941                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
1942 
1943         /**
1944          * @stable ICU 3.4
1945          */
1946         public static final UnicodeBlock VERTICAL_FORMS =
1947                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
1948 
1949         /**
1950          * @stable ICU 3.6
1951          */
1952         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
1953         /**
1954          * @stable ICU 3.6
1955          */
1956         public static final UnicodeBlock BALINESE =
1957                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
1958         /**
1959          * @stable ICU 3.6
1960          */
1961         public static final UnicodeBlock LATIN_EXTENDED_C =
1962                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
1963         /**
1964          * @stable ICU 3.6
1965          */
1966         public static final UnicodeBlock LATIN_EXTENDED_D =
1967                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
1968         /**
1969          * @stable ICU 3.6
1970          */
1971         public static final UnicodeBlock PHAGS_PA =
1972                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
1973         /**
1974          * @stable ICU 3.6
1975          */
1976         public static final UnicodeBlock PHOENICIAN =
1977                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
1978         /**
1979          * @stable ICU 3.6
1980          */
1981         public static final UnicodeBlock CUNEIFORM =
1982                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
1983         /**
1984          * @stable ICU 3.6
1985          */
1986         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1987                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1988                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
1989         /**
1990          * @stable ICU 3.6
1991          */
1992         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
1993                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
1994 
1995         /**
1996          * @stable ICU 4.0
1997          */
1998         public static final UnicodeBlock SUNDANESE =
1999                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
2000 
2001         /**
2002          * @stable ICU 4.0
2003          */
2004         public static final UnicodeBlock LEPCHA =
2005                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
2006 
2007         /**
2008          * @stable ICU 4.0
2009          */
2010         public static final UnicodeBlock OL_CHIKI =
2011                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
2012 
2013         /**
2014          * @stable ICU 4.0
2015          */
2016         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2017                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
2018 
2019         /**
2020          * @stable ICU 4.0
2021          */
2022         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
2023 
2024         /**
2025          * @stable ICU 4.0
2026          */
2027         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2028                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
2029 
2030         /**
2031          * @stable ICU 4.0
2032          */
2033         public static final UnicodeBlock SAURASHTRA =
2034                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
2035 
2036         /**
2037          * @stable ICU 4.0
2038          */
2039         public static final UnicodeBlock KAYAH_LI =
2040                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
2041 
2042         /**
2043          * @stable ICU 4.0
2044          */
2045         public static final UnicodeBlock REJANG =
2046                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
2047 
2048         /**
2049          * @stable ICU 4.0
2050          */
2051         public static final UnicodeBlock CHAM =
2052                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
2053 
2054         /**
2055          * @stable ICU 4.0
2056          */
2057         public static final UnicodeBlock ANCIENT_SYMBOLS =
2058                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
2059 
2060         /**
2061          * @stable ICU 4.0
2062          */
2063         public static final UnicodeBlock PHAISTOS_DISC =
2064                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
2065 
2066         /**
2067          * @stable ICU 4.0
2068          */
2069         public static final UnicodeBlock LYCIAN =
2070                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
2071 
2072         /**
2073          * @stable ICU 4.0
2074          */
2075         public static final UnicodeBlock CARIAN =
2076                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
2077 
2078         /**
2079          * @stable ICU 4.0
2080          */
2081         public static final UnicodeBlock LYDIAN =
2082                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
2083 
2084         /**
2085          * @stable ICU 4.0
2086          */
2087         public static final UnicodeBlock MAHJONG_TILES =
2088                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
2089 
2090         /**
2091          * @stable ICU 4.0
2092          */
2093         public static final UnicodeBlock DOMINO_TILES =
2094                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
2095 
2096         /* New blocks in Unicode 5.2 */
2097 
2098         /** @stable ICU 4.4 */
2099         public static final UnicodeBlock SAMARITAN =
2100                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
2101         /** @stable ICU 4.4 */
2102         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
2103                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
2104                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
2105         /** @stable ICU 4.4 */
2106         public static final UnicodeBlock TAI_THAM =
2107                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
2108         /** @stable ICU 4.4 */
2109         public static final UnicodeBlock VEDIC_EXTENSIONS =
2110                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
2111         /** @stable ICU 4.4 */
2112         public static final UnicodeBlock LISU =
2113                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
2114         /** @stable ICU 4.4 */
2115         public static final UnicodeBlock BAMUM =
2116                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
2117         /** @stable ICU 4.4 */
2118         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2119                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
2120         /** @stable ICU 4.4 */
2121         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2122                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
2123         /** @stable ICU 4.4 */
2124         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2125                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
2126         /** @stable ICU 4.4 */
2127         public static final UnicodeBlock JAVANESE =
2128                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
2129         /** @stable ICU 4.4 */
2130         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2131                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
2132         /** @stable ICU 4.4 */
2133         public static final UnicodeBlock TAI_VIET =
2134                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
2135         /** @stable ICU 4.4 */
2136         public static final UnicodeBlock MEETEI_MAYEK =
2137                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
2138         /** @stable ICU 4.4 */
2139         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2140                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
2141         /** @stable ICU 4.4 */
2142         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2143                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
2144         /** @stable ICU 4.4 */
2145         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2146                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
2147         /** @stable ICU 4.4 */
2148         public static final UnicodeBlock AVESTAN =
2149                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
2150         /** @stable ICU 4.4 */
2151         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2152                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
2153         /** @stable ICU 4.4 */
2154         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2155                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
2156         /** @stable ICU 4.4 */
2157         public static final UnicodeBlock OLD_TURKIC =
2158                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
2159         /** @stable ICU 4.4 */
2160         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2161                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
2162         /** @stable ICU 4.4 */
2163         public static final UnicodeBlock KAITHI =
2164                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
2165         /** @stable ICU 4.4 */
2166         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2167                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
2168         /** @stable ICU 4.4 */
2169         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2170                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2171                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
2172         /** @stable ICU 4.4 */
2173         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2174                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2175                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
2176         /** @stable ICU 4.4 */
2177         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2178                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2179                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
2180 
2181         /* New blocks in Unicode 6.0 */
2182 
2183         /** @stable ICU 4.6 */
2184         public static final UnicodeBlock MANDAIC =
2185                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
2186         /** @stable ICU 4.6 */
2187         public static final UnicodeBlock BATAK =
2188                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
2189         /** @stable ICU 4.6 */
2190         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2191                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
2192         /** @stable ICU 4.6 */
2193         public static final UnicodeBlock BRAHMI =
2194                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
2195         /** @stable ICU 4.6 */
2196         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2197                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
2198         /** @stable ICU 4.6 */
2199         public static final UnicodeBlock KANA_SUPPLEMENT =
2200                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
2201         /** @stable ICU 4.6 */
2202         public static final UnicodeBlock PLAYING_CARDS =
2203                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
2204         /** @stable ICU 4.6 */
2205         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2206                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2207                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
2208         /** @stable ICU 4.6 */
2209         public static final UnicodeBlock EMOTICONS =
2210                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
2211         /** @stable ICU 4.6 */
2212         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2213                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
2214         /** @stable ICU 4.6 */
2215         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2216                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
2217         /** @stable ICU 4.6 */
2218         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2219                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2220                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
2221 
2222         /* New blocks in Unicode 6.1 */
2223 
2224         /** @stable ICU 49 */
2225         public static final UnicodeBlock ARABIC_EXTENDED_A =
2226                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
2227         /** @stable ICU 49 */
2228         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2229                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
2230         /** @stable ICU 49 */
2231         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
2232         /** @stable ICU 49 */
2233         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2234                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
2235         /** @stable ICU 49 */
2236         public static final UnicodeBlock MEROITIC_CURSIVE =
2237                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
2238         /** @stable ICU 49 */
2239         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2240                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
2241         /** @stable ICU 49 */
2242         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
2243         /** @stable ICU 49 */
2244         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
2245         /** @stable ICU 49 */
2246         public static final UnicodeBlock SORA_SOMPENG =
2247                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
2248         /** @stable ICU 49 */
2249         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2250                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
2251         /** @stable ICU 49 */
2252         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
2253 
2254         /* New blocks in Unicode 7.0 */
2255 
2256         /** @stable ICU 54 */
2257         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
2258         /** @stable ICU 54 */
2259         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2260                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
2261         /** @stable ICU 54 */
2262         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2263                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
2264         /** @stable ICU 54 */
2265         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2266                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
2267         /** @stable ICU 54 */
2268         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
2269         /** @stable ICU 54 */
2270         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
2271         /** @stable ICU 54 */
2272         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2273                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
2274         /** @stable ICU 54 */
2275         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
2276         /** @stable ICU 54 */
2277         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
2278         /** @stable ICU 54 */
2279         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
2280         /** @stable ICU 54 */
2281         public static final UnicodeBlock LATIN_EXTENDED_E =
2282                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
2283         /** @stable ICU 54 */
2284         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
2285         /** @stable ICU 54 */
2286         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
2287         /** @stable ICU 54 */
2288         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
2289         /** @stable ICU 54 */
2290         public static final UnicodeBlock MENDE_KIKAKUI =
2291                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
2292         /** @stable ICU 54 */
2293         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
2294         /** @stable ICU 54 */
2295         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
2296         /** @stable ICU 54 */
2297         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2298                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
2299         /** @stable ICU 54 */
2300         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
2301         /** @stable ICU 54 */
2302         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2303                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
2304         /** @stable ICU 54 */
2305         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
2306         /** @stable ICU 54 */
2307         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2308                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
2309         /** @stable ICU 54 */
2310         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
2311         /** @stable ICU 54 */
2312         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
2313         /** @stable ICU 54 */
2314         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
2315         /** @stable ICU 54 */
2316         public static final UnicodeBlock PSALTER_PAHLAVI =
2317                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
2318         /** @stable ICU 54 */
2319         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2320                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
2321         /** @stable ICU 54 */
2322         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
2323         /** @stable ICU 54 */
2324         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2325                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
2326         /** @stable ICU 54 */
2327         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2328                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
2329         /** @stable ICU 54 */
2330         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
2331         /** @stable ICU 54 */
2332         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
2333 
2334         /* New blocks in Unicode 8.0 */
2335 
2336         /** @stable ICU 56 */
2337         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
2338         /** @stable ICU 56 */
2339         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2340                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
2341         /** @stable ICU 56 */
2342         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2343                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
2344         /** @stable ICU 56 */
2345         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2346                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2347                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
2348         /** @stable ICU 56 */
2349         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2350                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
2351         /** @stable ICU 56 */
2352         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
2353         /** @stable ICU 56 */
2354         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
2355         /** @stable ICU 56 */
2356         public static final UnicodeBlock OLD_HUNGARIAN =
2357                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
2358         /** @stable ICU 56 */
2359         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2360                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2361                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
2362         /** @stable ICU 56 */
2363         public static final UnicodeBlock SUTTON_SIGNWRITING =
2364                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
2365 
2366         /**
2367          * @stable ICU 2.4
2368          */
2369         public static final UnicodeBlock INVALID_CODE
2370         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2371 
2372         static {
2373             for (int blockId = 0; blockId < COUNT; ++blockId) {
2374                 if (BLOCKS_[blockId] == null) {
2375                     throw new java.lang.IllegalStateException(
2376                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2377                 }
2378             }
2379         }
2380 
2381         // public methods --------------------------------------------------
2382 
2383         /**
2384          * {@icu} Returns the only instance of the UnicodeBlock with the argument ID.
2385          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2386          * @param id UnicodeBlock ID
2387          * @return the only instance of the UnicodeBlock with the argument ID
2388          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2389          *         returned.
2390          * @stable ICU 2.4
2391          */
getInstance(int id)2392         public static UnicodeBlock getInstance(int id)
2393         {
2394             if (id >= 0 && id < BLOCKS_.length) {
2395                 return BLOCKS_[id];
2396             }
2397             return INVALID_CODE;
2398         }
2399 
2400         /**
2401          * Returns the Unicode allocation block that contains the code point,
2402          * or null if the code point is not a member of a defined block.
2403          * @param ch code point to be tested
2404          * @return the Unicode allocation block that contains the code point
2405          * @stable ICU 2.4
2406          */
of(int ch)2407         public static UnicodeBlock of(int ch)
2408         {
2409             if (ch > MAX_VALUE) {
2410                 return INVALID_CODE;
2411             }
2412 
2413             return UnicodeBlock.getInstance(
2414                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2415         }
2416 
2417         /**
2418          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
2419          * Returns the Unicode block with the given name. {@icunote} Unlike
2420          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
2421          * against the official UCD name and the Java block name
2422          * (ignoring case).
2423          * @param blockName the name of the block to match
2424          * @return the UnicodeBlock with that name
2425          * @throws IllegalArgumentException if the blockName could not be matched
2426          * @stable ICU 3.0
2427          */
forName(String blockName)2428         public static final UnicodeBlock forName(String blockName) {
2429             Map<String, UnicodeBlock> m = null;
2430             if (mref != null) {
2431                 m = mref.get();
2432             }
2433             if (m == null) {
2434                 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length);
2435                 for (int i = 0; i < BLOCKS_.length; ++i) {
2436                     UnicodeBlock b = BLOCKS_[i];
2437                     String name = trimBlockName(
2438                             getPropertyValueName(UProperty.BLOCK, b.getID(),
2439                                     UProperty.NameChoice.LONG));
2440                     m.put(name, b);
2441                 }
2442                 mref = new SoftReference<Map<String, UnicodeBlock>>(m);
2443             }
2444             UnicodeBlock b = m.get(trimBlockName(blockName));
2445             if (b == null) {
2446                 throw new IllegalArgumentException();
2447             }
2448             return b;
2449         }
2450         private static SoftReference<Map<String, UnicodeBlock>> mref;
2451 
trimBlockName(String name)2452         private static String trimBlockName(String name) {
2453             String upper = name.toUpperCase(Locale.ENGLISH);
2454             StringBuilder result = new StringBuilder(upper.length());
2455             for (int i = 0; i < upper.length(); i++) {
2456                 char c = upper.charAt(i);
2457                 if (c != ' ' && c != '_' && c != '-') {
2458                     result.append(c);
2459                 }
2460             }
2461             return result.toString();
2462         }
2463 
2464         /**
2465          * {icu} Returns the type ID of this Unicode block
2466          * @return integer type ID of this Unicode block
2467          * @stable ICU 2.4
2468          */
getID()2469         public int getID()
2470         {
2471             return m_id_;
2472         }
2473 
2474         // private data members ---------------------------------------------
2475 
2476         /**
2477          * Identification code for this UnicodeBlock
2478          */
2479         private int m_id_;
2480 
2481         // private constructor ----------------------------------------------
2482 
2483         /**
2484          * UnicodeBlock constructor
2485          * @param name name of this UnicodeBlock
2486          * @param id unique id of this UnicodeBlock
2487          * @exception NullPointerException if name is <code>null</code>
2488          */
UnicodeBlock(String name, int id)2489         private UnicodeBlock(String name, int id)
2490         {
2491             super(name);
2492             m_id_ = id;
2493             if (id >= 0) {
2494                 BLOCKS_[id] = this;
2495             }
2496         }
2497     }
2498 
2499     /**
2500      * East Asian Width constants.
2501      * @see UProperty#EAST_ASIAN_WIDTH
2502      * @see UCharacter#getIntPropertyValue
2503      * @stable ICU 2.4
2504      */
2505     public static interface EastAsianWidth
2506     {
2507         /**
2508          * @stable ICU 2.4
2509          */
2510         public static final int NEUTRAL = 0;
2511         /**
2512          * @stable ICU 2.4
2513          */
2514         public static final int AMBIGUOUS = 1;
2515         /**
2516          * @stable ICU 2.4
2517          */
2518         public static final int HALFWIDTH = 2;
2519         /**
2520          * @stable ICU 2.4
2521          */
2522         public static final int FULLWIDTH = 3;
2523         /**
2524          * @stable ICU 2.4
2525          */
2526         public static final int NARROW = 4;
2527         /**
2528          * @stable ICU 2.4
2529          */
2530         public static final int WIDE = 5;
2531         /**
2532          * @stable ICU 2.4
2533          */
2534         public static final int COUNT = 6;
2535     }
2536 
2537     /**
2538      * Decomposition Type constants.
2539      * @see UProperty#DECOMPOSITION_TYPE
2540      * @stable ICU 2.4
2541      */
2542     public static interface DecompositionType
2543     {
2544         /**
2545          * @stable ICU 2.4
2546          */
2547         public static final int NONE = 0;
2548         /**
2549          * @stable ICU 2.4
2550          */
2551         public static final int CANONICAL = 1;
2552         /**
2553          * @stable ICU 2.4
2554          */
2555         public static final int COMPAT = 2;
2556         /**
2557          * @stable ICU 2.4
2558          */
2559         public static final int CIRCLE = 3;
2560         /**
2561          * @stable ICU 2.4
2562          */
2563         public static final int FINAL = 4;
2564         /**
2565          * @stable ICU 2.4
2566          */
2567         public static final int FONT = 5;
2568         /**
2569          * @stable ICU 2.4
2570          */
2571         public static final int FRACTION = 6;
2572         /**
2573          * @stable ICU 2.4
2574          */
2575         public static final int INITIAL = 7;
2576         /**
2577          * @stable ICU 2.4
2578          */
2579         public static final int ISOLATED = 8;
2580         /**
2581          * @stable ICU 2.4
2582          */
2583         public static final int MEDIAL = 9;
2584         /**
2585          * @stable ICU 2.4
2586          */
2587         public static final int NARROW = 10;
2588         /**
2589          * @stable ICU 2.4
2590          */
2591         public static final int NOBREAK = 11;
2592         /**
2593          * @stable ICU 2.4
2594          */
2595         public static final int SMALL = 12;
2596         /**
2597          * @stable ICU 2.4
2598          */
2599         public static final int SQUARE = 13;
2600         /**
2601          * @stable ICU 2.4
2602          */
2603         public static final int SUB = 14;
2604         /**
2605          * @stable ICU 2.4
2606          */
2607         public static final int SUPER = 15;
2608         /**
2609          * @stable ICU 2.4
2610          */
2611         public static final int VERTICAL = 16;
2612         /**
2613          * @stable ICU 2.4
2614          */
2615         public static final int WIDE = 17;
2616         /**
2617          * @stable ICU 2.4
2618          */
2619         public static final int COUNT = 18;
2620     }
2621 
2622     /**
2623      * Joining Type constants.
2624      * @see UProperty#JOINING_TYPE
2625      * @stable ICU 2.4
2626      */
2627     public static interface JoiningType
2628     {
2629         /**
2630          * @stable ICU 2.4
2631          */
2632         public static final int NON_JOINING = 0;
2633         /**
2634          * @stable ICU 2.4
2635          */
2636         public static final int JOIN_CAUSING = 1;
2637         /**
2638          * @stable ICU 2.4
2639          */
2640         public static final int DUAL_JOINING = 2;
2641         /**
2642          * @stable ICU 2.4
2643          */
2644         public static final int LEFT_JOINING = 3;
2645         /**
2646          * @stable ICU 2.4
2647          */
2648         public static final int RIGHT_JOINING = 4;
2649         /**
2650          * @stable ICU 2.4
2651          */
2652         public static final int TRANSPARENT = 5;
2653         /**
2654          * @stable ICU 2.4
2655          */
2656         public static final int COUNT = 6;
2657     }
2658 
2659     /**
2660      * Joining Group constants.
2661      * @see UProperty#JOINING_GROUP
2662      * @stable ICU 2.4
2663      */
2664     public static interface JoiningGroup
2665     {
2666         /**
2667          * @stable ICU 2.4
2668          */
2669         public static final int NO_JOINING_GROUP = 0;
2670         /**
2671          * @stable ICU 2.4
2672          */
2673         public static final int AIN = 1;
2674         /**
2675          * @stable ICU 2.4
2676          */
2677         public static final int ALAPH = 2;
2678         /**
2679          * @stable ICU 2.4
2680          */
2681         public static final int ALEF = 3;
2682         /**
2683          * @stable ICU 2.4
2684          */
2685         public static final int BEH = 4;
2686         /**
2687          * @stable ICU 2.4
2688          */
2689         public static final int BETH = 5;
2690         /**
2691          * @stable ICU 2.4
2692          */
2693         public static final int DAL = 6;
2694         /**
2695          * @stable ICU 2.4
2696          */
2697         public static final int DALATH_RISH = 7;
2698         /**
2699          * @stable ICU 2.4
2700          */
2701         public static final int E = 8;
2702         /**
2703          * @stable ICU 2.4
2704          */
2705         public static final int FEH = 9;
2706         /**
2707          * @stable ICU 2.4
2708          */
2709         public static final int FINAL_SEMKATH = 10;
2710         /**
2711          * @stable ICU 2.4
2712          */
2713         public static final int GAF = 11;
2714         /**
2715          * @stable ICU 2.4
2716          */
2717         public static final int GAMAL = 12;
2718         /**
2719          * @stable ICU 2.4
2720          */
2721         public static final int HAH = 13;
2722         /** @stable ICU 4.6 */
2723         public static final int TEH_MARBUTA_GOAL = 14;
2724         /**
2725          * @stable ICU 2.4
2726          */
2727         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
2728         /**
2729          * @stable ICU 2.4
2730          */
2731         public static final int HE = 15;
2732         /**
2733          * @stable ICU 2.4
2734          */
2735         public static final int HEH = 16;
2736         /**
2737          * @stable ICU 2.4
2738          */
2739         public static final int HEH_GOAL = 17;
2740         /**
2741          * @stable ICU 2.4
2742          */
2743         public static final int HETH = 18;
2744         /**
2745          * @stable ICU 2.4
2746          */
2747         public static final int KAF = 19;
2748         /**
2749          * @stable ICU 2.4
2750          */
2751         public static final int KAPH = 20;
2752         /**
2753          * @stable ICU 2.4
2754          */
2755         public static final int KNOTTED_HEH = 21;
2756         /**
2757          * @stable ICU 2.4
2758          */
2759         public static final int LAM = 22;
2760         /**
2761          * @stable ICU 2.4
2762          */
2763         public static final int LAMADH = 23;
2764         /**
2765          * @stable ICU 2.4
2766          */
2767         public static final int MEEM = 24;
2768         /**
2769          * @stable ICU 2.4
2770          */
2771         public static final int MIM = 25;
2772         /**
2773          * @stable ICU 2.4
2774          */
2775         public static final int NOON = 26;
2776         /**
2777          * @stable ICU 2.4
2778          */
2779         public static final int NUN = 27;
2780         /**
2781          * @stable ICU 2.4
2782          */
2783         public static final int PE = 28;
2784         /**
2785          * @stable ICU 2.4
2786          */
2787         public static final int QAF = 29;
2788         /**
2789          * @stable ICU 2.4
2790          */
2791         public static final int QAPH = 30;
2792         /**
2793          * @stable ICU 2.4
2794          */
2795         public static final int REH = 31;
2796         /**
2797          * @stable ICU 2.4
2798          */
2799         public static final int REVERSED_PE = 32;
2800         /**
2801          * @stable ICU 2.4
2802          */
2803         public static final int SAD = 33;
2804         /**
2805          * @stable ICU 2.4
2806          */
2807         public static final int SADHE = 34;
2808         /**
2809          * @stable ICU 2.4
2810          */
2811         public static final int SEEN = 35;
2812         /**
2813          * @stable ICU 2.4
2814          */
2815         public static final int SEMKATH = 36;
2816         /**
2817          * @stable ICU 2.4
2818          */
2819         public static final int SHIN = 37;
2820         /**
2821          * @stable ICU 2.4
2822          */
2823         public static final int SWASH_KAF = 38;
2824         /**
2825          * @stable ICU 2.4
2826          */
2827         public static final int SYRIAC_WAW = 39;
2828         /**
2829          * @stable ICU 2.4
2830          */
2831         public static final int TAH = 40;
2832         /**
2833          * @stable ICU 2.4
2834          */
2835         public static final int TAW = 41;
2836         /**
2837          * @stable ICU 2.4
2838          */
2839         public static final int TEH_MARBUTA = 42;
2840         /**
2841          * @stable ICU 2.4
2842          */
2843         public static final int TETH = 43;
2844         /**
2845          * @stable ICU 2.4
2846          */
2847         public static final int WAW = 44;
2848         /**
2849          * @stable ICU 2.4
2850          */
2851         public static final int YEH = 45;
2852         /**
2853          * @stable ICU 2.4
2854          */
2855         public static final int YEH_BARREE = 46;
2856         /**
2857          * @stable ICU 2.4
2858          */
2859         public static final int YEH_WITH_TAIL = 47;
2860         /**
2861          * @stable ICU 2.4
2862          */
2863         public static final int YUDH = 48;
2864         /**
2865          * @stable ICU 2.4
2866          */
2867         public static final int YUDH_HE = 49;
2868         /**
2869          * @stable ICU 2.4
2870          */
2871         public static final int ZAIN = 50;
2872         /**
2873          * @stable ICU 2.6
2874          */
2875         public static final int FE = 51;
2876         /**
2877          * @stable ICU 2.6
2878          */
2879         public static final int KHAPH = 52;
2880         /**
2881          * @stable ICU 2.6
2882          */
2883         public static final int ZHAIN = 53;
2884         /**
2885          * @stable ICU 4.0
2886          */
2887         public static final int BURUSHASKI_YEH_BARREE = 54;
2888         /** @stable ICU 4.4 */
2889         public static final int FARSI_YEH = 55;
2890         /** @stable ICU 4.4 */
2891         public static final int NYA = 56;
2892         /** @stable ICU 49 */
2893         public static final int ROHINGYA_YEH = 57;
2894 
2895         /** @stable ICU 54 */
2896         public static final int MANICHAEAN_ALEPH = 58;
2897         /** @stable ICU 54 */
2898         public static final int MANICHAEAN_AYIN = 59;
2899         /** @stable ICU 54 */
2900         public static final int MANICHAEAN_BETH = 60;
2901         /** @stable ICU 54 */
2902         public static final int MANICHAEAN_DALETH = 61;
2903         /** @stable ICU 54 */
2904         public static final int MANICHAEAN_DHAMEDH = 62;
2905         /** @stable ICU 54 */
2906         public static final int MANICHAEAN_FIVE = 63;
2907         /** @stable ICU 54 */
2908         public static final int MANICHAEAN_GIMEL = 64;
2909         /** @stable ICU 54 */
2910         public static final int MANICHAEAN_HETH = 65;
2911         /** @stable ICU 54 */
2912         public static final int MANICHAEAN_HUNDRED = 66;
2913         /** @stable ICU 54 */
2914         public static final int MANICHAEAN_KAPH = 67;
2915         /** @stable ICU 54 */
2916         public static final int MANICHAEAN_LAMEDH = 68;
2917         /** @stable ICU 54 */
2918         public static final int MANICHAEAN_MEM = 69;
2919         /** @stable ICU 54 */
2920         public static final int MANICHAEAN_NUN = 70;
2921         /** @stable ICU 54 */
2922         public static final int MANICHAEAN_ONE = 71;
2923         /** @stable ICU 54 */
2924         public static final int MANICHAEAN_PE = 72;
2925         /** @stable ICU 54 */
2926         public static final int MANICHAEAN_QOPH = 73;
2927         /** @stable ICU 54 */
2928         public static final int MANICHAEAN_RESH = 74;
2929         /** @stable ICU 54 */
2930         public static final int MANICHAEAN_SADHE = 75;
2931         /** @stable ICU 54 */
2932         public static final int MANICHAEAN_SAMEKH = 76;
2933         /** @stable ICU 54 */
2934         public static final int MANICHAEAN_TAW = 77;
2935         /** @stable ICU 54 */
2936         public static final int MANICHAEAN_TEN = 78;
2937         /** @stable ICU 54 */
2938         public static final int MANICHAEAN_TETH = 79;
2939         /** @stable ICU 54 */
2940         public static final int MANICHAEAN_THAMEDH = 80;
2941         /** @stable ICU 54 */
2942         public static final int MANICHAEAN_TWENTY = 81;
2943         /** @stable ICU 54 */
2944         public static final int MANICHAEAN_WAW = 82;
2945         /** @stable ICU 54 */
2946         public static final int MANICHAEAN_YODH = 83;
2947         /** @stable ICU 54 */
2948         public static final int MANICHAEAN_ZAYIN = 84;
2949         /** @stable ICU 54 */
2950         public static final int STRAIGHT_WAW = 85;
2951 
2952         /**
2953          * @stable ICU 2.4
2954          */
2955         public static final int COUNT = 86;
2956     }
2957 
2958     /**
2959      * Grapheme Cluster Break constants.
2960      * @see UProperty#GRAPHEME_CLUSTER_BREAK
2961      * @stable ICU 3.4
2962      */
2963     public static interface GraphemeClusterBreak {
2964         /**
2965          * @stable ICU 3.4
2966          */
2967         public static final int OTHER = 0;
2968         /**
2969          * @stable ICU 3.4
2970          */
2971         public static final int CONTROL = 1;
2972         /**
2973          * @stable ICU 3.4
2974          */
2975         public static final int CR = 2;
2976         /**
2977          * @stable ICU 3.4
2978          */
2979         public static final int EXTEND = 3;
2980         /**
2981          * @stable ICU 3.4
2982          */
2983         public static final int L = 4;
2984         /**
2985          * @stable ICU 3.4
2986          */
2987         public static final int LF = 5;
2988         /**
2989          * @stable ICU 3.4
2990          */
2991         public static final int LV = 6;
2992         /**
2993          * @stable ICU 3.4
2994          */
2995         public static final int LVT = 7;
2996         /**
2997          * @stable ICU 3.4
2998          */
2999         public static final int T = 8;
3000         /**
3001          * @stable ICU 3.4
3002          */
3003         public static final int V = 9;
3004         /**
3005          * @stable ICU 4.0
3006          */
3007         public static final int SPACING_MARK = 10;
3008         /**
3009          * @stable ICU 4.0
3010          */
3011         public static final int PREPEND = 11;
3012         /** @stable ICU 50 */
3013         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3014         /**
3015          * @stable ICU 3.4
3016          */
3017         public static final int COUNT = 13;
3018     }
3019 
3020     /**
3021      * Word Break constants.
3022      * @see UProperty#WORD_BREAK
3023      * @stable ICU 3.4
3024      */
3025     public static interface WordBreak {
3026         /**
3027          * @stable ICU 3.8
3028          */
3029         public static final int OTHER = 0;
3030         /**
3031          * @stable ICU 3.8
3032          */
3033         public static final int ALETTER = 1;
3034         /**
3035          * @stable ICU 3.8
3036          */
3037         public static final int FORMAT = 2;
3038         /**
3039          * @stable ICU 3.8
3040          */
3041         public static final int KATAKANA = 3;
3042         /**
3043          * @stable ICU 3.8
3044          */
3045         public static final int MIDLETTER = 4;
3046         /**
3047          * @stable ICU 3.8
3048          */
3049         public static final int MIDNUM = 5;
3050         /**
3051          * @stable ICU 3.8
3052          */
3053         public static final int NUMERIC = 6;
3054         /**
3055          * @stable ICU 3.8
3056          */
3057         public static final int EXTENDNUMLET = 7;
3058         /**
3059          * @stable ICU 4.0
3060          */
3061         public static final int CR = 8;
3062         /**
3063          * @stable ICU 4.0
3064          */
3065         public static final int EXTEND = 9;
3066         /**
3067          * @stable ICU 4.0
3068          */
3069         public static final int LF = 10;
3070         /**
3071          * @stable ICU 4.0
3072          */
3073         public static final int MIDNUMLET = 11;
3074         /**
3075          * @stable ICU 4.0
3076          */
3077         public static final int NEWLINE = 12;
3078         /** @stable ICU 50 */
3079         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3080         /** @stable ICU 52 */
3081         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
3082         /** @stable ICU 52 */
3083         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
3084         /** @stable ICU 52 */
3085         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
3086         /**
3087          * @stable ICU 4.0
3088          */
3089         public static final int COUNT = 17;
3090     }
3091 
3092     /**
3093      * Sentence Break constants.
3094      * @see UProperty#SENTENCE_BREAK
3095      * @stable ICU 3.4
3096      */
3097     public static interface SentenceBreak {
3098         /**
3099          * @stable ICU 3.8
3100          */
3101         public static final int OTHER = 0;
3102         /**
3103          * @stable ICU 3.8
3104          */
3105         public static final int ATERM = 1;
3106         /**
3107          * @stable ICU 3.8
3108          */
3109         public static final int CLOSE = 2;
3110         /**
3111          * @stable ICU 3.8
3112          */
3113         public static final int FORMAT = 3;
3114         /**
3115          * @stable ICU 3.8
3116          */
3117         public static final int LOWER = 4;
3118         /**
3119          * @stable ICU 3.8
3120          */
3121         public static final int NUMERIC = 5;
3122         /**
3123          * @stable ICU 3.8
3124          */
3125         public static final int OLETTER = 6;
3126         /**
3127          * @stable ICU 3.8
3128          */
3129         public static final int SEP = 7;
3130         /**
3131          * @stable ICU 3.8
3132          */
3133         public static final int SP = 8;
3134         /**
3135          * @stable ICU 3.8
3136          */
3137         public static final int STERM = 9;
3138         /**
3139          * @stable ICU 3.8
3140          */
3141         public static final int UPPER = 10;
3142         /**
3143          * @stable ICU 4.0
3144          */
3145         public static final int CR = 11;
3146         /**
3147          * @stable ICU 4.0
3148          */
3149         public static final int EXTEND = 12;
3150         /**
3151          * @stable ICU 4.0
3152          */
3153         public static final int LF = 13;
3154         /**
3155          * @stable ICU 4.0
3156          */
3157         public static final int SCONTINUE = 14;
3158         /**
3159          * @stable ICU 4.0
3160          */
3161         public static final int COUNT = 15;
3162     }
3163 
3164     /**
3165      * Line Break constants.
3166      * @see UProperty#LINE_BREAK
3167      * @stable ICU 2.4
3168      */
3169     public static interface LineBreak
3170     {
3171         /**
3172          * @stable ICU 2.4
3173          */
3174         public static final int UNKNOWN = 0;
3175         /**
3176          * @stable ICU 2.4
3177          */
3178         public static final int AMBIGUOUS = 1;
3179         /**
3180          * @stable ICU 2.4
3181          */
3182         public static final int ALPHABETIC = 2;
3183         /**
3184          * @stable ICU 2.4
3185          */
3186         public static final int BREAK_BOTH = 3;
3187         /**
3188          * @stable ICU 2.4
3189          */
3190         public static final int BREAK_AFTER = 4;
3191         /**
3192          * @stable ICU 2.4
3193          */
3194         public static final int BREAK_BEFORE = 5;
3195         /**
3196          * @stable ICU 2.4
3197          */
3198         public static final int MANDATORY_BREAK = 6;
3199         /**
3200          * @stable ICU 2.4
3201          */
3202         public static final int CONTINGENT_BREAK = 7;
3203         /**
3204          * @stable ICU 2.4
3205          */
3206         public static final int CLOSE_PUNCTUATION = 8;
3207         /**
3208          * @stable ICU 2.4
3209          */
3210         public static final int COMBINING_MARK = 9;
3211         /**
3212          * @stable ICU 2.4
3213          */
3214         public static final int CARRIAGE_RETURN = 10;
3215         /**
3216          * @stable ICU 2.4
3217          */
3218         public static final int EXCLAMATION = 11;
3219         /**
3220          * @stable ICU 2.4
3221          */
3222         public static final int GLUE = 12;
3223         /**
3224          * @stable ICU 2.4
3225          */
3226         public static final int HYPHEN = 13;
3227         /**
3228          * @stable ICU 2.4
3229          */
3230         public static final int IDEOGRAPHIC = 14;
3231         /**
3232          * @see #INSEPARABLE
3233          * @stable ICU 2.4
3234          */
3235         public static final int INSEPERABLE = 15;
3236         /**
3237          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
3238          * @stable ICU 3.0
3239          */
3240         public static final int INSEPARABLE = 15;
3241         /**
3242          * @stable ICU 2.4
3243          */
3244         public static final int INFIX_NUMERIC = 16;
3245         /**
3246          * @stable ICU 2.4
3247          */
3248         public static final int LINE_FEED = 17;
3249         /**
3250          * @stable ICU 2.4
3251          */
3252         public static final int NONSTARTER = 18;
3253         /**
3254          * @stable ICU 2.4
3255          */
3256         public static final int NUMERIC = 19;
3257         /**
3258          * @stable ICU 2.4
3259          */
3260         public static final int OPEN_PUNCTUATION = 20;
3261         /**
3262          * @stable ICU 2.4
3263          */
3264         public static final int POSTFIX_NUMERIC = 21;
3265         /**
3266          * @stable ICU 2.4
3267          */
3268         public static final int PREFIX_NUMERIC = 22;
3269         /**
3270          * @stable ICU 2.4
3271          */
3272         public static final int QUOTATION = 23;
3273         /**
3274          * @stable ICU 2.4
3275          */
3276         public static final int COMPLEX_CONTEXT = 24;
3277         /**
3278          * @stable ICU 2.4
3279          */
3280         public static final int SURROGATE = 25;
3281         /**
3282          * @stable ICU 2.4
3283          */
3284         public static final int SPACE = 26;
3285         /**
3286          * @stable ICU 2.4
3287          */
3288         public static final int BREAK_SYMBOLS = 27;
3289         /**
3290          * @stable ICU 2.4
3291          */
3292         public static final int ZWSPACE = 28;
3293         /**
3294          * @stable ICU 2.6
3295          */
3296         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
3297         /**
3298          * @stable ICU 2.6
3299          */
3300         public static final int WORD_JOINER = 30;      /*[WJ]*/
3301         /**
3302          * @stable ICU 3.4
3303          */
3304         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
3305         /**
3306          * @stable ICU 3.4
3307          */
3308         public static final int H3 = 32;
3309         /**
3310          * @stable ICU 3.4
3311          */
3312         public static final int JL = 33;
3313         /**
3314          * @stable ICU 3.4
3315          */
3316         public static final int JT = 34;
3317         /**
3318          * @stable ICU 3.4
3319          */
3320         public static final int JV = 35;
3321         /** @stable ICU 4.4 */
3322         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
3323         /** @stable ICU 49 */
3324         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
3325         /** @stable ICU 49 */
3326         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
3327         /** @stable ICU 50 */
3328         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3329         /**
3330          * @stable ICU 2.4
3331          */
3332         public static final int COUNT = 40;
3333     }
3334 
3335     /**
3336      * Numeric Type constants.
3337      * @see UProperty#NUMERIC_TYPE
3338      * @stable ICU 2.4
3339      */
3340     public static interface NumericType
3341     {
3342         /**
3343          * @stable ICU 2.4
3344          */
3345         public static final int NONE = 0;
3346         /**
3347          * @stable ICU 2.4
3348          */
3349         public static final int DECIMAL = 1;
3350         /**
3351          * @stable ICU 2.4
3352          */
3353         public static final int DIGIT = 2;
3354         /**
3355          * @stable ICU 2.4
3356          */
3357         public static final int NUMERIC = 3;
3358         /**
3359          * @stable ICU 2.4
3360          */
3361         public static final int COUNT = 4;
3362     }
3363 
3364     /**
3365      * Hangul Syllable Type constants.
3366      *
3367      * @see UProperty#HANGUL_SYLLABLE_TYPE
3368      * @stable ICU 2.6
3369      */
3370     public static interface HangulSyllableType
3371     {
3372         /**
3373          * @stable ICU 2.6
3374          */
3375         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
3376         /**
3377          * @stable ICU 2.6
3378          */
3379         public static final int LEADING_JAMO        = 1;   /*[L]*/
3380         /**
3381          * @stable ICU 2.6
3382          */
3383         public static final int VOWEL_JAMO          = 2;   /*[V]*/
3384         /**
3385          * @stable ICU 2.6
3386          */
3387         public static final int TRAILING_JAMO       = 3;   /*[T]*/
3388         /**
3389          * @stable ICU 2.6
3390          */
3391         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
3392         /**
3393          * @stable ICU 2.6
3394          */
3395         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
3396         /**
3397          * @stable ICU 2.6
3398          */
3399         public static final int COUNT               = 6;
3400     }
3401 
3402     /**
3403      * Bidi Paired Bracket Type constants.
3404      *
3405      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3406      * @stable ICU 52
3407      */
3408     public static interface BidiPairedBracketType {
3409         /**
3410          * Not a paired bracket.
3411          * @stable ICU 52
3412          */
3413         public static final int NONE = 0;
3414         /**
3415          * Open paired bracket.
3416          * @stable ICU 52
3417          */
3418         public static final int OPEN = 1;
3419         /**
3420          * Close paired bracket.
3421          * @stable ICU 52
3422          */
3423         public static final int CLOSE = 2;
3424         /**
3425          * @stable ICU 52
3426          */
3427         public static final int COUNT = 3;
3428     }
3429 
3430     // public data members -----------------------------------------------
3431 
3432     /**
3433      * The lowest Unicode code point value, constant 0.
3434      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
3435      *
3436      * @stable ICU 2.1
3437      */
3438     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
3439 
3440     /**
3441      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
3442      * Same as {@link Character#MAX_CODE_POINT}.
3443      *
3444      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
3445      * which is still a char with the value U+FFFF.
3446      *
3447      * @stable ICU 2.1
3448      */
3449     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
3450 
3451     /**
3452      * The minimum value for Supplementary code points, constant U+10000.
3453      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
3454      *
3455      * @stable ICU 2.1
3456      */
3457     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
3458 
3459     /**
3460      * Unicode value used when translating into Unicode encoding form and there
3461      * is no existing character.
3462      * @stable ICU 2.1
3463      */
3464     public static final int REPLACEMENT_CHAR = '\uFFFD';
3465 
3466     /**
3467      * Special value that is returned by getUnicodeNumericValue(int) when no
3468      * numeric value is defined for a code point.
3469      * @stable ICU 2.4
3470      * @see #getUnicodeNumericValue
3471      */
3472     public static final double NO_NUMERIC_VALUE = -123456789;
3473 
3474     /**
3475      * Compatibility constant for Java Character's MIN_RADIX.
3476      * @stable ICU 3.4
3477      */
3478     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
3479 
3480     /**
3481      * Compatibility constant for Java Character's MAX_RADIX.
3482      * @stable ICU 3.4
3483      */
3484     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
3485 
3486     /**
3487      * Do not lowercase non-initial parts of words when titlecasing.
3488      * Option bit for titlecasing APIs that take an options bit set.
3489      *
3490      * By default, titlecasing will titlecase the first cased character
3491      * of a word and lowercase all other characters.
3492      * With this option, the other characters will not be modified.
3493      *
3494      * @see #toTitleCase
3495      * @stable ICU 3.8
3496      */
3497     public static final int TITLECASE_NO_LOWERCASE = 0x100;
3498 
3499     /**
3500      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
3501      * titlecase exactly the characters at breaks from the iterator.
3502      * Option bit for titlecasing APIs that take an options bit set.
3503      *
3504      * By default, titlecasing will take each break iterator index,
3505      * adjust it by looking for the next cased character, and titlecase that one.
3506      * Other characters are lowercased.
3507      *
3508      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
3509      *
3510      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
3511      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
3512      * cased character F. If F exists, map F to default_title(F); then map each
3513      * subsequent character C to default_lower(C).
3514      *
3515      * @see #toTitleCase
3516      * @see #TITLECASE_NO_LOWERCASE
3517      * @stable ICU 3.8
3518      */
3519     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
3520 
3521     // public methods ----------------------------------------------------
3522 
3523     /**
3524      * Returnss the numeric value of a decimal digit code point.
3525      * <br>This method observes the semantics of
3526      * <code>java.lang.Character.digit()</code>.  Note that this
3527      * will return positive values for code points for which isDigit
3528      * returns false, just like java.lang.Character.
3529      * <br><em>Semantic Change:</em> In release 1.3.1 and
3530      * prior, this did not treat the European letters as having a
3531      * digit value, and also treated numeric letters and other numbers as
3532      * digits.
3533      * This has been changed to conform to the java semantics.
3534      * <br>A code point is a valid digit if and only if:
3535      * <ul>
3536      *   <li>ch is a decimal digit or one of the european letters, and
3537      *   <li>the value of ch is less than the specified radix.
3538      * </ul>
3539      * @param ch the code point to query
3540      * @param radix the radix
3541      * @return the numeric value represented by the code point in the
3542      * specified radix, or -1 if the code point is not a decimal digit
3543      * or if its value is too large for the radix
3544      * @stable ICU 2.1
3545      */
digit(int ch, int radix)3546     public static int digit(int ch, int radix)
3547     {
3548         if (2 <= radix && radix <= 36) {
3549             int value = digit(ch);
3550             if (value < 0) {
3551                 // ch is not a decimal digit, try latin letters
3552                 value = UCharacterProperty.getEuropeanDigit(ch);
3553             }
3554             return (value < radix) ? value : -1;
3555         } else {
3556             return -1;  // invalid radix
3557         }
3558     }
3559 
3560     /**
3561      * Returnss the numeric value of a decimal digit code point.
3562      * <br>This is a convenience overload of <code>digit(int, int)</code>
3563      * that provides a decimal radix.
3564      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
3565      * treated numeric letters and other numbers as digits.  This has
3566      * been changed to conform to the java semantics.
3567      * @param ch the code point to query
3568      * @return the numeric value represented by the code point,
3569      * or -1 if the code point is not a decimal digit or if its
3570      * value is too large for a decimal radix
3571      * @stable ICU 2.1
3572      */
digit(int ch)3573     public static int digit(int ch)
3574     {
3575         return UCharacterProperty.INSTANCE.digit(ch);
3576     }
3577 
3578     /**
3579      * Returns the numeric value of the code point as a nonnegative
3580      * integer.
3581      * <br>If the code point does not have a numeric value, then -1 is returned.
3582      * <br>
3583      * If the code point has a numeric value that cannot be represented as a
3584      * nonnegative integer (for example, a fractional value), then -2 is
3585      * returned.
3586      * @param ch the code point to query
3587      * @return the numeric value of the code point, or -1 if it has no numeric
3588      * value, or -2 if it has a numeric value that cannot be represented as a
3589      * nonnegative integer
3590      * @stable ICU 2.1
3591      */
getNumericValue(int ch)3592     public static int getNumericValue(int ch)
3593     {
3594         return UCharacterProperty.INSTANCE.getNumericValue(ch);
3595     }
3596 
3597     /**
3598      * {@icu} Returns the numeric value for a Unicode code point as defined in the
3599      * Unicode Character Database.
3600      * <p>A "double" return type is necessary because some numeric values are
3601      * fractions, negative, or too large for int.
3602      * <p>For characters without any numeric values in the Unicode Character
3603      * Database, this function will return NO_NUMERIC_VALUE.
3604      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
3605      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
3606      * return type int and returns -1 when the argument ch does not have a
3607      * corresponding numeric value. This has been changed to synch with ICU4C
3608      *
3609      * This corresponds to the ICU4C function u_getNumericValue.
3610      * @param ch Code point to get the numeric value for.
3611      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
3612      * @stable ICU 2.4
3613      */
getUnicodeNumericValue(int ch)3614     public static double getUnicodeNumericValue(int ch)
3615     {
3616         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
3617     }
3618 
3619     /**
3620      * Compatibility override of Java deprecated method.  This
3621      * method will always remain deprecated.
3622      * Same as java.lang.Character.isSpace().
3623      * @param ch the code point
3624      * @return true if the code point is a space character as
3625      * defined by java.lang.Character.isSpace.
3626      * @deprecated ICU 3.4 (Java)
3627      */
3628     @Deprecated
isSpace(int ch)3629     public static boolean isSpace(int ch) {
3630         return ch <= 0x20 &&
3631                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
3632     }
3633 
3634     /**
3635      * Returns a value indicating a code point's Unicode category.
3636      * Up-to-date Unicode implementation of java.lang.Character.getType()
3637      * except for the above mentioned code points that had their category
3638      * changed.<br>
3639      * Return results are constants from the interface
3640      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
3641      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3642      * those returned by java.lang.Character.getType.  UCharacterCategory values
3643      * match the ones used in ICU4C, while java.lang.Character type
3644      * values, though similar, skip the value 17.
3645      * @param ch code point whose type is to be determined
3646      * @return category which is a value of UCharacterCategory
3647      * @stable ICU 2.1
3648      */
getType(int ch)3649     public static int getType(int ch)
3650     {
3651         return UCharacterProperty.INSTANCE.getType(ch);
3652     }
3653 
3654     /**
3655      * Determines if a code point has a defined meaning in the up-to-date
3656      * Unicode standard.
3657      * E.g. supplementary code points though allocated space are not defined in
3658      * Unicode yet.<br>
3659      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3660      * @param ch code point to be determined if it is defined in the most
3661      *        current version of Unicode
3662      * @return true if this code point is defined in unicode
3663      * @stable ICU 2.1
3664      */
isDefined(int ch)3665     public static boolean isDefined(int ch)
3666     {
3667         return getType(ch) != 0;
3668     }
3669 
3670     /**
3671      * Determines if a code point is a Java digit.
3672      * <br>This method observes the semantics of
3673      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
3674      * digits only.
3675      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
3676      * numeric letters and other numbers as digits.
3677      * This has been changed to conform to the java semantics.
3678      * @param ch code point to query
3679      * @return true if this code point is a digit
3680      * @stable ICU 2.1
3681      */
isDigit(int ch)3682     public static boolean isDigit(int ch)
3683     {
3684         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3685     }
3686 
3687     /**
3688      * Determines if the specified code point is an ISO control character.
3689      * A code point is considered to be an ISO control character if it is in
3690      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
3691      * &#92;u009F.<br>
3692      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3693      * @param ch code point to determine if it is an ISO control character
3694      * @return true if code point is a ISO control character
3695      * @stable ICU 2.1
3696      */
isISOControl(int ch)3697     public static boolean isISOControl(int ch)
3698     {
3699         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
3700                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3701     }
3702 
3703     /**
3704      * Determines if the specified code point is a letter.
3705      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3706      * @param ch code point to determine if it is a letter
3707      * @return true if code point is a letter
3708      * @stable ICU 2.1
3709      */
isLetter(int ch)3710     public static boolean isLetter(int ch)
3711     {
3712         // if props == 0, it will just fall through and return false
3713         return ((1 << getType(ch))
3714                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3715                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3716                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3717                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3718                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3719     }
3720 
3721     /**
3722      * Determines if the specified code point is a letter or digit.
3723      * {@icunote} This method, unlike java.lang.Character does not regard the ascii
3724      * characters 'A' - 'Z' and 'a' - 'z' as digits.
3725      * @param ch code point to determine if it is a letter or a digit
3726      * @return true if code point is a letter or a digit
3727      * @stable ICU 2.1
3728      */
isLetterOrDigit(int ch)3729     public static boolean isLetterOrDigit(int ch)
3730     {
3731         return ((1 << getType(ch))
3732                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3733                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3734                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3735                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3736                         | (1 << UCharacterCategory.OTHER_LETTER)
3737                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3738     }
3739 
3740     /**
3741      * Compatibility override of Java deprecated method.  This
3742      * method will always remain deprecated.  Delegates to
3743      * java.lang.Character.isJavaIdentifierStart.
3744      * @param cp the code point
3745      * @return true if the code point can start a java identifier.
3746      * @deprecated ICU 3.4 (Java)
3747      */
3748     @Deprecated
isJavaLetter(int cp)3749     public static boolean isJavaLetter(int cp) {
3750         return isJavaIdentifierStart(cp);
3751     }
3752 
3753     /**
3754      * Compatibility override of Java deprecated method.  This
3755      * method will always remain deprecated.  Delegates to
3756      * java.lang.Character.isJavaIdentifierPart.
3757      * @param cp the code point
3758      * @return true if the code point can continue a java identifier.
3759      * @deprecated ICU 3.4 (Java)
3760      */
3761     @Deprecated
isJavaLetterOrDigit(int cp)3762     public static boolean isJavaLetterOrDigit(int cp) {
3763         return isJavaIdentifierPart(cp);
3764     }
3765 
3766     /**
3767      * Compatibility override of Java method, delegates to
3768      * java.lang.Character.isJavaIdentifierStart.
3769      * @param cp the code point
3770      * @return true if the code point can start a java identifier.
3771      * @stable ICU 3.4
3772      */
isJavaIdentifierStart(int cp)3773     public static boolean isJavaIdentifierStart(int cp) {
3774         // note, downcast to char for jdk 1.4 compatibility
3775         return java.lang.Character.isJavaIdentifierStart((char)cp);
3776     }
3777 
3778     /**
3779      * Compatibility override of Java method, delegates to
3780      * java.lang.Character.isJavaIdentifierPart.
3781      * @param cp the code point
3782      * @return true if the code point can continue a java identifier.
3783      * @stable ICU 3.4
3784      */
isJavaIdentifierPart(int cp)3785     public static boolean isJavaIdentifierPart(int cp) {
3786         // note, downcast to char for jdk 1.4 compatibility
3787         return java.lang.Character.isJavaIdentifierPart((char)cp);
3788     }
3789 
3790     /**
3791      * Determines if the specified code point is a lowercase character.
3792      * UnicodeData only contains case mappings for code points where they are
3793      * one-to-one mappings; it also omits information about context-sensitive
3794      * case mappings.<br> For more information about Unicode case mapping
3795      * please refer to the
3796      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
3797      * #21</a>.<br>
3798      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3799      * @param ch code point to determine if it is in lowercase
3800      * @return true if code point is a lowercase character
3801      * @stable ICU 2.1
3802      */
isLowerCase(int ch)3803     public static boolean isLowerCase(int ch)
3804     {
3805         // if props == 0, it will just fall through and return false
3806         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3807     }
3808 
3809     /**
3810      * Determines if the specified code point is a white space character.
3811      * A code point is considered to be an whitespace character if and only
3812      * if it satisfies one of the following criteria:
3813      * <ul>
3814      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
3815      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
3816      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
3817      * <li> It is &#92;u000A, LINE FEED.
3818      * <li> It is &#92;u000B, VERTICAL TABULATION.
3819      * <li> It is &#92;u000C, FORM FEED.
3820      * <li> It is &#92;u000D, CARRIAGE RETURN.
3821      * <li> It is &#92;u001C, FILE SEPARATOR.
3822      * <li> It is &#92;u001D, GROUP SEPARATOR.
3823      * <li> It is &#92;u001E, RECORD SEPARATOR.
3824      * <li> It is &#92;u001F, UNIT SEPARATOR.
3825      * </ul>
3826      *
3827      * This API tries to sync with the semantics of Java's
3828      * java.lang.Character.isWhitespace(), but it may not return
3829      * the exact same results because of the Unicode version
3830      * difference.
3831      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
3832      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
3833      * See http://www.unicode.org/versions/Unicode4.0.1/
3834      * @param ch code point to determine if it is a white space
3835      * @return true if the specified code point is a white space character
3836      * @stable ICU 2.1
3837      */
isWhitespace(int ch)3838     public static boolean isWhitespace(int ch)
3839     {
3840         // exclude no-break spaces
3841         // if props == 0, it will just fall through and return false
3842         return ((1 << getType(ch)) &
3843                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
3844                         | (1 << UCharacterCategory.LINE_SEPARATOR)
3845                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
3846                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
3847                         // TAB VT LF FF CR FS GS RS US NL are all control characters
3848                         // that are white spaces.
3849                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
3850     }
3851 
3852     /**
3853      * Determines if the specified code point is a Unicode specified space
3854      * character, i.e. if code point is in the category Zs, Zl and Zp.
3855      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
3856      * @param ch code point to determine if it is a space
3857      * @return true if the specified code point is a space character
3858      * @stable ICU 2.1
3859      */
isSpaceChar(int ch)3860     public static boolean isSpaceChar(int ch)
3861     {
3862         // if props == 0, it will just fall through and return false
3863         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
3864                 | (1 << UCharacterCategory.LINE_SEPARATOR)
3865                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
3866                 != 0;
3867     }
3868 
3869     /**
3870      * Determines if the specified code point is a titlecase character.
3871      * UnicodeData only contains case mappings for code points where they are
3872      * one-to-one mappings; it also omits information about context-sensitive
3873      * case mappings.<br>
3874      * For more information about Unicode case mapping please refer to the
3875      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3876      * Technical report #21</a>.<br>
3877      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
3878      * @param ch code point to determine if it is in title case
3879      * @return true if the specified code point is a titlecase character
3880      * @stable ICU 2.1
3881      */
isTitleCase(int ch)3882     public static boolean isTitleCase(int ch)
3883     {
3884         // if props == 0, it will just fall through and return false
3885         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
3886     }
3887 
3888     /**
3889      * Determines if the specified code point may be any part of a Unicode
3890      * identifier other than the starting character.
3891      * A code point may be part of a Unicode identifier if and only if it is
3892      * one of the following:
3893      * <ul>
3894      * <li> Lu Uppercase letter
3895      * <li> Ll Lowercase letter
3896      * <li> Lt Titlecase letter
3897      * <li> Lm Modifier letter
3898      * <li> Lo Other letter
3899      * <li> Nl Letter number
3900      * <li> Pc Connecting punctuation character
3901      * <li> Nd decimal number
3902      * <li> Mc Spacing combining mark
3903      * <li> Mn Non-spacing mark
3904      * <li> Cf formatting code
3905      * </ul>
3906      * Up-to-date Unicode implementation of
3907      * java.lang.Character.isUnicodeIdentifierPart().<br>
3908      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3909      * @param ch code point to determine if is can be part of a Unicode
3910      *        identifier
3911      * @return true if code point is any character belonging a unicode
3912      *         identifier suffix after the first character
3913      * @stable ICU 2.1
3914      */
isUnicodeIdentifierPart(int ch)3915     public static boolean isUnicodeIdentifierPart(int ch)
3916     {
3917         // if props == 0, it will just fall through and return false
3918         // cat == format
3919         return ((1 << getType(ch))
3920                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3921                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3922                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3923                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3924                         | (1 << UCharacterCategory.OTHER_LETTER)
3925                         | (1 << UCharacterCategory.LETTER_NUMBER)
3926                         | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
3927                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
3928                         | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
3929                         | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
3930                         || isIdentifierIgnorable(ch);
3931     }
3932 
3933     /**
3934      * Determines if the specified code point is permissible as the first
3935      * character in a Unicode identifier.
3936      * A code point may start a Unicode identifier if it is of type either
3937      * <ul>
3938      * <li> Lu Uppercase letter
3939      * <li> Ll Lowercase letter
3940      * <li> Lt Titlecase letter
3941      * <li> Lm Modifier letter
3942      * <li> Lo Other letter
3943      * <li> Nl Letter number
3944      * </ul>
3945      * Up-to-date Unicode implementation of
3946      * java.lang.Character.isUnicodeIdentifierStart().<br>
3947      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3948      * @param ch code point to determine if it can start a Unicode identifier
3949      * @return true if code point is the first character belonging a unicode
3950      *              identifier
3951      * @stable ICU 2.1
3952      */
isUnicodeIdentifierStart(int ch)3953     public static boolean isUnicodeIdentifierStart(int ch)
3954     {
3955         /*int cat = getType(ch);*/
3956         // if props == 0, it will just fall through and return false
3957         return ((1 << getType(ch))
3958                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3959                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3960                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3961                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3962                         | (1 << UCharacterCategory.OTHER_LETTER)
3963                         | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
3964     }
3965 
3966     /**
3967      * Determines if the specified code point should be regarded as an
3968      * ignorable character in a Java identifier.
3969      * A character is Java-identifier-ignorable if it has the general category
3970      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
3971      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
3972      * Up-to-date Unicode implementation of
3973      * java.lang.Character.isIdentifierIgnorable().<br>
3974      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3975      * <p>Note that Unicode just recommends to ignore Cf (format controls).
3976      * @param ch code point to be determined if it can be ignored in a Unicode
3977      *        identifier.
3978      * @return true if the code point is ignorable
3979      * @stable ICU 2.1
3980      */
isIdentifierIgnorable(int ch)3981     public static boolean isIdentifierIgnorable(int ch)
3982     {
3983         // see java.lang.Character.isIdentifierIgnorable() on range of
3984         // ignorable characters.
3985         if (ch <= 0x9f) {
3986             return isISOControl(ch)
3987                     && !((ch >= 0x9 && ch <= 0xd)
3988                             || (ch >= 0x1c && ch <= 0x1f));
3989         }
3990         return getType(ch) == UCharacterCategory.FORMAT;
3991     }
3992 
3993     /**
3994      * Determines if the specified code point is an uppercase character.
3995      * UnicodeData only contains case mappings for code point where they are
3996      * one-to-one mappings; it also omits information about context-sensitive
3997      * case mappings.<br>
3998      * For language specific case conversion behavior, use
3999      * toUpperCase(locale, str). <br>
4000      * For example, the case conversion for dot-less i and dotted I in Turkish,
4001      * or for final sigma in Greek.
4002      * For more information about Unicode case mapping please refer to the
4003      * <a href=http://www.unicode.org/unicode/reports/tr21/>
4004      * Technical report #21</a>.<br>
4005      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
4006      * @param ch code point to determine if it is in uppercase
4007      * @return true if the code point is an uppercase character
4008      * @stable ICU 2.1
4009      */
isUpperCase(int ch)4010     public static boolean isUpperCase(int ch)
4011     {
4012         // if props == 0, it will just fall through and return false
4013         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
4014     }
4015 
4016     /**
4017      * The given code point is mapped to its lowercase equivalent; if the code
4018      * point has no lowercase equivalent, the code point itself is returned.
4019      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
4020      *
4021      * <p>This function only returns the simple, single-code point case mapping.
4022      * Full case mappings should be used whenever possible because they produce
4023      * better results by working on whole strings.
4024      * They take into account the string context and the language and can map
4025      * to a result string with a different length as appropriate.
4026      * Full case mappings are applied by the case mapping functions
4027      * that take String parameters rather than code points (int).
4028      * See also the User Guide chapter on C/POSIX migration:
4029      * http://www.icu-project.org/userguide/posix.html#case_mappings
4030      *
4031      * @param ch code point whose lowercase equivalent is to be retrieved
4032      * @return the lowercase equivalent code point
4033      * @stable ICU 2.1
4034      */
toLowerCase(int ch)4035     public static int toLowerCase(int ch) {
4036         return UCaseProps.INSTANCE.tolower(ch);
4037     }
4038 
4039     /**
4040      * Converts argument code point and returns a String object representing
4041      * the code point's value in UTF-16 format.
4042      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
4043      *
4044      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
4045      *
4046      * @param ch code point
4047      * @return string representation of the code point, null if code point is not
4048      *         defined in unicode
4049      * @stable ICU 2.1
4050      */
toString(int ch)4051     public static String toString(int ch)
4052     {
4053         if (ch < MIN_VALUE || ch > MAX_VALUE) {
4054             return null;
4055         }
4056 
4057         if (ch < SUPPLEMENTARY_MIN_VALUE) {
4058             return String.valueOf((char)ch);
4059         }
4060 
4061         return new String(Character.toChars(ch));
4062     }
4063 
4064     /**
4065      * Converts the code point argument to titlecase.
4066      * If no titlecase is available, the uppercase is returned. If no uppercase
4067      * is available, the code point itself is returned.
4068      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
4069      *
4070      * <p>This function only returns the simple, single-code point case mapping.
4071      * Full case mappings should be used whenever possible because they produce
4072      * better results by working on whole strings.
4073      * They take into account the string context and the language and can map
4074      * to a result string with a different length as appropriate.
4075      * Full case mappings are applied by the case mapping functions
4076      * that take String parameters rather than code points (int).
4077      * See also the User Guide chapter on C/POSIX migration:
4078      * http://www.icu-project.org/userguide/posix.html#case_mappings
4079      *
4080      * @param ch code point  whose title case is to be retrieved
4081      * @return titlecase code point
4082      * @stable ICU 2.1
4083      */
toTitleCase(int ch)4084     public static int toTitleCase(int ch) {
4085         return UCaseProps.INSTANCE.totitle(ch);
4086     }
4087 
4088     /**
4089      * Converts the character argument to uppercase.
4090      * If no uppercase is available, the character itself is returned.
4091      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
4092      *
4093      * <p>This function only returns the simple, single-code point case mapping.
4094      * Full case mappings should be used whenever possible because they produce
4095      * better results by working on whole strings.
4096      * They take into account the string context and the language and can map
4097      * to a result string with a different length as appropriate.
4098      * Full case mappings are applied by the case mapping functions
4099      * that take String parameters rather than code points (int).
4100      * See also the User Guide chapter on C/POSIX migration:
4101      * http://www.icu-project.org/userguide/posix.html#case_mappings
4102      *
4103      * @param ch code point whose uppercase is to be retrieved
4104      * @return uppercase code point
4105      * @stable ICU 2.1
4106      */
toUpperCase(int ch)4107     public static int toUpperCase(int ch) {
4108         return UCaseProps.INSTANCE.toupper(ch);
4109     }
4110 
4111     // extra methods not in java.lang.Character --------------------------
4112 
4113     /**
4114      * {@icu} Determines if the code point is a supplementary character.
4115      * A code point is a supplementary character if and only if it is greater
4116      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
4117      * @param ch code point to be determined if it is in the supplementary
4118      *        plane
4119      * @return true if code point is a supplementary character
4120      * @stable ICU 2.1
4121      */
isSupplementary(int ch)4122     public static boolean isSupplementary(int ch)
4123     {
4124         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
4125                 ch <= UCharacter.MAX_VALUE;
4126     }
4127 
4128     /**
4129      * {@icu} Determines if the code point is in the BMP plane.
4130      * @param ch code point to be determined if it is not a supplementary
4131      *        character
4132      * @return true if code point is not a supplementary character
4133      * @stable ICU 2.1
4134      */
isBMP(int ch)4135     public static boolean isBMP(int ch)
4136     {
4137         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
4138     }
4139 
4140     /**
4141      * {@icu} Determines whether the specified code point is a printable character
4142      * according to the Unicode standard.
4143      * @param ch code point to be determined if it is printable
4144      * @return true if the code point is a printable character
4145      * @stable ICU 2.1
4146      */
isPrintable(int ch)4147     public static boolean isPrintable(int ch)
4148     {
4149         int cat = getType(ch);
4150         // if props == 0, it will just fall through and return false
4151         return (cat != UCharacterCategory.UNASSIGNED &&
4152                 cat != UCharacterCategory.CONTROL &&
4153                 cat != UCharacterCategory.FORMAT &&
4154                 cat != UCharacterCategory.PRIVATE_USE &&
4155                 cat != UCharacterCategory.SURROGATE &&
4156                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
4157     }
4158 
4159     /**
4160      * {@icu} Determines whether the specified code point is of base form.
4161      * A code point of base form does not graphically combine with preceding
4162      * characters, and is neither a control nor a format character.
4163      * @param ch code point to be determined if it is of base form
4164      * @return true if the code point is of base form
4165      * @stable ICU 2.1
4166      */
isBaseForm(int ch)4167     public static boolean isBaseForm(int ch)
4168     {
4169         int cat = getType(ch);
4170         // if props == 0, it will just fall through and return false
4171         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
4172                 cat == UCharacterCategory.OTHER_NUMBER ||
4173                 cat == UCharacterCategory.LETTER_NUMBER ||
4174                 cat == UCharacterCategory.UPPERCASE_LETTER ||
4175                 cat == UCharacterCategory.LOWERCASE_LETTER ||
4176                 cat == UCharacterCategory.TITLECASE_LETTER ||
4177                 cat == UCharacterCategory.MODIFIER_LETTER ||
4178                 cat == UCharacterCategory.OTHER_LETTER ||
4179                 cat == UCharacterCategory.NON_SPACING_MARK ||
4180                 cat == UCharacterCategory.ENCLOSING_MARK ||
4181                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
4182     }
4183 
4184     /**
4185      * {@icu} Returns the Bidirection property of a code point.
4186      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
4187      * property.<br>
4188      * Result returned belongs to the interface
4189      * <a href=UCharacterDirection.html>UCharacterDirection</a>
4190      * @param ch the code point to be determined its direction
4191      * @return direction constant from UCharacterDirection.
4192      * @stable ICU 2.1
4193      */
getDirection(int ch)4194     public static int getDirection(int ch)
4195     {
4196         return UBiDiProps.INSTANCE.getClass(ch);
4197     }
4198 
4199     /**
4200      * Determines whether the code point has the "mirrored" property.
4201      * This property is set for characters that are commonly used in
4202      * Right-To-Left contexts and need to be displayed with a "mirrored"
4203      * glyph.
4204      * @param ch code point whose mirror is to be determined
4205      * @return true if the code point has the "mirrored" property
4206      * @stable ICU 2.1
4207      */
isMirrored(int ch)4208     public static boolean isMirrored(int ch)
4209     {
4210         return UBiDiProps.INSTANCE.isMirrored(ch);
4211     }
4212 
4213     /**
4214      * {@icu} Maps the specified code point to a "mirror-image" code point.
4215      * For code points with the "mirrored" property, implementations sometimes
4216      * need a "poor man's" mapping to another code point such that the default
4217      * glyph may serve as the mirror-image of the default glyph of the
4218      * specified code point.<br>
4219      * This is useful for text conversion to and from codepages with visual
4220      * order, and for displays without glyph selection capabilities.
4221      * @param ch code point whose mirror is to be retrieved
4222      * @return another code point that may serve as a mirror-image substitute,
4223      *         or ch itself if there is no such mapping or ch does not have the
4224      *         "mirrored" property
4225      * @stable ICU 2.1
4226      */
getMirror(int ch)4227     public static int getMirror(int ch)
4228     {
4229         return UBiDiProps.INSTANCE.getMirror(ch);
4230     }
4231 
4232     /**
4233      * {@icu} Maps the specified character to its paired bracket character.
4234      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
4235      * Otherwise c itself is returned.
4236      * See http://www.unicode.org/reports/tr9/
4237      *
4238      * @param c the code point to be mapped
4239      * @return the paired bracket code point,
4240      *         or c itself if there is no such mapping
4241      *         (Bidi_Paired_Bracket_Type=None)
4242      *
4243      * @see UProperty#BIDI_PAIRED_BRACKET
4244      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
4245      * @see #getMirror(int)
4246      * @stable ICU 52
4247      */
getBidiPairedBracket(int c)4248     public static int getBidiPairedBracket(int c) {
4249         return UBiDiProps.INSTANCE.getPairedBracket(c);
4250     }
4251 
4252     /**
4253      * {@icu} Returns the combining class of the argument codepoint
4254      * @param ch code point whose combining is to be retrieved
4255      * @return the combining class of the codepoint
4256      * @stable ICU 2.1
4257      */
getCombiningClass(int ch)4258     public static int getCombiningClass(int ch)
4259     {
4260         return Normalizer2.getNFDInstance().getCombiningClass(ch);
4261     }
4262 
4263     /**
4264      * {@icu} A code point is illegal if and only if
4265      * <ul>
4266      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4267      * <li> A surrogate value, 0xD800 to 0xDFFF
4268      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4269      * </ul>
4270      * Note: legal does not mean that it is assigned in this version of Unicode.
4271      * @param ch code point to determine if it is a legal code point by itself
4272      * @return true if and only if legal.
4273      * @stable ICU 2.1
4274      */
isLegal(int ch)4275     public static boolean isLegal(int ch)
4276     {
4277         if (ch < MIN_VALUE) {
4278             return false;
4279         }
4280         if (ch < Character.MIN_SURROGATE) {
4281             return true;
4282         }
4283         if (ch <= Character.MAX_SURROGATE) {
4284             return false;
4285         }
4286         if (UCharacterUtility.isNonCharacter(ch)) {
4287             return false;
4288         }
4289         return (ch <= MAX_VALUE);
4290     }
4291 
4292     /**
4293      * {@icu} A string is legal iff all its code points are legal.
4294      * A code point is illegal if and only if
4295      * <ul>
4296      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4297      * <li> A surrogate value, 0xD800 to 0xDFFF
4298      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4299      * </ul>
4300      * Note: legal does not mean that it is assigned in this version of Unicode.
4301      * @param str containing code points to examin
4302      * @return true if and only if legal.
4303      * @stable ICU 2.1
4304      */
isLegal(String str)4305     public static boolean isLegal(String str)
4306     {
4307         int size = str.length();
4308         int codepoint;
4309         for (int i = 0; i < size; i += Character.charCount(codepoint))
4310         {
4311             codepoint = str.codePointAt(i);
4312             if (!isLegal(codepoint)) {
4313                 return false;
4314             }
4315         }
4316         return true;
4317     }
4318 
4319     /**
4320      * {@icu} Returns the version of Unicode data used.
4321      * @return the unicode version number used
4322      * @stable ICU 2.1
4323      */
getUnicodeVersion()4324     public static VersionInfo getUnicodeVersion()
4325     {
4326         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
4327     }
4328 
4329     /**
4330      * {@icu} Returns the most current Unicode name of the argument code point, or
4331      * null if the character is unassigned or outside the range
4332      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4333      * <br>
4334      * Note calling any methods related to code point names, e.g. get*Name*()
4335      * incurs a one-time initialisation cost to construct the name tables.
4336      * @param ch the code point for which to get the name
4337      * @return most current Unicode name
4338      * @stable ICU 2.1
4339      */
getName(int ch)4340     public static String getName(int ch)
4341     {
4342         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
4343     }
4344 
4345     /**
4346      * {@icu} Returns the names for each of the characters in a string
4347      * @param s string to format
4348      * @param separator string to go between names
4349      * @return string of names
4350      * @stable ICU 3.8
4351      */
getName(String s, String separator)4352     public static String getName(String s, String separator) {
4353         if (s.length() == 1) { // handle common case
4354             return getName(s.charAt(0));
4355         }
4356         int cp;
4357         StringBuilder sb = new StringBuilder();
4358         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
4359             cp = s.codePointAt(i);
4360             if (i != 0) sb.append(separator);
4361             sb.append(UCharacter.getName(cp));
4362         }
4363         return sb.toString();
4364     }
4365 
4366     /**
4367      * {@icu} Returns null.
4368      * Used to return the Unicode_1_Name property value which was of little practical value.
4369      * @param ch the code point for which to get the name
4370      * @return null
4371      * @deprecated ICU 49
4372      */
4373     @Deprecated
getName1_0(int ch)4374     public static String getName1_0(int ch)
4375     {
4376         return null;
4377     }
4378 
4379     /**
4380      * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and
4381      * getName1_0(int), this method will return a name even for codepoints that
4382      * are not assigned a name in UnicodeData.txt.
4383      *
4384      * <p>The names are returned in the following order.
4385      * <ul>
4386      * <li> Most current Unicode name if there is any
4387      * <li> Unicode 1.0 name if there is any
4388      * <li> Extended name in the form of
4389      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
4390      * </ul>
4391      * Note calling any methods related to code point names, e.g. get*Name*()
4392      * incurs a one-time initialisation cost to construct the name tables.
4393      * @param ch the code point for which to get the name
4394      * @return a name for the argument codepoint
4395      * @stable ICU 2.6
4396      */
getExtendedName(int ch)4397     public static String getExtendedName(int ch) {
4398         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
4399     }
4400 
4401     /**
4402      * {@icu} Returns the corrected name from NameAliases.txt if there is one.
4403      * Returns null if the character is unassigned or outside the range
4404      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4405      * <br>
4406      * Note calling any methods related to code point names, e.g. get*Name*()
4407      * incurs a one-time initialisation cost to construct the name tables.
4408      * @param ch the code point for which to get the name alias
4409      * @return Unicode name alias, or null
4410      * @stable ICU 4.4
4411      */
getNameAlias(int ch)4412     public static String getNameAlias(int ch)
4413     {
4414         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
4415     }
4416 
4417     /**
4418      * {@icu} Returns null.
4419      * Used to return the ISO 10646 comment for a character.
4420      * The Unicode ISO_Comment property is deprecated and has no values.
4421      *
4422      * @param ch The code point for which to get the ISO comment.
4423      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
4424      * @return null
4425      * @deprecated ICU 49
4426      */
4427     @Deprecated
getISOComment(int ch)4428     public static String getISOComment(int ch)
4429     {
4430         return null;
4431     }
4432 
4433     /**
4434      * {@icu} <p>Finds a Unicode code point by its most current Unicode name and
4435      * return its code point value. All Unicode names are in uppercase.
4436      * Note calling any methods related to code point names, e.g. get*Name*()
4437      * incurs a one-time initialisation cost to construct the name tables.
4438      * @param name most current Unicode character name whose code point is to
4439      *        be returned
4440      * @return code point or -1 if name is not found
4441      * @stable ICU 2.1
4442      */
getCharFromName(String name)4443     public static int getCharFromName(String name){
4444         return UCharacterName.INSTANCE.getCharFromName(
4445                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
4446     }
4447 
4448     /**
4449      * {@icu} Returns -1.
4450      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
4451      * its code point value.
4452      * @param name Unicode 1.0 code point name whose code point is to be
4453      *             returned
4454      * @return -1
4455      * @deprecated ICU 49
4456      * @see #getName1_0(int)
4457      */
4458     @Deprecated
getCharFromName1_0(String name)4459     public static int getCharFromName1_0(String name){
4460         return -1;
4461     }
4462 
4463     /**
4464      * {@icu} <p>Find a Unicode character by either its name and return its code
4465      * point value. All Unicode names are in uppercase.
4466      * Extended names are all lowercase except for numbers and are contained
4467      * within angle brackets.
4468      * The names are searched in the following order
4469      * <ul>
4470      * <li> Most current Unicode name if there is any
4471      * <li> Unicode 1.0 name if there is any
4472      * <li> Extended name in the form of
4473      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
4474      * </ul>
4475      * Note calling any methods related to code point names, e.g. get*Name*()
4476      * incurs a one-time initialisation cost to construct the name tables.
4477      * @param name codepoint name
4478      * @return code point associated with the name or -1 if the name is not
4479      *         found.
4480      * @stable ICU 2.6
4481      */
getCharFromExtendedName(String name)4482     public static int getCharFromExtendedName(String name){
4483         return UCharacterName.INSTANCE.getCharFromName(
4484                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
4485     }
4486 
4487     /**
4488      * {@icu} <p>Find a Unicode character by its corrected name alias and return
4489      * its code point value. All Unicode names are in uppercase.
4490      * Note calling any methods related to code point names, e.g. get*Name*()
4491      * incurs a one-time initialisation cost to construct the name tables.
4492      * @param name Unicode name alias whose code point is to be returned
4493      * @return code point or -1 if name is not found
4494      * @stable ICU 4.4
4495      */
getCharFromNameAlias(String name)4496     public static int getCharFromNameAlias(String name){
4497         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
4498     }
4499 
4500     /**
4501      * {@icu} Return the Unicode name for a given property, as given in the
4502      * Unicode database file PropertyAliases.txt.  Most properties
4503      * have more than one name.  The nameChoice determines which one
4504      * is returned.
4505      *
4506      * In addition, this function maps the property
4507      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
4508      * "General_Category_Mask".  These names are not in
4509      * PropertyAliases.txt.
4510      *
4511      * @param property UProperty selector.
4512      *
4513      * @param nameChoice UProperty.NameChoice selector for which name
4514      * to get.  All properties have a long name.  Most have a short
4515      * name, but some do not.  Unicode allows for additional names; if
4516      * present these will be returned by UProperty.NameChoice.LONG + i,
4517      * where i=1, 2,...
4518      *
4519      * @return a name, or null if Unicode explicitly defines no name
4520      * ("n/a") for a given property/nameChoice.  If a given nameChoice
4521      * throws an exception, then all larger values of nameChoice will
4522      * throw an exception.  If null is returned for a given
4523      * nameChoice, then other nameChoice values may return non-null
4524      * results.
4525      *
4526      * @exception IllegalArgumentException thrown if property or
4527      * nameChoice are invalid.
4528      *
4529      * @see UProperty
4530      * @see UProperty.NameChoice
4531      * @stable ICU 2.4
4532      */
getPropertyName(int property, int nameChoice)4533     public static String getPropertyName(int property,
4534             int nameChoice) {
4535         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
4536     }
4537 
4538     /**
4539      * {@icu} Return the UProperty selector for a given property name, as
4540      * specified in the Unicode database file PropertyAliases.txt.
4541      * Short, long, and any other variants are recognized.
4542      *
4543      * In addition, this function maps the synthetic names "gcm" /
4544      * "General_Category_Mask" to the property
4545      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
4546      * PropertyAliases.txt.
4547      *
4548      * @param propertyAlias the property name to be matched.  The name
4549      * is compared using "loose matching" as described in
4550      * PropertyAliases.txt.
4551      *
4552      * @return a UProperty enum.
4553      *
4554      * @exception IllegalArgumentException thrown if propertyAlias
4555      * is not recognized.
4556      *
4557      * @see UProperty
4558      * @stable ICU 2.4
4559      */
getPropertyEnum(CharSequence propertyAlias)4560     public static int getPropertyEnum(CharSequence propertyAlias) {
4561         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
4562         if (propEnum == UProperty.UNDEFINED) {
4563             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
4564         }
4565         return propEnum;
4566     }
4567 
4568     /**
4569      * {@icu} Return the Unicode name for a given property value, as given in
4570      * the Unicode database file PropertyValueAliases.txt.  Most
4571      * values have more than one name.  The nameChoice determines
4572      * which one is returned.
4573      *
4574      * Note: Some of the names in PropertyValueAliases.txt can only be
4575      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
4576      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4577      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4578      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4579      *
4580      * @param property UProperty selector constant.
4581      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4582      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4583      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4584      * If out of range, null is returned.
4585      *
4586      * @param value selector for a value for the given property.  In
4587      * general, valid values range from 0 up to some maximum.  There
4588      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
4589      * non-zero value BASIC_LATIN.getID().  (2.)
4590      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
4591      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
4592      * are mask values produced by left-shifting 1 by
4593      * UCharacter.getType().  This allows grouped categories such as
4594      * [:L:] to be represented.  Mask values are non-contiguous.
4595      *
4596      * @param nameChoice UProperty.NameChoice selector for which name
4597      * to get.  All values have a long name.  Most have a short name,
4598      * but some do not.  Unicode allows for additional names; if
4599      * present these will be returned by UProperty.NameChoice.LONG + i,
4600      * where i=1, 2,...
4601      *
4602      * @return a name, or null if Unicode explicitly defines no name
4603      * ("n/a") for a given property/value/nameChoice.  If a given
4604      * nameChoice throws an exception, then all larger values of
4605      * nameChoice will throw an exception.  If null is returned for a
4606      * given nameChoice, then other nameChoice values may return
4607      * non-null results.
4608      *
4609      * @exception IllegalArgumentException thrown if property, value,
4610      * or nameChoice are invalid.
4611      *
4612      * @see UProperty
4613      * @see UProperty.NameChoice
4614      * @stable ICU 2.4
4615      */
getPropertyValueName(int property, int value, int nameChoice)4616     public static String getPropertyValueName(int property,
4617             int value,
4618             int nameChoice)
4619     {
4620         if ((property == UProperty.CANONICAL_COMBINING_CLASS
4621                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
4622                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
4623                 && value >= UCharacter.getIntPropertyMinValue(
4624                         UProperty.CANONICAL_COMBINING_CLASS)
4625                         && value <= UCharacter.getIntPropertyMaxValue(
4626                                 UProperty.CANONICAL_COMBINING_CLASS)
4627                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
4628             // this is hard coded for the valid cc
4629             // because PropertyValueAliases.txt does not contain all of them
4630             try {
4631                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
4632                         nameChoice);
4633             }
4634             catch (IllegalArgumentException e) {
4635                 return null;
4636             }
4637         }
4638         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
4639     }
4640 
4641     /**
4642      * {@icu} Return the property value integer for a given value name, as
4643      * specified in the Unicode database file PropertyValueAliases.txt.
4644      * Short, long, and any other variants are recognized.
4645      *
4646      * Note: Some of the names in PropertyValueAliases.txt will only be
4647      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4648      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4649      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4650      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4651      *
4652      * @param property UProperty selector constant.
4653      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4654      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4655      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4656      * Only these properties can be enumerated.
4657      *
4658      * @param valueAlias the value name to be matched.  The name is
4659      * compared using "loose matching" as described in
4660      * PropertyValueAliases.txt.
4661      *
4662      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4663      * values are mask values produced by left-shifting 1 by
4664      * UCharacter.getType().  This allows grouped categories such as
4665      * [:L:] to be represented.
4666      *
4667      * @see UProperty
4668      * @throws IllegalArgumentException if property is not a valid UProperty
4669      *         selector or valueAlias is not a value of this property
4670      * @stable ICU 2.4
4671      */
getPropertyValueEnum(int property, CharSequence valueAlias)4672     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
4673         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
4674         if (propEnum == UProperty.UNDEFINED) {
4675             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
4676         }
4677         return propEnum;
4678     }
4679 
4680     /**
4681      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
4682      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
4683      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
4684      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
4685      * @internal
4686      * @deprecated This API is ICU internal only.
4687      */
4688     @Deprecated
getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4689     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
4690         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
4691     }
4692 
4693 
4694     /**
4695      * {@icu} Returns a code point corresponding to the two surrogate code units.
4696      *
4697      * @param lead the lead char
4698      * @param trail the trail char
4699      * @return code point if surrogate characters are valid.
4700      * @exception IllegalArgumentException thrown when the code units do
4701      *            not form a valid code point
4702      * @stable ICU 2.1
4703      */
getCodePoint(char lead, char trail)4704     public static int getCodePoint(char lead, char trail)
4705     {
4706         if (Character.isSurrogatePair(lead, trail)) {
4707             return Character.toCodePoint(lead, trail);
4708         }
4709         throw new IllegalArgumentException("Illegal surrogate characters");
4710     }
4711 
4712     /**
4713      * {@icu} Returns the code point corresponding to the BMP code point.
4714      *
4715      * @param char16 the BMP code point
4716      * @return code point if argument is a valid character.
4717      * @exception IllegalArgumentException thrown when char16 is not a valid
4718      *            code point
4719      * @stable ICU 2.1
4720      */
getCodePoint(char char16)4721     public static int getCodePoint(char char16)
4722     {
4723         if (UCharacter.isLegal(char16)) {
4724             return char16;
4725         }
4726         throw new IllegalArgumentException("Illegal codepoint");
4727     }
4728 
4729     /**
4730      * Implementation of UCaseProps.ContextIterator, iterates over a String.
4731      * See ustrcase.c/utf16_caseContextIterator().
4732      */
4733     private static class StringContextIterator implements UCaseProps.ContextIterator {
4734         /**
4735          * Constructor.
4736          * @param s String to iterate over.
4737          */
StringContextIterator(String s)4738         StringContextIterator(String s) {
4739             this.s=s;
4740             limit=s.length();
4741             cpStart=cpLimit=index=0;
4742             dir=0;
4743         }
4744 
4745         /**
4746          * Set the iteration limit for nextCaseMapCP() to an index within the string.
4747          * If the limit parameter is negative or past the string, then the
4748          * string length is restored as the iteration limit.
4749          *
4750          * <p>This limit does not affect the next() function which always
4751          * iterates to the very end of the string.
4752          *
4753          * @param lim The iteration limit.
4754          */
setLimit(int lim)4755         public void setLimit(int lim) {
4756             if(0<=lim && lim<=s.length()) {
4757                 limit=lim;
4758             } else {
4759                 limit=s.length();
4760             }
4761         }
4762 
4763         /**
4764          * Move to the iteration limit without fetching code points up to there.
4765          */
moveToLimit()4766         public void moveToLimit() {
4767             cpStart=cpLimit=limit;
4768         }
4769 
4770         /**
4771          * Iterate forward through the string to fetch the next code point
4772          * to be case-mapped, and set the context indexes for it.
4773          *
4774          * <p>When the iteration limit is reached (and -1 is returned),
4775          * getCPStart() will be at the iteration limit.
4776          *
4777          * <p>Iteration with next() does not affect the position for nextCaseMapCP().
4778          *
4779          * @return The next code point to be case-mapped, or <0 when the iteration is done.
4780          */
nextCaseMapCP()4781         public int nextCaseMapCP() {
4782             cpStart=cpLimit;
4783             if(cpLimit<limit) {
4784                 int c=s.codePointAt(cpLimit);
4785                 cpLimit+=Character.charCount(c);
4786                 return c;
4787             } else {
4788                 return -1;
4789             }
4790         }
4791 
4792         /**
4793          * Returns the start of the code point that was last returned
4794          * by nextCaseMapCP().
4795          */
getCPStart()4796         public int getCPStart() {
4797             return cpStart;
4798         }
4799 
4800         /**
4801          * Returns the limit of the code point that was last returned
4802          * by nextCaseMapCP().
4803          */
getCPLimit()4804         public int getCPLimit() {
4805             return cpLimit;
4806         }
4807 
4808         // implement UCaseProps.ContextIterator
4809         // The following code is not used anywhere in this private class
reset(int direction)4810         public void reset(int direction) {
4811             if(direction>0) {
4812                 /* reset for forward iteration */
4813                 dir=1;
4814                 index=cpLimit;
4815             } else if(direction<0) {
4816                 /* reset for backward iteration */
4817                 dir=-1;
4818                 index=cpStart;
4819             } else {
4820                 // not a valid direction
4821                 dir=0;
4822                 index=0;
4823             }
4824         }
4825 
next()4826         public int next() {
4827             int c;
4828 
4829             if(dir>0 && index<s.length()) {
4830                 c=s.codePointAt(index);
4831                 index+=Character.charCount(c);
4832                 return c;
4833             } else if(dir<0 && index>0) {
4834                 c=s.codePointBefore(index);
4835                 index-=Character.charCount(c);
4836                 return c;
4837             }
4838             return -1;
4839         }
4840 
4841         // variables
4842         protected String s;
4843         protected int index, limit, cpStart, cpLimit;
4844         protected int dir; // 0=initial state  >0=forward  <0=backward
4845     }
4846 
4847     /**
4848      * Returns the uppercase version of the argument string.
4849      * Casing is dependent on the default locale and context-sensitive.
4850      * @param str source string to be performed on
4851      * @return uppercase version of the argument string
4852      * @stable ICU 2.1
4853      */
toUpperCase(String str)4854     public static String toUpperCase(String str)
4855     {
4856         return toUpperCase(ULocale.getDefault(), str);
4857     }
4858 
4859     /**
4860      * Returns the lowercase version of the argument string.
4861      * Casing is dependent on the default locale and context-sensitive
4862      * @param str source string to be performed on
4863      * @return lowercase version of the argument string
4864      * @stable ICU 2.1
4865      */
toLowerCase(String str)4866     public static String toLowerCase(String str)
4867     {
4868         return toLowerCase(ULocale.getDefault(), str);
4869     }
4870 
4871     /**
4872      * <p>Returns the titlecase version of the argument string.
4873      * <p>Position for titlecasing is determined by the argument break
4874      * iterator, hence the user can customize his break iterator for
4875      * a specialized titlecasing. In this case only the forward iteration
4876      * needs to be implemented.
4877      * If the break iterator passed in is null, the default Unicode algorithm
4878      * will be used to determine the titlecase positions.
4879      *
4880      * <p>Only positions returned by the break iterator will be title cased,
4881      * character in between the positions will all be in lower case.
4882      * <p>Casing is dependent on the default locale and context-sensitive
4883      * @param str source string to be performed on
4884      * @param breakiter break iterator to determine the positions in which
4885      *        the character should be title cased.
4886      * @return lowercase version of the argument string
4887      * @stable ICU 2.6
4888      */
toTitleCase(String str, BreakIterator breakiter)4889     public static String toTitleCase(String str, BreakIterator breakiter)
4890     {
4891         return toTitleCase(ULocale.getDefault(), str, breakiter);
4892     }
4893 
4894     /**
4895      * Returns the uppercase version of the argument string.
4896      * Casing is dependent on the argument locale and context-sensitive.
4897      * @param locale which string is to be converted in
4898      * @param str source string to be performed on
4899      * @return uppercase version of the argument string
4900      * @stable ICU 2.1
4901      */
toUpperCase(Locale locale, String str)4902     public static String toUpperCase(Locale locale, String str)
4903     {
4904         return toUpperCase(ULocale.forLocale(locale), str);
4905     }
4906 
4907     /**
4908      * Returns the uppercase version of the argument string.
4909      * Casing is dependent on the argument locale and context-sensitive.
4910      * @param locale which string is to be converted in
4911      * @param str source string to be performed on
4912      * @return uppercase version of the argument string
4913      * @stable ICU 3.2
4914      */
toUpperCase(ULocale locale, String str)4915     public static String toUpperCase(ULocale locale, String str) {
4916         StringContextIterator iter = new StringContextIterator(str);
4917         StringBuilder result = new StringBuilder(str.length());
4918         int[] locCache = new int[1];
4919         int c;
4920 
4921         if (locale == null) {
4922             locale = ULocale.getDefault();
4923         }
4924         locCache[0]=0;
4925 
4926         while((c=iter.nextCaseMapCP())>=0) {
4927             c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache);
4928 
4929             /* decode the result */
4930             if(c<0) {
4931                 /* (not) original code point */
4932                 c=~c;
4933             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4934                 /* mapping already appended to result */
4935                 continue;
4936                 /* } else { append single-code point mapping */
4937             }
4938             result.appendCodePoint(c);
4939         }
4940         return result.toString();
4941     }
4942 
4943     /**
4944      * Returns the lowercase version of the argument string.
4945      * Casing is dependent on the argument locale and context-sensitive
4946      * @param locale which string is to be converted in
4947      * @param str source string to be performed on
4948      * @return lowercase version of the argument string
4949      * @stable ICU 2.1
4950      */
toLowerCase(Locale locale, String str)4951     public static String toLowerCase(Locale locale, String str)
4952     {
4953         return toLowerCase(ULocale.forLocale(locale), str);
4954     }
4955 
4956     /**
4957      * Returns the lowercase version of the argument string.
4958      * Casing is dependent on the argument locale and context-sensitive
4959      * @param locale which string is to be converted in
4960      * @param str source string to be performed on
4961      * @return lowercase version of the argument string
4962      * @stable ICU 3.2
4963      */
toLowerCase(ULocale locale, String str)4964     public static String toLowerCase(ULocale locale, String str) {
4965         StringContextIterator iter = new StringContextIterator(str);
4966         StringBuilder result = new StringBuilder(str.length());
4967         int[] locCache = new int[1];
4968         int c;
4969 
4970         if (locale == null) {
4971             locale = ULocale.getDefault();
4972         }
4973         locCache[0]=0;
4974 
4975         while((c=iter.nextCaseMapCP())>=0) {
4976             c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache);
4977 
4978             /* decode the result */
4979             if(c<0) {
4980                 /* (not) original code point */
4981                 c=~c;
4982             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4983                 /* mapping already appended to result */
4984                 continue;
4985                 /* } else { append single-code point mapping */
4986             }
4987             result.appendCodePoint(c);
4988         }
4989         return result.toString();
4990     }
4991 
4992     /**
4993      * <p>Returns the titlecase version of the argument string.
4994      * <p>Position for titlecasing is determined by the argument break
4995      * iterator, hence the user can customize his break iterator for
4996      * a specialized titlecasing. In this case only the forward iteration
4997      * needs to be implemented.
4998      * If the break iterator passed in is null, the default Unicode algorithm
4999      * will be used to determine the titlecase positions.
5000      *
5001      * <p>Only positions returned by the break iterator will be title cased,
5002      * character in between the positions will all be in lower case.
5003      * <p>Casing is dependent on the argument locale and context-sensitive
5004      * @param locale which string is to be converted in
5005      * @param str source string to be performed on
5006      * @param breakiter break iterator to determine the positions in which
5007      *        the character should be title cased.
5008      * @return lowercase version of the argument string
5009      * @stable ICU 2.6
5010      */
toTitleCase(Locale locale, String str, BreakIterator breakiter)5011     public static String toTitleCase(Locale locale, String str,
5012             BreakIterator breakiter)
5013     {
5014         return toTitleCase(ULocale.forLocale(locale), str, breakiter);
5015     }
5016 
5017     /**
5018      * <p>Returns the titlecase version of the argument string.
5019      * <p>Position for titlecasing is determined by the argument break
5020      * iterator, hence the user can customize his break iterator for
5021      * a specialized titlecasing. In this case only the forward iteration
5022      * needs to be implemented.
5023      * If the break iterator passed in is null, the default Unicode algorithm
5024      * will be used to determine the titlecase positions.
5025      *
5026      * <p>Only positions returned by the break iterator will be title cased,
5027      * character in between the positions will all be in lower case.
5028      * <p>Casing is dependent on the argument locale and context-sensitive
5029      * @param locale which string is to be converted in
5030      * @param str source string to be performed on
5031      * @param titleIter break iterator to determine the positions in which
5032      *        the character should be title cased.
5033      * @return lowercase version of the argument string
5034      * @stable ICU 3.2
5035      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter)5036     public static String toTitleCase(ULocale locale, String str,
5037             BreakIterator titleIter) {
5038         return toTitleCase(locale, str, titleIter, 0);
5039     }
5040 
5041     /**
5042      * <p>Returns the titlecase version of the argument string.
5043      * <p>Position for titlecasing is determined by the argument break
5044      * iterator, hence the user can customize his break iterator for
5045      * a specialized titlecasing. In this case only the forward iteration
5046      * needs to be implemented.
5047      * If the break iterator passed in is null, the default Unicode algorithm
5048      * will be used to determine the titlecase positions.
5049      *
5050      * <p>Only positions returned by the break iterator will be title cased,
5051      * character in between the positions will all be in lower case.
5052      * <p>Casing is dependent on the argument locale and context-sensitive
5053      * @param locale which string is to be converted in
5054      * @param str source string to be performed on
5055      * @param titleIter break iterator to determine the positions in which
5056      *        the character should be title cased.
5057      * @param options bit set to modify the titlecasing operation
5058      * @return lowercase version of the argument string
5059      * @stable ICU 3.8
5060      * @see #TITLECASE_NO_LOWERCASE
5061      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5062      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5063     public static String toTitleCase(ULocale locale, String str,
5064             BreakIterator titleIter,
5065             int options) {
5066         StringContextIterator iter = new StringContextIterator(str);
5067         StringBuilder result = new StringBuilder(str.length());
5068         int[] locCache = new int[1];
5069         int c, nc, srcLength = str.length();
5070 
5071         if (locale == null) {
5072             locale = ULocale.getDefault();
5073         }
5074         locCache[0]=0;
5075 
5076         if(titleIter == null) {
5077             titleIter = BreakIterator.getWordInstance(locale);
5078         }
5079         titleIter.setText(str);
5080 
5081         int prev, titleStart, index;
5082         boolean isFirstIndex;
5083         boolean isDutch = locale.getLanguage().equals("nl");
5084         boolean FirstIJ = true;
5085 
5086         /* set up local variables */
5087         prev=0;
5088         isFirstIndex=true;
5089 
5090         /* titlecasing loop */
5091         while(prev<srcLength) {
5092             /* find next index where to titlecase */
5093             if(isFirstIndex) {
5094                 isFirstIndex=false;
5095                 index=titleIter.first();
5096             } else {
5097                 index=titleIter.next();
5098             }
5099             if(index==BreakIterator.DONE || index>srcLength) {
5100                 index=srcLength;
5101             }
5102 
5103             /*
5104              * Unicode 4 & 5 section 3.13 Default Case Operations:
5105              *
5106              * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
5107              * #29, "Text Boundaries." Between each pair of word boundaries, find the first
5108              * cased character F. If F exists, map F to default_title(F); then map each
5109              * subsequent character C to default_lower(C).
5110              *
5111              * In this implementation, segment [prev..index[ into 3 parts:
5112              * a) uncased characters (copy as-is) [prev..titleStart[
5113              * b) first case letter (titlecase)         [titleStart..titleLimit[
5114              * c) subsequent characters (lowercase)                 [titleLimit..index[
5115              */
5116             if(prev<index) {
5117                 /* find and copy uncased characters [prev..titleStart[ */
5118                 iter.setLimit(index);
5119                 c=iter.nextCaseMapCP();
5120                 if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0
5121                         && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
5122                     while((c=iter.nextCaseMapCP())>=0
5123                             && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
5124                     titleStart=iter.getCPStart();
5125                     if(prev<titleStart) {
5126                         result.append(str, prev, titleStart);
5127                     }
5128                 } else {
5129                     titleStart=prev;
5130                 }
5131 
5132                 if(titleStart<index) {
5133                     FirstIJ = true;
5134                     /* titlecase c which is from titleStart */
5135                     c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache);
5136 
5137                     /* decode the result and lowercase up to index */
5138                     for(;;) {
5139                         if(c<0) {
5140                             /* (not) original code point */
5141                             c=~c;
5142                             result.appendCodePoint(c);
5143                         } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
5144                             /* mapping already appended to result */
5145                         } else {
5146                             /* append single-code point mapping */
5147                             result.appendCodePoint(c);
5148                         }
5149 
5150                         if((options&TITLECASE_NO_LOWERCASE)!=0) {
5151                             /* Optionally just copy the rest of the word unchanged. */
5152 
5153                             int titleLimit=iter.getCPLimit();
5154                             if(titleLimit<index) {
5155                                 /* Special Case - Dutch IJ Titlecasing */
5156                                 if (isDutch && c == 0x0049 && str.charAt(titleLimit) == 'j') {
5157                                     result.append('J').append(str, titleLimit + 1, index);
5158                                 } else {
5159                                     result.append(str, titleLimit, index);
5160                                 }
5161                             }
5162                             iter.moveToLimit();
5163                             break;
5164                         } else if((nc=iter.nextCaseMapCP())>=0) {
5165                             if (isDutch && (nc == 0x004A ||  nc == 0x006A)
5166                                     && (c == 0x0049) && (FirstIJ == true)) {
5167                                 c = 0x004A; /* J */
5168                                 FirstIJ = false;
5169                             } else {
5170                                 /* Normal operation: Lowercase the rest of the word. */
5171                                 c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale,
5172                                         locCache);
5173                             }
5174                         } else {
5175                             break;
5176                         }
5177                     }
5178                 }
5179             }
5180 
5181             prev=index;
5182         }
5183         return result.toString();
5184     }
5185 
5186 
5187     private static final int BREAK_MASK =
5188             (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER)
5189             | (1<<UCharacterCategory.OTHER_LETTER)
5190             | (1<<UCharacterCategory.MODIFIER_LETTER);
5191 
5192     /**
5193      * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
5194      * and sometimes has no effect at all; the original string is returned whenever casing
5195      * would not be appropriate for the first word (such as for CJK characters or initial numbers).
5196      * Initial non-letters are skipped in order to find the character to change.
5197      * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
5198      * <p>Examples:
5199      * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
5200      * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
5201      * <tr><td>“contact us”</td><td>“Contact us”</td></tr>
5202      * <tr><td>49ers win!</td><td>49ers win!</td></tr>
5203      * <tr><td>丰(abc)</td><td>丰(abc)</td></tr>
5204      * <tr><td>«ijs»</td><td>«Ijs»</td></tr>
5205      * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr>
5206      * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr>
5207      * </table>
5208      * @param locale the locale for accessing exceptional behavior (eg for tr).
5209      * @param str the source string to change
5210      * @return the modified string, or the original if no modifications were necessary.
5211      * @internal
5212      * @deprecated ICU internal only
5213      */
5214     @Deprecated
toTitleFirst(ULocale locale, String str)5215     public static String toTitleFirst(ULocale locale, String str) {
5216         int c = 0;
5217         for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) {
5218             c = UCharacter.codePointAt(str, i);
5219             int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK);
5220             if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK
5221                 break;
5222             }
5223             if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) {
5224                 continue;
5225             }
5226 
5227             // we now have the first cased character
5228             // What we really want is something like:
5229             // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken);
5230             // That is, just give us the titlecased string, for the locale, at i and following,
5231             // and tell us how many characters are replaced.
5232             // The following won't work completely: it needs some more substantial changes to UCaseProps
5233 
5234             String substring = str.substring(i, i+UCharacter.charCount(c));
5235             String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0);
5236 
5237             // skip if no change
5238             if (titled.codePointAt(0) == c) {
5239                 // Using 0 is safe, since any change in titling will not have first initial character
5240                 break;
5241             }
5242             StringBuilder result = new StringBuilder(str.length()).append(str, 0, i);
5243             int startOfSuffix;
5244 
5245             // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps.
5246 
5247             if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') {
5248                 result.append("IJ");
5249                 startOfSuffix = 2;
5250             } else {
5251                 result.append(titled);
5252                 startOfSuffix = i + UCharacter.charCount(c);
5253             }
5254 
5255             // add the remainder, and return
5256             return result.append(str, startOfSuffix, str.length()).toString();
5257         }
5258         return str; // no change
5259     }
5260 
5261     /**
5262      * {@icu} <p>Returns the titlecase version of the argument string.
5263      * <p>Position for titlecasing is determined by the argument break
5264      * iterator, hence the user can customize his break iterator for
5265      * a specialized titlecasing. In this case only the forward iteration
5266      * needs to be implemented.
5267      * If the break iterator passed in is null, the default Unicode algorithm
5268      * will be used to determine the titlecase positions.
5269      *
5270      * <p>Only positions returned by the break iterator will be title cased,
5271      * character in between the positions will all be in lower case.
5272      * <p>Casing is dependent on the argument locale and context-sensitive
5273      * @param locale which string is to be converted in
5274      * @param str source string to be performed on
5275      * @param titleIter break iterator to determine the positions in which
5276      *        the character should be title cased.
5277      * @param options bit set to modify the titlecasing operation
5278      * @return lowercase version of the argument string
5279      * @see #TITLECASE_NO_LOWERCASE
5280      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5281      * @stable ICU 54
5282      */
toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5283     public static String toTitleCase(Locale locale, String str,
5284             BreakIterator titleIter,
5285             int options) {
5286         return toTitleCase(ULocale.forLocale(locale), str, titleIter, options);
5287     }
5288 
5289     /**
5290      * {@icu} The given character is mapped to its case folding equivalent according
5291      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5292      * folding equivalent, the character itself is returned.
5293      *
5294      * <p>This function only returns the simple, single-code point case mapping.
5295      * Full case mappings should be used whenever possible because they produce
5296      * better results by working on whole strings.
5297      * They can map to a result string with a different length as appropriate.
5298      * Full case mappings are applied by the case mapping functions
5299      * that take String parameters rather than code points (int).
5300      * See also the User Guide chapter on C/POSIX migration:
5301      * http://www.icu-project.org/userguide/posix.html#case_mappings
5302      *
5303      * @param ch             the character to be converted
5304      * @param defaultmapping Indicates whether the default mappings defined in
5305      *                       CaseFolding.txt are to be used, otherwise the
5306      *                       mappings for dotted I and dotless i marked with
5307      *                       'T' in CaseFolding.txt are included.
5308      * @return               the case folding equivalent of the character, if
5309      *                       any; otherwise the character itself.
5310      * @see                  #foldCase(String, boolean)
5311      * @stable ICU 2.1
5312      */
foldCase(int ch, boolean defaultmapping)5313     public static int foldCase(int ch, boolean defaultmapping) {
5314         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5315     }
5316 
5317     /**
5318      * {@icu} The given string is mapped to its case folding equivalent according to
5319      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5320      * folding equivalent, the character itself is returned.
5321      * "Full", multiple-code point case folding mappings are returned here.
5322      * For "simple" single-code point mappings use the API
5323      * foldCase(int ch, boolean defaultmapping).
5324      * @param str            the String to be converted
5325      * @param defaultmapping Indicates whether the default mappings defined in
5326      *                       CaseFolding.txt are to be used, otherwise the
5327      *                       mappings for dotted I and dotless i marked with
5328      *                       'T' in CaseFolding.txt are included.
5329      * @return               the case folding equivalent of the character, if
5330      *                       any; otherwise the character itself.
5331      * @see                  #foldCase(int, boolean)
5332      * @stable ICU 2.1
5333      */
foldCase(String str, boolean defaultmapping)5334     public static String foldCase(String str, boolean defaultmapping) {
5335         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5336     }
5337 
5338     /**
5339      * {@icu} Option value for case folding: use default mappings defined in
5340      * CaseFolding.txt.
5341      * @stable ICU 2.6
5342      */
5343     public static final int FOLD_CASE_DEFAULT    =      0x0000;
5344     /**
5345      * {@icu} Option value for case folding:
5346      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
5347      * and dotless i appropriately for Turkic languages (tr, az).
5348      *
5349      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
5350      * are to be included for default mappings and
5351      * excluded for the Turkic-specific mappings.
5352      *
5353      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
5354      * are to be excluded for default mappings and
5355      * included for the Turkic-specific mappings.
5356      *
5357      * @stable ICU 2.6
5358      */
5359     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
5360 
5361     /**
5362      * {@icu} The given character is mapped to its case folding equivalent according
5363      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5364      * folding equivalent, the character itself is returned.
5365      *
5366      * <p>This function only returns the simple, single-code point case mapping.
5367      * Full case mappings should be used whenever possible because they produce
5368      * better results by working on whole strings.
5369      * They can map to a result string with a different length as appropriate.
5370      * Full case mappings are applied by the case mapping functions
5371      * that take String parameters rather than code points (int).
5372      * See also the User Guide chapter on C/POSIX migration:
5373      * http://www.icu-project.org/userguide/posix.html#case_mappings
5374      *
5375      * @param ch the character to be converted
5376      * @param options A bit set for special processing. Currently the recognised options
5377      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5378      * @return the case folding equivalent of the character, if any; otherwise the
5379      * character itself.
5380      * @see #foldCase(String, boolean)
5381      * @stable ICU 2.6
5382      */
foldCase(int ch, int options)5383     public static int foldCase(int ch, int options) {
5384         return UCaseProps.INSTANCE.fold(ch, options);
5385     }
5386 
5387     /**
5388      * {@icu} The given string is mapped to its case folding equivalent according to
5389      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5390      * folding equivalent, the character itself is returned.
5391      * "Full", multiple-code point case folding mappings are returned here.
5392      * For "simple" single-code point mappings use the API
5393      * foldCase(int ch, boolean defaultmapping).
5394      * @param str the String to be converted
5395      * @param options A bit set for special processing. Currently the recognised options
5396      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5397      * @return the case folding equivalent of the character, if any; otherwise the
5398      *         character itself.
5399      * @see #foldCase(int, boolean)
5400      * @stable ICU 2.6
5401      */
foldCase(String str, int options)5402     public static final String foldCase(String str, int options) {
5403         StringBuilder result = new StringBuilder(str.length());
5404         int c, i, length;
5405 
5406         length = str.length();
5407         for(i=0; i<length;) {
5408             c=str.codePointAt(i);
5409             i+=Character.charCount(c);
5410             c = UCaseProps.INSTANCE.toFullFolding(c, result, options);
5411 
5412             /* decode the result */
5413             if(c<0) {
5414                 /* (not) original code point */
5415                 c=~c;
5416             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
5417                 /* mapping already appended to result */
5418                 continue;
5419                 /* } else { append single-code point mapping */
5420             }
5421             result.appendCodePoint(c);
5422         }
5423         return result.toString();
5424     }
5425 
5426     /**
5427      * {@icu} Returns the numeric value of a Han character.
5428      *
5429      * <p>This returns the value of Han 'numeric' code points,
5430      * including those for zero, ten, hundred, thousand, ten thousand,
5431      * and hundred million.
5432      * This includes both the standard and 'checkwriting'
5433      * characters, the 'big circle' zero character, and the standard
5434      * zero character.
5435      *
5436      * <p>Note: The Unicode Standard has numeric values for more
5437      * Han characters recognized by this method
5438      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
5439      * and a {@link com.ibm.icu.text.NumberFormat} can be used with
5440      * a Chinese {@link com.ibm.icu.text.NumberingSystem}.
5441      *
5442      * @param ch code point to query
5443      * @return value if it is a Han 'numeric character,' otherwise return -1.
5444      * @stable ICU 2.4
5445      */
getHanNumericValue(int ch)5446     public static int getHanNumericValue(int ch)
5447     {
5448         switch(ch)
5449         {
5450         case IDEOGRAPHIC_NUMBER_ZERO_ :
5451         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
5452             return 0; // Han Zero
5453         case CJK_IDEOGRAPH_FIRST_ :
5454         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
5455             return 1; // Han One
5456         case CJK_IDEOGRAPH_SECOND_ :
5457         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
5458             return 2; // Han Two
5459         case CJK_IDEOGRAPH_THIRD_ :
5460         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
5461             return 3; // Han Three
5462         case CJK_IDEOGRAPH_FOURTH_ :
5463         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
5464             return 4; // Han Four
5465         case CJK_IDEOGRAPH_FIFTH_ :
5466         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
5467             return 5; // Han Five
5468         case CJK_IDEOGRAPH_SIXTH_ :
5469         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
5470             return 6; // Han Six
5471         case CJK_IDEOGRAPH_SEVENTH_ :
5472         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
5473             return 7; // Han Seven
5474         case CJK_IDEOGRAPH_EIGHTH_ :
5475         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
5476             return 8; // Han Eight
5477         case CJK_IDEOGRAPH_NINETH_ :
5478         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
5479             return 9; // Han Nine
5480         case CJK_IDEOGRAPH_TEN_ :
5481         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
5482             return 10;
5483         case CJK_IDEOGRAPH_HUNDRED_ :
5484         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
5485             return 100;
5486         case CJK_IDEOGRAPH_THOUSAND_ :
5487         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
5488             return 1000;
5489         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
5490             return 10000;
5491         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
5492             return 100000000;
5493         }
5494         return -1; // no value
5495     }
5496 
5497     /**
5498      * {@icu} <p>Returns an iterator for character types, iterating over codepoints.
5499      * <p>Example of use:<br>
5500      * <pre>
5501      * RangeValueIterator iterator = UCharacter.getTypeIterator();
5502      * RangeValueIterator.Element element = new RangeValueIterator.Element();
5503      * while (iterator.next(element)) {
5504      *     System.out.println("Codepoint \\u" +
5505      *                        Integer.toHexString(element.start) +
5506      *                        " to codepoint \\u" +
5507      *                        Integer.toHexString(element.limit - 1) +
5508      *                        " has the character type " +
5509      *                        element.value);
5510      * }
5511      * </pre>
5512      * @return an iterator
5513      * @stable ICU 2.6
5514      */
getTypeIterator()5515     public static RangeValueIterator getTypeIterator()
5516     {
5517         return new UCharacterTypeIterator();
5518     }
5519 
5520     private static final class UCharacterTypeIterator implements RangeValueIterator {
UCharacterTypeIterator()5521         UCharacterTypeIterator() {
5522             reset();
5523         }
5524 
5525         // implements RangeValueIterator
next(Element element)5526         public boolean next(Element element) {
5527             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5528                 element.start=range.startCodePoint;
5529                 element.limit=range.endCodePoint+1;
5530                 element.value=range.value;
5531                 return true;
5532             } else {
5533                 return false;
5534             }
5535         }
5536 
5537         // implements RangeValueIterator
reset()5538         public void reset() {
5539             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
5540         }
5541 
5542         private Iterator<Trie2.Range> trieIterator;
5543         private Trie2.Range range;
5544 
5545         private static final class MaskType implements Trie2.ValueMapper {
5546             // Extracts the general category ("character type") from the trie value.
map(int value)5547             public int map(int value) {
5548                 return value & UCharacterProperty.TYPE_MASK;
5549             }
5550         }
5551         private static final MaskType MASK_TYPE=new MaskType();
5552     }
5553 
5554     /**
5555      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
5556      * <p>This API only gets the iterator for the modern, most up-to-date
5557      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
5558      * for extended names use getExtendedNameIterator().
5559      * <p>Example of use:<br>
5560      * <pre>
5561      * ValueIterator iterator = UCharacter.getNameIterator();
5562      * ValueIterator.Element element = new ValueIterator.Element();
5563      * while (iterator.next(element)) {
5564      *     System.out.println("Codepoint \\u" +
5565      *                        Integer.toHexString(element.codepoint) +
5566      *                        " has the name " + (String)element.value);
5567      * }
5568      * </pre>
5569      * <p>The maximal range which the name iterator iterates is from
5570      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
5571      * @return an iterator
5572      * @stable ICU 2.6
5573      */
getNameIterator()5574     public static ValueIterator getNameIterator(){
5575         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5576                 UCharacterNameChoice.UNICODE_CHAR_NAME);
5577     }
5578 
5579     /**
5580      * {@icu} Returns an empty iterator.
5581      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
5582      * @return an empty iterator
5583      * @deprecated ICU 49
5584      * @see #getName1_0(int)
5585      */
5586     @Deprecated
getName1_0Iterator()5587     public static ValueIterator getName1_0Iterator(){
5588         return new DummyValueIterator();
5589     }
5590 
5591     private static final class DummyValueIterator implements ValueIterator {
next(Element element)5592         public boolean next(Element element) { return false; }
reset()5593         public void reset() {}
setRange(int start, int limit)5594         public void setRange(int start, int limit) {}
5595     }
5596 
5597     /**
5598      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
5599      * <p>This API only gets the iterator for the extended names.
5600      * For modern, most up-to-date Unicode names use getNameIterator() or
5601      * for older 1.0 Unicode names use get1_0NameIterator().
5602      * <p>Example of use:<br>
5603      * <pre>
5604      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
5605      * ValueIterator.Element element = new ValueIterator.Element();
5606      * while (iterator.next(element)) {
5607      *     System.out.println("Codepoint \\u" +
5608      *                        Integer.toHexString(element.codepoint) +
5609      *                        " has the name " + (String)element.value);
5610      * }
5611      * </pre>
5612      * <p>The maximal range which the name iterator iterates is from
5613      * @return an iterator
5614      * @stable ICU 2.6
5615      */
getExtendedNameIterator()5616     public static ValueIterator getExtendedNameIterator(){
5617         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5618                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
5619     }
5620 
5621     /**
5622      * {@icu} Returns the "age" of the code point.
5623      * <p>The "age" is the Unicode version when the code point was first
5624      * designated (as a non-character or for Private Use) or assigned a
5625      * character.
5626      * <p>This can be useful to avoid emitting code points to receiving
5627      * processes that do not accept newer characters.
5628      * <p>The data is from the UCD file DerivedAge.txt.
5629      * @param ch The code point.
5630      * @return the Unicode version number
5631      * @stable ICU 2.6
5632      */
getAge(int ch)5633     public static VersionInfo getAge(int ch)
5634     {
5635         if (ch < MIN_VALUE || ch > MAX_VALUE) {
5636             throw new IllegalArgumentException("Codepoint out of bounds");
5637         }
5638         return UCharacterProperty.INSTANCE.getAge(ch);
5639     }
5640 
5641     /**
5642      * {@icu} <p>Check a binary Unicode property for a code point.
5643      * <p>Unicode, especially in version 3.2, defines many more properties
5644      * than the original set in UnicodeData.txt.
5645      * <p>This API is intended to reflect Unicode properties as defined in
5646      * the Unicode Character Database (UCD) and Unicode Technical Reports
5647      * (UTR).
5648      * <p>For details about the properties see
5649      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
5650      * <p>For names of Unicode properties see the UCD file
5651      * PropertyAliases.txt.
5652      * <p>This API does not check the validity of the codepoint.
5653      * <p>Important: If ICU is built with UCD files from Unicode versions
5654      * below 3.2, then properties marked with "new" are not or
5655      * not fully available.
5656      * @param ch code point to test.
5657      * @param property selector constant from com.ibm.icu.lang.UProperty,
5658      *        identifies which binary property to check.
5659      * @return true or false according to the binary Unicode property value
5660      *         for ch. Also false if property is out of bounds or if the
5661      *         Unicode version does not have data for the property at all, or
5662      *         not for this code point.
5663      * @see com.ibm.icu.lang.UProperty
5664      * @stable ICU 2.6
5665      */
hasBinaryProperty(int ch, int property)5666     public static boolean hasBinaryProperty(int ch, int property)
5667     {
5668         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5669     }
5670 
5671     /**
5672      * {@icu} <p>Check if a code point has the Alphabetic Unicode property.
5673      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
5674      * <p>Different from UCharacter.isLetter(ch)!
5675      * @stable ICU 2.6
5676      * @param ch codepoint to be tested
5677      */
isUAlphabetic(int ch)5678     public static boolean isUAlphabetic(int ch)
5679     {
5680         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5681     }
5682 
5683     /**
5684      * {@icu} <p>Check if a code point has the Lowercase Unicode property.
5685      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
5686      * <p>This is different from UCharacter.isLowerCase(ch)!
5687      * @param ch codepoint to be tested
5688      * @stable ICU 2.6
5689      */
isULowercase(int ch)5690     public static boolean isULowercase(int ch)
5691     {
5692         return hasBinaryProperty(ch, UProperty.LOWERCASE);
5693     }
5694 
5695     /**
5696      * {@icu} <p>Check if a code point has the Uppercase Unicode property.
5697      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
5698      * <p>This is different from UCharacter.isUpperCase(ch)!
5699      * @param ch codepoint to be tested
5700      * @stable ICU 2.6
5701      */
isUUppercase(int ch)5702     public static boolean isUUppercase(int ch)
5703     {
5704         return hasBinaryProperty(ch, UProperty.UPPERCASE);
5705     }
5706 
5707     /**
5708      * {@icu} <p>Check if a code point has the White_Space Unicode property.
5709      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
5710      * <p>This is different from both UCharacter.isSpace(ch) and
5711      * UCharacter.isWhitespace(ch)!
5712      * @param ch codepoint to be tested
5713      * @stable ICU 2.6
5714      */
isUWhiteSpace(int ch)5715     public static boolean isUWhiteSpace(int ch)
5716     {
5717         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5718     }
5719 
5720     /**
5721      * {@icu} <p>Returns the property value for an Unicode property type of a code point.
5722      * Also returns binary and mask property values.
5723      * <p>Unicode, especially in version 3.2, defines many more properties than
5724      * the original set in UnicodeData.txt.
5725      * <p>The properties APIs are intended to reflect Unicode properties as
5726      * defined in the Unicode Character Database (UCD) and Unicode Technical
5727      * Reports (UTR). For details about the properties see
5728      * http://www.unicode.org/.
5729      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5730      *
5731      * <pre>
5732      * Sample usage:
5733      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5734      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5735      * boolean b = (ideo == 1) ? true : false;
5736      * </pre>
5737      * @param ch code point to test.
5738      * @param type UProperty selector constant, identifies which binary
5739      *        property to check. Must be
5740      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5741      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5742      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5743      * @return numeric value that is directly the property value or,
5744      *         for enumerated properties, corresponds to the numeric value of
5745      *         the enumerated constant of the respective property value
5746      *         enumeration type (cast to enum type if necessary).
5747      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5748      *         Returns a bit-mask for mask properties.
5749      *         Returns 0 if 'type' is out of bounds or if the Unicode version
5750      *         does not have data for the property at all, or not for this code
5751      *         point.
5752      * @see UProperty
5753      * @see #hasBinaryProperty
5754      * @see #getIntPropertyMinValue
5755      * @see #getIntPropertyMaxValue
5756      * @see #getUnicodeVersion
5757      * @stable ICU 2.4
5758      */
getIntPropertyValue(int ch, int type)5759     public static int getIntPropertyValue(int ch, int type)
5760     {
5761         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
5762     }
5763     /**
5764      * {@icu} Returns a string version of the property value.
5765      * @param propertyEnum The property enum value.
5766      * @param codepoint The codepoint value.
5767      * @param nameChoice The choice of the name.
5768      * @return value as string
5769      * @internal
5770      * @deprecated This API is ICU internal only.
5771      */
5772     @Deprecated
5773     ///CLOVER:OFF
getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5774     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5775         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5776                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5777             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
5778                     nameChoice);
5779         }
5780         if (propertyEnum == UProperty.NUMERIC_VALUE) {
5781             return String.valueOf(getUnicodeNumericValue(codepoint));
5782         }
5783         // otherwise must be string property
5784         switch (propertyEnum) {
5785         case UProperty.AGE: return getAge(codepoint).toString();
5786         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5787         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
5788         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
5789         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5790         case UProperty.NAME: return getName(codepoint);
5791         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
5792         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5793         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5794         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5795         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5796         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5797         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5798         }
5799         throw new IllegalArgumentException("Illegal Property Enum");
5800     }
5801     ///CLOVER:ON
5802 
5803     /**
5804      * {@icu} Returns the minimum value for an integer/binary Unicode property type.
5805      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5806      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5807      * @param type UProperty selector constant, identifies which binary
5808      *        property to check. Must be
5809      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5810      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5811      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5812      *         for a Unicode property. 0 if the property
5813      *         selector 'type' is out of range.
5814      * @see UProperty
5815      * @see #hasBinaryProperty
5816      * @see #getUnicodeVersion
5817      * @see #getIntPropertyMaxValue
5818      * @see #getIntPropertyValue
5819      * @stable ICU 2.4
5820      */
getIntPropertyMinValue(int type)5821     public static int getIntPropertyMinValue(int type){
5822 
5823         return 0; // undefined; and: all other properties have a minimum value of 0
5824     }
5825 
5826 
5827     /**
5828      * {@icu} Returns the maximum value for an integer/binary Unicode property.
5829      * Can be used together with UCharacter.getIntPropertyMinValue(int)
5830      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5831      * Examples for min/max values (for Unicode 3.2):
5832      * <ul>
5833      * <li> UProperty.BIDI_CLASS:    0/18
5834      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5835      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5836      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5837      * </ul>
5838      * For undefined UProperty constant values, min/max values will be 0/-1.
5839      * @param type UProperty selector constant, identifies which binary
5840      *        property to check. Must be
5841      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5842      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5843      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
5844      *         property. &lt;= 0 if the property selector 'type' is out of range.
5845      * @see UProperty
5846      * @see #hasBinaryProperty
5847      * @see #getUnicodeVersion
5848      * @see #getIntPropertyMaxValue
5849      * @see #getIntPropertyValue
5850      * @stable ICU 2.4
5851      */
getIntPropertyMaxValue(int type)5852     public static int getIntPropertyMaxValue(int type)
5853     {
5854         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
5855     }
5856 
5857     /**
5858      * Provide the java.lang.Character forDigit API, for convenience.
5859      * @stable ICU 3.0
5860      */
forDigit(int digit, int radix)5861     public static char forDigit(int digit, int radix) {
5862         return java.lang.Character.forDigit(digit, radix);
5863     }
5864 
5865     // JDK 1.5 API coverage
5866 
5867     /**
5868      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
5869      *
5870      * @stable ICU 3.0
5871      */
5872     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
5873 
5874     /**
5875      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
5876      *
5877      * @stable ICU 3.0
5878      */
5879     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
5880 
5881     /**
5882      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
5883      *
5884      * @stable ICU 3.0
5885      */
5886     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
5887 
5888     /**
5889      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
5890      *
5891      * @stable ICU 3.0
5892      */
5893     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
5894 
5895     /**
5896      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
5897      *
5898      * @stable ICU 3.0
5899      */
5900     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
5901 
5902     /**
5903      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
5904      *
5905      * @stable ICU 3.0
5906      */
5907     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
5908 
5909     /**
5910      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
5911      *
5912      * @stable ICU 3.0
5913      */
5914     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
5915 
5916     /**
5917      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
5918      *
5919      * @stable ICU 3.0
5920      */
5921     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
5922 
5923     /**
5924      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
5925      *
5926      * @stable ICU 3.0
5927      */
5928     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
5929 
5930     /**
5931      * Equivalent to {@link Character#isValidCodePoint}.
5932      *
5933      * @param cp the code point to check
5934      * @return true if cp is a valid code point
5935      * @stable ICU 3.0
5936      */
isValidCodePoint(int cp)5937     public static final boolean isValidCodePoint(int cp) {
5938         return cp >= 0 && cp <= MAX_CODE_POINT;
5939     }
5940 
5941     /**
5942      * Same as {@link Character#isSupplementaryCodePoint}.
5943      *
5944      * @param cp the code point to check
5945      * @return true if cp is a supplementary code point
5946      * @stable ICU 3.0
5947      */
isSupplementaryCodePoint(int cp)5948     public static final boolean isSupplementaryCodePoint(int cp) {
5949         return Character.isSupplementaryCodePoint(cp);
5950     }
5951 
5952     /**
5953      * Same as {@link Character#isHighSurrogate}.
5954      *
5955      * @param ch the char to check
5956      * @return true if ch is a high (lead) surrogate
5957      * @stable ICU 3.0
5958      */
isHighSurrogate(char ch)5959     public static boolean isHighSurrogate(char ch) {
5960         return Character.isHighSurrogate(ch);
5961     }
5962 
5963     /**
5964      * Same as {@link Character#isLowSurrogate}.
5965      *
5966      * @param ch the char to check
5967      * @return true if ch is a low (trail) surrogate
5968      * @stable ICU 3.0
5969      */
isLowSurrogate(char ch)5970     public static boolean isLowSurrogate(char ch) {
5971         return Character.isLowSurrogate(ch);
5972     }
5973 
5974     /**
5975      * Same as {@link Character#isSurrogatePair}.
5976      *
5977      * @param high the high (lead) char
5978      * @param low the low (trail) char
5979      * @return true if high, low form a surrogate pair
5980      * @stable ICU 3.0
5981      */
isSurrogatePair(char high, char low)5982     public static final boolean isSurrogatePair(char high, char low) {
5983         return Character.isSurrogatePair(high, low);
5984     }
5985 
5986     /**
5987      * Same as {@link Character#charCount}.
5988      * Returns the number of chars needed to represent the code point (1 or 2).
5989      * This does not check the code point for validity.
5990      *
5991      * @param cp the code point to check
5992      * @return the number of chars needed to represent the code point
5993      * @stable ICU 3.0
5994      */
charCount(int cp)5995     public static int charCount(int cp) {
5996         return Character.charCount(cp);
5997     }
5998 
5999     /**
6000      * Same as {@link Character#toCodePoint}.
6001      * Returns the code point represented by the two surrogate code units.
6002      * This does not check the surrogate pair for validity.
6003      *
6004      * @param high the high (lead) surrogate
6005      * @param low the low (trail) surrogate
6006      * @return the code point formed by the surrogate pair
6007      * @stable ICU 3.0
6008      */
toCodePoint(char high, char low)6009     public static final int toCodePoint(char high, char low) {
6010         return Character.toCodePoint(high, low);
6011     }
6012 
6013     /**
6014      * Same as {@link Character#codePointAt(CharSequence, int)}.
6015      * Returns the code point at index.
6016      * This examines only the characters at index and index+1.
6017      *
6018      * @param seq the characters to check
6019      * @param index the index of the first or only char forming the code point
6020      * @return the code point at the index
6021      * @stable ICU 3.0
6022      */
codePointAt(CharSequence seq, int index)6023     public static final int codePointAt(CharSequence seq, int index) {
6024         char c1 = seq.charAt(index++);
6025         if (isHighSurrogate(c1)) {
6026             if (index < seq.length()) {
6027                 char c2 = seq.charAt(index);
6028                 if (isLowSurrogate(c2)) {
6029                     return toCodePoint(c1, c2);
6030                 }
6031             }
6032         }
6033         return c1;
6034     }
6035 
6036     /**
6037      * Same as {@link Character#codePointAt(char[], int)}.
6038      * Returns the code point at index.
6039      * This examines only the characters at index and index+1.
6040      *
6041      * @param text the characters to check
6042      * @param index the index of the first or only char forming the code point
6043      * @return the code point at the index
6044      * @stable ICU 3.0
6045      */
codePointAt(char[] text, int index)6046     public static final int codePointAt(char[] text, int index) {
6047         char c1 = text[index++];
6048         if (isHighSurrogate(c1)) {
6049             if (index < text.length) {
6050                 char c2 = text[index];
6051                 if (isLowSurrogate(c2)) {
6052                     return toCodePoint(c1, c2);
6053                 }
6054             }
6055         }
6056         return c1;
6057     }
6058 
6059     /**
6060      * Same as {@link Character#codePointAt(char[], int, int)}.
6061      * Returns the code point at index.
6062      * This examines only the characters at index and index+1.
6063      *
6064      * @param text the characters to check
6065      * @param index the index of the first or only char forming the code point
6066      * @param limit the limit of the valid text
6067      * @return the code point at the index
6068      * @stable ICU 3.0
6069      */
codePointAt(char[] text, int index, int limit)6070     public static final int codePointAt(char[] text, int index, int limit) {
6071         if (index >= limit || limit > text.length) {
6072             throw new IndexOutOfBoundsException();
6073         }
6074         char c1 = text[index++];
6075         if (isHighSurrogate(c1)) {
6076             if (index < limit) {
6077                 char c2 = text[index];
6078                 if (isLowSurrogate(c2)) {
6079                     return toCodePoint(c1, c2);
6080                 }
6081             }
6082         }
6083         return c1;
6084     }
6085 
6086     /**
6087      * Same as {@link Character#codePointBefore(CharSequence, int)}.
6088      * Return the code point before index.
6089      * This examines only the characters at index-1 and index-2.
6090      *
6091      * @param seq the characters to check
6092      * @param index the index after the last or only char forming the code point
6093      * @return the code point before the index
6094      * @stable ICU 3.0
6095      */
codePointBefore(CharSequence seq, int index)6096     public static final int codePointBefore(CharSequence seq, int index) {
6097         char c2 = seq.charAt(--index);
6098         if (isLowSurrogate(c2)) {
6099             if (index > 0) {
6100                 char c1 = seq.charAt(--index);
6101                 if (isHighSurrogate(c1)) {
6102                     return toCodePoint(c1, c2);
6103                 }
6104             }
6105         }
6106         return c2;
6107     }
6108 
6109     /**
6110      * Same as {@link Character#codePointBefore(char[], int)}.
6111      * Returns the code point before index.
6112      * This examines only the characters at index-1 and index-2.
6113      *
6114      * @param text the characters to check
6115      * @param index the index after the last or only char forming the code point
6116      * @return the code point before the index
6117      * @stable ICU 3.0
6118      */
codePointBefore(char[] text, int index)6119     public static final int codePointBefore(char[] text, int index) {
6120         char c2 = text[--index];
6121         if (isLowSurrogate(c2)) {
6122             if (index > 0) {
6123                 char c1 = text[--index];
6124                 if (isHighSurrogate(c1)) {
6125                     return toCodePoint(c1, c2);
6126                 }
6127             }
6128         }
6129         return c2;
6130     }
6131 
6132     /**
6133      * Same as {@link Character#codePointBefore(char[], int, int)}.
6134      * Return the code point before index.
6135      * This examines only the characters at index-1 and index-2.
6136      *
6137      * @param text the characters to check
6138      * @param index the index after the last or only char forming the code point
6139      * @param limit the start of the valid text
6140      * @return the code point before the index
6141      * @stable ICU 3.0
6142      */
codePointBefore(char[] text, int index, int limit)6143     public static final int codePointBefore(char[] text, int index, int limit) {
6144         if (index <= limit || limit < 0) {
6145             throw new IndexOutOfBoundsException();
6146         }
6147         char c2 = text[--index];
6148         if (isLowSurrogate(c2)) {
6149             if (index > limit) {
6150                 char c1 = text[--index];
6151                 if (isHighSurrogate(c1)) {
6152                     return toCodePoint(c1, c2);
6153                 }
6154             }
6155         }
6156         return c2;
6157     }
6158 
6159     /**
6160      * Same as {@link Character#toChars(int, char[], int)}.
6161      * Writes the chars representing the
6162      * code point into the destination at the given index.
6163      *
6164      * @param cp the code point to convert
6165      * @param dst the destination array into which to put the char(s) representing the code point
6166      * @param dstIndex the index at which to put the first (or only) char
6167      * @return the count of the number of chars written (1 or 2)
6168      * @throws IllegalArgumentException if cp is not a valid code point
6169      * @stable ICU 3.0
6170      */
toChars(int cp, char[] dst, int dstIndex)6171     public static final int toChars(int cp, char[] dst, int dstIndex) {
6172         return Character.toChars(cp, dst, dstIndex);
6173     }
6174 
6175     /**
6176      * Same as {@link Character#toChars(int)}.
6177      * Returns a char array representing the code point.
6178      *
6179      * @param cp the code point to convert
6180      * @return an array containing the char(s) representing the code point
6181      * @throws IllegalArgumentException if cp is not a valid code point
6182      * @stable ICU 3.0
6183      */
toChars(int cp)6184     public static final char[] toChars(int cp) {
6185         return Character.toChars(cp);
6186     }
6187 
6188     /**
6189      * Equivalent to the {@link Character#getDirectionality(char)} method, for
6190      * convenience. Returns a byte representing the directionality of the
6191      * character.
6192      *
6193      * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns
6194      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
6195      *
6196      * {@icunote} The return value must be tested using the constants defined in {@link
6197      * UCharacterDirection} and its interface {@link
6198      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
6199      * defined by <code>java.lang.Character</code>.
6200      * @param cp the code point to check
6201      * @return the directionality of the code point
6202      * @see #getDirection
6203      * @stable ICU 3.0
6204      */
getDirectionality(int cp)6205     public static byte getDirectionality(int cp)
6206     {
6207         return (byte)getDirection(cp);
6208     }
6209 
6210     /**
6211      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
6212      * method, for convenience.  Counts the number of code points in the range
6213      * of text.
6214      * @param text the characters to check
6215      * @param start the start of the range
6216      * @param limit the limit of the range
6217      * @return the number of code points in the range
6218      * @stable ICU 3.0
6219      */
codePointCount(CharSequence text, int start, int limit)6220     public static int codePointCount(CharSequence text, int start, int limit) {
6221         if (start < 0 || limit < start || limit > text.length()) {
6222             throw new IndexOutOfBoundsException("start (" + start +
6223                     ") or limit (" + limit +
6224                     ") invalid or out of range 0, " + text.length());
6225         }
6226 
6227         int len = limit - start;
6228         while (limit > start) {
6229             char ch = text.charAt(--limit);
6230             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6231                 ch = text.charAt(--limit);
6232                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6233                     --len;
6234                     break;
6235                 }
6236             }
6237         }
6238         return len;
6239     }
6240 
6241     /**
6242      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
6243      * convenience. Counts the number of code points in the range of text.
6244      * @param text the characters to check
6245      * @param start the start of the range
6246      * @param limit the limit of the range
6247      * @return the number of code points in the range
6248      * @stable ICU 3.0
6249      */
codePointCount(char[] text, int start, int limit)6250     public static int codePointCount(char[] text, int start, int limit) {
6251         if (start < 0 || limit < start || limit > text.length) {
6252             throw new IndexOutOfBoundsException("start (" + start +
6253                     ") or limit (" + limit +
6254                     ") invalid or out of range 0, " + text.length);
6255         }
6256 
6257         int len = limit - start;
6258         while (limit > start) {
6259             char ch = text[--limit];
6260             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6261                 ch = text[--limit];
6262                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6263                     --len;
6264                     break;
6265                 }
6266             }
6267         }
6268         return len;
6269     }
6270 
6271     /**
6272      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
6273      * method, for convenience.  Adjusts the char index by a code point offset.
6274      * @param text the characters to check
6275      * @param index the index to adjust
6276      * @param codePointOffset the number of code points by which to offset the index
6277      * @return the adjusted index
6278      * @stable ICU 3.0
6279      */
offsetByCodePoints(CharSequence text, int index, int codePointOffset)6280     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
6281         if (index < 0 || index > text.length()) {
6282             throw new IndexOutOfBoundsException("index ( " + index +
6283                     ") out of range 0, " + text.length());
6284         }
6285 
6286         if (codePointOffset < 0) {
6287             while (++codePointOffset <= 0) {
6288                 char ch = text.charAt(--index);
6289                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
6290                     ch = text.charAt(--index);
6291                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6292                         if (++codePointOffset > 0) {
6293                             return index+1;
6294                         }
6295                     }
6296                 }
6297             }
6298         } else {
6299             int limit = text.length();
6300             while (--codePointOffset >= 0) {
6301                 char ch = text.charAt(index++);
6302                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6303                     ch = text.charAt(index++);
6304                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6305                         if (--codePointOffset < 0) {
6306                             return index-1;
6307                         }
6308                     }
6309                 }
6310             }
6311         }
6312 
6313         return index;
6314     }
6315 
6316     /**
6317      * Equivalent to the
6318      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
6319      * method, for convenience.  Adjusts the char index by a code point offset.
6320      * @param text the characters to check
6321      * @param start the start of the range to check
6322      * @param count the length of the range to check
6323      * @param index the index to adjust
6324      * @param codePointOffset the number of code points by which to offset the index
6325      * @return the adjusted index
6326      * @stable ICU 3.0
6327      */
offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6328     public static int offsetByCodePoints(char[] text, int start, int count, int index,
6329             int codePointOffset) {
6330         int limit = start + count;
6331         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
6332             throw new IndexOutOfBoundsException("index ( " + index +
6333                     ") out of range " + start +
6334                     ", " + limit +
6335                     " in array 0, " + text.length);
6336         }
6337 
6338         if (codePointOffset < 0) {
6339             while (++codePointOffset <= 0) {
6340                 char ch = text[--index];
6341                 if (index < start) {
6342                     throw new IndexOutOfBoundsException("index ( " + index +
6343                             ") < start (" + start +
6344                             ")");
6345                 }
6346                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
6347                     ch = text[--index];
6348                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6349                         if (++codePointOffset > 0) {
6350                             return index+1;
6351                         }
6352                     }
6353                 }
6354             }
6355         } else {
6356             while (--codePointOffset >= 0) {
6357                 char ch = text[index++];
6358                 if (index > limit) {
6359                     throw new IndexOutOfBoundsException("index ( " + index +
6360                             ") > limit (" + limit +
6361                             ")");
6362                 }
6363                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6364                     ch = text[index++];
6365                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6366                         if (--codePointOffset < 0) {
6367                             return index-1;
6368                         }
6369                     }
6370                 }
6371             }
6372         }
6373 
6374         return index;
6375     }
6376 
6377     // private variables -------------------------------------------------
6378 
6379     /**
6380      * To get the last character out from a data type
6381      */
6382     private static final int LAST_CHAR_MASK_ = 0xFFFF;
6383 
6384     //    /**
6385     //     * To get the last byte out from a data type
6386     //     */
6387     //    private static final int LAST_BYTE_MASK_ = 0xFF;
6388     //
6389     //    /**
6390     //     * Shift 16 bits
6391     //     */
6392     //    private static final int SHIFT_16_ = 16;
6393     //
6394     //    /**
6395     //     * Shift 24 bits
6396     //     */
6397     //    private static final int SHIFT_24_ = 24;
6398     //
6399     //    /**
6400     //     * Decimal radix
6401     //     */
6402     //    private static final int DECIMAL_RADIX_ = 10;
6403 
6404     /**
6405      * No break space code point
6406      */
6407     private static final int NO_BREAK_SPACE_ = 0xA0;
6408 
6409     /**
6410      * Figure space code point
6411      */
6412     private static final int FIGURE_SPACE_ = 0x2007;
6413 
6414     /**
6415      * Narrow no break space code point
6416      */
6417     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
6418 
6419     /**
6420      * Ideographic number zero code point
6421      */
6422     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
6423 
6424     /**
6425      * CJK Ideograph, First code point
6426      */
6427     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
6428 
6429     /**
6430      * CJK Ideograph, Second code point
6431      */
6432     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
6433 
6434     /**
6435      * CJK Ideograph, Third code point
6436      */
6437     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
6438 
6439     /**
6440      * CJK Ideograph, Fourth code point
6441      */
6442     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
6443 
6444     /**
6445      * CJK Ideograph, FIFTH code point
6446      */
6447     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
6448 
6449     /**
6450      * CJK Ideograph, Sixth code point
6451      */
6452     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
6453 
6454     /**
6455      * CJK Ideograph, Seventh code point
6456      */
6457     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
6458 
6459     /**
6460      * CJK Ideograph, Eighth code point
6461      */
6462     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
6463 
6464     /**
6465      * CJK Ideograph, Nineth code point
6466      */
6467     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
6468 
6469     /**
6470      * Application Program command code point
6471      */
6472     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
6473 
6474     /**
6475      * Unit separator code point
6476      */
6477     private static final int UNIT_SEPARATOR_ = 0x001F;
6478 
6479     /**
6480      * Delete code point
6481      */
6482     private static final int DELETE_ = 0x007F;
6483 
6484     /**
6485      * Han digit characters
6486      */
6487     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
6488     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
6489     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
6490     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
6491     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
6492     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
6493     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
6494     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
6495     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
6496     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
6497     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
6498     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
6499     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
6500     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
6501     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
6502     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
6503     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
6504     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6505 
6506     // private constructor -----------------------------------------------
6507     ///CLOVER:OFF
6508     /**
6509      * Private constructor to prevent instantiation
6510      */
UCharacter()6511     private UCharacter()
6512     {
6513     }
6514     ///CLOVER:ON
6515 }
6516