1 /**
2  *******************************************************************************
3  * Copyright (C) 1996-2015, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  */
7 
8 package com.ibm.icu.lang;
9 
10 import java.lang.ref.SoftReference;
11 import java.util.HashMap;
12 import java.util.Iterator;
13 import java.util.Locale;
14 import java.util.Map;
15 
16 import com.ibm.icu.impl.IllegalIcuArgumentException;
17 import com.ibm.icu.impl.Trie2;
18 import com.ibm.icu.impl.UBiDiProps;
19 import com.ibm.icu.impl.UCaseProps;
20 import com.ibm.icu.impl.UCharacterName;
21 import com.ibm.icu.impl.UCharacterNameChoice;
22 import com.ibm.icu.impl.UCharacterProperty;
23 import com.ibm.icu.impl.UCharacterUtility;
24 import com.ibm.icu.impl.UPropertyAliases;
25 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
26 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
27 import com.ibm.icu.text.BreakIterator;
28 import com.ibm.icu.text.Normalizer2;
29 import com.ibm.icu.text.UTF16;
30 import com.ibm.icu.util.RangeValueIterator;
31 import com.ibm.icu.util.ULocale;
32 import com.ibm.icu.util.ValueIterator;
33 import com.ibm.icu.util.VersionInfo;
34 
35 /**
36  * {@icuenhanced java.lang.Character}.{@icu _usage_}
37  *
38  * <p>The UCharacter class provides extensions to the
39  * <a href="http://java.sun.com/j2se/1.5/docs/api/java/lang/Character.html">
40  * java.lang.Character</a> class. These extensions provide support for
41  * more Unicode properties and together with the <a href=../text/UTF16.html>UTF16</a>
42  * class, provide support for supplementary characters (those with code
43  * points above U+FFFF).
44  * Each ICU release supports the latest version of Unicode available at that time.
45  *
46  * <p>Code points are represented in these API using ints. While it would be
47  * more convenient in Java to have a separate primitive datatype for them,
48  * ints suffice in the meantime.
49  *
50  * <p>To use this class please add the jar file name icu4j.jar to the
51  * class path, since it contains data files which supply the information used
52  * by this file.<br>
53  * E.g. In Windows <br>
54  * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
55  * Otherwise, another method would be to copy the files uprops.dat and
56  * unames.icu from the icu4j source subdirectory
57  * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
58  * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
59  *
60  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
61  * properties, the main differences between UCharacter and Character are:
62  * <ul>
63  * <li> UCharacter is not designed to be a char wrapper and does not have
64  *      APIs to which involves management of that single char.<br>
65  *      These include:
66  *      <ul>
67  *        <li> char charValue(),
68  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
69  *      </ul>
70  * <li> UCharacter does not include Character APIs that are deprecated, nor
71  *      does it include the Java-specific character information, such as
72  *      boolean isJavaIdentifierPart(char ch).
73  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
74  *      values '10' - '35'. UCharacter also does this in digit and
75  *      getNumericValue, to adhere to the java semantics of these
76  *      methods.  New methods unicodeDigit, and
77  *      getUnicodeNumericValue do not treat the above code points
78  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
79  * </ul>
80  * <p>
81  * Further detail on differences can be determined using the program
82  *        <a href=
83  * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
84  *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
85  * </p>
86  * <p>
87  * In addition to Java compatibility functions, which calculate derived properties,
88  * this API provides low-level access to the Unicode Character Database.
89  * </p>
90  * <p>
91  * Unicode assigns each code point (not just assigned character) values for
92  * many properties.
93  * Most of them are simple boolean flags, or constants from a small enumerated list.
94  * For some properties, values are strings or other relatively more complex types.
95  * </p>
96  * <p>
97  * For more information see
98  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
99  * (http://www.unicode.org/ucd/)
100  * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
101  * User Guide chapter on Properties</a>
102  * (http://www.icu-project.org/userguide/properties.html).
103  * </p>
104  * <p>
105  * There are also functions that provide easy migration from C/POSIX functions
106  * like isblank(). Their use is generally discouraged because the C/POSIX
107  * standards do not define their semantics beyond the ASCII range, which means
108  * that different implementations exhibit very different behavior.
109  * Instead, Unicode properties should be used directly.
110  * </p>
111  * <p>
112  * There are also only a few, broad C/POSIX character classes, and they tend
113  * to be used for conflicting purposes. For example, the "isalpha()" class
114  * is sometimes used to determine word boundaries, while a more sophisticated
115  * approach would at least distinguish initial letters from continuation
116  * characters (the latter including combining marks).
117  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
118  * Another example: There is no "istitle()" class for titlecase characters.
119  * </p>
120  * <p>
121  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
122  * ICU implements them according to the Standard Recommendations in
123  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
124  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
125  * </p>
126  * <p>
127  * API access for C/POSIX character classes is as follows:
128  * <pre>{@code
129  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
130  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
131  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
132  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
133  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
134  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
135  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
136  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
137  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
138  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
139  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
140  * - cntrl:     getType(c)==CONTROL
141  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
142  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
143  * </p>
144  * <p>
145  * The C/POSIX character classes are also available in UnicodeSet patterns,
146  * using patterns like [:graph:] or \p{graph}.
147  * </p>
148  *
149  * {@icunote} There are several ICU (and Java) whitespace functions.
150  * Comparison:<ul>
151  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
152  *       most of general categories "Z" (separators) + most whitespace ISO controls
153  *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
154  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
155  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
156  * </p>
157  * <p>
158  * This class is not subclassable.
159  * </p>
160  * @author Syn Wee Quek
161  * @stable ICU 2.1
162  * @see com.ibm.icu.lang.UCharacterEnums
163  */
164 
165 public final class UCharacter implements ECharacterCategory, ECharacterDirection
166 {
167     // public inner classes ----------------------------------------------
168 
169     /**
170      * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_}
171      *
172      * A family of character subsets representing the character blocks in the
173      * Unicode specification, generated from Unicode Data file Blocks.txt.
174      * Character blocks generally define characters used for a specific script
175      * or purpose. A character is contained by at most one Unicode block.
176      *
177      * {@icunote} All fields named XXX_ID are specific to ICU.
178      *
179      * @stable ICU 2.4
180      */
181     public static final class UnicodeBlock extends Character.Subset
182     {
183         // block id corresponding to icu4c -----------------------------------
184 
185         /**
186          * @stable ICU 2.4
187          */
188         public static final int INVALID_CODE_ID = -1;
189         /**
190          * @stable ICU 2.4
191          */
192         public static final int BASIC_LATIN_ID = 1;
193         /**
194          * @stable ICU 2.4
195          */
196         public static final int LATIN_1_SUPPLEMENT_ID = 2;
197         /**
198          * @stable ICU 2.4
199          */
200         public static final int LATIN_EXTENDED_A_ID = 3;
201         /**
202          * @stable ICU 2.4
203          */
204         public static final int LATIN_EXTENDED_B_ID = 4;
205         /**
206          * @stable ICU 2.4
207          */
208         public static final int IPA_EXTENSIONS_ID = 5;
209         /**
210          * @stable ICU 2.4
211          */
212         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
213         /**
214          * @stable ICU 2.4
215          */
216         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
217         /**
218          * Unicode 3.2 renames this block to "Greek and Coptic".
219          * @stable ICU 2.4
220          */
221         public static final int GREEK_ID = 8;
222         /**
223          * @stable ICU 2.4
224          */
225         public static final int CYRILLIC_ID = 9;
226         /**
227          * @stable ICU 2.4
228          */
229         public static final int ARMENIAN_ID = 10;
230         /**
231          * @stable ICU 2.4
232          */
233         public static final int HEBREW_ID = 11;
234         /**
235          * @stable ICU 2.4
236          */
237         public static final int ARABIC_ID = 12;
238         /**
239          * @stable ICU 2.4
240          */
241         public static final int SYRIAC_ID = 13;
242         /**
243          * @stable ICU 2.4
244          */
245         public static final int THAANA_ID = 14;
246         /**
247          * @stable ICU 2.4
248          */
249         public static final int DEVANAGARI_ID = 15;
250         /**
251          * @stable ICU 2.4
252          */
253         public static final int BENGALI_ID = 16;
254         /**
255          * @stable ICU 2.4
256          */
257         public static final int GURMUKHI_ID = 17;
258         /**
259          * @stable ICU 2.4
260          */
261         public static final int GUJARATI_ID = 18;
262         /**
263          * @stable ICU 2.4
264          */
265         public static final int ORIYA_ID = 19;
266         /**
267          * @stable ICU 2.4
268          */
269         public static final int TAMIL_ID = 20;
270         /**
271          * @stable ICU 2.4
272          */
273         public static final int TELUGU_ID = 21;
274         /**
275          * @stable ICU 2.4
276          */
277         public static final int KANNADA_ID = 22;
278         /**
279          * @stable ICU 2.4
280          */
281         public static final int MALAYALAM_ID = 23;
282         /**
283          * @stable ICU 2.4
284          */
285         public static final int SINHALA_ID = 24;
286         /**
287          * @stable ICU 2.4
288          */
289         public static final int THAI_ID = 25;
290         /**
291          * @stable ICU 2.4
292          */
293         public static final int LAO_ID = 26;
294         /**
295          * @stable ICU 2.4
296          */
297         public static final int TIBETAN_ID = 27;
298         /**
299          * @stable ICU 2.4
300          */
301         public static final int MYANMAR_ID = 28;
302         /**
303          * @stable ICU 2.4
304          */
305         public static final int GEORGIAN_ID = 29;
306         /**
307          * @stable ICU 2.4
308          */
309         public static final int HANGUL_JAMO_ID = 30;
310         /**
311          * @stable ICU 2.4
312          */
313         public static final int ETHIOPIC_ID = 31;
314         /**
315          * @stable ICU 2.4
316          */
317         public static final int CHEROKEE_ID = 32;
318         /**
319          * @stable ICU 2.4
320          */
321         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
322         /**
323          * @stable ICU 2.4
324          */
325         public static final int OGHAM_ID = 34;
326         /**
327          * @stable ICU 2.4
328          */
329         public static final int RUNIC_ID = 35;
330         /**
331          * @stable ICU 2.4
332          */
333         public static final int KHMER_ID = 36;
334         /**
335          * @stable ICU 2.4
336          */
337         public static final int MONGOLIAN_ID = 37;
338         /**
339          * @stable ICU 2.4
340          */
341         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
342         /**
343          * @stable ICU 2.4
344          */
345         public static final int GREEK_EXTENDED_ID = 39;
346         /**
347          * @stable ICU 2.4
348          */
349         public static final int GENERAL_PUNCTUATION_ID = 40;
350         /**
351          * @stable ICU 2.4
352          */
353         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
354         /**
355          * @stable ICU 2.4
356          */
357         public static final int CURRENCY_SYMBOLS_ID = 42;
358         /**
359          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
360          * Symbols".
361          * @stable ICU 2.4
362          */
363         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
364         /**
365          * @stable ICU 2.4
366          */
367         public static final int LETTERLIKE_SYMBOLS_ID = 44;
368         /**
369          * @stable ICU 2.4
370          */
371         public static final int NUMBER_FORMS_ID = 45;
372         /**
373          * @stable ICU 2.4
374          */
375         public static final int ARROWS_ID = 46;
376         /**
377          * @stable ICU 2.4
378          */
379         public static final int MATHEMATICAL_OPERATORS_ID = 47;
380         /**
381          * @stable ICU 2.4
382          */
383         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
384         /**
385          * @stable ICU 2.4
386          */
387         public static final int CONTROL_PICTURES_ID = 49;
388         /**
389          * @stable ICU 2.4
390          */
391         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
392         /**
393          * @stable ICU 2.4
394          */
395         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
396         /**
397          * @stable ICU 2.4
398          */
399         public static final int BOX_DRAWING_ID = 52;
400         /**
401          * @stable ICU 2.4
402          */
403         public static final int BLOCK_ELEMENTS_ID = 53;
404         /**
405          * @stable ICU 2.4
406          */
407         public static final int GEOMETRIC_SHAPES_ID = 54;
408         /**
409          * @stable ICU 2.4
410          */
411         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
412         /**
413          * @stable ICU 2.4
414          */
415         public static final int DINGBATS_ID = 56;
416         /**
417          * @stable ICU 2.4
418          */
419         public static final int BRAILLE_PATTERNS_ID = 57;
420         /**
421          * @stable ICU 2.4
422          */
423         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
424         /**
425          * @stable ICU 2.4
426          */
427         public static final int KANGXI_RADICALS_ID = 59;
428         /**
429          * @stable ICU 2.4
430          */
431         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
432         /**
433          * @stable ICU 2.4
434          */
435         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
436         /**
437          * @stable ICU 2.4
438          */
439         public static final int HIRAGANA_ID = 62;
440         /**
441          * @stable ICU 2.4
442          */
443         public static final int KATAKANA_ID = 63;
444         /**
445          * @stable ICU 2.4
446          */
447         public static final int BOPOMOFO_ID = 64;
448         /**
449          * @stable ICU 2.4
450          */
451         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
452         /**
453          * @stable ICU 2.4
454          */
455         public static final int KANBUN_ID = 66;
456         /**
457          * @stable ICU 2.4
458          */
459         public static final int BOPOMOFO_EXTENDED_ID = 67;
460         /**
461          * @stable ICU 2.4
462          */
463         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
464         /**
465          * @stable ICU 2.4
466          */
467         public static final int CJK_COMPATIBILITY_ID = 69;
468         /**
469          * @stable ICU 2.4
470          */
471         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
472         /**
473          * @stable ICU 2.4
474          */
475         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
476         /**
477          * @stable ICU 2.4
478          */
479         public static final int YI_SYLLABLES_ID = 72;
480         /**
481          * @stable ICU 2.4
482          */
483         public static final int YI_RADICALS_ID = 73;
484         /**
485          * @stable ICU 2.4
486          */
487         public static final int HANGUL_SYLLABLES_ID = 74;
488         /**
489          * @stable ICU 2.4
490          */
491         public static final int HIGH_SURROGATES_ID = 75;
492         /**
493          * @stable ICU 2.4
494          */
495         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
496         /**
497          * @stable ICU 2.4
498          */
499         public static final int LOW_SURROGATES_ID = 77;
500         /**
501          * Same as public static final int PRIVATE_USE.
502          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
503          * and multiple code point ranges had this block.
504          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
505          * and adds separate blocks for the supplementary PUAs.
506          * @stable ICU 2.4
507          */
508         public static final int PRIVATE_USE_AREA_ID = 78;
509         /**
510          * Same as public static final int PRIVATE_USE_AREA.
511          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
512          * and multiple code point ranges had this block.
513          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
514          * and adds separate blocks for the supplementary PUAs.
515          * @stable ICU 2.4
516          */
517         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
518         /**
519          * @stable ICU 2.4
520          */
521         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
522         /**
523          * @stable ICU 2.4
524          */
525         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
526         /**
527          * @stable ICU 2.4
528          */
529         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
530         /**
531          * @stable ICU 2.4
532          */
533         public static final int COMBINING_HALF_MARKS_ID = 82;
534         /**
535          * @stable ICU 2.4
536          */
537         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
538         /**
539          * @stable ICU 2.4
540          */
541         public static final int SMALL_FORM_VARIANTS_ID = 84;
542         /**
543          * @stable ICU 2.4
544          */
545         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
546         /**
547          * @stable ICU 2.4
548          */
549         public static final int SPECIALS_ID = 86;
550         /**
551          * @stable ICU 2.4
552          */
553         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
554         /**
555          * @stable ICU 2.4
556          */
557         public static final int OLD_ITALIC_ID = 88;
558         /**
559          * @stable ICU 2.4
560          */
561         public static final int GOTHIC_ID = 89;
562         /**
563          * @stable ICU 2.4
564          */
565         public static final int DESERET_ID = 90;
566         /**
567          * @stable ICU 2.4
568          */
569         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
570         /**
571          * @stable ICU 2.4
572          */
573         public static final int MUSICAL_SYMBOLS_ID = 92;
574         /**
575          * @stable ICU 2.4
576          */
577         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
578         /**
579          * @stable ICU 2.4
580          */
581         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
582         /**
583          * @stable ICU 2.4
584          */
585         public static final int
586         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
587         /**
588          * @stable ICU 2.4
589          */
590         public static final int TAGS_ID = 96;
591 
592         // New blocks in Unicode 3.2
593 
594         /**
595          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
596          * @stable ICU 2.4
597          */
598         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
599         /**
600          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
601          * @stable ICU 3.0
602          */
603 
604         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
605         /**
606          * @stable ICU 2.4
607          */
608         public static final int TAGALOG_ID = 98;
609         /**
610          * @stable ICU 2.4
611          */
612         public static final int HANUNOO_ID = 99;
613         /**
614          * @stable ICU 2.4
615          */
616         public static final int BUHID_ID = 100;
617         /**
618          * @stable ICU 2.4
619          */
620         public static final int TAGBANWA_ID = 101;
621         /**
622          * @stable ICU 2.4
623          */
624         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
625         /**
626          * @stable ICU 2.4
627          */
628         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
629         /**
630          * @stable ICU 2.4
631          */
632         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
633         /**
634          * @stable ICU 2.4
635          */
636         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
637         /**
638          * @stable ICU 2.4
639          */
640         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
641         /**
642          * @stable ICU 2.4
643          */
644         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
645         /**
646          * @stable ICU 2.4
647          */
648         public static final int VARIATION_SELECTORS_ID = 108;
649         /**
650          * @stable ICU 2.4
651          */
652         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
653         /**
654          * @stable ICU 2.4
655          */
656         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
657 
658         /**
659          * @stable ICU 2.6
660          */
661         public static final int LIMBU_ID = 111; /*[1900]*/
662         /**
663          * @stable ICU 2.6
664          */
665         public static final int TAI_LE_ID = 112; /*[1950]*/
666         /**
667          * @stable ICU 2.6
668          */
669         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
670         /**
671          * @stable ICU 2.6
672          */
673         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
674         /**
675          * @stable ICU 2.6
676          */
677         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
678         /**
679          * @stable ICU 2.6
680          */
681         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
682         /**
683          * @stable ICU 2.6
684          */
685         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
686         /**
687          * @stable ICU 2.6
688          */
689         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
690         /**
691          * @stable ICU 2.6
692          */
693         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
694         /**
695          * @stable ICU 2.6
696          */
697         public static final int UGARITIC_ID = 120; /*[10380]*/
698         /**
699          * @stable ICU 2.6
700          */
701         public static final int SHAVIAN_ID = 121; /*[10450]*/
702         /**
703          * @stable ICU 2.6
704          */
705         public static final int OSMANYA_ID = 122; /*[10480]*/
706         /**
707          * @stable ICU 2.6
708          */
709         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
710         /**
711          * @stable ICU 2.6
712          */
713         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
714         /**
715          * @stable ICU 2.6
716          */
717         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
718 
719         /* New blocks in Unicode 4.1 */
720 
721         /**
722          * @stable ICU 3.4
723          */
724         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
725 
726         /**
727          * @stable ICU 3.4
728          */
729         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
730 
731         /**
732          * @stable ICU 3.4
733          */
734         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
735 
736         /**
737          * @stable ICU 3.4
738          */
739         public static final int BUGINESE_ID = 129; /*[1A00]*/
740 
741         /**
742          * @stable ICU 3.4
743          */
744         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
745 
746         /**
747          * @stable ICU 3.4
748          */
749         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
750 
751         /**
752          * @stable ICU 3.4
753          */
754         public static final int COPTIC_ID = 132; /*[2C80]*/
755 
756         /**
757          * @stable ICU 3.4
758          */
759         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
760 
761         /**
762          * @stable ICU 3.4
763          */
764         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
765 
766         /**
767          * @stable ICU 3.4
768          */
769         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
770 
771         /**
772          * @stable ICU 3.4
773          */
774         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
775 
776         /**
777          * @stable ICU 3.4
778          */
779         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
780 
781         /**
782          * @stable ICU 3.4
783          */
784         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
785 
786         /**
787          * @stable ICU 3.4
788          */
789         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
790 
791         /**
792          * @stable ICU 3.4
793          */
794         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
795 
796         /**
797          * @stable ICU 3.4
798          */
799         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
800 
801         /**
802          * @stable ICU 3.4
803          */
804         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
805 
806         /**
807          * @stable ICU 3.4
808          */
809         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
810 
811         /**
812          * @stable ICU 3.4
813          */
814         public static final int TIFINAGH_ID = 144; /*[2D30]*/
815 
816         /**
817          * @stable ICU 3.4
818          */
819         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
820 
821         /* New blocks in Unicode 5.0 */
822 
823         /**
824          * @stable ICU 3.6
825          */
826         public static final int NKO_ID = 146; /*[07C0]*/
827         /**
828          * @stable ICU 3.6
829          */
830         public static final int BALINESE_ID = 147; /*[1B00]*/
831         /**
832          * @stable ICU 3.6
833          */
834         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
835         /**
836          * @stable ICU 3.6
837          */
838         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
839         /**
840          * @stable ICU 3.6
841          */
842         public static final int PHAGS_PA_ID = 150; /*[A840]*/
843         /**
844          * @stable ICU 3.6
845          */
846         public static final int PHOENICIAN_ID = 151; /*[10900]*/
847         /**
848          * @stable ICU 3.6
849          */
850         public static final int CUNEIFORM_ID = 152; /*[12000]*/
851         /**
852          * @stable ICU 3.6
853          */
854         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
855         /**
856          * @stable ICU 3.6
857          */
858         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
859 
860         /**
861          * @stable ICU 4.0
862          */
863         public static final int SUNDANESE_ID = 155; /* [1B80] */
864 
865         /**
866          * @stable ICU 4.0
867          */
868         public static final int LEPCHA_ID = 156; /* [1C00] */
869 
870         /**
871          * @stable ICU 4.0
872          */
873         public static final int OL_CHIKI_ID = 157; /* [1C50] */
874 
875         /**
876          * @stable ICU 4.0
877          */
878         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
879 
880         /**
881          * @stable ICU 4.0
882          */
883         public static final int VAI_ID = 159; /* [A500] */
884 
885         /**
886          * @stable ICU 4.0
887          */
888         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
889 
890         /**
891          * @stable ICU 4.0
892          */
893         public static final int SAURASHTRA_ID = 161; /* [A880] */
894 
895         /**
896          * @stable ICU 4.0
897          */
898         public static final int KAYAH_LI_ID = 162; /* [A900] */
899 
900         /**
901          * @stable ICU 4.0
902          */
903         public static final int REJANG_ID = 163; /* [A930] */
904 
905         /**
906          * @stable ICU 4.0
907          */
908         public static final int CHAM_ID = 164; /* [AA00] */
909 
910         /**
911          * @stable ICU 4.0
912          */
913         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
914 
915         /**
916          * @stable ICU 4.0
917          */
918         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
919 
920         /**
921          * @stable ICU 4.0
922          */
923         public static final int LYCIAN_ID = 167; /* [10280] */
924 
925         /**
926          * @stable ICU 4.0
927          */
928         public static final int CARIAN_ID = 168; /* [102A0] */
929 
930         /**
931          * @stable ICU 4.0
932          */
933         public static final int LYDIAN_ID = 169; /* [10920] */
934 
935         /**
936          * @stable ICU 4.0
937          */
938         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
939 
940         /**
941          * @stable ICU 4.0
942          */
943         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
944 
945         /* New blocks in Unicode 5.2 */
946 
947         /** @stable ICU 4.4 */
948         public static final int SAMARITAN_ID = 172; /*[0800]*/
949         /** @stable ICU 4.4 */
950         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
951         /** @stable ICU 4.4 */
952         public static final int TAI_THAM_ID = 174; /*[1A20]*/
953         /** @stable ICU 4.4 */
954         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
955         /** @stable ICU 4.4 */
956         public static final int LISU_ID = 176; /*[A4D0]*/
957         /** @stable ICU 4.4 */
958         public static final int BAMUM_ID = 177; /*[A6A0]*/
959         /** @stable ICU 4.4 */
960         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
961         /** @stable ICU 4.4 */
962         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
963         /** @stable ICU 4.4 */
964         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
965         /** @stable ICU 4.4 */
966         public static final int JAVANESE_ID = 181; /*[A980]*/
967         /** @stable ICU 4.4 */
968         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
969         /** @stable ICU 4.4 */
970         public static final int TAI_VIET_ID = 183; /*[AA80]*/
971         /** @stable ICU 4.4 */
972         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
973         /** @stable ICU 4.4 */
974         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
975         /** @stable ICU 4.4 */
976         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
977         /** @stable ICU 4.4 */
978         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
979         /** @stable ICU 4.4 */
980         public static final int AVESTAN_ID = 188; /*[10B00]*/
981         /** @stable ICU 4.4 */
982         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
983         /** @stable ICU 4.4 */
984         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
985         /** @stable ICU 4.4 */
986         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
987         /** @stable ICU 4.4 */
988         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
989         /** @stable ICU 4.4 */
990         public static final int KAITHI_ID = 193; /*[11080]*/
991         /** @stable ICU 4.4 */
992         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
993         /** @stable ICU 4.4 */
994         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
995         /** @stable ICU 4.4 */
996         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
997         /** @stable ICU 4.4 */
998         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
999 
1000         /* New blocks in Unicode 6.0 */
1001 
1002         /** @stable ICU 4.6 */
1003         public static final int MANDAIC_ID = 198; /*[0840]*/
1004         /** @stable ICU 4.6 */
1005         public static final int BATAK_ID = 199; /*[1BC0]*/
1006         /** @stable ICU 4.6 */
1007         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
1008         /** @stable ICU 4.6 */
1009         public static final int BRAHMI_ID = 201; /*[11000]*/
1010         /** @stable ICU 4.6 */
1011         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
1012         /** @stable ICU 4.6 */
1013         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
1014         /** @stable ICU 4.6 */
1015         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
1016         /** @stable ICU 4.6 */
1017         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
1018         /** @stable ICU 4.6 */
1019         public static final int EMOTICONS_ID = 206; /*[1F600]*/
1020         /** @stable ICU 4.6 */
1021         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
1022         /** @stable ICU 4.6 */
1023         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
1024         /** @stable ICU 4.6 */
1025         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
1026 
1027         /* New blocks in Unicode 6.1 */
1028 
1029         /** @stable ICU 49 */
1030         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
1031         /** @stable ICU 49 */
1032         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
1033         /** @stable ICU 49 */
1034         public static final int CHAKMA_ID = 212; /*[11100]*/
1035         /** @stable ICU 49 */
1036         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
1037         /** @stable ICU 49 */
1038         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
1039         /** @stable ICU 49 */
1040         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
1041         /** @stable ICU 49 */
1042         public static final int MIAO_ID = 216; /*[16F00]*/
1043         /** @stable ICU 49 */
1044         public static final int SHARADA_ID = 217; /*[11180]*/
1045         /** @stable ICU 49 */
1046         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
1047         /** @stable ICU 49 */
1048         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
1049         /** @stable ICU 49 */
1050         public static final int TAKRI_ID = 220; /*[11680]*/
1051 
1052         /* New blocks in Unicode 7.0 */
1053 
1054         /** @stable ICU 54 */
1055         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
1056         /** @stable ICU 54 */
1057         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
1058         /** @stable ICU 54 */
1059         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
1060         /** @stable ICU 54 */
1061         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
1062         /** @stable ICU 54 */
1063         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
1064         /** @stable ICU 54 */
1065         public static final int ELBASAN_ID = 226; /*[10500]*/
1066         /** @stable ICU 54 */
1067         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
1068         /** @stable ICU 54 */
1069         public static final int GRANTHA_ID = 228; /*[11300]*/
1070         /** @stable ICU 54 */
1071         public static final int KHOJKI_ID = 229; /*[11200]*/
1072         /** @stable ICU 54 */
1073         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
1074         /** @stable ICU 54 */
1075         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
1076         /** @stable ICU 54 */
1077         public static final int LINEAR_A_ID = 232; /*[10600]*/
1078         /** @stable ICU 54 */
1079         public static final int MAHAJANI_ID = 233; /*[11150]*/
1080         /** @stable ICU 54 */
1081         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
1082         /** @stable ICU 54 */
1083         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
1084         /** @stable ICU 54 */
1085         public static final int MODI_ID = 236; /*[11600]*/
1086         /** @stable ICU 54 */
1087         public static final int MRO_ID = 237; /*[16A40]*/
1088         /** @stable ICU 54 */
1089         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
1090         /** @stable ICU 54 */
1091         public static final int NABATAEAN_ID = 239; /*[10880]*/
1092         /** @stable ICU 54 */
1093         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
1094         /** @stable ICU 54 */
1095         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
1096         /** @stable ICU 54 */
1097         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
1098         /** @stable ICU 54 */
1099         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
1100         /** @stable ICU 54 */
1101         public static final int PALMYRENE_ID = 244; /*[10860]*/
1102         /** @stable ICU 54 */
1103         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
1104         /** @stable ICU 54 */
1105         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
1106         /** @stable ICU 54 */
1107         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
1108         /** @stable ICU 54 */
1109         public static final int SIDDHAM_ID = 248; /*[11580]*/
1110         /** @stable ICU 54 */
1111         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
1112         /** @stable ICU 54 */
1113         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
1114         /** @stable ICU 54 */
1115         public static final int TIRHUTA_ID = 251; /*[11480]*/
1116         /** @stable ICU 54 */
1117         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
1118 
1119         /**
1120          * @stable ICU 2.4
1121          */
1122         public static final int COUNT = 253;
1123 
1124         // blocks objects ---------------------------------------------------
1125 
1126         /**
1127          * Array of UnicodeBlocks, for easy access in getInstance(int)
1128          */
1129         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
1130 
1131         /**
1132          * @stable ICU 2.6
1133          */
1134         public static final UnicodeBlock NO_BLOCK
1135         = new UnicodeBlock("NO_BLOCK", 0);
1136 
1137         /**
1138          * @stable ICU 2.4
1139          */
1140         public static final UnicodeBlock BASIC_LATIN
1141         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
1142         /**
1143          * @stable ICU 2.4
1144          */
1145         public static final UnicodeBlock LATIN_1_SUPPLEMENT
1146         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1147         /**
1148          * @stable ICU 2.4
1149          */
1150         public static final UnicodeBlock LATIN_EXTENDED_A
1151         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1152         /**
1153          * @stable ICU 2.4
1154          */
1155         public static final UnicodeBlock LATIN_EXTENDED_B
1156         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1157         /**
1158          * @stable ICU 2.4
1159          */
1160         public static final UnicodeBlock IPA_EXTENSIONS
1161         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1162         /**
1163          * @stable ICU 2.4
1164          */
1165         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1166         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1167         /**
1168          * @stable ICU 2.4
1169          */
1170         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1171         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1172         /**
1173          * Unicode 3.2 renames this block to "Greek and Coptic".
1174          * @stable ICU 2.4
1175          */
1176         public static final UnicodeBlock GREEK
1177         = new UnicodeBlock("GREEK", GREEK_ID);
1178         /**
1179          * @stable ICU 2.4
1180          */
1181         public static final UnicodeBlock CYRILLIC
1182         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1183         /**
1184          * @stable ICU 2.4
1185          */
1186         public static final UnicodeBlock ARMENIAN
1187         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1188         /**
1189          * @stable ICU 2.4
1190          */
1191         public static final UnicodeBlock HEBREW
1192         = new UnicodeBlock("HEBREW", HEBREW_ID);
1193         /**
1194          * @stable ICU 2.4
1195          */
1196         public static final UnicodeBlock ARABIC
1197         = new UnicodeBlock("ARABIC", ARABIC_ID);
1198         /**
1199          * @stable ICU 2.4
1200          */
1201         public static final UnicodeBlock SYRIAC
1202         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1203         /**
1204          * @stable ICU 2.4
1205          */
1206         public static final UnicodeBlock THAANA
1207         = new UnicodeBlock("THAANA", THAANA_ID);
1208         /**
1209          * @stable ICU 2.4
1210          */
1211         public static final UnicodeBlock DEVANAGARI
1212         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1213         /**
1214          * @stable ICU 2.4
1215          */
1216         public static final UnicodeBlock BENGALI
1217         = new UnicodeBlock("BENGALI", BENGALI_ID);
1218         /**
1219          * @stable ICU 2.4
1220          */
1221         public static final UnicodeBlock GURMUKHI
1222         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1223         /**
1224          * @stable ICU 2.4
1225          */
1226         public static final UnicodeBlock GUJARATI
1227         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1228         /**
1229          * @stable ICU 2.4
1230          */
1231         public static final UnicodeBlock ORIYA
1232         = new UnicodeBlock("ORIYA", ORIYA_ID);
1233         /**
1234          * @stable ICU 2.4
1235          */
1236         public static final UnicodeBlock TAMIL
1237         = new UnicodeBlock("TAMIL", TAMIL_ID);
1238         /**
1239          * @stable ICU 2.4
1240          */
1241         public static final UnicodeBlock TELUGU
1242         = new UnicodeBlock("TELUGU", TELUGU_ID);
1243         /**
1244          * @stable ICU 2.4
1245          */
1246         public static final UnicodeBlock KANNADA
1247         = new UnicodeBlock("KANNADA", KANNADA_ID);
1248         /**
1249          * @stable ICU 2.4
1250          */
1251         public static final UnicodeBlock MALAYALAM
1252         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1253         /**
1254          * @stable ICU 2.4
1255          */
1256         public static final UnicodeBlock SINHALA
1257         = new UnicodeBlock("SINHALA", SINHALA_ID);
1258         /**
1259          * @stable ICU 2.4
1260          */
1261         public static final UnicodeBlock THAI
1262         = new UnicodeBlock("THAI", THAI_ID);
1263         /**
1264          * @stable ICU 2.4
1265          */
1266         public static final UnicodeBlock LAO
1267         = new UnicodeBlock("LAO", LAO_ID);
1268         /**
1269          * @stable ICU 2.4
1270          */
1271         public static final UnicodeBlock TIBETAN
1272         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1273         /**
1274          * @stable ICU 2.4
1275          */
1276         public static final UnicodeBlock MYANMAR
1277         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1278         /**
1279          * @stable ICU 2.4
1280          */
1281         public static final UnicodeBlock GEORGIAN
1282         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1283         /**
1284          * @stable ICU 2.4
1285          */
1286         public static final UnicodeBlock HANGUL_JAMO
1287         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1288         /**
1289          * @stable ICU 2.4
1290          */
1291         public static final UnicodeBlock ETHIOPIC
1292         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1293         /**
1294          * @stable ICU 2.4
1295          */
1296         public static final UnicodeBlock CHEROKEE
1297         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1298         /**
1299          * @stable ICU 2.4
1300          */
1301         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1302         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1303                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1304         /**
1305          * @stable ICU 2.4
1306          */
1307         public static final UnicodeBlock OGHAM
1308         = new UnicodeBlock("OGHAM", OGHAM_ID);
1309         /**
1310          * @stable ICU 2.4
1311          */
1312         public static final UnicodeBlock RUNIC
1313         = new UnicodeBlock("RUNIC", RUNIC_ID);
1314         /**
1315          * @stable ICU 2.4
1316          */
1317         public static final UnicodeBlock KHMER
1318         = new UnicodeBlock("KHMER", KHMER_ID);
1319         /**
1320          * @stable ICU 2.4
1321          */
1322         public static final UnicodeBlock MONGOLIAN
1323         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1324         /**
1325          * @stable ICU 2.4
1326          */
1327         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1328         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1329         /**
1330          * @stable ICU 2.4
1331          */
1332         public static final UnicodeBlock GREEK_EXTENDED
1333         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1334         /**
1335          * @stable ICU 2.4
1336          */
1337         public static final UnicodeBlock GENERAL_PUNCTUATION
1338         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1339         /**
1340          * @stable ICU 2.4
1341          */
1342         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1343         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1344         /**
1345          * @stable ICU 2.4
1346          */
1347         public static final UnicodeBlock CURRENCY_SYMBOLS
1348         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1349         /**
1350          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1351          * Symbols".
1352          * @stable ICU 2.4
1353          */
1354         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1355         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1356         /**
1357          * @stable ICU 2.4
1358          */
1359         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1360         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1361         /**
1362          * @stable ICU 2.4
1363          */
1364         public static final UnicodeBlock NUMBER_FORMS
1365         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1366         /**
1367          * @stable ICU 2.4
1368          */
1369         public static final UnicodeBlock ARROWS
1370         = new UnicodeBlock("ARROWS", ARROWS_ID);
1371         /**
1372          * @stable ICU 2.4
1373          */
1374         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1375         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1376         /**
1377          * @stable ICU 2.4
1378          */
1379         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1380         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1381         /**
1382          * @stable ICU 2.4
1383          */
1384         public static final UnicodeBlock CONTROL_PICTURES
1385         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1386         /**
1387          * @stable ICU 2.4
1388          */
1389         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1390         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1391         /**
1392          * @stable ICU 2.4
1393          */
1394         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1395         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1396         /**
1397          * @stable ICU 2.4
1398          */
1399         public static final UnicodeBlock BOX_DRAWING
1400         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1401         /**
1402          * @stable ICU 2.4
1403          */
1404         public static final UnicodeBlock BLOCK_ELEMENTS
1405         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1406         /**
1407          * @stable ICU 2.4
1408          */
1409         public static final UnicodeBlock GEOMETRIC_SHAPES
1410         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1411         /**
1412          * @stable ICU 2.4
1413          */
1414         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1415         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1416         /**
1417          * @stable ICU 2.4
1418          */
1419         public static final UnicodeBlock DINGBATS
1420         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1421         /**
1422          * @stable ICU 2.4
1423          */
1424         public static final UnicodeBlock BRAILLE_PATTERNS
1425         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1426         /**
1427          * @stable ICU 2.4
1428          */
1429         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1430         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1431         /**
1432          * @stable ICU 2.4
1433          */
1434         public static final UnicodeBlock KANGXI_RADICALS
1435         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1436         /**
1437          * @stable ICU 2.4
1438          */
1439         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1440         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1441                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1442         /**
1443          * @stable ICU 2.4
1444          */
1445         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1446         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1447         /**
1448          * @stable ICU 2.4
1449          */
1450         public static final UnicodeBlock HIRAGANA
1451         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1452         /**
1453          * @stable ICU 2.4
1454          */
1455         public static final UnicodeBlock KATAKANA
1456         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1457         /**
1458          * @stable ICU 2.4
1459          */
1460         public static final UnicodeBlock BOPOMOFO
1461         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1462         /**
1463          * @stable ICU 2.4
1464          */
1465         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1466         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1467         /**
1468          * @stable ICU 2.4
1469          */
1470         public static final UnicodeBlock KANBUN
1471         = new UnicodeBlock("KANBUN", KANBUN_ID);
1472         /**
1473          * @stable ICU 2.4
1474          */
1475         public static final UnicodeBlock BOPOMOFO_EXTENDED
1476         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1477         /**
1478          * @stable ICU 2.4
1479          */
1480         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1481         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1482                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1483         /**
1484          * @stable ICU 2.4
1485          */
1486         public static final UnicodeBlock CJK_COMPATIBILITY
1487         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1488         /**
1489          * @stable ICU 2.4
1490          */
1491         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1492         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1493                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1494         /**
1495          * @stable ICU 2.4
1496          */
1497         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1498         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1499         /**
1500          * @stable ICU 2.4
1501          */
1502         public static final UnicodeBlock YI_SYLLABLES
1503         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1504         /**
1505          * @stable ICU 2.4
1506          */
1507         public static final UnicodeBlock YI_RADICALS
1508         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1509         /**
1510          * @stable ICU 2.4
1511          */
1512         public static final UnicodeBlock HANGUL_SYLLABLES
1513         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1514         /**
1515          * @stable ICU 2.4
1516          */
1517         public static final UnicodeBlock HIGH_SURROGATES
1518         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1519         /**
1520          * @stable ICU 2.4
1521          */
1522         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1523         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1524         /**
1525          * @stable ICU 2.4
1526          */
1527         public static final UnicodeBlock LOW_SURROGATES
1528         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1529         /**
1530          * Same as public static final int PRIVATE_USE.
1531          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1532          * and multiple code point ranges had this block.
1533          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1534          * and adds separate blocks for the supplementary PUAs.
1535          * @stable ICU 2.4
1536          */
1537         public static final UnicodeBlock PRIVATE_USE_AREA
1538         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1539         /**
1540          * Same as public static final int PRIVATE_USE_AREA.
1541          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1542          * and multiple code point ranges had this block.
1543          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1544          * and adds separate blocks for the supplementary PUAs.
1545          * @stable ICU 2.4
1546          */
1547         public static final UnicodeBlock PRIVATE_USE
1548         = PRIVATE_USE_AREA;
1549         /**
1550          * @stable ICU 2.4
1551          */
1552         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1553         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1554         /**
1555          * @stable ICU 2.4
1556          */
1557         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1558         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1559         /**
1560          * @stable ICU 2.4
1561          */
1562         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1563         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1564         /**
1565          * @stable ICU 2.4
1566          */
1567         public static final UnicodeBlock COMBINING_HALF_MARKS
1568         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1569         /**
1570          * @stable ICU 2.4
1571          */
1572         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1573         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1574         /**
1575          * @stable ICU 2.4
1576          */
1577         public static final UnicodeBlock SMALL_FORM_VARIANTS
1578         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1579         /**
1580          * @stable ICU 2.4
1581          */
1582         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1583         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1584         /**
1585          * @stable ICU 2.4
1586          */
1587         public static final UnicodeBlock SPECIALS
1588         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1589         /**
1590          * @stable ICU 2.4
1591          */
1592         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1593         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1594         /**
1595          * @stable ICU 2.4
1596          */
1597         public static final UnicodeBlock OLD_ITALIC
1598         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1599         /**
1600          * @stable ICU 2.4
1601          */
1602         public static final UnicodeBlock GOTHIC
1603         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1604         /**
1605          * @stable ICU 2.4
1606          */
1607         public static final UnicodeBlock DESERET
1608         = new UnicodeBlock("DESERET", DESERET_ID);
1609         /**
1610          * @stable ICU 2.4
1611          */
1612         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1613         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1614         /**
1615          * @stable ICU 2.4
1616          */
1617         public static final UnicodeBlock MUSICAL_SYMBOLS
1618         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1619         /**
1620          * @stable ICU 2.4
1621          */
1622         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1623         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1624                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1625         /**
1626          * @stable ICU 2.4
1627          */
1628         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1629         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1630                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1631         /**
1632          * @stable ICU 2.4
1633          */
1634         public static final UnicodeBlock
1635         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1636         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1637                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1638         /**
1639          * @stable ICU 2.4
1640          */
1641         public static final UnicodeBlock TAGS
1642         = new UnicodeBlock("TAGS", TAGS_ID);
1643 
1644         // New blocks in Unicode 3.2
1645 
1646         /**
1647          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1648          * @stable ICU 2.4
1649          */
1650         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1651         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1652         /**
1653          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1654          * @stable ICU 3.0
1655          */
1656         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1657         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1658         /**
1659          * @stable ICU 2.4
1660          */
1661         public static final UnicodeBlock TAGALOG
1662         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1663         /**
1664          * @stable ICU 2.4
1665          */
1666         public static final UnicodeBlock HANUNOO
1667         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1668         /**
1669          * @stable ICU 2.4
1670          */
1671         public static final UnicodeBlock BUHID
1672         = new UnicodeBlock("BUHID", BUHID_ID);
1673         /**
1674          * @stable ICU 2.4
1675          */
1676         public static final UnicodeBlock TAGBANWA
1677         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1678         /**
1679          * @stable ICU 2.4
1680          */
1681         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1682         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1683                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1684         /**
1685          * @stable ICU 2.4
1686          */
1687         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1688         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1689         /**
1690          * @stable ICU 2.4
1691          */
1692         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1693         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1694         /**
1695          * @stable ICU 2.4
1696          */
1697         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1698         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1699                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1700         /**
1701          * @stable ICU 2.4
1702          */
1703         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1704         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1705                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1706         /**
1707          * @stable ICU 2.4
1708          */
1709         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1710         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1711         /**
1712          * @stable ICU 2.4
1713          */
1714         public static final UnicodeBlock VARIATION_SELECTORS
1715         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1716         /**
1717          * @stable ICU 2.4
1718          */
1719         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1720         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1721                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1722         /**
1723          * @stable ICU 2.4
1724          */
1725         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1726         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1727                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1728 
1729         /**
1730          * @stable ICU 2.6
1731          */
1732         public static final UnicodeBlock LIMBU
1733         = new UnicodeBlock("LIMBU", LIMBU_ID);
1734         /**
1735          * @stable ICU 2.6
1736          */
1737         public static final UnicodeBlock TAI_LE
1738         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1739         /**
1740          * @stable ICU 2.6
1741          */
1742         public static final UnicodeBlock KHMER_SYMBOLS
1743         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1744 
1745         /**
1746          * @stable ICU 2.6
1747          */
1748         public static final UnicodeBlock PHONETIC_EXTENSIONS
1749         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1750 
1751         /**
1752          * @stable ICU 2.6
1753          */
1754         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1755         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1756                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1757         /**
1758          * @stable ICU 2.6
1759          */
1760         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1761         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1762         /**
1763          * @stable ICU 2.6
1764          */
1765         public static final UnicodeBlock LINEAR_B_SYLLABARY
1766         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1767         /**
1768          * @stable ICU 2.6
1769          */
1770         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1771         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1772         /**
1773          * @stable ICU 2.6
1774          */
1775         public static final UnicodeBlock AEGEAN_NUMBERS
1776         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1777         /**
1778          * @stable ICU 2.6
1779          */
1780         public static final UnicodeBlock UGARITIC
1781         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1782         /**
1783          * @stable ICU 2.6
1784          */
1785         public static final UnicodeBlock SHAVIAN
1786         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1787         /**
1788          * @stable ICU 2.6
1789          */
1790         public static final UnicodeBlock OSMANYA
1791         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1792         /**
1793          * @stable ICU 2.6
1794          */
1795         public static final UnicodeBlock CYPRIOT_SYLLABARY
1796         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1797         /**
1798          * @stable ICU 2.6
1799          */
1800         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1801         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1802 
1803         /**
1804          * @stable ICU 2.6
1805          */
1806         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1807         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1808 
1809         /* New blocks in Unicode 4.1 */
1810 
1811         /**
1812          * @stable ICU 3.4
1813          */
1814         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1815                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1816                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1817 
1818         /**
1819          * @stable ICU 3.4
1820          */
1821         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1822                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1823 
1824         /**
1825          * @stable ICU 3.4
1826          */
1827         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1828                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1829 
1830         /**
1831          * @stable ICU 3.4
1832          */
1833         public static final UnicodeBlock BUGINESE =
1834                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1835 
1836         /**
1837          * @stable ICU 3.4
1838          */
1839         public static final UnicodeBlock CJK_STROKES =
1840                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1841 
1842         /**
1843          * @stable ICU 3.4
1844          */
1845         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1846                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1847                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1848 
1849         /**
1850          * @stable ICU 3.4
1851          */
1852         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1853 
1854         /**
1855          * @stable ICU 3.4
1856          */
1857         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1858                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1859 
1860         /**
1861          * @stable ICU 3.4
1862          */
1863         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1864                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1865 
1866         /**
1867          * @stable ICU 3.4
1868          */
1869         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1870                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
1871 
1872         /**
1873          * @stable ICU 3.4
1874          */
1875         public static final UnicodeBlock GLAGOLITIC =
1876                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
1877 
1878         /**
1879          * @stable ICU 3.4
1880          */
1881         public static final UnicodeBlock KHAROSHTHI =
1882                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
1883 
1884         /**
1885          * @stable ICU 3.4
1886          */
1887         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1888                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
1889 
1890         /**
1891          * @stable ICU 3.4
1892          */
1893         public static final UnicodeBlock NEW_TAI_LUE =
1894                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
1895 
1896         /**
1897          * @stable ICU 3.4
1898          */
1899         public static final UnicodeBlock OLD_PERSIAN =
1900                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
1901 
1902         /**
1903          * @stable ICU 3.4
1904          */
1905         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1906                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1907                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
1908 
1909         /**
1910          * @stable ICU 3.4
1911          */
1912         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1913                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
1914 
1915         /**
1916          * @stable ICU 3.4
1917          */
1918         public static final UnicodeBlock SYLOTI_NAGRI =
1919                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
1920 
1921         /**
1922          * @stable ICU 3.4
1923          */
1924         public static final UnicodeBlock TIFINAGH =
1925                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
1926 
1927         /**
1928          * @stable ICU 3.4
1929          */
1930         public static final UnicodeBlock VERTICAL_FORMS =
1931                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
1932 
1933         /**
1934          * @stable ICU 3.6
1935          */
1936         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
1937         /**
1938          * @stable ICU 3.6
1939          */
1940         public static final UnicodeBlock BALINESE =
1941                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
1942         /**
1943          * @stable ICU 3.6
1944          */
1945         public static final UnicodeBlock LATIN_EXTENDED_C =
1946                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
1947         /**
1948          * @stable ICU 3.6
1949          */
1950         public static final UnicodeBlock LATIN_EXTENDED_D =
1951                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
1952         /**
1953          * @stable ICU 3.6
1954          */
1955         public static final UnicodeBlock PHAGS_PA =
1956                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
1957         /**
1958          * @stable ICU 3.6
1959          */
1960         public static final UnicodeBlock PHOENICIAN =
1961                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
1962         /**
1963          * @stable ICU 3.6
1964          */
1965         public static final UnicodeBlock CUNEIFORM =
1966                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
1967         /**
1968          * @stable ICU 3.6
1969          */
1970         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1971                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1972                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
1973         /**
1974          * @stable ICU 3.6
1975          */
1976         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
1977                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
1978 
1979         /**
1980          * @stable ICU 4.0
1981          */
1982         public static final UnicodeBlock SUNDANESE =
1983                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
1984 
1985         /**
1986          * @stable ICU 4.0
1987          */
1988         public static final UnicodeBlock LEPCHA =
1989                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
1990 
1991         /**
1992          * @stable ICU 4.0
1993          */
1994         public static final UnicodeBlock OL_CHIKI =
1995                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
1996 
1997         /**
1998          * @stable ICU 4.0
1999          */
2000         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2001                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
2002 
2003         /**
2004          * @stable ICU 4.0
2005          */
2006         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
2007 
2008         /**
2009          * @stable ICU 4.0
2010          */
2011         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2012                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
2013 
2014         /**
2015          * @stable ICU 4.0
2016          */
2017         public static final UnicodeBlock SAURASHTRA =
2018                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
2019 
2020         /**
2021          * @stable ICU 4.0
2022          */
2023         public static final UnicodeBlock KAYAH_LI =
2024                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
2025 
2026         /**
2027          * @stable ICU 4.0
2028          */
2029         public static final UnicodeBlock REJANG =
2030                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
2031 
2032         /**
2033          * @stable ICU 4.0
2034          */
2035         public static final UnicodeBlock CHAM =
2036                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
2037 
2038         /**
2039          * @stable ICU 4.0
2040          */
2041         public static final UnicodeBlock ANCIENT_SYMBOLS =
2042                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
2043 
2044         /**
2045          * @stable ICU 4.0
2046          */
2047         public static final UnicodeBlock PHAISTOS_DISC =
2048                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
2049 
2050         /**
2051          * @stable ICU 4.0
2052          */
2053         public static final UnicodeBlock LYCIAN =
2054                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
2055 
2056         /**
2057          * @stable ICU 4.0
2058          */
2059         public static final UnicodeBlock CARIAN =
2060                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
2061 
2062         /**
2063          * @stable ICU 4.0
2064          */
2065         public static final UnicodeBlock LYDIAN =
2066                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
2067 
2068         /**
2069          * @stable ICU 4.0
2070          */
2071         public static final UnicodeBlock MAHJONG_TILES =
2072                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
2073 
2074         /**
2075          * @stable ICU 4.0
2076          */
2077         public static final UnicodeBlock DOMINO_TILES =
2078                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
2079 
2080         /* New blocks in Unicode 5.2 */
2081 
2082         /** @stable ICU 4.4 */
2083         public static final UnicodeBlock SAMARITAN =
2084                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
2085         /** @stable ICU 4.4 */
2086         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
2087                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
2088                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
2089         /** @stable ICU 4.4 */
2090         public static final UnicodeBlock TAI_THAM =
2091                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
2092         /** @stable ICU 4.4 */
2093         public static final UnicodeBlock VEDIC_EXTENSIONS =
2094                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
2095         /** @stable ICU 4.4 */
2096         public static final UnicodeBlock LISU =
2097                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
2098         /** @stable ICU 4.4 */
2099         public static final UnicodeBlock BAMUM =
2100                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
2101         /** @stable ICU 4.4 */
2102         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2103                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
2104         /** @stable ICU 4.4 */
2105         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2106                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
2107         /** @stable ICU 4.4 */
2108         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2109                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
2110         /** @stable ICU 4.4 */
2111         public static final UnicodeBlock JAVANESE =
2112                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
2113         /** @stable ICU 4.4 */
2114         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2115                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
2116         /** @stable ICU 4.4 */
2117         public static final UnicodeBlock TAI_VIET =
2118                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
2119         /** @stable ICU 4.4 */
2120         public static final UnicodeBlock MEETEI_MAYEK =
2121                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
2122         /** @stable ICU 4.4 */
2123         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2124                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
2125         /** @stable ICU 4.4 */
2126         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2127                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
2128         /** @stable ICU 4.4 */
2129         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2130                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
2131         /** @stable ICU 4.4 */
2132         public static final UnicodeBlock AVESTAN =
2133                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
2134         /** @stable ICU 4.4 */
2135         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2136                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
2137         /** @stable ICU 4.4 */
2138         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2139                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
2140         /** @stable ICU 4.4 */
2141         public static final UnicodeBlock OLD_TURKIC =
2142                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
2143         /** @stable ICU 4.4 */
2144         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2145                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
2146         /** @stable ICU 4.4 */
2147         public static final UnicodeBlock KAITHI =
2148                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
2149         /** @stable ICU 4.4 */
2150         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2151                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
2152         /** @stable ICU 4.4 */
2153         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2154                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2155                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
2156         /** @stable ICU 4.4 */
2157         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2158                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2159                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
2160         /** @stable ICU 4.4 */
2161         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2162                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2163                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
2164 
2165         /* New blocks in Unicode 6.0 */
2166 
2167         /** @stable ICU 4.6 */
2168         public static final UnicodeBlock MANDAIC =
2169                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
2170         /** @stable ICU 4.6 */
2171         public static final UnicodeBlock BATAK =
2172                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
2173         /** @stable ICU 4.6 */
2174         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2175                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
2176         /** @stable ICU 4.6 */
2177         public static final UnicodeBlock BRAHMI =
2178                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
2179         /** @stable ICU 4.6 */
2180         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2181                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
2182         /** @stable ICU 4.6 */
2183         public static final UnicodeBlock KANA_SUPPLEMENT =
2184                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
2185         /** @stable ICU 4.6 */
2186         public static final UnicodeBlock PLAYING_CARDS =
2187                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
2188         /** @stable ICU 4.6 */
2189         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2190                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2191                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
2192         /** @stable ICU 4.6 */
2193         public static final UnicodeBlock EMOTICONS =
2194                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
2195         /** @stable ICU 4.6 */
2196         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2197                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
2198         /** @stable ICU 4.6 */
2199         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2200                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
2201         /** @stable ICU 4.6 */
2202         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2203                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2204                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
2205 
2206         /* New blocks in Unicode 6.1 */
2207 
2208         /** @stable ICU 49 */
2209         public static final UnicodeBlock ARABIC_EXTENDED_A =
2210                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
2211         /** @stable ICU 49 */
2212         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2213                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
2214         /** @stable ICU 49 */
2215         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
2216         /** @stable ICU 49 */
2217         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2218                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
2219         /** @stable ICU 49 */
2220         public static final UnicodeBlock MEROITIC_CURSIVE =
2221                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
2222         /** @stable ICU 49 */
2223         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2224                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
2225         /** @stable ICU 49 */
2226         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
2227         /** @stable ICU 49 */
2228         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
2229         /** @stable ICU 49 */
2230         public static final UnicodeBlock SORA_SOMPENG =
2231                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
2232         /** @stable ICU 49 */
2233         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2234                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
2235         /** @stable ICU 49 */
2236         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
2237 
2238         /* New blocks in Unicode 7.0 */
2239 
2240         /** @stable ICU 54 */
2241         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
2242         /** @stable ICU 54 */
2243         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2244                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
2245         /** @stable ICU 54 */
2246         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2247                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
2248         /** @stable ICU 54 */
2249         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2250                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
2251         /** @stable ICU 54 */
2252         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
2253         /** @stable ICU 54 */
2254         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
2255         /** @stable ICU 54 */
2256         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2257                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
2258         /** @stable ICU 54 */
2259         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
2260         /** @stable ICU 54 */
2261         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
2262         /** @stable ICU 54 */
2263         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
2264         /** @stable ICU 54 */
2265         public static final UnicodeBlock LATIN_EXTENDED_E =
2266                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
2267         /** @stable ICU 54 */
2268         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
2269         /** @stable ICU 54 */
2270         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
2271         /** @stable ICU 54 */
2272         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
2273         /** @stable ICU 54 */
2274         public static final UnicodeBlock MENDE_KIKAKUI =
2275                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
2276         /** @stable ICU 54 */
2277         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
2278         /** @stable ICU 54 */
2279         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
2280         /** @stable ICU 54 */
2281         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2282                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
2283         /** @stable ICU 54 */
2284         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
2285         /** @stable ICU 54 */
2286         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2287                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
2288         /** @stable ICU 54 */
2289         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
2290         /** @stable ICU 54 */
2291         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2292                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
2293         /** @stable ICU 54 */
2294         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
2295         /** @stable ICU 54 */
2296         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
2297         /** @stable ICU 54 */
2298         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
2299         /** @stable ICU 54 */
2300         public static final UnicodeBlock PSALTER_PAHLAVI =
2301                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
2302         /** @stable ICU 54 */
2303         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2304                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
2305         /** @stable ICU 54 */
2306         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
2307         /** @stable ICU 54 */
2308         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2309                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
2310         /** @stable ICU 54 */
2311         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2312                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
2313         /** @stable ICU 54 */
2314         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
2315         /** @stable ICU 54 */
2316         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
2317 
2318         /**
2319          * @stable ICU 2.4
2320          */
2321         public static final UnicodeBlock INVALID_CODE
2322         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2323 
2324         static {
2325             for (int blockId = 0; blockId < COUNT; ++blockId) {
2326                 if (BLOCKS_[blockId] == null) {
2327                     throw new java.lang.IllegalStateException(
2328                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2329                 }
2330             }
2331         }
2332 
2333         // public methods --------------------------------------------------
2334 
2335         /**
2336          * {@icu} Returns the only instance of the UnicodeBlock with the argument ID.
2337          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2338          * @param id UnicodeBlock ID
2339          * @return the only instance of the UnicodeBlock with the argument ID
2340          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2341          *         returned.
2342          * @stable ICU 2.4
2343          */
getInstance(int id)2344         public static UnicodeBlock getInstance(int id)
2345         {
2346             if (id >= 0 && id < BLOCKS_.length) {
2347                 return BLOCKS_[id];
2348             }
2349             return INVALID_CODE;
2350         }
2351 
2352         /**
2353          * Returns the Unicode allocation block that contains the code point,
2354          * or null if the code point is not a member of a defined block.
2355          * @param ch code point to be tested
2356          * @return the Unicode allocation block that contains the code point
2357          * @stable ICU 2.4
2358          */
of(int ch)2359         public static UnicodeBlock of(int ch)
2360         {
2361             if (ch > MAX_VALUE) {
2362                 return INVALID_CODE;
2363             }
2364 
2365             return UnicodeBlock.getInstance(
2366                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2367         }
2368 
2369         /**
2370          * Cover the JDK 1.5 API.  Return the Unicode block with the
2371          * given name. {@icunote} Unlike JDK 1.5, this only matches
2372          * against the official UCD name and the Java block name
2373          * (ignoring case).
2374          * @param blockName the name of the block to match
2375          * @return the UnicodeBlock with that name
2376          * @throws IllegalArgumentException if the blockName could not be matched
2377          * @stable ICU 3.0
2378          */
forName(String blockName)2379         public static final UnicodeBlock forName(String blockName) {
2380             Map<String, UnicodeBlock> m = null;
2381             if (mref != null) {
2382                 m = mref.get();
2383             }
2384             if (m == null) {
2385                 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length);
2386                 for (int i = 0; i < BLOCKS_.length; ++i) {
2387                     UnicodeBlock b = BLOCKS_[i];
2388                     String name = trimBlockName(
2389                             getPropertyValueName(UProperty.BLOCK, b.getID(),
2390                                     UProperty.NameChoice.LONG));
2391                     m.put(name, b);
2392                 }
2393                 mref = new SoftReference<Map<String, UnicodeBlock>>(m);
2394             }
2395             UnicodeBlock b = m.get(trimBlockName(blockName));
2396             if (b == null) {
2397                 throw new IllegalArgumentException();
2398             }
2399             return b;
2400         }
2401         private static SoftReference<Map<String, UnicodeBlock>> mref;
2402 
trimBlockName(String name)2403         private static String trimBlockName(String name) {
2404             String upper = name.toUpperCase(Locale.ENGLISH);
2405             StringBuilder result = new StringBuilder(upper.length());
2406             for (int i = 0; i < upper.length(); i++) {
2407                 char c = upper.charAt(i);
2408                 if (c != ' ' && c != '_' && c != '-') {
2409                     result.append(c);
2410                 }
2411             }
2412             return result.toString();
2413         }
2414 
2415         /**
2416          * {icu} Returns the type ID of this Unicode block
2417          * @return integer type ID of this Unicode block
2418          * @stable ICU 2.4
2419          */
getID()2420         public int getID()
2421         {
2422             return m_id_;
2423         }
2424 
2425         // private data members ---------------------------------------------
2426 
2427         /**
2428          * Identification code for this UnicodeBlock
2429          */
2430         private int m_id_;
2431 
2432         // private constructor ----------------------------------------------
2433 
2434         /**
2435          * UnicodeBlock constructor
2436          * @param name name of this UnicodeBlock
2437          * @param id unique id of this UnicodeBlock
2438          * @exception NullPointerException if name is <code>null</code>
2439          */
UnicodeBlock(String name, int id)2440         private UnicodeBlock(String name, int id)
2441         {
2442             super(name);
2443             m_id_ = id;
2444             if (id >= 0) {
2445                 BLOCKS_[id] = this;
2446             }
2447         }
2448     }
2449 
2450     /**
2451      * East Asian Width constants.
2452      * @see UProperty#EAST_ASIAN_WIDTH
2453      * @see UCharacter#getIntPropertyValue
2454      * @stable ICU 2.4
2455      */
2456     public static interface EastAsianWidth
2457     {
2458         /**
2459          * @stable ICU 2.4
2460          */
2461         public static final int NEUTRAL = 0;
2462         /**
2463          * @stable ICU 2.4
2464          */
2465         public static final int AMBIGUOUS = 1;
2466         /**
2467          * @stable ICU 2.4
2468          */
2469         public static final int HALFWIDTH = 2;
2470         /**
2471          * @stable ICU 2.4
2472          */
2473         public static final int FULLWIDTH = 3;
2474         /**
2475          * @stable ICU 2.4
2476          */
2477         public static final int NARROW = 4;
2478         /**
2479          * @stable ICU 2.4
2480          */
2481         public static final int WIDE = 5;
2482         /**
2483          * @stable ICU 2.4
2484          */
2485         public static final int COUNT = 6;
2486     }
2487 
2488     /**
2489      * Decomposition Type constants.
2490      * @see UProperty#DECOMPOSITION_TYPE
2491      * @stable ICU 2.4
2492      */
2493     public static interface DecompositionType
2494     {
2495         /**
2496          * @stable ICU 2.4
2497          */
2498         public static final int NONE = 0;
2499         /**
2500          * @stable ICU 2.4
2501          */
2502         public static final int CANONICAL = 1;
2503         /**
2504          * @stable ICU 2.4
2505          */
2506         public static final int COMPAT = 2;
2507         /**
2508          * @stable ICU 2.4
2509          */
2510         public static final int CIRCLE = 3;
2511         /**
2512          * @stable ICU 2.4
2513          */
2514         public static final int FINAL = 4;
2515         /**
2516          * @stable ICU 2.4
2517          */
2518         public static final int FONT = 5;
2519         /**
2520          * @stable ICU 2.4
2521          */
2522         public static final int FRACTION = 6;
2523         /**
2524          * @stable ICU 2.4
2525          */
2526         public static final int INITIAL = 7;
2527         /**
2528          * @stable ICU 2.4
2529          */
2530         public static final int ISOLATED = 8;
2531         /**
2532          * @stable ICU 2.4
2533          */
2534         public static final int MEDIAL = 9;
2535         /**
2536          * @stable ICU 2.4
2537          */
2538         public static final int NARROW = 10;
2539         /**
2540          * @stable ICU 2.4
2541          */
2542         public static final int NOBREAK = 11;
2543         /**
2544          * @stable ICU 2.4
2545          */
2546         public static final int SMALL = 12;
2547         /**
2548          * @stable ICU 2.4
2549          */
2550         public static final int SQUARE = 13;
2551         /**
2552          * @stable ICU 2.4
2553          */
2554         public static final int SUB = 14;
2555         /**
2556          * @stable ICU 2.4
2557          */
2558         public static final int SUPER = 15;
2559         /**
2560          * @stable ICU 2.4
2561          */
2562         public static final int VERTICAL = 16;
2563         /**
2564          * @stable ICU 2.4
2565          */
2566         public static final int WIDE = 17;
2567         /**
2568          * @stable ICU 2.4
2569          */
2570         public static final int COUNT = 18;
2571     }
2572 
2573     /**
2574      * Joining Type constants.
2575      * @see UProperty#JOINING_TYPE
2576      * @stable ICU 2.4
2577      */
2578     public static interface JoiningType
2579     {
2580         /**
2581          * @stable ICU 2.4
2582          */
2583         public static final int NON_JOINING = 0;
2584         /**
2585          * @stable ICU 2.4
2586          */
2587         public static final int JOIN_CAUSING = 1;
2588         /**
2589          * @stable ICU 2.4
2590          */
2591         public static final int DUAL_JOINING = 2;
2592         /**
2593          * @stable ICU 2.4
2594          */
2595         public static final int LEFT_JOINING = 3;
2596         /**
2597          * @stable ICU 2.4
2598          */
2599         public static final int RIGHT_JOINING = 4;
2600         /**
2601          * @stable ICU 2.4
2602          */
2603         public static final int TRANSPARENT = 5;
2604         /**
2605          * @stable ICU 2.4
2606          */
2607         public static final int COUNT = 6;
2608     }
2609 
2610     /**
2611      * Joining Group constants.
2612      * @see UProperty#JOINING_GROUP
2613      * @stable ICU 2.4
2614      */
2615     public static interface JoiningGroup
2616     {
2617         /**
2618          * @stable ICU 2.4
2619          */
2620         public static final int NO_JOINING_GROUP = 0;
2621         /**
2622          * @stable ICU 2.4
2623          */
2624         public static final int AIN = 1;
2625         /**
2626          * @stable ICU 2.4
2627          */
2628         public static final int ALAPH = 2;
2629         /**
2630          * @stable ICU 2.4
2631          */
2632         public static final int ALEF = 3;
2633         /**
2634          * @stable ICU 2.4
2635          */
2636         public static final int BEH = 4;
2637         /**
2638          * @stable ICU 2.4
2639          */
2640         public static final int BETH = 5;
2641         /**
2642          * @stable ICU 2.4
2643          */
2644         public static final int DAL = 6;
2645         /**
2646          * @stable ICU 2.4
2647          */
2648         public static final int DALATH_RISH = 7;
2649         /**
2650          * @stable ICU 2.4
2651          */
2652         public static final int E = 8;
2653         /**
2654          * @stable ICU 2.4
2655          */
2656         public static final int FEH = 9;
2657         /**
2658          * @stable ICU 2.4
2659          */
2660         public static final int FINAL_SEMKATH = 10;
2661         /**
2662          * @stable ICU 2.4
2663          */
2664         public static final int GAF = 11;
2665         /**
2666          * @stable ICU 2.4
2667          */
2668         public static final int GAMAL = 12;
2669         /**
2670          * @stable ICU 2.4
2671          */
2672         public static final int HAH = 13;
2673         /** @stable ICU 4.6 */
2674         public static final int TEH_MARBUTA_GOAL = 14;
2675         /**
2676          * @stable ICU 2.4
2677          */
2678         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
2679         /**
2680          * @stable ICU 2.4
2681          */
2682         public static final int HE = 15;
2683         /**
2684          * @stable ICU 2.4
2685          */
2686         public static final int HEH = 16;
2687         /**
2688          * @stable ICU 2.4
2689          */
2690         public static final int HEH_GOAL = 17;
2691         /**
2692          * @stable ICU 2.4
2693          */
2694         public static final int HETH = 18;
2695         /**
2696          * @stable ICU 2.4
2697          */
2698         public static final int KAF = 19;
2699         /**
2700          * @stable ICU 2.4
2701          */
2702         public static final int KAPH = 20;
2703         /**
2704          * @stable ICU 2.4
2705          */
2706         public static final int KNOTTED_HEH = 21;
2707         /**
2708          * @stable ICU 2.4
2709          */
2710         public static final int LAM = 22;
2711         /**
2712          * @stable ICU 2.4
2713          */
2714         public static final int LAMADH = 23;
2715         /**
2716          * @stable ICU 2.4
2717          */
2718         public static final int MEEM = 24;
2719         /**
2720          * @stable ICU 2.4
2721          */
2722         public static final int MIM = 25;
2723         /**
2724          * @stable ICU 2.4
2725          */
2726         public static final int NOON = 26;
2727         /**
2728          * @stable ICU 2.4
2729          */
2730         public static final int NUN = 27;
2731         /**
2732          * @stable ICU 2.4
2733          */
2734         public static final int PE = 28;
2735         /**
2736          * @stable ICU 2.4
2737          */
2738         public static final int QAF = 29;
2739         /**
2740          * @stable ICU 2.4
2741          */
2742         public static final int QAPH = 30;
2743         /**
2744          * @stable ICU 2.4
2745          */
2746         public static final int REH = 31;
2747         /**
2748          * @stable ICU 2.4
2749          */
2750         public static final int REVERSED_PE = 32;
2751         /**
2752          * @stable ICU 2.4
2753          */
2754         public static final int SAD = 33;
2755         /**
2756          * @stable ICU 2.4
2757          */
2758         public static final int SADHE = 34;
2759         /**
2760          * @stable ICU 2.4
2761          */
2762         public static final int SEEN = 35;
2763         /**
2764          * @stable ICU 2.4
2765          */
2766         public static final int SEMKATH = 36;
2767         /**
2768          * @stable ICU 2.4
2769          */
2770         public static final int SHIN = 37;
2771         /**
2772          * @stable ICU 2.4
2773          */
2774         public static final int SWASH_KAF = 38;
2775         /**
2776          * @stable ICU 2.4
2777          */
2778         public static final int SYRIAC_WAW = 39;
2779         /**
2780          * @stable ICU 2.4
2781          */
2782         public static final int TAH = 40;
2783         /**
2784          * @stable ICU 2.4
2785          */
2786         public static final int TAW = 41;
2787         /**
2788          * @stable ICU 2.4
2789          */
2790         public static final int TEH_MARBUTA = 42;
2791         /**
2792          * @stable ICU 2.4
2793          */
2794         public static final int TETH = 43;
2795         /**
2796          * @stable ICU 2.4
2797          */
2798         public static final int WAW = 44;
2799         /**
2800          * @stable ICU 2.4
2801          */
2802         public static final int YEH = 45;
2803         /**
2804          * @stable ICU 2.4
2805          */
2806         public static final int YEH_BARREE = 46;
2807         /**
2808          * @stable ICU 2.4
2809          */
2810         public static final int YEH_WITH_TAIL = 47;
2811         /**
2812          * @stable ICU 2.4
2813          */
2814         public static final int YUDH = 48;
2815         /**
2816          * @stable ICU 2.4
2817          */
2818         public static final int YUDH_HE = 49;
2819         /**
2820          * @stable ICU 2.4
2821          */
2822         public static final int ZAIN = 50;
2823         /**
2824          * @stable ICU 2.6
2825          */
2826         public static final int FE = 51;
2827         /**
2828          * @stable ICU 2.6
2829          */
2830         public static final int KHAPH = 52;
2831         /**
2832          * @stable ICU 2.6
2833          */
2834         public static final int ZHAIN = 53;
2835         /**
2836          * @stable ICU 4.0
2837          */
2838         public static final int BURUSHASKI_YEH_BARREE = 54;
2839         /** @stable ICU 4.4 */
2840         public static final int FARSI_YEH = 55;
2841         /** @stable ICU 4.4 */
2842         public static final int NYA = 56;
2843         /** @stable ICU 49 */
2844         public static final int ROHINGYA_YEH = 57;
2845 
2846         /** @stable ICU 54 */
2847         public static final int MANICHAEAN_ALEPH = 58;
2848         /** @stable ICU 54 */
2849         public static final int MANICHAEAN_AYIN = 59;
2850         /** @stable ICU 54 */
2851         public static final int MANICHAEAN_BETH = 60;
2852         /** @stable ICU 54 */
2853         public static final int MANICHAEAN_DALETH = 61;
2854         /** @stable ICU 54 */
2855         public static final int MANICHAEAN_DHAMEDH = 62;
2856         /** @stable ICU 54 */
2857         public static final int MANICHAEAN_FIVE = 63;
2858         /** @stable ICU 54 */
2859         public static final int MANICHAEAN_GIMEL = 64;
2860         /** @stable ICU 54 */
2861         public static final int MANICHAEAN_HETH = 65;
2862         /** @stable ICU 54 */
2863         public static final int MANICHAEAN_HUNDRED = 66;
2864         /** @stable ICU 54 */
2865         public static final int MANICHAEAN_KAPH = 67;
2866         /** @stable ICU 54 */
2867         public static final int MANICHAEAN_LAMEDH = 68;
2868         /** @stable ICU 54 */
2869         public static final int MANICHAEAN_MEM = 69;
2870         /** @stable ICU 54 */
2871         public static final int MANICHAEAN_NUN = 70;
2872         /** @stable ICU 54 */
2873         public static final int MANICHAEAN_ONE = 71;
2874         /** @stable ICU 54 */
2875         public static final int MANICHAEAN_PE = 72;
2876         /** @stable ICU 54 */
2877         public static final int MANICHAEAN_QOPH = 73;
2878         /** @stable ICU 54 */
2879         public static final int MANICHAEAN_RESH = 74;
2880         /** @stable ICU 54 */
2881         public static final int MANICHAEAN_SADHE = 75;
2882         /** @stable ICU 54 */
2883         public static final int MANICHAEAN_SAMEKH = 76;
2884         /** @stable ICU 54 */
2885         public static final int MANICHAEAN_TAW = 77;
2886         /** @stable ICU 54 */
2887         public static final int MANICHAEAN_TEN = 78;
2888         /** @stable ICU 54 */
2889         public static final int MANICHAEAN_TETH = 79;
2890         /** @stable ICU 54 */
2891         public static final int MANICHAEAN_THAMEDH = 80;
2892         /** @stable ICU 54 */
2893         public static final int MANICHAEAN_TWENTY = 81;
2894         /** @stable ICU 54 */
2895         public static final int MANICHAEAN_WAW = 82;
2896         /** @stable ICU 54 */
2897         public static final int MANICHAEAN_YODH = 83;
2898         /** @stable ICU 54 */
2899         public static final int MANICHAEAN_ZAYIN = 84;
2900         /** @stable ICU 54 */
2901         public static final int STRAIGHT_WAW = 85;
2902 
2903         /**
2904          * @stable ICU 2.4
2905          */
2906         public static final int COUNT = 86;
2907     }
2908 
2909     /**
2910      * Grapheme Cluster Break constants.
2911      * @see UProperty#GRAPHEME_CLUSTER_BREAK
2912      * @stable ICU 3.4
2913      */
2914     public static interface GraphemeClusterBreak {
2915         /**
2916          * @stable ICU 3.4
2917          */
2918         public static final int OTHER = 0;
2919         /**
2920          * @stable ICU 3.4
2921          */
2922         public static final int CONTROL = 1;
2923         /**
2924          * @stable ICU 3.4
2925          */
2926         public static final int CR = 2;
2927         /**
2928          * @stable ICU 3.4
2929          */
2930         public static final int EXTEND = 3;
2931         /**
2932          * @stable ICU 3.4
2933          */
2934         public static final int L = 4;
2935         /**
2936          * @stable ICU 3.4
2937          */
2938         public static final int LF = 5;
2939         /**
2940          * @stable ICU 3.4
2941          */
2942         public static final int LV = 6;
2943         /**
2944          * @stable ICU 3.4
2945          */
2946         public static final int LVT = 7;
2947         /**
2948          * @stable ICU 3.4
2949          */
2950         public static final int T = 8;
2951         /**
2952          * @stable ICU 3.4
2953          */
2954         public static final int V = 9;
2955         /**
2956          * @stable ICU 4.0
2957          */
2958         public static final int SPACING_MARK = 10;
2959         /**
2960          * @stable ICU 4.0
2961          */
2962         public static final int PREPEND = 11;
2963         /** @stable ICU 50 */
2964         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2965         /**
2966          * @stable ICU 3.4
2967          */
2968         public static final int COUNT = 13;
2969     }
2970 
2971     /**
2972      * Word Break constants.
2973      * @see UProperty#WORD_BREAK
2974      * @stable ICU 3.4
2975      */
2976     public static interface WordBreak {
2977         /**
2978          * @stable ICU 3.8
2979          */
2980         public static final int OTHER = 0;
2981         /**
2982          * @stable ICU 3.8
2983          */
2984         public static final int ALETTER = 1;
2985         /**
2986          * @stable ICU 3.8
2987          */
2988         public static final int FORMAT = 2;
2989         /**
2990          * @stable ICU 3.8
2991          */
2992         public static final int KATAKANA = 3;
2993         /**
2994          * @stable ICU 3.8
2995          */
2996         public static final int MIDLETTER = 4;
2997         /**
2998          * @stable ICU 3.8
2999          */
3000         public static final int MIDNUM = 5;
3001         /**
3002          * @stable ICU 3.8
3003          */
3004         public static final int NUMERIC = 6;
3005         /**
3006          * @stable ICU 3.8
3007          */
3008         public static final int EXTENDNUMLET = 7;
3009         /**
3010          * @stable ICU 4.0
3011          */
3012         public static final int CR = 8;
3013         /**
3014          * @stable ICU 4.0
3015          */
3016         public static final int EXTEND = 9;
3017         /**
3018          * @stable ICU 4.0
3019          */
3020         public static final int LF = 10;
3021         /**
3022          * @stable ICU 4.0
3023          */
3024         public static final int MIDNUMLET = 11;
3025         /**
3026          * @stable ICU 4.0
3027          */
3028         public static final int NEWLINE = 12;
3029         /** @stable ICU 50 */
3030         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3031         /** @stable ICU 52 */
3032         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
3033         /** @stable ICU 52 */
3034         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
3035         /** @stable ICU 52 */
3036         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
3037         /**
3038          * @stable ICU 4.0
3039          */
3040         public static final int COUNT = 17;
3041     }
3042 
3043     /**
3044      * Sentence Break constants.
3045      * @see UProperty#SENTENCE_BREAK
3046      * @stable ICU 3.4
3047      */
3048     public static interface SentenceBreak {
3049         /**
3050          * @stable ICU 3.8
3051          */
3052         public static final int OTHER = 0;
3053         /**
3054          * @stable ICU 3.8
3055          */
3056         public static final int ATERM = 1;
3057         /**
3058          * @stable ICU 3.8
3059          */
3060         public static final int CLOSE = 2;
3061         /**
3062          * @stable ICU 3.8
3063          */
3064         public static final int FORMAT = 3;
3065         /**
3066          * @stable ICU 3.8
3067          */
3068         public static final int LOWER = 4;
3069         /**
3070          * @stable ICU 3.8
3071          */
3072         public static final int NUMERIC = 5;
3073         /**
3074          * @stable ICU 3.8
3075          */
3076         public static final int OLETTER = 6;
3077         /**
3078          * @stable ICU 3.8
3079          */
3080         public static final int SEP = 7;
3081         /**
3082          * @stable ICU 3.8
3083          */
3084         public static final int SP = 8;
3085         /**
3086          * @stable ICU 3.8
3087          */
3088         public static final int STERM = 9;
3089         /**
3090          * @stable ICU 3.8
3091          */
3092         public static final int UPPER = 10;
3093         /**
3094          * @stable ICU 4.0
3095          */
3096         public static final int CR = 11;
3097         /**
3098          * @stable ICU 4.0
3099          */
3100         public static final int EXTEND = 12;
3101         /**
3102          * @stable ICU 4.0
3103          */
3104         public static final int LF = 13;
3105         /**
3106          * @stable ICU 4.0
3107          */
3108         public static final int SCONTINUE = 14;
3109         /**
3110          * @stable ICU 4.0
3111          */
3112         public static final int COUNT = 15;
3113     }
3114 
3115     /**
3116      * Line Break constants.
3117      * @see UProperty#LINE_BREAK
3118      * @stable ICU 2.4
3119      */
3120     public static interface LineBreak
3121     {
3122         /**
3123          * @stable ICU 2.4
3124          */
3125         public static final int UNKNOWN = 0;
3126         /**
3127          * @stable ICU 2.4
3128          */
3129         public static final int AMBIGUOUS = 1;
3130         /**
3131          * @stable ICU 2.4
3132          */
3133         public static final int ALPHABETIC = 2;
3134         /**
3135          * @stable ICU 2.4
3136          */
3137         public static final int BREAK_BOTH = 3;
3138         /**
3139          * @stable ICU 2.4
3140          */
3141         public static final int BREAK_AFTER = 4;
3142         /**
3143          * @stable ICU 2.4
3144          */
3145         public static final int BREAK_BEFORE = 5;
3146         /**
3147          * @stable ICU 2.4
3148          */
3149         public static final int MANDATORY_BREAK = 6;
3150         /**
3151          * @stable ICU 2.4
3152          */
3153         public static final int CONTINGENT_BREAK = 7;
3154         /**
3155          * @stable ICU 2.4
3156          */
3157         public static final int CLOSE_PUNCTUATION = 8;
3158         /**
3159          * @stable ICU 2.4
3160          */
3161         public static final int COMBINING_MARK = 9;
3162         /**
3163          * @stable ICU 2.4
3164          */
3165         public static final int CARRIAGE_RETURN = 10;
3166         /**
3167          * @stable ICU 2.4
3168          */
3169         public static final int EXCLAMATION = 11;
3170         /**
3171          * @stable ICU 2.4
3172          */
3173         public static final int GLUE = 12;
3174         /**
3175          * @stable ICU 2.4
3176          */
3177         public static final int HYPHEN = 13;
3178         /**
3179          * @stable ICU 2.4
3180          */
3181         public static final int IDEOGRAPHIC = 14;
3182         /**
3183          * @see #INSEPARABLE
3184          * @stable ICU 2.4
3185          */
3186         public static final int INSEPERABLE = 15;
3187         /**
3188          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
3189          * @stable ICU 3.0
3190          */
3191         public static final int INSEPARABLE = 15;
3192         /**
3193          * @stable ICU 2.4
3194          */
3195         public static final int INFIX_NUMERIC = 16;
3196         /**
3197          * @stable ICU 2.4
3198          */
3199         public static final int LINE_FEED = 17;
3200         /**
3201          * @stable ICU 2.4
3202          */
3203         public static final int NONSTARTER = 18;
3204         /**
3205          * @stable ICU 2.4
3206          */
3207         public static final int NUMERIC = 19;
3208         /**
3209          * @stable ICU 2.4
3210          */
3211         public static final int OPEN_PUNCTUATION = 20;
3212         /**
3213          * @stable ICU 2.4
3214          */
3215         public static final int POSTFIX_NUMERIC = 21;
3216         /**
3217          * @stable ICU 2.4
3218          */
3219         public static final int PREFIX_NUMERIC = 22;
3220         /**
3221          * @stable ICU 2.4
3222          */
3223         public static final int QUOTATION = 23;
3224         /**
3225          * @stable ICU 2.4
3226          */
3227         public static final int COMPLEX_CONTEXT = 24;
3228         /**
3229          * @stable ICU 2.4
3230          */
3231         public static final int SURROGATE = 25;
3232         /**
3233          * @stable ICU 2.4
3234          */
3235         public static final int SPACE = 26;
3236         /**
3237          * @stable ICU 2.4
3238          */
3239         public static final int BREAK_SYMBOLS = 27;
3240         /**
3241          * @stable ICU 2.4
3242          */
3243         public static final int ZWSPACE = 28;
3244         /**
3245          * @stable ICU 2.6
3246          */
3247         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
3248         /**
3249          * @stable ICU 2.6
3250          */
3251         public static final int WORD_JOINER = 30;      /*[WJ]*/
3252         /**
3253          * @stable ICU 3.4
3254          */
3255         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
3256         /**
3257          * @stable ICU 3.4
3258          */
3259         public static final int H3 = 32;
3260         /**
3261          * @stable ICU 3.4
3262          */
3263         public static final int JL = 33;
3264         /**
3265          * @stable ICU 3.4
3266          */
3267         public static final int JT = 34;
3268         /**
3269          * @stable ICU 3.4
3270          */
3271         public static final int JV = 35;
3272         /** @stable ICU 4.4 */
3273         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
3274         /** @stable ICU 49 */
3275         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
3276         /** @stable ICU 49 */
3277         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
3278         /** @stable ICU 50 */
3279         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3280         /**
3281          * @stable ICU 2.4
3282          */
3283         public static final int COUNT = 40;
3284     }
3285 
3286     /**
3287      * Numeric Type constants.
3288      * @see UProperty#NUMERIC_TYPE
3289      * @stable ICU 2.4
3290      */
3291     public static interface NumericType
3292     {
3293         /**
3294          * @stable ICU 2.4
3295          */
3296         public static final int NONE = 0;
3297         /**
3298          * @stable ICU 2.4
3299          */
3300         public static final int DECIMAL = 1;
3301         /**
3302          * @stable ICU 2.4
3303          */
3304         public static final int DIGIT = 2;
3305         /**
3306          * @stable ICU 2.4
3307          */
3308         public static final int NUMERIC = 3;
3309         /**
3310          * @stable ICU 2.4
3311          */
3312         public static final int COUNT = 4;
3313     }
3314 
3315     /**
3316      * Hangul Syllable Type constants.
3317      *
3318      * @see UProperty#HANGUL_SYLLABLE_TYPE
3319      * @stable ICU 2.6
3320      */
3321     public static interface HangulSyllableType
3322     {
3323         /**
3324          * @stable ICU 2.6
3325          */
3326         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
3327         /**
3328          * @stable ICU 2.6
3329          */
3330         public static final int LEADING_JAMO        = 1;   /*[L]*/
3331         /**
3332          * @stable ICU 2.6
3333          */
3334         public static final int VOWEL_JAMO          = 2;   /*[V]*/
3335         /**
3336          * @stable ICU 2.6
3337          */
3338         public static final int TRAILING_JAMO       = 3;   /*[T]*/
3339         /**
3340          * @stable ICU 2.6
3341          */
3342         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
3343         /**
3344          * @stable ICU 2.6
3345          */
3346         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
3347         /**
3348          * @stable ICU 2.6
3349          */
3350         public static final int COUNT               = 6;
3351     }
3352 
3353     /**
3354      * Bidi Paired Bracket Type constants.
3355      *
3356      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3357      * @stable ICU 52
3358      */
3359     public static interface BidiPairedBracketType {
3360         /**
3361          * Not a paired bracket.
3362          * @stable ICU 52
3363          */
3364         public static final int NONE = 0;
3365         /**
3366          * Open paired bracket.
3367          * @stable ICU 52
3368          */
3369         public static final int OPEN = 1;
3370         /**
3371          * Close paired bracket.
3372          * @stable ICU 52
3373          */
3374         public static final int CLOSE = 2;
3375         /**
3376          * @stable ICU 52
3377          */
3378         public static final int COUNT = 3;
3379     }
3380 
3381     // public data members -----------------------------------------------
3382 
3383     /**
3384      * The lowest Unicode code point value.
3385      * @stable ICU 2.1
3386      */
3387     public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE;
3388 
3389     /**
3390      * The highest Unicode code point value (scalar value) according to the
3391      * Unicode Standard.
3392      * This is a 21-bit value (21 bits, rounded up).<br>
3393      * Up-to-date Unicode implementation of java.lang.Character.MAX_VALUE
3394      * @stable ICU 2.1
3395      */
3396     public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE;
3397 
3398     /**
3399      * The minimum value for Supplementary code points
3400      * @stable ICU 2.1
3401      */
3402     public static final int SUPPLEMENTARY_MIN_VALUE =
3403             UTF16.SUPPLEMENTARY_MIN_VALUE;
3404 
3405     /**
3406      * Unicode value used when translating into Unicode encoding form and there
3407      * is no existing character.
3408      * @stable ICU 2.1
3409      */
3410     public static final int REPLACEMENT_CHAR = '\uFFFD';
3411 
3412     /**
3413      * Special value that is returned by getUnicodeNumericValue(int) when no
3414      * numeric value is defined for a code point.
3415      * @stable ICU 2.4
3416      * @see #getUnicodeNumericValue
3417      */
3418     public static final double NO_NUMERIC_VALUE = -123456789;
3419 
3420     /**
3421      * Compatibility constant for Java Character's MIN_RADIX.
3422      * @stable ICU 3.4
3423      */
3424     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
3425 
3426     /**
3427      * Compatibility constant for Java Character's MAX_RADIX.
3428      * @stable ICU 3.4
3429      */
3430     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
3431 
3432     /**
3433      * Do not lowercase non-initial parts of words when titlecasing.
3434      * Option bit for titlecasing APIs that take an options bit set.
3435      *
3436      * By default, titlecasing will titlecase the first cased character
3437      * of a word and lowercase all other characters.
3438      * With this option, the other characters will not be modified.
3439      *
3440      * @see #toTitleCase
3441      * @stable ICU 3.8
3442      */
3443     public static final int TITLECASE_NO_LOWERCASE = 0x100;
3444 
3445     /**
3446      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
3447      * titlecase exactly the characters at breaks from the iterator.
3448      * Option bit for titlecasing APIs that take an options bit set.
3449      *
3450      * By default, titlecasing will take each break iterator index,
3451      * adjust it by looking for the next cased character, and titlecase that one.
3452      * Other characters are lowercased.
3453      *
3454      * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
3455      *
3456      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
3457      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
3458      * cased character F. If F exists, map F to default_title(F); then map each
3459      * subsequent character C to default_lower(C).
3460      *
3461      * @see #toTitleCase
3462      * @see #TITLECASE_NO_LOWERCASE
3463      * @stable ICU 3.8
3464      */
3465     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
3466 
3467     // public methods ----------------------------------------------------
3468 
3469     /**
3470      * Returnss the numeric value of a decimal digit code point.
3471      * <br>This method observes the semantics of
3472      * <code>java.lang.Character.digit()</code>.  Note that this
3473      * will return positive values for code points for which isDigit
3474      * returns false, just like java.lang.Character.
3475      * <br><em>Semantic Change:</em> In release 1.3.1 and
3476      * prior, this did not treat the European letters as having a
3477      * digit value, and also treated numeric letters and other numbers as
3478      * digits.
3479      * This has been changed to conform to the java semantics.
3480      * <br>A code point is a valid digit if and only if:
3481      * <ul>
3482      *   <li>ch is a decimal digit or one of the european letters, and
3483      *   <li>the value of ch is less than the specified radix.
3484      * </ul>
3485      * @param ch the code point to query
3486      * @param radix the radix
3487      * @return the numeric value represented by the code point in the
3488      * specified radix, or -1 if the code point is not a decimal digit
3489      * or if its value is too large for the radix
3490      * @stable ICU 2.1
3491      */
digit(int ch, int radix)3492     public static int digit(int ch, int radix)
3493     {
3494         if (2 <= radix && radix <= 36) {
3495             int value = digit(ch);
3496             if (value < 0) {
3497                 // ch is not a decimal digit, try latin letters
3498                 value = UCharacterProperty.getEuropeanDigit(ch);
3499             }
3500             return (value < radix) ? value : -1;
3501         } else {
3502             return -1;  // invalid radix
3503         }
3504     }
3505 
3506     /**
3507      * Returnss the numeric value of a decimal digit code point.
3508      * <br>This is a convenience overload of <code>digit(int, int)</code>
3509      * that provides a decimal radix.
3510      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
3511      * treated numeric letters and other numbers as digits.  This has
3512      * been changed to conform to the java semantics.
3513      * @param ch the code point to query
3514      * @return the numeric value represented by the code point,
3515      * or -1 if the code point is not a decimal digit or if its
3516      * value is too large for a decimal radix
3517      * @stable ICU 2.1
3518      */
digit(int ch)3519     public static int digit(int ch)
3520     {
3521         return UCharacterProperty.INSTANCE.digit(ch);
3522     }
3523 
3524     /**
3525      * Returns the numeric value of the code point as a nonnegative
3526      * integer.
3527      * <br>If the code point does not have a numeric value, then -1 is returned.
3528      * <br>
3529      * If the code point has a numeric value that cannot be represented as a
3530      * nonnegative integer (for example, a fractional value), then -2 is
3531      * returned.
3532      * @param ch the code point to query
3533      * @return the numeric value of the code point, or -1 if it has no numeric
3534      * value, or -2 if it has a numeric value that cannot be represented as a
3535      * nonnegative integer
3536      * @stable ICU 2.1
3537      */
getNumericValue(int ch)3538     public static int getNumericValue(int ch)
3539     {
3540         return UCharacterProperty.INSTANCE.getNumericValue(ch);
3541     }
3542 
3543     /**
3544      * {@icu} Returns the numeric value for a Unicode code point as defined in the
3545      * Unicode Character Database.</p>
3546      * <p>A "double" return type is necessary because some numeric values are
3547      * fractions, negative, or too large for int.</p>
3548      * <p>For characters without any numeric values in the Unicode Character
3549      * Database, this function will return NO_NUMERIC_VALUE.
3550      * Note: This is different from the Unicode Standard which specifies NaN as the default value.</p>
3551      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
3552      * return type int and returns -1 when the argument ch does not have a
3553      * corresponding numeric value. This has been changed to synch with ICU4C
3554      * </p>
3555      * This corresponds to the ICU4C function u_getNumericValue.
3556      * @param ch Code point to get the numeric value for.
3557      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
3558      * @stable ICU 2.4
3559      */
getUnicodeNumericValue(int ch)3560     public static double getUnicodeNumericValue(int ch)
3561     {
3562         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
3563     }
3564 
3565     /**
3566      * Compatibility override of Java deprecated method.  This
3567      * method will always remain deprecated.
3568      * Same as java.lang.Character.isSpace().
3569      * @param ch the code point
3570      * @return true if the code point is a space character as
3571      * defined by java.lang.Character.isSpace.
3572      * @deprecated ICU 3.4 (Java)
3573      */
3574     @Deprecated
isSpace(int ch)3575     public static boolean isSpace(int ch) {
3576         return ch <= 0x20 &&
3577                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
3578     }
3579 
3580     /**
3581      * Returns a value indicating a code point's Unicode category.
3582      * Up-to-date Unicode implementation of java.lang.Character.getType()
3583      * except for the above mentioned code points that had their category
3584      * changed.<br>
3585      * Return results are constants from the interface
3586      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
3587      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3588      * those returned by java.lang.Character.getType.  UCharacterCategory values
3589      * match the ones used in ICU4C, while java.lang.Character type
3590      * values, though similar, skip the value 17.</p>
3591      * @param ch code point whose type is to be determined
3592      * @return category which is a value of UCharacterCategory
3593      * @stable ICU 2.1
3594      */
getType(int ch)3595     public static int getType(int ch)
3596     {
3597         return UCharacterProperty.INSTANCE.getType(ch);
3598     }
3599 
3600     /**
3601      * Determines if a code point has a defined meaning in the up-to-date
3602      * Unicode standard.
3603      * E.g. supplementary code points though allocated space are not defined in
3604      * Unicode yet.<br>
3605      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3606      * @param ch code point to be determined if it is defined in the most
3607      *        current version of Unicode
3608      * @return true if this code point is defined in unicode
3609      * @stable ICU 2.1
3610      */
isDefined(int ch)3611     public static boolean isDefined(int ch)
3612     {
3613         return getType(ch) != 0;
3614     }
3615 
3616     /**
3617      * Determines if a code point is a Java digit.
3618      * <br>This method observes the semantics of
3619      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
3620      * digits only.
3621      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
3622      * numeric letters and other numbers as digits.
3623      * This has been changed to conform to the java semantics.
3624      * @param ch code point to query
3625      * @return true if this code point is a digit
3626      * @stable ICU 2.1
3627      */
isDigit(int ch)3628     public static boolean isDigit(int ch)
3629     {
3630         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3631     }
3632 
3633     /**
3634      * Determines if the specified code point is an ISO control character.
3635      * A code point is considered to be an ISO control character if it is in
3636      * the range &#92u0000 through &#92u001F or in the range &#92u007F through
3637      * &#92u009F.<br>
3638      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3639      * @param ch code point to determine if it is an ISO control character
3640      * @return true if code point is a ISO control character
3641      * @stable ICU 2.1
3642      */
isISOControl(int ch)3643     public static boolean isISOControl(int ch)
3644     {
3645         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
3646                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3647     }
3648 
3649     /**
3650      * Determines if the specified code point is a letter.
3651      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3652      * @param ch code point to determine if it is a letter
3653      * @return true if code point is a letter
3654      * @stable ICU 2.1
3655      */
isLetter(int ch)3656     public static boolean isLetter(int ch)
3657     {
3658         // if props == 0, it will just fall through and return false
3659         return ((1 << getType(ch))
3660                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3661                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3662                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3663                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3664                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3665     }
3666 
3667     /**
3668      * Determines if the specified code point is a letter or digit.
3669      * {@icunote} This method, unlike java.lang.Character does not regard the ascii
3670      * characters 'A' - 'Z' and 'a' - 'z' as digits.
3671      * @param ch code point to determine if it is a letter or a digit
3672      * @return true if code point is a letter or a digit
3673      * @stable ICU 2.1
3674      */
isLetterOrDigit(int ch)3675     public static boolean isLetterOrDigit(int ch)
3676     {
3677         return ((1 << getType(ch))
3678                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3679                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3680                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3681                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3682                         | (1 << UCharacterCategory.OTHER_LETTER)
3683                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3684     }
3685 
3686     /**
3687      * Compatibility override of Java deprecated method.  This
3688      * method will always remain deprecated.  Delegates to
3689      * java.lang.Character.isJavaIdentifierStart.
3690      * @param cp the code point
3691      * @return true if the code point can start a java identifier.
3692      * @deprecated ICU 3.4 (Java)
3693      */
3694     @Deprecated
isJavaLetter(int cp)3695     public static boolean isJavaLetter(int cp) {
3696         return isJavaIdentifierStart(cp);
3697     }
3698 
3699     /**
3700      * Compatibility override of Java deprecated method.  This
3701      * method will always remain deprecated.  Delegates to
3702      * java.lang.Character.isJavaIdentifierPart.
3703      * @param cp the code point
3704      * @return true if the code point can continue a java identifier.
3705      * @deprecated ICU 3.4 (Java)
3706      */
3707     @Deprecated
isJavaLetterOrDigit(int cp)3708     public static boolean isJavaLetterOrDigit(int cp) {
3709         return isJavaIdentifierPart(cp);
3710     }
3711 
3712     /**
3713      * Compatibility override of Java method, delegates to
3714      * java.lang.Character.isJavaIdentifierStart.
3715      * @param cp the code point
3716      * @return true if the code point can start a java identifier.
3717      * @stable ICU 3.4
3718      */
isJavaIdentifierStart(int cp)3719     public static boolean isJavaIdentifierStart(int cp) {
3720         // note, downcast to char for jdk 1.4 compatibility
3721         return java.lang.Character.isJavaIdentifierStart((char)cp);
3722     }
3723 
3724     /**
3725      * Compatibility override of Java method, delegates to
3726      * java.lang.Character.isJavaIdentifierPart.
3727      * @param cp the code point
3728      * @return true if the code point can continue a java identifier.
3729      * @stable ICU 3.4
3730      */
isJavaIdentifierPart(int cp)3731     public static boolean isJavaIdentifierPart(int cp) {
3732         // note, downcast to char for jdk 1.4 compatibility
3733         return java.lang.Character.isJavaIdentifierPart((char)cp);
3734     }
3735 
3736     /**
3737      * Determines if the specified code point is a lowercase character.
3738      * UnicodeData only contains case mappings for code points where they are
3739      * one-to-one mappings; it also omits information about context-sensitive
3740      * case mappings.<br> For more information about Unicode case mapping
3741      * please refer to the
3742      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
3743      * #21</a>.<br>
3744      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3745      * @param ch code point to determine if it is in lowercase
3746      * @return true if code point is a lowercase character
3747      * @stable ICU 2.1
3748      */
isLowerCase(int ch)3749     public static boolean isLowerCase(int ch)
3750     {
3751         // if props == 0, it will just fall through and return false
3752         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3753     }
3754 
3755     /**
3756      * Determines if the specified code point is a white space character.
3757      * A code point is considered to be an whitespace character if and only
3758      * if it satisfies one of the following criteria:
3759      * <ul>
3760      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
3761      *      also a non-breaking space (&#92u00A0 or &#92u2007 or &#92u202F).
3762      * <li> It is &#92u0009, HORIZONTAL TABULATION.
3763      * <li> It is &#92u000A, LINE FEED.
3764      * <li> It is &#92u000B, VERTICAL TABULATION.
3765      * <li> It is &#92u000C, FORM FEED.
3766      * <li> It is &#92u000D, CARRIAGE RETURN.
3767      * <li> It is &#92u001C, FILE SEPARATOR.
3768      * <li> It is &#92u001D, GROUP SEPARATOR.
3769      * <li> It is &#92u001E, RECORD SEPARATOR.
3770      * <li> It is &#92u001F, UNIT SEPARATOR.
3771      * </ul>
3772      *
3773      * This API tries to sync with the semantics of Java's
3774      * java.lang.Character.isWhitespace(), but it may not return
3775      * the exact same results because of the Unicode version
3776      * difference.
3777      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
3778      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
3779      * See http://www.unicode.org/versions/Unicode4.0.1/
3780      * @param ch code point to determine if it is a white space
3781      * @return true if the specified code point is a white space character
3782      * @stable ICU 2.1
3783      */
isWhitespace(int ch)3784     public static boolean isWhitespace(int ch)
3785     {
3786         // exclude no-break spaces
3787         // if props == 0, it will just fall through and return false
3788         return ((1 << getType(ch)) &
3789                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
3790                         | (1 << UCharacterCategory.LINE_SEPARATOR)
3791                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
3792                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
3793                         // TAB VT LF FF CR FS GS RS US NL are all control characters
3794                         // that are white spaces.
3795                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
3796     }
3797 
3798     /**
3799      * Determines if the specified code point is a Unicode specified space
3800      * character, i.e. if code point is in the category Zs, Zl and Zp.
3801      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
3802      * @param ch code point to determine if it is a space
3803      * @return true if the specified code point is a space character
3804      * @stable ICU 2.1
3805      */
isSpaceChar(int ch)3806     public static boolean isSpaceChar(int ch)
3807     {
3808         // if props == 0, it will just fall through and return false
3809         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
3810                 | (1 << UCharacterCategory.LINE_SEPARATOR)
3811                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
3812                 != 0;
3813     }
3814 
3815     /**
3816      * Determines if the specified code point is a titlecase character.
3817      * UnicodeData only contains case mappings for code points where they are
3818      * one-to-one mappings; it also omits information about context-sensitive
3819      * case mappings.<br>
3820      * For more information about Unicode case mapping please refer to the
3821      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3822      * Technical report #21</a>.<br>
3823      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
3824      * @param ch code point to determine if it is in title case
3825      * @return true if the specified code point is a titlecase character
3826      * @stable ICU 2.1
3827      */
isTitleCase(int ch)3828     public static boolean isTitleCase(int ch)
3829     {
3830         // if props == 0, it will just fall through and return false
3831         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
3832     }
3833 
3834     /**
3835      * Determines if the specified code point may be any part of a Unicode
3836      * identifier other than the starting character.
3837      * A code point may be part of a Unicode identifier if and only if it is
3838      * one of the following:
3839      * <ul>
3840      * <li> Lu Uppercase letter
3841      * <li> Ll Lowercase letter
3842      * <li> Lt Titlecase letter
3843      * <li> Lm Modifier letter
3844      * <li> Lo Other letter
3845      * <li> Nl Letter number
3846      * <li> Pc Connecting punctuation character
3847      * <li> Nd decimal number
3848      * <li> Mc Spacing combining mark
3849      * <li> Mn Non-spacing mark
3850      * <li> Cf formatting code
3851      * </ul>
3852      * Up-to-date Unicode implementation of
3853      * java.lang.Character.isUnicodeIdentifierPart().<br>
3854      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3855      * @param ch code point to determine if is can be part of a Unicode
3856      *        identifier
3857      * @return true if code point is any character belonging a unicode
3858      *         identifier suffix after the first character
3859      * @stable ICU 2.1
3860      */
isUnicodeIdentifierPart(int ch)3861     public static boolean isUnicodeIdentifierPart(int ch)
3862     {
3863         // if props == 0, it will just fall through and return false
3864         // cat == format
3865         return ((1 << getType(ch))
3866                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3867                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3868                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3869                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3870                         | (1 << UCharacterCategory.OTHER_LETTER)
3871                         | (1 << UCharacterCategory.LETTER_NUMBER)
3872                         | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
3873                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
3874                         | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
3875                         | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
3876                         || isIdentifierIgnorable(ch);
3877     }
3878 
3879     /**
3880      * Determines if the specified code point is permissible as the first
3881      * character in a Unicode identifier.
3882      * A code point may start a Unicode identifier if it is of type either
3883      * <ul>
3884      * <li> Lu Uppercase letter
3885      * <li> Ll Lowercase letter
3886      * <li> Lt Titlecase letter
3887      * <li> Lm Modifier letter
3888      * <li> Lo Other letter
3889      * <li> Nl Letter number
3890      * </ul>
3891      * Up-to-date Unicode implementation of
3892      * java.lang.Character.isUnicodeIdentifierStart().<br>
3893      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3894      * @param ch code point to determine if it can start a Unicode identifier
3895      * @return true if code point is the first character belonging a unicode
3896      *              identifier
3897      * @stable ICU 2.1
3898      */
isUnicodeIdentifierStart(int ch)3899     public static boolean isUnicodeIdentifierStart(int ch)
3900     {
3901         /*int cat = getType(ch);*/
3902         // if props == 0, it will just fall through and return false
3903         return ((1 << getType(ch))
3904                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3905                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3906                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3907                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3908                         | (1 << UCharacterCategory.OTHER_LETTER)
3909                         | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
3910     }
3911 
3912     /**
3913      * Determines if the specified code point should be regarded as an
3914      * ignorable character in a Java identifier.
3915      * A character is Java-identifier-ignorable if it has the general category
3916      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
3917      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
3918      * Up-to-date Unicode implementation of
3919      * java.lang.Character.isIdentifierIgnorable().<br>
3920      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3921      * <p>Note that Unicode just recommends to ignore Cf (format controls).
3922      * @param ch code point to be determined if it can be ignored in a Unicode
3923      *        identifier.
3924      * @return true if the code point is ignorable
3925      * @stable ICU 2.1
3926      */
isIdentifierIgnorable(int ch)3927     public static boolean isIdentifierIgnorable(int ch)
3928     {
3929         // see java.lang.Character.isIdentifierIgnorable() on range of
3930         // ignorable characters.
3931         if (ch <= 0x9f) {
3932             return isISOControl(ch)
3933                     && !((ch >= 0x9 && ch <= 0xd)
3934                             || (ch >= 0x1c && ch <= 0x1f));
3935         }
3936         return getType(ch) == UCharacterCategory.FORMAT;
3937     }
3938 
3939     /**
3940      * Determines if the specified code point is an uppercase character.
3941      * UnicodeData only contains case mappings for code point where they are
3942      * one-to-one mappings; it also omits information about context-sensitive
3943      * case mappings.<br>
3944      * For language specific case conversion behavior, use
3945      * toUpperCase(locale, str). <br>
3946      * For example, the case conversion for dot-less i and dotted I in Turkish,
3947      * or for final sigma in Greek.
3948      * For more information about Unicode case mapping please refer to the
3949      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3950      * Technical report #21</a>.<br>
3951      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
3952      * @param ch code point to determine if it is in uppercase
3953      * @return true if the code point is an uppercase character
3954      * @stable ICU 2.1
3955      */
isUpperCase(int ch)3956     public static boolean isUpperCase(int ch)
3957     {
3958         // if props == 0, it will just fall through and return false
3959         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
3960     }
3961 
3962     /**
3963      * The given code point is mapped to its lowercase equivalent; if the code
3964      * point has no lowercase equivalent, the code point itself is returned.
3965      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
3966      *
3967      * <p>This function only returns the simple, single-code point case mapping.
3968      * Full case mappings should be used whenever possible because they produce
3969      * better results by working on whole strings.
3970      * They take into account the string context and the language and can map
3971      * to a result string with a different length as appropriate.
3972      * Full case mappings are applied by the case mapping functions
3973      * that take String parameters rather than code points (int).
3974      * See also the User Guide chapter on C/POSIX migration:
3975      * http://www.icu-project.org/userguide/posix.html#case_mappings
3976      *
3977      * @param ch code point whose lowercase equivalent is to be retrieved
3978      * @return the lowercase equivalent code point
3979      * @stable ICU 2.1
3980      */
toLowerCase(int ch)3981     public static int toLowerCase(int ch) {
3982         return UCaseProps.INSTANCE.tolower(ch);
3983     }
3984 
3985     /**
3986      * Converts argument code point and returns a String object representing
3987      * the code point's value in UTF16 format.
3988      * The result is a string whose length is 1 for non-supplementary code
3989      * points, 2 otherwise.<br>
3990      * com.ibm.ibm.icu.UTF16 can be used to parse Strings generated by this
3991      * function.<br>
3992      * Up-to-date Unicode implementation of java.lang.Character.toString()
3993      * @param ch code point
3994      * @return string representation of the code point, null if code point is not
3995      *         defined in unicode
3996      * @stable ICU 2.1
3997      */
toString(int ch)3998     public static String toString(int ch)
3999     {
4000         if (ch < MIN_VALUE || ch > MAX_VALUE) {
4001             return null;
4002         }
4003 
4004         if (ch < SUPPLEMENTARY_MIN_VALUE) {
4005             return String.valueOf((char)ch);
4006         }
4007 
4008         StringBuilder result = new StringBuilder();
4009         result.append(UTF16.getLeadSurrogate(ch));
4010         result.append(UTF16.getTrailSurrogate(ch));
4011         return result.toString();
4012     }
4013 
4014     /**
4015      * Converts the code point argument to titlecase.
4016      * If no titlecase is available, the uppercase is returned. If no uppercase
4017      * is available, the code point itself is returned.
4018      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
4019      *
4020      * <p>This function only returns the simple, single-code point case mapping.
4021      * Full case mappings should be used whenever possible because they produce
4022      * better results by working on whole strings.
4023      * They take into account the string context and the language and can map
4024      * to a result string with a different length as appropriate.
4025      * Full case mappings are applied by the case mapping functions
4026      * that take String parameters rather than code points (int).
4027      * See also the User Guide chapter on C/POSIX migration:
4028      * http://www.icu-project.org/userguide/posix.html#case_mappings
4029      *
4030      * @param ch code point  whose title case is to be retrieved
4031      * @return titlecase code point
4032      * @stable ICU 2.1
4033      */
toTitleCase(int ch)4034     public static int toTitleCase(int ch) {
4035         return UCaseProps.INSTANCE.totitle(ch);
4036     }
4037 
4038     /**
4039      * Converts the character argument to uppercase.
4040      * If no uppercase is available, the character itself is returned.
4041      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
4042      *
4043      * <p>This function only returns the simple, single-code point case mapping.
4044      * Full case mappings should be used whenever possible because they produce
4045      * better results by working on whole strings.
4046      * They take into account the string context and the language and can map
4047      * to a result string with a different length as appropriate.
4048      * Full case mappings are applied by the case mapping functions
4049      * that take String parameters rather than code points (int).
4050      * See also the User Guide chapter on C/POSIX migration:
4051      * http://www.icu-project.org/userguide/posix.html#case_mappings
4052      *
4053      * @param ch code point whose uppercase is to be retrieved
4054      * @return uppercase code point
4055      * @stable ICU 2.1
4056      */
toUpperCase(int ch)4057     public static int toUpperCase(int ch) {
4058         return UCaseProps.INSTANCE.toupper(ch);
4059     }
4060 
4061     // extra methods not in java.lang.Character --------------------------
4062 
4063     /**
4064      * {@icu} Determines if the code point is a supplementary character.
4065      * A code point is a supplementary character if and only if it is greater
4066      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
4067      * @param ch code point to be determined if it is in the supplementary
4068      *        plane
4069      * @return true if code point is a supplementary character
4070      * @stable ICU 2.1
4071      */
isSupplementary(int ch)4072     public static boolean isSupplementary(int ch)
4073     {
4074         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
4075                 ch <= UCharacter.MAX_VALUE;
4076     }
4077 
4078     /**
4079      * {@icu} Determines if the code point is in the BMP plane.
4080      * @param ch code point to be determined if it is not a supplementary
4081      *        character
4082      * @return true if code point is not a supplementary character
4083      * @stable ICU 2.1
4084      */
isBMP(int ch)4085     public static boolean isBMP(int ch)
4086     {
4087         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
4088     }
4089 
4090     /**
4091      * {@icu} Determines whether the specified code point is a printable character
4092      * according to the Unicode standard.
4093      * @param ch code point to be determined if it is printable
4094      * @return true if the code point is a printable character
4095      * @stable ICU 2.1
4096      */
isPrintable(int ch)4097     public static boolean isPrintable(int ch)
4098     {
4099         int cat = getType(ch);
4100         // if props == 0, it will just fall through and return false
4101         return (cat != UCharacterCategory.UNASSIGNED &&
4102                 cat != UCharacterCategory.CONTROL &&
4103                 cat != UCharacterCategory.FORMAT &&
4104                 cat != UCharacterCategory.PRIVATE_USE &&
4105                 cat != UCharacterCategory.SURROGATE &&
4106                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
4107     }
4108 
4109     /**
4110      * {@icu} Determines whether the specified code point is of base form.
4111      * A code point of base form does not graphically combine with preceding
4112      * characters, and is neither a control nor a format character.
4113      * @param ch code point to be determined if it is of base form
4114      * @return true if the code point is of base form
4115      * @stable ICU 2.1
4116      */
isBaseForm(int ch)4117     public static boolean isBaseForm(int ch)
4118     {
4119         int cat = getType(ch);
4120         // if props == 0, it will just fall through and return false
4121         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
4122                 cat == UCharacterCategory.OTHER_NUMBER ||
4123                 cat == UCharacterCategory.LETTER_NUMBER ||
4124                 cat == UCharacterCategory.UPPERCASE_LETTER ||
4125                 cat == UCharacterCategory.LOWERCASE_LETTER ||
4126                 cat == UCharacterCategory.TITLECASE_LETTER ||
4127                 cat == UCharacterCategory.MODIFIER_LETTER ||
4128                 cat == UCharacterCategory.OTHER_LETTER ||
4129                 cat == UCharacterCategory.NON_SPACING_MARK ||
4130                 cat == UCharacterCategory.ENCLOSING_MARK ||
4131                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
4132     }
4133 
4134     /**
4135      * {@icu} Returns the Bidirection property of a code point.
4136      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
4137      * property.<br>
4138      * Result returned belongs to the interface
4139      * <a href=UCharacterDirection.html>UCharacterDirection</a>
4140      * @param ch the code point to be determined its direction
4141      * @return direction constant from UCharacterDirection.
4142      * @stable ICU 2.1
4143      */
getDirection(int ch)4144     public static int getDirection(int ch)
4145     {
4146         return UBiDiProps.INSTANCE.getClass(ch);
4147     }
4148 
4149     /**
4150      * Determines whether the code point has the "mirrored" property.
4151      * This property is set for characters that are commonly used in
4152      * Right-To-Left contexts and need to be displayed with a "mirrored"
4153      * glyph.
4154      * @param ch code point whose mirror is to be determined
4155      * @return true if the code point has the "mirrored" property
4156      * @stable ICU 2.1
4157      */
isMirrored(int ch)4158     public static boolean isMirrored(int ch)
4159     {
4160         return UBiDiProps.INSTANCE.isMirrored(ch);
4161     }
4162 
4163     /**
4164      * {@icu} Maps the specified code point to a "mirror-image" code point.
4165      * For code points with the "mirrored" property, implementations sometimes
4166      * need a "poor man's" mapping to another code point such that the default
4167      * glyph may serve as the mirror-image of the default glyph of the
4168      * specified code point.<br>
4169      * This is useful for text conversion to and from codepages with visual
4170      * order, and for displays without glyph selection capabilities.
4171      * @param ch code point whose mirror is to be retrieved
4172      * @return another code point that may serve as a mirror-image substitute,
4173      *         or ch itself if there is no such mapping or ch does not have the
4174      *         "mirrored" property
4175      * @stable ICU 2.1
4176      */
getMirror(int ch)4177     public static int getMirror(int ch)
4178     {
4179         return UBiDiProps.INSTANCE.getMirror(ch);
4180     }
4181 
4182     /**
4183      * {@icu} Maps the specified character to its paired bracket character.
4184      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
4185      * Otherwise c itself is returned.
4186      * See http://www.unicode.org/reports/tr9/
4187      *
4188      * @param c the code point to be mapped
4189      * @return the paired bracket code point,
4190      *         or c itself if there is no such mapping
4191      *         (Bidi_Paired_Bracket_Type=None)
4192      *
4193      * @see UProperty#BIDI_PAIRED_BRACKET
4194      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
4195      * @see #getMirror(int)
4196      * @stable ICU 52
4197      */
getBidiPairedBracket(int c)4198     public static int getBidiPairedBracket(int c) {
4199         return UBiDiProps.INSTANCE.getPairedBracket(c);
4200     }
4201 
4202     /**
4203      * {@icu} Returns the combining class of the argument codepoint
4204      * @param ch code point whose combining is to be retrieved
4205      * @return the combining class of the codepoint
4206      * @stable ICU 2.1
4207      */
getCombiningClass(int ch)4208     public static int getCombiningClass(int ch)
4209     {
4210         return Normalizer2.getNFDInstance().getCombiningClass(ch);
4211     }
4212 
4213     /**
4214      * {@icu} A code point is illegal if and only if
4215      * <ul>
4216      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4217      * <li> A surrogate value, 0xD800 to 0xDFFF
4218      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4219      * </ul>
4220      * Note: legal does not mean that it is assigned in this version of Unicode.
4221      * @param ch code point to determine if it is a legal code point by itself
4222      * @return true if and only if legal.
4223      * @stable ICU 2.1
4224      */
isLegal(int ch)4225     public static boolean isLegal(int ch)
4226     {
4227         if (ch < MIN_VALUE) {
4228             return false;
4229         }
4230         if (ch < UTF16.SURROGATE_MIN_VALUE) {
4231             return true;
4232         }
4233         if (ch <= UTF16.SURROGATE_MAX_VALUE) {
4234             return false;
4235         }
4236         if (UCharacterUtility.isNonCharacter(ch)) {
4237             return false;
4238         }
4239         return (ch <= MAX_VALUE);
4240     }
4241 
4242     /**
4243      * {@icu} A string is legal iff all its code points are legal.
4244      * A code point is illegal if and only if
4245      * <ul>
4246      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4247      * <li> A surrogate value, 0xD800 to 0xDFFF
4248      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4249      * </ul>
4250      * Note: legal does not mean that it is assigned in this version of Unicode.
4251      * @param str containing code points to examin
4252      * @return true if and only if legal.
4253      * @stable ICU 2.1
4254      */
isLegal(String str)4255     public static boolean isLegal(String str)
4256     {
4257         int size = str.length();
4258         int codepoint;
4259         for (int i = 0; i < size; i ++)
4260         {
4261             codepoint = UTF16.charAt(str, i);
4262             if (!isLegal(codepoint)) {
4263                 return false;
4264             }
4265             if (isSupplementary(codepoint)) {
4266                 i ++;
4267             }
4268         }
4269         return true;
4270     }
4271 
4272     /**
4273      * {@icu} Returns the version of Unicode data used.
4274      * @return the unicode version number used
4275      * @stable ICU 2.1
4276      */
getUnicodeVersion()4277     public static VersionInfo getUnicodeVersion()
4278     {
4279         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
4280     }
4281 
4282     /**
4283      * {@icu} Returns the most current Unicode name of the argument code point, or
4284      * null if the character is unassigned or outside the range
4285      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4286      * <br>
4287      * Note calling any methods related to code point names, e.g. get*Name*()
4288      * incurs a one-time initialisation cost to construct the name tables.
4289      * @param ch the code point for which to get the name
4290      * @return most current Unicode name
4291      * @stable ICU 2.1
4292      */
getName(int ch)4293     public static String getName(int ch)
4294     {
4295         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
4296     }
4297 
4298     /**
4299      * {@icu} Returns the names for each of the characters in a string
4300      * @param s string to format
4301      * @param separator string to go between names
4302      * @return string of names
4303      * @stable ICU 3.8
4304      */
getName(String s, String separator)4305     public static String getName(String s, String separator) {
4306         if (s.length() == 1) { // handle common case
4307             return getName(s.charAt(0));
4308         }
4309         int cp;
4310         StringBuilder sb = new StringBuilder();
4311         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
4312             cp = UTF16.charAt(s,i);
4313             if (i != 0) sb.append(separator);
4314             sb.append(UCharacter.getName(cp));
4315         }
4316         return sb.toString();
4317     }
4318 
4319     /**
4320      * {@icu} Returns null.
4321      * Used to return the Unicode_1_Name property value which was of little practical value.
4322      * @param ch the code point for which to get the name
4323      * @return null
4324      * @deprecated ICU 49
4325      */
4326     @Deprecated
getName1_0(int ch)4327     public static String getName1_0(int ch)
4328     {
4329         return null;
4330     }
4331 
4332     /**
4333      * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and
4334      * getName1_0(int), this method will return a name even for codepoints that
4335      * are not assigned a name in UnicodeData.txt.
4336      * </p>
4337      * The names are returned in the following order.
4338      * <ul>
4339      * <li> Most current Unicode name if there is any
4340      * <li> Unicode 1.0 name if there is any
4341      * <li> Extended name in the form of
4342      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
4343      * </ul>
4344      * Note calling any methods related to code point names, e.g. get*Name*()
4345      * incurs a one-time initialisation cost to construct the name tables.
4346      * @param ch the code point for which to get the name
4347      * @return a name for the argument codepoint
4348      * @stable ICU 2.6
4349      */
getExtendedName(int ch)4350     public static String getExtendedName(int ch) {
4351         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
4352     }
4353 
4354     /**
4355      * {@icu} Returns the corrected name from NameAliases.txt if there is one.
4356      * Returns null if the character is unassigned or outside the range
4357      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4358      * <br>
4359      * Note calling any methods related to code point names, e.g. get*Name*()
4360      * incurs a one-time initialisation cost to construct the name tables.
4361      * @param ch the code point for which to get the name alias
4362      * @return Unicode name alias, or null
4363      * @stable ICU 4.4
4364      */
getNameAlias(int ch)4365     public static String getNameAlias(int ch)
4366     {
4367         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
4368     }
4369 
4370     /**
4371      * {@icu} Returns null.
4372      * Used to return the ISO 10646 comment for a character.
4373      * The Unicode ISO_Comment property is deprecated and has no values.
4374      *
4375      * @param ch The code point for which to get the ISO comment.
4376      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
4377      * @return null
4378      * @deprecated ICU 49
4379      */
4380     @Deprecated
getISOComment(int ch)4381     public static String getISOComment(int ch)
4382     {
4383         return null;
4384     }
4385 
4386     /**
4387      * {@icu} <p>Finds a Unicode code point by its most current Unicode name and
4388      * return its code point value. All Unicode names are in uppercase.</p>
4389      * Note calling any methods related to code point names, e.g. get*Name*()
4390      * incurs a one-time initialisation cost to construct the name tables.
4391      * @param name most current Unicode character name whose code point is to
4392      *        be returned
4393      * @return code point or -1 if name is not found
4394      * @stable ICU 2.1
4395      */
getCharFromName(String name)4396     public static int getCharFromName(String name){
4397         return UCharacterName.INSTANCE.getCharFromName(
4398                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
4399     }
4400 
4401     /**
4402      * {@icu} Returns -1.
4403      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
4404      * its code point value.</p>
4405      * @param name Unicode 1.0 code point name whose code point is to be
4406      *             returned
4407      * @return -1
4408      * @deprecated ICU 49
4409      * @see #getName1_0(int)
4410      */
4411     @Deprecated
getCharFromName1_0(String name)4412     public static int getCharFromName1_0(String name){
4413         return -1;
4414     }
4415 
4416     /**
4417      * {@icu} <p>Find a Unicode character by either its name and return its code
4418      * point value. All Unicode names are in uppercase.
4419      * Extended names are all lowercase except for numbers and are contained
4420      * within angle brackets.</p>
4421      * The names are searched in the following order
4422      * <ul>
4423      * <li> Most current Unicode name if there is any
4424      * <li> Unicode 1.0 name if there is any
4425      * <li> Extended name in the form of
4426      *      "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE>
4427      * </ul>
4428      * Note calling any methods related to code point names, e.g. get*Name*()
4429      * incurs a one-time initialisation cost to construct the name tables.
4430      * @param name codepoint name
4431      * @return code point associated with the name or -1 if the name is not
4432      *         found.
4433      * @stable ICU 2.6
4434      */
getCharFromExtendedName(String name)4435     public static int getCharFromExtendedName(String name){
4436         return UCharacterName.INSTANCE.getCharFromName(
4437                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
4438     }
4439 
4440     /**
4441      * {@icu} <p>Find a Unicode character by its corrected name alias and return
4442      * its code point value. All Unicode names are in uppercase.</p>
4443      * Note calling any methods related to code point names, e.g. get*Name*()
4444      * incurs a one-time initialisation cost to construct the name tables.
4445      * @param name Unicode name alias whose code point is to be returned
4446      * @return code point or -1 if name is not found
4447      * @stable ICU 4.4
4448      */
getCharFromNameAlias(String name)4449     public static int getCharFromNameAlias(String name){
4450         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
4451     }
4452 
4453     /**
4454      * {@icu} Return the Unicode name for a given property, as given in the
4455      * Unicode database file PropertyAliases.txt.  Most properties
4456      * have more than one name.  The nameChoice determines which one
4457      * is returned.
4458      *
4459      * In addition, this function maps the property
4460      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
4461      * "General_Category_Mask".  These names are not in
4462      * PropertyAliases.txt.
4463      *
4464      * @param property UProperty selector.
4465      *
4466      * @param nameChoice UProperty.NameChoice selector for which name
4467      * to get.  All properties have a long name.  Most have a short
4468      * name, but some do not.  Unicode allows for additional names; if
4469      * present these will be returned by UProperty.NameChoice.LONG + i,
4470      * where i=1, 2,...
4471      *
4472      * @return a name, or null if Unicode explicitly defines no name
4473      * ("n/a") for a given property/nameChoice.  If a given nameChoice
4474      * throws an exception, then all larger values of nameChoice will
4475      * throw an exception.  If null is returned for a given
4476      * nameChoice, then other nameChoice values may return non-null
4477      * results.
4478      *
4479      * @exception IllegalArgumentException thrown if property or
4480      * nameChoice are invalid.
4481      *
4482      * @see UProperty
4483      * @see UProperty.NameChoice
4484      * @stable ICU 2.4
4485      */
getPropertyName(int property, int nameChoice)4486     public static String getPropertyName(int property,
4487             int nameChoice) {
4488         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
4489     }
4490 
4491     /**
4492      * {@icu} Return the UProperty selector for a given property name, as
4493      * specified in the Unicode database file PropertyAliases.txt.
4494      * Short, long, and any other variants are recognized.
4495      *
4496      * In addition, this function maps the synthetic names "gcm" /
4497      * "General_Category_Mask" to the property
4498      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
4499      * PropertyAliases.txt.
4500      *
4501      * @param propertyAlias the property name to be matched.  The name
4502      * is compared using "loose matching" as described in
4503      * PropertyAliases.txt.
4504      *
4505      * @return a UProperty enum.
4506      *
4507      * @exception IllegalArgumentException thrown if propertyAlias
4508      * is not recognized.
4509      *
4510      * @see UProperty
4511      * @stable ICU 2.4
4512      */
getPropertyEnum(CharSequence propertyAlias)4513     public static int getPropertyEnum(CharSequence propertyAlias) {
4514         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
4515         if (propEnum == UProperty.UNDEFINED) {
4516             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
4517         }
4518         return propEnum;
4519     }
4520 
4521     /**
4522      * {@icu} Return the Unicode name for a given property value, as given in
4523      * the Unicode database file PropertyValueAliases.txt.  Most
4524      * values have more than one name.  The nameChoice determines
4525      * which one is returned.
4526      *
4527      * Note: Some of the names in PropertyValueAliases.txt can only be
4528      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
4529      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4530      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4531      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4532      *
4533      * @param property UProperty selector constant.
4534      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4535      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4536      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4537      * If out of range, null is returned.
4538      *
4539      * @param value selector for a value for the given property.  In
4540      * general, valid values range from 0 up to some maximum.  There
4541      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
4542      * non-zero value BASIC_LATIN.getID().  (2.)
4543      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
4544      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
4545      * are mask values produced by left-shifting 1 by
4546      * UCharacter.getType().  This allows grouped categories such as
4547      * [:L:] to be represented.  Mask values are non-contiguous.
4548      *
4549      * @param nameChoice UProperty.NameChoice selector for which name
4550      * to get.  All values have a long name.  Most have a short name,
4551      * but some do not.  Unicode allows for additional names; if
4552      * present these will be returned by UProperty.NameChoice.LONG + i,
4553      * where i=1, 2,...
4554      *
4555      * @return a name, or null if Unicode explicitly defines no name
4556      * ("n/a") for a given property/value/nameChoice.  If a given
4557      * nameChoice throws an exception, then all larger values of
4558      * nameChoice will throw an exception.  If null is returned for a
4559      * given nameChoice, then other nameChoice values may return
4560      * non-null results.
4561      *
4562      * @exception IllegalArgumentException thrown if property, value,
4563      * or nameChoice are invalid.
4564      *
4565      * @see UProperty
4566      * @see UProperty.NameChoice
4567      * @stable ICU 2.4
4568      */
getPropertyValueName(int property, int value, int nameChoice)4569     public static String getPropertyValueName(int property,
4570             int value,
4571             int nameChoice)
4572     {
4573         if ((property == UProperty.CANONICAL_COMBINING_CLASS
4574                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
4575                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
4576                 && value >= UCharacter.getIntPropertyMinValue(
4577                         UProperty.CANONICAL_COMBINING_CLASS)
4578                         && value <= UCharacter.getIntPropertyMaxValue(
4579                                 UProperty.CANONICAL_COMBINING_CLASS)
4580                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
4581             // this is hard coded for the valid cc
4582             // because PropertyValueAliases.txt does not contain all of them
4583             try {
4584                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
4585                         nameChoice);
4586             }
4587             catch (IllegalArgumentException e) {
4588                 return null;
4589             }
4590         }
4591         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
4592     }
4593 
4594     /**
4595      * {@icu} Return the property value integer for a given value name, as
4596      * specified in the Unicode database file PropertyValueAliases.txt.
4597      * Short, long, and any other variants are recognized.
4598      *
4599      * Note: Some of the names in PropertyValueAliases.txt will only be
4600      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4601      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4602      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4603      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4604      *
4605      * @param property UProperty selector constant.
4606      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4607      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4608      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4609      * Only these properties can be enumerated.
4610      *
4611      * @param valueAlias the value name to be matched.  The name is
4612      * compared using "loose matching" as described in
4613      * PropertyValueAliases.txt.
4614      *
4615      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4616      * values are mask values produced by left-shifting 1 by
4617      * UCharacter.getType().  This allows grouped categories such as
4618      * [:L:] to be represented.
4619      *
4620      * @see UProperty
4621      * @throws IllegalArgumentException if property is not a valid UProperty
4622      *         selector or valueAlias is not a value of this property
4623      * @stable ICU 2.4
4624      */
getPropertyValueEnum(int property, CharSequence valueAlias)4625     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
4626         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
4627         if (propEnum == UProperty.UNDEFINED) {
4628             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
4629         }
4630         return propEnum;
4631     }
4632 
4633     /**
4634      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
4635      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
4636      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
4637      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
4638      * @internal
4639      * @deprecated This API is ICU internal only.
4640      */
4641     @Deprecated
getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4642     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
4643         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
4644     }
4645 
4646 
4647     /**
4648      * {@icu} Returns a code point corresponding to the two UTF16 characters.
4649      * @param lead the lead char
4650      * @param trail the trail char
4651      * @return code point if surrogate characters are valid.
4652      * @exception IllegalArgumentException thrown when argument characters do
4653      *            not form a valid codepoint
4654      * @stable ICU 2.1
4655      */
getCodePoint(char lead, char trail)4656     public static int getCodePoint(char lead, char trail)
4657     {
4658         if (UTF16.isLeadSurrogate(lead) && UTF16.isTrailSurrogate(trail)) {
4659             return UCharacterProperty.getRawSupplementary(lead, trail);
4660         }
4661         throw new IllegalArgumentException("Illegal surrogate characters");
4662     }
4663 
4664     /**
4665      * {@icu} Returns the code point corresponding to the UTF16 character.
4666      * @param char16 the UTF16 character
4667      * @return code point if argument is a valid character.
4668      * @exception IllegalArgumentException thrown when char16 is not a valid
4669      *            codepoint
4670      * @stable ICU 2.1
4671      */
getCodePoint(char char16)4672     public static int getCodePoint(char char16)
4673     {
4674         if (UCharacter.isLegal(char16)) {
4675             return char16;
4676         }
4677         throw new IllegalArgumentException("Illegal codepoint");
4678     }
4679 
4680     /**
4681      * Implementation of UCaseProps.ContextIterator, iterates over a String.
4682      * See ustrcase.c/utf16_caseContextIterator().
4683      */
4684     private static class StringContextIterator implements UCaseProps.ContextIterator {
4685         /**
4686          * Constructor.
4687          * @param s String to iterate over.
4688          */
StringContextIterator(String s)4689         StringContextIterator(String s) {
4690             this.s=s;
4691             limit=s.length();
4692             cpStart=cpLimit=index=0;
4693             dir=0;
4694         }
4695 
4696         /**
4697          * Set the iteration limit for nextCaseMapCP() to an index within the string.
4698          * If the limit parameter is negative or past the string, then the
4699          * string length is restored as the iteration limit.
4700          *
4701          * This limit does not affect the next() function which always
4702          * iterates to the very end of the string.
4703          *
4704          * @param lim The iteration limit.
4705          */
setLimit(int lim)4706         public void setLimit(int lim) {
4707             if(0<=lim && lim<=s.length()) {
4708                 limit=lim;
4709             } else {
4710                 limit=s.length();
4711             }
4712         }
4713 
4714         /**
4715          * Move to the iteration limit without fetching code points up to there.
4716          */
moveToLimit()4717         public void moveToLimit() {
4718             cpStart=cpLimit=limit;
4719         }
4720 
4721         /**
4722          * Iterate forward through the string to fetch the next code point
4723          * to be case-mapped, and set the context indexes for it.
4724          * Performance optimization, to save on function calls and redundant
4725          * tests. Combines UTF16.charAt(), UTF16.getCharCount(), and setIndex().
4726          *
4727          * When the iteration limit is reached (and -1 is returned),
4728          * getCPStart() will be at the iteration limit.
4729          *
4730          * Iteration with next() does not affect the position for nextCaseMapCP().
4731          *
4732          * @return The next code point to be case-mapped, or <0 when the iteration is done.
4733          */
nextCaseMapCP()4734         public int nextCaseMapCP() {
4735             cpStart=cpLimit;
4736             if(cpLimit<limit) {
4737                 int c=s.charAt(cpLimit++);
4738                 if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE) {
4739                     char c2;
4740                     if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit &&
4741                             UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) &&
4742                             c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE
4743                             ) {
4744                         // supplementary code point
4745                         ++cpLimit;
4746                         c=UCharacterProperty.getRawSupplementary((char)c, c2);
4747                         // else unpaired surrogate code point
4748                     }
4749                     // else BMP code point
4750                 }
4751                 return c;
4752             } else {
4753                 return -1;
4754             }
4755         }
4756 
4757         /**
4758          * Returns the start of the code point that was last returned
4759          * by nextCaseMapCP().
4760          */
getCPStart()4761         public int getCPStart() {
4762             return cpStart;
4763         }
4764 
4765         /**
4766          * Returns the limit of the code point that was last returned
4767          * by nextCaseMapCP().
4768          */
getCPLimit()4769         public int getCPLimit() {
4770             return cpLimit;
4771         }
4772 
4773         // implement UCaseProps.ContextIterator
4774         // The following code is not used anywhere in this private class
reset(int direction)4775         public void reset(int direction) {
4776             if(direction>0) {
4777                 /* reset for forward iteration */
4778                 dir=1;
4779                 index=cpLimit;
4780             } else if(direction<0) {
4781                 /* reset for backward iteration */
4782                 dir=-1;
4783                 index=cpStart;
4784             } else {
4785                 // not a valid direction
4786                 dir=0;
4787                 index=0;
4788             }
4789         }
4790 
next()4791         public int next() {
4792             int c;
4793 
4794             if(dir>0 && index<s.length()) {
4795                 c=UTF16.charAt(s, index);
4796                 index+=UTF16.getCharCount(c);
4797                 return c;
4798             } else if(dir<0 && index>0) {
4799                 c=UTF16.charAt(s, index-1);
4800                 index-=UTF16.getCharCount(c);
4801                 return c;
4802             }
4803             return -1;
4804         }
4805 
4806         // variables
4807         protected String s;
4808         protected int index, limit, cpStart, cpLimit;
4809         protected int dir; // 0=initial state  >0=forward  <0=backward
4810     }
4811 
4812     /**
4813      * Returns the uppercase version of the argument string.
4814      * Casing is dependent on the default locale and context-sensitive.
4815      * @param str source string to be performed on
4816      * @return uppercase version of the argument string
4817      * @stable ICU 2.1
4818      */
toUpperCase(String str)4819     public static String toUpperCase(String str)
4820     {
4821         return toUpperCase(ULocale.getDefault(), str);
4822     }
4823 
4824     /**
4825      * Returns the lowercase version of the argument string.
4826      * Casing is dependent on the default locale and context-sensitive
4827      * @param str source string to be performed on
4828      * @return lowercase version of the argument string
4829      * @stable ICU 2.1
4830      */
toLowerCase(String str)4831     public static String toLowerCase(String str)
4832     {
4833         return toLowerCase(ULocale.getDefault(), str);
4834     }
4835 
4836     /**
4837      * <p>Returns the titlecase version of the argument string.</p>
4838      * <p>Position for titlecasing is determined by the argument break
4839      * iterator, hence the user can customize his break iterator for
4840      * a specialized titlecasing. In this case only the forward iteration
4841      * needs to be implemented.
4842      * If the break iterator passed in is null, the default Unicode algorithm
4843      * will be used to determine the titlecase positions.
4844      * </p>
4845      * <p>Only positions returned by the break iterator will be title cased,
4846      * character in between the positions will all be in lower case.</p>
4847      * <p>Casing is dependent on the default locale and context-sensitive</p>
4848      * @param str source string to be performed on
4849      * @param breakiter break iterator to determine the positions in which
4850      *        the character should be title cased.
4851      * @return lowercase version of the argument string
4852      * @stable ICU 2.6
4853      */
toTitleCase(String str, BreakIterator breakiter)4854     public static String toTitleCase(String str, BreakIterator breakiter)
4855     {
4856         return toTitleCase(ULocale.getDefault(), str, breakiter);
4857     }
4858 
4859     /**
4860      * Returns the uppercase version of the argument string.
4861      * Casing is dependent on the argument locale and context-sensitive.
4862      * @param locale which string is to be converted in
4863      * @param str source string to be performed on
4864      * @return uppercase version of the argument string
4865      * @stable ICU 2.1
4866      */
toUpperCase(Locale locale, String str)4867     public static String toUpperCase(Locale locale, String str)
4868     {
4869         return toUpperCase(ULocale.forLocale(locale), str);
4870     }
4871 
4872     /**
4873      * Returns the uppercase version of the argument string.
4874      * Casing is dependent on the argument locale and context-sensitive.
4875      * @param locale which string is to be converted in
4876      * @param str source string to be performed on
4877      * @return uppercase version of the argument string
4878      * @stable ICU 3.2
4879      */
toUpperCase(ULocale locale, String str)4880     public static String toUpperCase(ULocale locale, String str) {
4881         StringContextIterator iter = new StringContextIterator(str);
4882         StringBuilder result = new StringBuilder(str.length());
4883         int[] locCache = new int[1];
4884         int c;
4885 
4886         if (locale == null) {
4887             locale = ULocale.getDefault();
4888         }
4889         locCache[0]=0;
4890 
4891         while((c=iter.nextCaseMapCP())>=0) {
4892             c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache);
4893 
4894             /* decode the result */
4895             if(c<0) {
4896                 /* (not) original code point */
4897                 c=~c;
4898             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4899                 /* mapping already appended to result */
4900                 continue;
4901                 /* } else { append single-code point mapping */
4902             }
4903             result.appendCodePoint(c);
4904         }
4905         return result.toString();
4906     }
4907 
4908     /**
4909      * Returns the lowercase version of the argument string.
4910      * Casing is dependent on the argument locale and context-sensitive
4911      * @param locale which string is to be converted in
4912      * @param str source string to be performed on
4913      * @return lowercase version of the argument string
4914      * @stable ICU 2.1
4915      */
toLowerCase(Locale locale, String str)4916     public static String toLowerCase(Locale locale, String str)
4917     {
4918         return toLowerCase(ULocale.forLocale(locale), str);
4919     }
4920 
4921     /**
4922      * Returns the lowercase version of the argument string.
4923      * Casing is dependent on the argument locale and context-sensitive
4924      * @param locale which string is to be converted in
4925      * @param str source string to be performed on
4926      * @return lowercase version of the argument string
4927      * @stable ICU 3.2
4928      */
toLowerCase(ULocale locale, String str)4929     public static String toLowerCase(ULocale locale, String str) {
4930         StringContextIterator iter = new StringContextIterator(str);
4931         StringBuilder result = new StringBuilder(str.length());
4932         int[] locCache = new int[1];
4933         int c;
4934 
4935         if (locale == null) {
4936             locale = ULocale.getDefault();
4937         }
4938         locCache[0]=0;
4939 
4940         while((c=iter.nextCaseMapCP())>=0) {
4941             c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache);
4942 
4943             /* decode the result */
4944             if(c<0) {
4945                 /* (not) original code point */
4946                 c=~c;
4947             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4948                 /* mapping already appended to result */
4949                 continue;
4950                 /* } else { append single-code point mapping */
4951             }
4952             result.appendCodePoint(c);
4953         }
4954         return result.toString();
4955     }
4956 
4957     /**
4958      * <p>Returns the titlecase version of the argument string.</p>
4959      * <p>Position for titlecasing is determined by the argument break
4960      * iterator, hence the user can customize his break iterator for
4961      * a specialized titlecasing. In this case only the forward iteration
4962      * needs to be implemented.
4963      * If the break iterator passed in is null, the default Unicode algorithm
4964      * will be used to determine the titlecase positions.
4965      * </p>
4966      * <p>Only positions returned by the break iterator will be title cased,
4967      * character in between the positions will all be in lower case.</p>
4968      * <p>Casing is dependent on the argument locale and context-sensitive</p>
4969      * @param locale which string is to be converted in
4970      * @param str source string to be performed on
4971      * @param breakiter break iterator to determine the positions in which
4972      *        the character should be title cased.
4973      * @return lowercase version of the argument string
4974      * @stable ICU 2.6
4975      */
toTitleCase(Locale locale, String str, BreakIterator breakiter)4976     public static String toTitleCase(Locale locale, String str,
4977             BreakIterator breakiter)
4978     {
4979         return toTitleCase(ULocale.forLocale(locale), str, breakiter);
4980     }
4981 
4982     /**
4983      * <p>Returns the titlecase version of the argument string.</p>
4984      * <p>Position for titlecasing is determined by the argument break
4985      * iterator, hence the user can customize his break iterator for
4986      * a specialized titlecasing. In this case only the forward iteration
4987      * needs to be implemented.
4988      * If the break iterator passed in is null, the default Unicode algorithm
4989      * will be used to determine the titlecase positions.
4990      * </p>
4991      * <p>Only positions returned by the break iterator will be title cased,
4992      * character in between the positions will all be in lower case.</p>
4993      * <p>Casing is dependent on the argument locale and context-sensitive</p>
4994      * @param locale which string is to be converted in
4995      * @param str source string to be performed on
4996      * @param titleIter break iterator to determine the positions in which
4997      *        the character should be title cased.
4998      * @return lowercase version of the argument string
4999      * @stable ICU 3.2
5000      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter)5001     public static String toTitleCase(ULocale locale, String str,
5002             BreakIterator titleIter) {
5003         return toTitleCase(locale, str, titleIter, 0);
5004     }
5005 
5006     /**
5007      * <p>Returns the titlecase version of the argument string.</p>
5008      * <p>Position for titlecasing is determined by the argument break
5009      * iterator, hence the user can customize his break iterator for
5010      * a specialized titlecasing. In this case only the forward iteration
5011      * needs to be implemented.
5012      * If the break iterator passed in is null, the default Unicode algorithm
5013      * will be used to determine the titlecase positions.
5014      * </p>
5015      * <p>Only positions returned by the break iterator will be title cased,
5016      * character in between the positions will all be in lower case.</p>
5017      * <p>Casing is dependent on the argument locale and context-sensitive</p>
5018      * @param locale which string is to be converted in
5019      * @param str source string to be performed on
5020      * @param titleIter break iterator to determine the positions in which
5021      *        the character should be title cased.
5022      * @param options bit set to modify the titlecasing operation
5023      * @return lowercase version of the argument string
5024      * @stable ICU 3.8
5025      * @see #TITLECASE_NO_LOWERCASE
5026      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5027      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5028     public static String toTitleCase(ULocale locale, String str,
5029             BreakIterator titleIter,
5030             int options) {
5031         StringContextIterator iter = new StringContextIterator(str);
5032         StringBuilder result = new StringBuilder(str.length());
5033         int[] locCache = new int[1];
5034         int c, nc, srcLength = str.length();
5035 
5036         if (locale == null) {
5037             locale = ULocale.getDefault();
5038         }
5039         locCache[0]=0;
5040 
5041         if(titleIter == null) {
5042             titleIter = BreakIterator.getWordInstance(locale);
5043         }
5044         titleIter.setText(str);
5045 
5046         int prev, titleStart, index;
5047         boolean isFirstIndex;
5048         boolean isDutch = locale.getLanguage().equals("nl");
5049         boolean FirstIJ = true;
5050 
5051         /* set up local variables */
5052         prev=0;
5053         isFirstIndex=true;
5054 
5055         /* titlecasing loop */
5056         while(prev<srcLength) {
5057             /* find next index where to titlecase */
5058             if(isFirstIndex) {
5059                 isFirstIndex=false;
5060                 index=titleIter.first();
5061             } else {
5062                 index=titleIter.next();
5063             }
5064             if(index==BreakIterator.DONE || index>srcLength) {
5065                 index=srcLength;
5066             }
5067 
5068             /*
5069              * Unicode 4 & 5 section 3.13 Default Case Operations:
5070              *
5071              * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
5072              * #29, "Text Boundaries." Between each pair of word boundaries, find the first
5073              * cased character F. If F exists, map F to default_title(F); then map each
5074              * subsequent character C to default_lower(C).
5075              *
5076              * In this implementation, segment [prev..index[ into 3 parts:
5077              * a) uncased characters (copy as-is) [prev..titleStart[
5078              * b) first case letter (titlecase)         [titleStart..titleLimit[
5079              * c) subsequent characters (lowercase)                 [titleLimit..index[
5080              */
5081             if(prev<index) {
5082                 /* find and copy uncased characters [prev..titleStart[ */
5083                 iter.setLimit(index);
5084                 c=iter.nextCaseMapCP();
5085                 if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0
5086                         && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
5087                     while((c=iter.nextCaseMapCP())>=0
5088                             && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
5089                     titleStart=iter.getCPStart();
5090                     if(prev<titleStart) {
5091                         result.append(str, prev, titleStart);
5092                     }
5093                 } else {
5094                     titleStart=prev;
5095                 }
5096 
5097                 if(titleStart<index) {
5098                     FirstIJ = true;
5099                     /* titlecase c which is from titleStart */
5100                     c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache);
5101 
5102                     /* decode the result and lowercase up to index */
5103                     for(;;) {
5104                         if(c<0) {
5105                             /* (not) original code point */
5106                             c=~c;
5107                             result.appendCodePoint(c);
5108                         } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
5109                             /* mapping already appended to result */
5110                         } else {
5111                             /* append single-code point mapping */
5112                             result.appendCodePoint(c);
5113                         }
5114 
5115                         if((options&TITLECASE_NO_LOWERCASE)!=0) {
5116                             /* Optionally just copy the rest of the word unchanged. */
5117 
5118                             int titleLimit=iter.getCPLimit();
5119                             if(titleLimit<index) {
5120                                 // TODO: With Java 5, this would want to be
5121                                 // result.append(str, titleLimit, index);
5122                                 String appendStr = str.substring(titleLimit,index);
5123                                 /* Special Case - Dutch IJ Titlecasing */
5124                                 if ( isDutch && c == 0x0049 && appendStr.startsWith("j")) {
5125                                     appendStr = "J" + appendStr.substring(1);
5126                                 }
5127                                 result.append(appendStr);
5128                             }
5129                             iter.moveToLimit();
5130                             break;
5131                         } else if((nc=iter.nextCaseMapCP())>=0) {
5132                             if (isDutch && (nc == 0x004A ||  nc == 0x006A)
5133                                     && (c == 0x0049) && (FirstIJ == true)) {
5134                                 c = 0x004A; /* J */
5135                                 FirstIJ = false;
5136                             } else {
5137                                 /* Normal operation: Lowercase the rest of the word. */
5138                                 c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale,
5139                                         locCache);
5140                             }
5141                         } else {
5142                             break;
5143                         }
5144                     }
5145                 }
5146             }
5147 
5148             prev=index;
5149         }
5150         return result.toString();
5151     }
5152 
5153 
5154     private static final int BREAK_MASK =
5155             (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER)
5156             | (1<<UCharacterCategory.OTHER_LETTER)
5157             | (1<<UCharacterCategory.MODIFIER_LETTER);
5158 
5159     /**
5160      * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
5161      * and sometimes has no effect at all; the original string is returned whenever casing
5162      * would not be appropriate for the first word (such as for CJK characters or initial numbers).
5163      * Initial non-letters are skipped in order to find the character to change.
5164      * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
5165      * <p>Examples:
5166      * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
5167      * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
5168      * <tr><td>“contact us”</td><td>“Contact us”</td></tr>
5169      * <tr><td>49ers win!</td><td>49ers win!</td></tr>
5170      * <tr><td>丰(abc)</td><td>丰(abc)</td></tr>
5171      * <tr><td>«ijs»</td><td>«Ijs»</td></tr>
5172      * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr>
5173      * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr>
5174      * </table>
5175      * @param locale the locale for accessing exceptional behavior (eg for tr).
5176      * @param str the source string to change
5177      * @return the modified string, or the original if no modifications were necessary.
5178      * @internal
5179      * @deprecated ICU internal only
5180      */
5181     @Deprecated
toTitleFirst(ULocale locale, String str)5182     public static String toTitleFirst(ULocale locale, String str) {
5183         int c = 0;
5184         for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) {
5185             c = UCharacter.codePointAt(str, i);
5186             int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK);
5187             if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK
5188                 break;
5189             }
5190             if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) {
5191                 continue;
5192             }
5193 
5194             // we now have the first cased character
5195             // What we really want is something like:
5196             // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken);
5197             // That is, just give us the titlecased string, for the locale, at i and following,
5198             // and tell us how many characters are replaced.
5199             // The following won't work completely: it needs some more substantial changes to UCaseProps
5200 
5201             String substring = str.substring(i, i+UCharacter.charCount(c));
5202             String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0);
5203 
5204             // skip if no change
5205             if (titled.codePointAt(0) == c) {
5206                 // Using 0 is safe, since any change in titling will not have first initial character
5207                 break;
5208             }
5209             StringBuilder result = new StringBuilder(str.length()).append(str, 0, i);
5210             int startOfSuffix;
5211 
5212             // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps.
5213 
5214             if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') {
5215                 result.append("IJ");
5216                 startOfSuffix = 2;
5217             } else {
5218                 result.append(titled);
5219                 startOfSuffix = i + UCharacter.charCount(c);
5220             }
5221 
5222             // add the remainder, and return
5223             return result.append(str, startOfSuffix, str.length()).toString();
5224         }
5225         return str; // no change
5226     }
5227 
5228     /**
5229      * {@icu} <p>Returns the titlecase version of the argument string.</p>
5230      * <p>Position for titlecasing is determined by the argument break
5231      * iterator, hence the user can customize his break iterator for
5232      * a specialized titlecasing. In this case only the forward iteration
5233      * needs to be implemented.
5234      * If the break iterator passed in is null, the default Unicode algorithm
5235      * will be used to determine the titlecase positions.
5236      * </p>
5237      * <p>Only positions returned by the break iterator will be title cased,
5238      * character in between the positions will all be in lower case.</p>
5239      * <p>Casing is dependent on the argument locale and context-sensitive</p>
5240      * @param locale which string is to be converted in
5241      * @param str source string to be performed on
5242      * @param titleIter break iterator to determine the positions in which
5243      *        the character should be title cased.
5244      * @param options bit set to modify the titlecasing operation
5245      * @return lowercase version of the argument string
5246      * @see #TITLECASE_NO_LOWERCASE
5247      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5248      * @draft ICU 54
5249      * @provisional This API might change or be removed in a future release.
5250      */
toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5251     public static String toTitleCase(Locale locale, String str,
5252             BreakIterator titleIter,
5253             int options) {
5254         return toTitleCase(ULocale.forLocale(locale), str, titleIter, options);
5255     }
5256 
5257     /**
5258      * {@icu} The given character is mapped to its case folding equivalent according
5259      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5260      * folding equivalent, the character itself is returned.
5261      *
5262      * <p>This function only returns the simple, single-code point case mapping.
5263      * Full case mappings should be used whenever possible because they produce
5264      * better results by working on whole strings.
5265      * They can map to a result string with a different length as appropriate.
5266      * Full case mappings are applied by the case mapping functions
5267      * that take String parameters rather than code points (int).
5268      * See also the User Guide chapter on C/POSIX migration:
5269      * http://www.icu-project.org/userguide/posix.html#case_mappings
5270      *
5271      * @param ch             the character to be converted
5272      * @param defaultmapping Indicates whether the default mappings defined in
5273      *                       CaseFolding.txt are to be used, otherwise the
5274      *                       mappings for dotted I and dotless i marked with
5275      *                       'T' in CaseFolding.txt are included.
5276      * @return               the case folding equivalent of the character, if
5277      *                       any; otherwise the character itself.
5278      * @see                  #foldCase(String, boolean)
5279      * @stable ICU 2.1
5280      */
foldCase(int ch, boolean defaultmapping)5281     public static int foldCase(int ch, boolean defaultmapping) {
5282         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5283     }
5284 
5285     /**
5286      * {@icu} The given string is mapped to its case folding equivalent according to
5287      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5288      * folding equivalent, the character itself is returned.
5289      * "Full", multiple-code point case folding mappings are returned here.
5290      * For "simple" single-code point mappings use the API
5291      * foldCase(int ch, boolean defaultmapping).
5292      * @param str            the String to be converted
5293      * @param defaultmapping Indicates whether the default mappings defined in
5294      *                       CaseFolding.txt are to be used, otherwise the
5295      *                       mappings for dotted I and dotless i marked with
5296      *                       'T' in CaseFolding.txt are included.
5297      * @return               the case folding equivalent of the character, if
5298      *                       any; otherwise the character itself.
5299      * @see                  #foldCase(int, boolean)
5300      * @stable ICU 2.1
5301      */
foldCase(String str, boolean defaultmapping)5302     public static String foldCase(String str, boolean defaultmapping) {
5303         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5304     }
5305 
5306     /**
5307      * {@icu} Option value for case folding: use default mappings defined in
5308      * CaseFolding.txt.
5309      * @stable ICU 2.6
5310      */
5311     public static final int FOLD_CASE_DEFAULT    =      0x0000;
5312     /**
5313      * {@icu} Option value for case folding:
5314      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
5315      * and dotless i appropriately for Turkic languages (tr, az).
5316      *
5317      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
5318      * are to be included for default mappings and
5319      * excluded for the Turkic-specific mappings.
5320      *
5321      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
5322      * are to be excluded for default mappings and
5323      * included for the Turkic-specific mappings.
5324      *
5325      * @stable ICU 2.6
5326      */
5327     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
5328 
5329     /**
5330      * {@icu} The given character is mapped to its case folding equivalent according
5331      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5332      * folding equivalent, the character itself is returned.
5333      *
5334      * <p>This function only returns the simple, single-code point case mapping.
5335      * Full case mappings should be used whenever possible because they produce
5336      * better results by working on whole strings.
5337      * They can map to a result string with a different length as appropriate.
5338      * Full case mappings are applied by the case mapping functions
5339      * that take String parameters rather than code points (int).
5340      * See also the User Guide chapter on C/POSIX migration:
5341      * http://www.icu-project.org/userguide/posix.html#case_mappings
5342      *
5343      * @param ch the character to be converted
5344      * @param options A bit set for special processing. Currently the recognised options
5345      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5346      * @return the case folding equivalent of the character, if any; otherwise the
5347      * character itself.
5348      * @see #foldCase(String, boolean)
5349      * @stable ICU 2.6
5350      */
foldCase(int ch, int options)5351     public static int foldCase(int ch, int options) {
5352         return UCaseProps.INSTANCE.fold(ch, options);
5353     }
5354 
5355     /**
5356      * {@icu} The given string is mapped to its case folding equivalent according to
5357      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5358      * folding equivalent, the character itself is returned.
5359      * "Full", multiple-code point case folding mappings are returned here.
5360      * For "simple" single-code point mappings use the API
5361      * foldCase(int ch, boolean defaultmapping).
5362      * @param str the String to be converted
5363      * @param options A bit set for special processing. Currently the recognised options
5364      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5365      * @return the case folding equivalent of the character, if any; otherwise the
5366      *         character itself.
5367      * @see #foldCase(int, boolean)
5368      * @stable ICU 2.6
5369      */
foldCase(String str, int options)5370     public static final String foldCase(String str, int options) {
5371         StringBuilder result = new StringBuilder(str.length());
5372         int c, i, length;
5373 
5374         length = str.length();
5375         for(i=0; i<length;) {
5376             c=UTF16.charAt(str, i);
5377             i+=UTF16.getCharCount(c);
5378             c = UCaseProps.INSTANCE.toFullFolding(c, result, options);
5379 
5380             /* decode the result */
5381             if(c<0) {
5382                 /* (not) original code point */
5383                 c=~c;
5384             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
5385                 /* mapping already appended to result */
5386                 continue;
5387                 /* } else { append single-code point mapping */
5388             }
5389             result.appendCodePoint(c);
5390         }
5391         return result.toString();
5392     }
5393 
5394     /**
5395      * {@icu} Returns the numeric value of a Han character.
5396      *
5397      * <p>This returns the value of Han 'numeric' code points,
5398      * including those for zero, ten, hundred, thousand, ten thousand,
5399      * and hundred million.
5400      * This includes both the standard and 'checkwriting'
5401      * characters, the 'big circle' zero character, and the standard
5402      * zero character.
5403      *
5404      * <p>Note: The Unicode Standard has numeric values for more
5405      * Han characters recognized by this method
5406      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
5407      * and a {@link com.ibm.icu.text.NumberFormat} can be used with
5408      * a Chinese {@link com.ibm.icu.text.NumberingSystem}.
5409      *
5410      * @param ch code point to query
5411      * @return value if it is a Han 'numeric character,' otherwise return -1.
5412      * @stable ICU 2.4
5413      */
getHanNumericValue(int ch)5414     public static int getHanNumericValue(int ch)
5415     {
5416         switch(ch)
5417         {
5418         case IDEOGRAPHIC_NUMBER_ZERO_ :
5419         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
5420             return 0; // Han Zero
5421         case CJK_IDEOGRAPH_FIRST_ :
5422         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
5423             return 1; // Han One
5424         case CJK_IDEOGRAPH_SECOND_ :
5425         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
5426             return 2; // Han Two
5427         case CJK_IDEOGRAPH_THIRD_ :
5428         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
5429             return 3; // Han Three
5430         case CJK_IDEOGRAPH_FOURTH_ :
5431         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
5432             return 4; // Han Four
5433         case CJK_IDEOGRAPH_FIFTH_ :
5434         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
5435             return 5; // Han Five
5436         case CJK_IDEOGRAPH_SIXTH_ :
5437         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
5438             return 6; // Han Six
5439         case CJK_IDEOGRAPH_SEVENTH_ :
5440         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
5441             return 7; // Han Seven
5442         case CJK_IDEOGRAPH_EIGHTH_ :
5443         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
5444             return 8; // Han Eight
5445         case CJK_IDEOGRAPH_NINETH_ :
5446         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
5447             return 9; // Han Nine
5448         case CJK_IDEOGRAPH_TEN_ :
5449         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
5450             return 10;
5451         case CJK_IDEOGRAPH_HUNDRED_ :
5452         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
5453             return 100;
5454         case CJK_IDEOGRAPH_THOUSAND_ :
5455         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
5456             return 1000;
5457         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
5458             return 10000;
5459         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
5460             return 100000000;
5461         }
5462         return -1; // no value
5463     }
5464 
5465     /**
5466      * {@icu} <p>Returns an iterator for character types, iterating over codepoints.</p>
5467      * Example of use:<br>
5468      * <pre>
5469      * RangeValueIterator iterator = UCharacter.getTypeIterator();
5470      * RangeValueIterator.Element element = new RangeValueIterator.Element();
5471      * while (iterator.next(element)) {
5472      *     System.out.println("Codepoint \\u" +
5473      *                        Integer.toHexString(element.start) +
5474      *                        " to codepoint \\u" +
5475      *                        Integer.toHexString(element.limit - 1) +
5476      *                        " has the character type " +
5477      *                        element.value);
5478      * }
5479      * </pre>
5480      * @return an iterator
5481      * @stable ICU 2.6
5482      */
getTypeIterator()5483     public static RangeValueIterator getTypeIterator()
5484     {
5485         return new UCharacterTypeIterator();
5486     }
5487 
5488     private static final class UCharacterTypeIterator implements RangeValueIterator {
UCharacterTypeIterator()5489         UCharacterTypeIterator() {
5490             reset();
5491         }
5492 
5493         // implements RangeValueIterator
next(Element element)5494         public boolean next(Element element) {
5495             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5496                 element.start=range.startCodePoint;
5497                 element.limit=range.endCodePoint+1;
5498                 element.value=range.value;
5499                 return true;
5500             } else {
5501                 return false;
5502             }
5503         }
5504 
5505         // implements RangeValueIterator
reset()5506         public void reset() {
5507             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
5508         }
5509 
5510         private Iterator<Trie2.Range> trieIterator;
5511         private Trie2.Range range;
5512 
5513         private static final class MaskType implements Trie2.ValueMapper {
5514             // Extracts the general category ("character type") from the trie value.
map(int value)5515             public int map(int value) {
5516                 return value & UCharacterProperty.TYPE_MASK;
5517             }
5518         }
5519         private static final MaskType MASK_TYPE=new MaskType();
5520     }
5521 
5522     /**
5523      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.</p>
5524      * <p>This API only gets the iterator for the modern, most up-to-date
5525      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
5526      * for extended names use getExtendedNameIterator().</p>
5527      * Example of use:<br>
5528      * <pre>
5529      * ValueIterator iterator = UCharacter.getNameIterator();
5530      * ValueIterator.Element element = new ValueIterator.Element();
5531      * while (iterator.next(element)) {
5532      *     System.out.println("Codepoint \\u" +
5533      *                        Integer.toHexString(element.codepoint) +
5534      *                        " has the name " + (String)element.value);
5535      * }
5536      * </pre>
5537      * <p>The maximal range which the name iterator iterates is from
5538      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.</p>
5539      * @return an iterator
5540      * @stable ICU 2.6
5541      */
getNameIterator()5542     public static ValueIterator getNameIterator(){
5543         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5544                 UCharacterNameChoice.UNICODE_CHAR_NAME);
5545     }
5546 
5547     /**
5548      * {@icu} Returns an empty iterator.
5549      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.</p>
5550      * @return an empty iterator
5551      * @deprecated ICU 49
5552      * @see #getName1_0(int)
5553      */
5554     @Deprecated
getName1_0Iterator()5555     public static ValueIterator getName1_0Iterator(){
5556         return new DummyValueIterator();
5557     }
5558 
5559     private static final class DummyValueIterator implements ValueIterator {
next(Element element)5560         public boolean next(Element element) { return false; }
reset()5561         public void reset() {}
setRange(int start, int limit)5562         public void setRange(int start, int limit) {}
5563     }
5564 
5565     /**
5566      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.</p>
5567      * <p>This API only gets the iterator for the extended names.
5568      * For modern, most up-to-date Unicode names use getNameIterator() or
5569      * for older 1.0 Unicode names use get1_0NameIterator().</p>
5570      * Example of use:<br>
5571      * <pre>
5572      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
5573      * ValueIterator.Element element = new ValueIterator.Element();
5574      * while (iterator.next(element)) {
5575      *     System.out.println("Codepoint \\u" +
5576      *                        Integer.toHexString(element.codepoint) +
5577      *                        " has the name " + (String)element.value);
5578      * }
5579      * </pre>
5580      * <p>The maximal range which the name iterator iterates is from
5581      * @return an iterator
5582      * @stable ICU 2.6
5583      */
getExtendedNameIterator()5584     public static ValueIterator getExtendedNameIterator(){
5585         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5586                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
5587     }
5588 
5589     /**
5590      * {@icu} Returns the "age" of the code point.</p>
5591      * <p>The "age" is the Unicode version when the code point was first
5592      * designated (as a non-character or for Private Use) or assigned a
5593      * character.
5594      * <p>This can be useful to avoid emitting code points to receiving
5595      * processes that do not accept newer characters.</p>
5596      * <p>The data is from the UCD file DerivedAge.txt.</p>
5597      * @param ch The code point.
5598      * @return the Unicode version number
5599      * @stable ICU 2.6
5600      */
getAge(int ch)5601     public static VersionInfo getAge(int ch)
5602     {
5603         if (ch < MIN_VALUE || ch > MAX_VALUE) {
5604             throw new IllegalArgumentException("Codepoint out of bounds");
5605         }
5606         return UCharacterProperty.INSTANCE.getAge(ch);
5607     }
5608 
5609     /**
5610      * {@icu} <p>Check a binary Unicode property for a code point.</p>
5611      * <p>Unicode, especially in version 3.2, defines many more properties
5612      * than the original set in UnicodeData.txt.</p>
5613      * <p>This API is intended to reflect Unicode properties as defined in
5614      * the Unicode Character Database (UCD) and Unicode Technical Reports
5615      * (UTR).</p>
5616      * <p>For details about the properties see
5617      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.</p>
5618      * <p>For names of Unicode properties see the UCD file
5619      * PropertyAliases.txt.</p>
5620      * <p>This API does not check the validity of the codepoint.</p>
5621      * <p>Important: If ICU is built with UCD files from Unicode versions
5622      * below 3.2, then properties marked with "new" are not or
5623      * not fully available.</p>
5624      * @param ch code point to test.
5625      * @param property selector constant from com.ibm.icu.lang.UProperty,
5626      *        identifies which binary property to check.
5627      * @return true or false according to the binary Unicode property value
5628      *         for ch. Also false if property is out of bounds or if the
5629      *         Unicode version does not have data for the property at all, or
5630      *         not for this code point.
5631      * @see com.ibm.icu.lang.UProperty
5632      * @stable ICU 2.6
5633      */
hasBinaryProperty(int ch, int property)5634     public static boolean hasBinaryProperty(int ch, int property)
5635     {
5636         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5637     }
5638 
5639     /**
5640      * {@icu} <p>Check if a code point has the Alphabetic Unicode property.</p>
5641      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).</p>
5642      * <p>Different from UCharacter.isLetter(ch)!</p>
5643      * @stable ICU 2.6
5644      * @param ch codepoint to be tested
5645      */
isUAlphabetic(int ch)5646     public static boolean isUAlphabetic(int ch)
5647     {
5648         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5649     }
5650 
5651     /**
5652      * {@icu} <p>Check if a code point has the Lowercase Unicode property.</p>
5653      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).</p>
5654      * <p>This is different from UCharacter.isLowerCase(ch)!</p>
5655      * @param ch codepoint to be tested
5656      * @stable ICU 2.6
5657      */
isULowercase(int ch)5658     public static boolean isULowercase(int ch)
5659     {
5660         return hasBinaryProperty(ch, UProperty.LOWERCASE);
5661     }
5662 
5663     /**
5664      * {@icu} <p>Check if a code point has the Uppercase Unicode property.</p>
5665      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).</p>
5666      * <p>This is different from UCharacter.isUpperCase(ch)!</p>
5667      * @param ch codepoint to be tested
5668      * @stable ICU 2.6
5669      */
isUUppercase(int ch)5670     public static boolean isUUppercase(int ch)
5671     {
5672         return hasBinaryProperty(ch, UProperty.UPPERCASE);
5673     }
5674 
5675     /**
5676      * {@icu} <p>Check if a code point has the White_Space Unicode property.</p>
5677      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).</p>
5678      * <p>This is different from both UCharacter.isSpace(ch) and
5679      * UCharacter.isWhitespace(ch)!</p>
5680      * @param ch codepoint to be tested
5681      * @stable ICU 2.6
5682      */
isUWhiteSpace(int ch)5683     public static boolean isUWhiteSpace(int ch)
5684     {
5685         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5686     }
5687 
5688     /**
5689      * {@icu} <p>Returns the property value for an Unicode property type of a code point.
5690      * Also returns binary and mask property values.</p>
5691      * <p>Unicode, especially in version 3.2, defines many more properties than
5692      * the original set in UnicodeData.txt.</p>
5693      * <p>The properties APIs are intended to reflect Unicode properties as
5694      * defined in the Unicode Character Database (UCD) and Unicode Technical
5695      * Reports (UTR). For details about the properties see
5696      * http://www.unicode.org/.</p>
5697      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5698      * </p>
5699      * <pre>
5700      * Sample usage:
5701      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5702      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5703      * boolean b = (ideo == 1) ? true : false;
5704      * </pre>
5705      * @param ch code point to test.
5706      * @param type UProperty selector constant, identifies which binary
5707      *        property to check. Must be
5708      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5709      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5710      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5711      * @return numeric value that is directly the property value or,
5712      *         for enumerated properties, corresponds to the numeric value of
5713      *         the enumerated constant of the respective property value
5714      *         enumeration type (cast to enum type if necessary).
5715      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5716      *         Returns a bit-mask for mask properties.
5717      *         Returns 0 if 'type' is out of bounds or if the Unicode version
5718      *         does not have data for the property at all, or not for this code
5719      *         point.
5720      * @see UProperty
5721      * @see #hasBinaryProperty
5722      * @see #getIntPropertyMinValue
5723      * @see #getIntPropertyMaxValue
5724      * @see #getUnicodeVersion
5725      * @stable ICU 2.4
5726      */
getIntPropertyValue(int ch, int type)5727     public static int getIntPropertyValue(int ch, int type)
5728     {
5729         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
5730     }
5731     /**
5732      * {@icu} Returns a string version of the property value.
5733      * @param propertyEnum The property enum value.
5734      * @param codepoint The codepoint value.
5735      * @param nameChoice The choice of the name.
5736      * @return value as string
5737      * @internal
5738      * @deprecated This API is ICU internal only.
5739      */
5740     @Deprecated
5741     ///CLOVER:OFF
getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5742     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5743         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5744                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5745             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
5746                     nameChoice);
5747         }
5748         if (propertyEnum == UProperty.NUMERIC_VALUE) {
5749             return String.valueOf(getUnicodeNumericValue(codepoint));
5750         }
5751         // otherwise must be string property
5752         switch (propertyEnum) {
5753         case UProperty.AGE: return getAge(codepoint).toString();
5754         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5755         case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(getMirror(codepoint));
5756         case UProperty.CASE_FOLDING: return foldCase(UTF16.valueOf(codepoint), true);
5757         case UProperty.LOWERCASE_MAPPING: return toLowerCase(UTF16.valueOf(codepoint));
5758         case UProperty.NAME: return getName(codepoint);
5759         case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(foldCase(codepoint,true));
5760         case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(toLowerCase(codepoint));
5761         case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(toTitleCase(codepoint));
5762         case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(toUpperCase(codepoint));
5763         case UProperty.TITLECASE_MAPPING: return toTitleCase(UTF16.valueOf(codepoint),null);
5764         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5765         case UProperty.UPPERCASE_MAPPING: return toUpperCase(UTF16.valueOf(codepoint));
5766         }
5767         throw new IllegalArgumentException("Illegal Property Enum");
5768     }
5769     ///CLOVER:ON
5770 
5771     /**
5772      * {@icu} Returns the minimum value for an integer/binary Unicode property type.
5773      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5774      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5775      * @param type UProperty selector constant, identifies which binary
5776      *        property to check. Must be
5777      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5778      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5779      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5780      *         for a Unicode property. 0 if the property
5781      *         selector 'type' is out of range.
5782      * @see UProperty
5783      * @see #hasBinaryProperty
5784      * @see #getUnicodeVersion
5785      * @see #getIntPropertyMaxValue
5786      * @see #getIntPropertyValue
5787      * @stable ICU 2.4
5788      */
getIntPropertyMinValue(int type)5789     public static int getIntPropertyMinValue(int type){
5790 
5791         return 0; // undefined; and: all other properties have a minimum value of 0
5792     }
5793 
5794 
5795     /**
5796      * {@icu} Returns the maximum value for an integer/binary Unicode property.
5797      * Can be used together with UCharacter.getIntPropertyMinValue(int)
5798      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5799      * Examples for min/max values (for Unicode 3.2):
5800      * <ul>
5801      * <li> UProperty.BIDI_CLASS:    0/18
5802      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5803      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5804      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5805      * </ul>
5806      * For undefined UProperty constant values, min/max values will be 0/-1.
5807      * @param type UProperty selector constant, identifies which binary
5808      *        property to check. Must be
5809      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5810      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5811      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
5812      *         property. &lt;= 0 if the property selector 'type' is out of range.
5813      * @see UProperty
5814      * @see #hasBinaryProperty
5815      * @see #getUnicodeVersion
5816      * @see #getIntPropertyMaxValue
5817      * @see #getIntPropertyValue
5818      * @stable ICU 2.4
5819      */
getIntPropertyMaxValue(int type)5820     public static int getIntPropertyMaxValue(int type)
5821     {
5822         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
5823     }
5824 
5825     /**
5826      * Provide the java.lang.Character forDigit API, for convenience.
5827      * @stable ICU 3.0
5828      */
forDigit(int digit, int radix)5829     public static char forDigit(int digit, int radix) {
5830         return java.lang.Character.forDigit(digit, radix);
5831     }
5832 
5833     // JDK 1.5 API coverage
5834 
5835     /**
5836      * Cover the JDK 1.5 API, for convenience.
5837      * @see UTF16#LEAD_SURROGATE_MIN_VALUE
5838      * @stable ICU 3.0
5839      */
5840     public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE;
5841 
5842     /**
5843      * Cover the JDK 1.5 API, for convenience.
5844      * @see UTF16#LEAD_SURROGATE_MAX_VALUE
5845      * @stable ICU 3.0
5846      */
5847     public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE;
5848 
5849     /**
5850      * Cover the JDK 1.5 API, for convenience.
5851      * @see UTF16#TRAIL_SURROGATE_MIN_VALUE
5852      * @stable ICU 3.0
5853      */
5854     public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE;
5855 
5856     /**
5857      * Cover the JDK 1.5 API, for convenience.
5858      * @see UTF16#TRAIL_SURROGATE_MAX_VALUE
5859      * @stable ICU 3.0
5860      */
5861     public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE;
5862 
5863     /**
5864      * Cover the JDK 1.5 API, for convenience.
5865      * @see UTF16#SURROGATE_MIN_VALUE
5866      * @stable ICU 3.0
5867      */
5868     public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE;
5869 
5870     /**
5871      * Cover the JDK 1.5 API, for convenience.
5872      * @see UTF16#SURROGATE_MAX_VALUE
5873      * @stable ICU 3.0
5874      */
5875     public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE;
5876 
5877     /**
5878      * Cover the JDK 1.5 API, for convenience.
5879      * @see UTF16#SUPPLEMENTARY_MIN_VALUE
5880      * @stable ICU 3.0
5881      */
5882     public static final int  MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE;
5883 
5884     /**
5885      * Cover the JDK 1.5 API, for convenience.
5886      * @see UTF16#CODEPOINT_MAX_VALUE
5887      * @stable ICU 3.0
5888      */
5889     public static final int  MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE;
5890 
5891     /**
5892      * Cover the JDK 1.5 API, for convenience.
5893      * @see UTF16#CODEPOINT_MIN_VALUE
5894      * @stable ICU 3.0
5895      */
5896     public static final int  MIN_CODE_POINT = UTF16.CODEPOINT_MIN_VALUE;
5897 
5898     /**
5899      * Cover the JDK 1.5 API, for convenience.
5900      * @param cp the code point to check
5901      * @return true if cp is a valid code point
5902      * @stable ICU 3.0
5903      */
isValidCodePoint(int cp)5904     public static final boolean isValidCodePoint(int cp) {
5905         return cp >= 0 && cp <= MAX_CODE_POINT;
5906     }
5907 
5908     /**
5909      * Cover the JDK 1.5 API, for convenience.
5910      * @param cp the code point to check
5911      * @return true if cp is a supplementary code point
5912      * @stable ICU 3.0
5913      */
isSupplementaryCodePoint(int cp)5914     public static final boolean isSupplementaryCodePoint(int cp) {
5915         return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE
5916                 && cp <= UTF16.CODEPOINT_MAX_VALUE;
5917     }
5918 
5919     /**
5920      * Cover the JDK 1.5 API, for convenience.
5921      * @param ch the char to check
5922      * @return true if ch is a high (lead) surrogate
5923      * @stable ICU 3.0
5924      */
isHighSurrogate(char ch)5925     public static boolean isHighSurrogate(char ch) {
5926         return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
5927     }
5928 
5929     /**
5930      * Cover the JDK 1.5 API, for convenience.
5931      * @param ch the char to check
5932      * @return true if ch is a low (trail) surrogate
5933      * @stable ICU 3.0
5934      */
isLowSurrogate(char ch)5935     public static boolean isLowSurrogate(char ch) {
5936         return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
5937     }
5938 
5939     /**
5940      * Cover the JDK 1.5 API, for convenience.  Return true if the chars
5941      * form a valid surrogate pair.
5942      * @param high the high (lead) char
5943      * @param low the low (trail) char
5944      * @return true if high, low form a surrogate pair
5945      * @stable ICU 3.0
5946      */
isSurrogatePair(char high, char low)5947     public static final boolean isSurrogatePair(char high, char low) {
5948         return isHighSurrogate(high) && isLowSurrogate(low);
5949     }
5950 
5951     /**
5952      * Cover the JDK 1.5 API, for convenience.  Return the number of chars needed
5953      * to represent the code point.  This does not check the
5954      * code point for validity.
5955      * @param cp the code point to check
5956      * @return the number of chars needed to represent the code point
5957      * @see UTF16#getCharCount
5958      * @stable ICU 3.0
5959      */
charCount(int cp)5960     public static int charCount(int cp) {
5961         return UTF16.getCharCount(cp);
5962     }
5963 
5964     /**
5965      * Cover the JDK 1.5 API, for convenience.  Return the code point represented by
5966      * the characters.  This does not check the surrogate pair for validity.
5967      * @param high the high (lead) surrogate
5968      * @param low the low (trail) surrogate
5969      * @return the code point formed by the surrogate pair
5970      * @stable ICU 3.0
5971      */
toCodePoint(char high, char low)5972     public static final int toCodePoint(char high, char low) {
5973         return UCharacterProperty.getRawSupplementary(high, low);
5974     }
5975 
5976     /**
5977      * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5978      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5979      * API.  This examines only the characters at index and index+1.
5980      * @param seq the characters to check
5981      * @param index the index of the first or only char forming the code point
5982      * @return the code point at the index
5983      * @stable ICU 3.0
5984      */
codePointAt(CharSequence seq, int index)5985     public static final int codePointAt(CharSequence seq, int index) {
5986         char c1 = seq.charAt(index++);
5987         if (isHighSurrogate(c1)) {
5988             if (index < seq.length()) {
5989                 char c2 = seq.charAt(index);
5990                 if (isLowSurrogate(c2)) {
5991                     return toCodePoint(c1, c2);
5992                 }
5993             }
5994         }
5995         return c1;
5996     }
5997 
5998     /**
5999      * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
6000      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
6001      * API.  This examines only the characters at index and index+1.
6002      * @param text the characters to check
6003      * @param index the index of the first or only char forming the code point
6004      * @return the code point at the index
6005      * @stable ICU 3.0
6006      */
codePointAt(char[] text, int index)6007     public static final int codePointAt(char[] text, int index) {
6008         char c1 = text[index++];
6009         if (isHighSurrogate(c1)) {
6010             if (index < text.length) {
6011                 char c2 = text[index];
6012                 if (isLowSurrogate(c2)) {
6013                     return toCodePoint(c1, c2);
6014                 }
6015             }
6016         }
6017         return c1;
6018     }
6019 
6020     /**
6021      * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
6022      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
6023      * API.  This examines only the characters at index and index+1.
6024      * @param text the characters to check
6025      * @param index the index of the first or only char forming the code point
6026      * @param limit the limit of the valid text
6027      * @return the code point at the index
6028      * @stable ICU 3.0
6029      */
codePointAt(char[] text, int index, int limit)6030     public static final int codePointAt(char[] text, int index, int limit) {
6031         if (index >= limit || limit > text.length) {
6032             throw new IndexOutOfBoundsException();
6033         }
6034         char c1 = text[index++];
6035         if (isHighSurrogate(c1)) {
6036             if (index < limit) {
6037                 char c2 = text[index];
6038                 if (isLowSurrogate(c2)) {
6039                     return toCodePoint(c1, c2);
6040                 }
6041             }
6042         }
6043         return c1;
6044     }
6045 
6046     /**
6047      * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
6048      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
6049      * API.  This examines only the characters at index-1 and index-2.
6050      * @param seq the characters to check
6051      * @param index the index after the last or only char forming the code point
6052      * @return the code point before the index
6053      * @stable ICU 3.0
6054      */
codePointBefore(CharSequence seq, int index)6055     public static final int codePointBefore(CharSequence seq, int index) {
6056         char c2 = seq.charAt(--index);
6057         if (isLowSurrogate(c2)) {
6058             if (index > 0) {
6059                 char c1 = seq.charAt(--index);
6060                 if (isHighSurrogate(c1)) {
6061                     return toCodePoint(c1, c2);
6062                 }
6063             }
6064         }
6065         return c2;
6066     }
6067 
6068     /**
6069      * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
6070      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
6071      * API.  This examines only the characters at index-1 and index-2.
6072      * @param text the characters to check
6073      * @param index the index after the last or only char forming the code point
6074      * @return the code point before the index
6075      * @stable ICU 3.0
6076      */
codePointBefore(char[] text, int index)6077     public static final int codePointBefore(char[] text, int index) {
6078         char c2 = text[--index];
6079         if (isLowSurrogate(c2)) {
6080             if (index > 0) {
6081                 char c1 = text[--index];
6082                 if (isHighSurrogate(c1)) {
6083                     return toCodePoint(c1, c2);
6084                 }
6085             }
6086         }
6087         return c2;
6088     }
6089 
6090     /**
6091      * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
6092      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
6093      * API.  This examines only the characters at index-1 and index-2.
6094      * @param text the characters to check
6095      * @param index the index after the last or only char forming the code point
6096      * @param limit the start of the valid text
6097      * @return the code point before the index
6098      * @stable ICU 3.0
6099      */
codePointBefore(char[] text, int index, int limit)6100     public static final int codePointBefore(char[] text, int index, int limit) {
6101         if (index <= limit || limit < 0) {
6102             throw new IndexOutOfBoundsException();
6103         }
6104         char c2 = text[--index];
6105         if (isLowSurrogate(c2)) {
6106             if (index > limit) {
6107                 char c1 = text[--index];
6108                 if (isHighSurrogate(c1)) {
6109                     return toCodePoint(c1, c2);
6110                 }
6111             }
6112         }
6113         return c2;
6114     }
6115 
6116     /**
6117      * Cover the JDK 1.5 API, for convenience.  Writes the chars representing the
6118      * code point into the destination at the given index.
6119      * @param cp the code point to convert
6120      * @param dst the destination array into which to put the char(s) representing the code point
6121      * @param dstIndex the index at which to put the first (or only) char
6122      * @return the count of the number of chars written (1 or 2)
6123      * @throws IllegalArgumentException if cp is not a valid code point
6124      * @stable ICU 3.0
6125      */
toChars(int cp, char[] dst, int dstIndex)6126     public static final int toChars(int cp, char[] dst, int dstIndex) {
6127         if (cp >= 0) {
6128             if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
6129                 dst[dstIndex] = (char)cp;
6130                 return 1;
6131             }
6132             if (cp <= MAX_CODE_POINT) {
6133                 dst[dstIndex] = UTF16.getLeadSurrogate(cp);
6134                 dst[dstIndex+1] = UTF16.getTrailSurrogate(cp);
6135                 return 2;
6136             }
6137         }
6138         throw new IllegalArgumentException();
6139     }
6140 
6141     /**
6142      * Cover the JDK 1.5 API, for convenience.  Returns a char array
6143      * representing the code point.
6144      * @param cp the code point to convert
6145      * @return an array containing the char(s) representing the code point
6146      * @throws IllegalArgumentException if cp is not a valid code point
6147      * @stable ICU 3.0
6148      */
toChars(int cp)6149     public static final char[] toChars(int cp) {
6150         if (cp >= 0) {
6151             if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
6152                 return new char[] { (char)cp };
6153             }
6154             if (cp <= MAX_CODE_POINT) {
6155                 return new char[] {
6156                         UTF16.getLeadSurrogate(cp),
6157                         UTF16.getTrailSurrogate(cp)
6158                 };
6159             }
6160         }
6161         throw new IllegalArgumentException();
6162     }
6163 
6164     /**
6165      * Cover the JDK API, for convenience.  Return a byte representing the directionality of
6166      * the character.
6167      *
6168      * {@icunote} Unlike the JDK, this returns DIRECTIONALITY_LEFT_TO_RIGHT for undefined
6169      * or out-of-bounds characters.
6170      *
6171      * {@icunote} The return value must be tested using the constants defined in {@link
6172      * UCharacterDirection} and its interface {@link
6173      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
6174      * defined by <code>java.lang.Character</code>.
6175      * @param cp the code point to check
6176      * @return the directionality of the code point
6177      * @see #getDirection
6178      * @stable ICU 3.0
6179      */
getDirectionality(int cp)6180     public static byte getDirectionality(int cp)
6181     {
6182         return (byte)getDirection(cp);
6183     }
6184 
6185     /**
6186      * Cover the JDK API, for convenience.  Count the number of code points in the range of text.
6187      * @param text the characters to check
6188      * @param start the start of the range
6189      * @param limit the limit of the range
6190      * @return the number of code points in the range
6191      * @stable ICU 3.0
6192      */
codePointCount(CharSequence text, int start, int limit)6193     public static int codePointCount(CharSequence text, int start, int limit) {
6194         if (start < 0 || limit < start || limit > text.length()) {
6195             throw new IndexOutOfBoundsException("start (" + start +
6196                     ") or limit (" + limit +
6197                     ") invalid or out of range 0, " + text.length());
6198         }
6199 
6200         int len = limit - start;
6201         while (limit > start) {
6202             char ch = text.charAt(--limit);
6203             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6204                 ch = text.charAt(--limit);
6205                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6206                     --len;
6207                     break;
6208                 }
6209             }
6210         }
6211         return len;
6212     }
6213 
6214     /**
6215      * Cover the JDK API, for convenience.  Count the number of code points in the range of text.
6216      * @param text the characters to check
6217      * @param start the start of the range
6218      * @param limit the limit of the range
6219      * @return the number of code points in the range
6220      * @stable ICU 3.0
6221      */
codePointCount(char[] text, int start, int limit)6222     public static int codePointCount(char[] text, int start, int limit) {
6223         if (start < 0 || limit < start || limit > text.length) {
6224             throw new IndexOutOfBoundsException("start (" + start +
6225                     ") or limit (" + limit +
6226                     ") invalid or out of range 0, " + text.length);
6227         }
6228 
6229         int len = limit - start;
6230         while (limit > start) {
6231             char ch = text[--limit];
6232             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6233                 ch = text[--limit];
6234                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6235                     --len;
6236                     break;
6237                 }
6238             }
6239         }
6240         return len;
6241     }
6242 
6243     /**
6244      * Cover the JDK API, for convenience.  Adjust the char index by a code point offset.
6245      * @param text the characters to check
6246      * @param index the index to adjust
6247      * @param codePointOffset the number of code points by which to offset the index
6248      * @return the adjusted index
6249      * @stable ICU 3.0
6250      */
offsetByCodePoints(CharSequence text, int index, int codePointOffset)6251     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
6252         if (index < 0 || index > text.length()) {
6253             throw new IndexOutOfBoundsException("index ( " + index +
6254                     ") out of range 0, " + text.length());
6255         }
6256 
6257         if (codePointOffset < 0) {
6258             while (++codePointOffset <= 0) {
6259                 char ch = text.charAt(--index);
6260                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
6261                     ch = text.charAt(--index);
6262                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6263                         if (++codePointOffset > 0) {
6264                             return index+1;
6265                         }
6266                     }
6267                 }
6268             }
6269         } else {
6270             int limit = text.length();
6271             while (--codePointOffset >= 0) {
6272                 char ch = text.charAt(index++);
6273                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6274                     ch = text.charAt(index++);
6275                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6276                         if (--codePointOffset < 0) {
6277                             return index-1;
6278                         }
6279                     }
6280                 }
6281             }
6282         }
6283 
6284         return index;
6285     }
6286 
6287     /**
6288      * Cover the JDK API, for convenience.  Adjust the char index by a code point offset.
6289      * @param text the characters to check
6290      * @param start the start of the range to check
6291      * @param count the length of the range to check
6292      * @param index the index to adjust
6293      * @param codePointOffset the number of code points by which to offset the index
6294      * @return the adjusted index
6295      * @stable ICU 3.0
6296      */
offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6297     public static int offsetByCodePoints(char[] text, int start, int count, int index,
6298             int codePointOffset) {
6299         int limit = start + count;
6300         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
6301             throw new IndexOutOfBoundsException("index ( " + index +
6302                     ") out of range " + start +
6303                     ", " + limit +
6304                     " in array 0, " + text.length);
6305         }
6306 
6307         if (codePointOffset < 0) {
6308             while (++codePointOffset <= 0) {
6309                 char ch = text[--index];
6310                 if (index < start) {
6311                     throw new IndexOutOfBoundsException("index ( " + index +
6312                             ") < start (" + start +
6313                             ")");
6314                 }
6315                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
6316                     ch = text[--index];
6317                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6318                         if (++codePointOffset > 0) {
6319                             return index+1;
6320                         }
6321                     }
6322                 }
6323             }
6324         } else {
6325             while (--codePointOffset >= 0) {
6326                 char ch = text[index++];
6327                 if (index > limit) {
6328                     throw new IndexOutOfBoundsException("index ( " + index +
6329                             ") > limit (" + limit +
6330                             ")");
6331                 }
6332                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6333                     ch = text[index++];
6334                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6335                         if (--codePointOffset < 0) {
6336                             return index-1;
6337                         }
6338                     }
6339                 }
6340             }
6341         }
6342 
6343         return index;
6344     }
6345 
6346     // private variables -------------------------------------------------
6347 
6348     /**
6349      * To get the last character out from a data type
6350      */
6351     private static final int LAST_CHAR_MASK_ = 0xFFFF;
6352 
6353     //    /**
6354     //     * To get the last byte out from a data type
6355     //     */
6356     //    private static final int LAST_BYTE_MASK_ = 0xFF;
6357     //
6358     //    /**
6359     //     * Shift 16 bits
6360     //     */
6361     //    private static final int SHIFT_16_ = 16;
6362     //
6363     //    /**
6364     //     * Shift 24 bits
6365     //     */
6366     //    private static final int SHIFT_24_ = 24;
6367     //
6368     //    /**
6369     //     * Decimal radix
6370     //     */
6371     //    private static final int DECIMAL_RADIX_ = 10;
6372 
6373     /**
6374      * No break space code point
6375      */
6376     private static final int NO_BREAK_SPACE_ = 0xA0;
6377 
6378     /**
6379      * Figure space code point
6380      */
6381     private static final int FIGURE_SPACE_ = 0x2007;
6382 
6383     /**
6384      * Narrow no break space code point
6385      */
6386     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
6387 
6388     /**
6389      * Ideographic number zero code point
6390      */
6391     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
6392 
6393     /**
6394      * CJK Ideograph, First code point
6395      */
6396     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
6397 
6398     /**
6399      * CJK Ideograph, Second code point
6400      */
6401     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
6402 
6403     /**
6404      * CJK Ideograph, Third code point
6405      */
6406     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
6407 
6408     /**
6409      * CJK Ideograph, Fourth code point
6410      */
6411     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
6412 
6413     /**
6414      * CJK Ideograph, FIFTH code point
6415      */
6416     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
6417 
6418     /**
6419      * CJK Ideograph, Sixth code point
6420      */
6421     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
6422 
6423     /**
6424      * CJK Ideograph, Seventh code point
6425      */
6426     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
6427 
6428     /**
6429      * CJK Ideograph, Eighth code point
6430      */
6431     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
6432 
6433     /**
6434      * CJK Ideograph, Nineth code point
6435      */
6436     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
6437 
6438     /**
6439      * Application Program command code point
6440      */
6441     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
6442 
6443     /**
6444      * Unit separator code point
6445      */
6446     private static final int UNIT_SEPARATOR_ = 0x001F;
6447 
6448     /**
6449      * Delete code point
6450      */
6451     private static final int DELETE_ = 0x007F;
6452 
6453     /**
6454      * Han digit characters
6455      */
6456     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
6457     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
6458     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
6459     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
6460     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
6461     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
6462     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
6463     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
6464     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
6465     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
6466     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
6467     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
6468     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
6469     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
6470     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
6471     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
6472     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
6473     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6474 
6475     // private constructor -----------------------------------------------
6476     ///CLOVER:OFF
6477     /**
6478      * Private constructor to prevent instantiation
6479      */
UCharacter()6480     private UCharacter()
6481     {
6482     }
6483     ///CLOVER:ON
6484 }
6485