1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  */
17 
18 package java.lang;
19 
20 import java.io.Serializable;
21 import java.util.Arrays;
22 
23 /**
24  * The wrapper for the primitive type {@code char}. This class also provides a
25  * number of utility methods for working with characters.
26  *
27  * <p>Character data is kept up to date as Unicode evolves.
28  * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of
29  * the {@code Locale} documentation for details of the Unicode versions implemented by current
30  * and historical Android releases.
31  *
32  * <p>The Unicode specification, character tables, and other information are available at
33  * <a href="http://www.unicode.org/">http://www.unicode.org/</a>.
34  *
35  * <p>Unicode characters are referred to as <i>code points</i>. The range of valid
36  * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
37  * is the code point range U+0000 to U+FFFF. Characters above the BMP are
38  * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
39  * encoding and {@code char} pairs are used to represent code points in the
40  * supplementary range. A pair of {@code char} values that represent a
41  * supplementary character are made up of a <i>high surrogate</i> with a value
42  * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
43  * 0xDC00 to 0xDFFF.
44  * <p>
45  * On the Java platform a {@code char} value represents either a single BMP code
46  * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
47  * is used to represent all Unicode code points.
48  *
49  * <a name="unicode_categories"></a><h3>Unicode categories</h3>
50  * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
51  * grouped semantically to provide a convenient overview. This table is also useful in
52  * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
53  * <span class="datatable">
54  * <style type="text/css">
55  * .datatable td { padding-right: 20px; }
56  * </style>
57  * <p><table>
58  * <tr> <td> Cn </td> <td> Unassigned </td>  <td>{@link #UNASSIGNED}</td> </tr>
59  * <tr> <td> Cc </td> <td> Control </td>     <td>{@link #CONTROL}</td> </tr>
60  * <tr> <td> Cf </td> <td> Format </td>      <td>{@link #FORMAT}</td> </tr>
61  * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
62  * <tr> <td> Cs </td> <td> Surrogate </td>   <td>{@link #SURROGATE}</td> </tr>
63  * <tr> <td><br></td> </tr>
64  * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
65  * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
66  * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
67  * <tr> <td> Lm </td> <td> Modifier letter </td>  <td>{@link #MODIFIER_LETTER}</td> </tr>
68  * <tr> <td> Lo </td> <td> Other letter </td>     <td>{@link #OTHER_LETTER}</td> </tr>
69  * <tr> <td><br></td> </tr>
70  * <tr> <td> Mn </td> <td> Non-spacing mark </td>       <td>{@link #NON_SPACING_MARK}</td> </tr>
71  * <tr> <td> Me </td> <td> Enclosing mark </td>         <td>{@link #ENCLOSING_MARK}</td> </tr>
72  * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
73  * <tr> <td><br></td> </tr>
74  * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
75  * <tr> <td> Nl </td> <td> Letter number </td>        <td>{@link #LETTER_NUMBER}</td> </tr>
76  * <tr> <td> No </td> <td> Other number </td>         <td>{@link #OTHER_NUMBER}</td> </tr>
77  * <tr> <td><br></td> </tr>
78  * <tr> <td> Pd </td> <td> Dash punctuation </td>          <td>{@link #DASH_PUNCTUATION}</td> </tr>
79  * <tr> <td> Ps </td> <td> Start punctuation </td>         <td>{@link #START_PUNCTUATION}</td> </tr>
80  * <tr> <td> Pe </td> <td> End punctuation </td>           <td>{@link #END_PUNCTUATION}</td> </tr>
81  * <tr> <td> Pc </td> <td> Connector punctuation </td>     <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
82  * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
83  * <tr> <td> Pf </td> <td> Final quote punctuation </td>   <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
84  * <tr> <td> Po </td> <td> Other punctuation </td>         <td>{@link #OTHER_PUNCTUATION}</td> </tr>
85  * <tr> <td><br></td> </tr>
86  * <tr> <td> Sm </td> <td> Math symbol </td>     <td>{@link #MATH_SYMBOL}</td> </tr>
87  * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
88  * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
89  * <tr> <td> So </td> <td> Other symbol </td>    <td>{@link #OTHER_SYMBOL}</td> </tr>
90  * <tr> <td><br></td> </tr>
91  * <tr> <td> Zs </td> <td> Space separator </td>     <td>{@link #SPACE_SEPARATOR}</td> </tr>
92  * <tr> <td> Zl </td> <td> Line separator </td>      <td>{@link #LINE_SEPARATOR}</td> </tr>
93  * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
94  * </table>
95  * </span>
96  *
97  * @since 1.0
98  */
99 @FindBugsSuppressWarnings("DM_NUMBER_CTOR")
100 public final class Character implements Serializable, Comparable<Character> {
101     private static final long serialVersionUID = 3786198910865385080L;
102 
103     private final char value;
104 
105     /**
106      * The minimum {@code Character} value.
107      */
108     public static final char MIN_VALUE = '\u0000';
109 
110     /**
111      * The maximum {@code Character} value.
112      */
113     public static final char MAX_VALUE = '\uffff';
114 
115     /**
116      * The minimum radix used for conversions between characters and integers.
117      */
118     public static final int MIN_RADIX = 2;
119 
120     /**
121      * The maximum radix used for conversions between characters and integers.
122      */
123     public static final int MAX_RADIX = 36;
124 
125     /**
126      * The {@link Class} object that represents the primitive type {@code char}.
127      */
128     @SuppressWarnings("unchecked")
129     public static final Class<Character> TYPE
130             = (Class<Character>) char[].class.getComponentType();
131     // Note: Character.TYPE can't be set to "char.class", since *that* is
132     // defined to be "java.lang.Character.TYPE";
133 
134     /**
135      * Unicode category constant Cn.
136      */
137     public static final byte UNASSIGNED = 0;
138 
139     /**
140      * Unicode category constant Lu.
141      */
142     public static final byte UPPERCASE_LETTER = 1;
143 
144     /**
145      * Unicode category constant Ll.
146      */
147     public static final byte LOWERCASE_LETTER = 2;
148 
149     /**
150      * Unicode category constant Lt.
151      */
152     public static final byte TITLECASE_LETTER = 3;
153 
154     /**
155      * Unicode category constant Lm.
156      */
157     public static final byte MODIFIER_LETTER = 4;
158 
159     /**
160      * Unicode category constant Lo.
161      */
162     public static final byte OTHER_LETTER = 5;
163 
164     /**
165      * Unicode category constant Mn.
166      */
167     public static final byte NON_SPACING_MARK = 6;
168 
169     /**
170      * Unicode category constant Me.
171      */
172     public static final byte ENCLOSING_MARK = 7;
173 
174     /**
175      * Unicode category constant Mc.
176      */
177     public static final byte COMBINING_SPACING_MARK = 8;
178 
179     /**
180      * Unicode category constant Nd.
181      */
182     public static final byte DECIMAL_DIGIT_NUMBER = 9;
183 
184     /**
185      * Unicode category constant Nl.
186      */
187     public static final byte LETTER_NUMBER = 10;
188 
189     /**
190      * Unicode category constant No.
191      */
192     public static final byte OTHER_NUMBER = 11;
193 
194     /**
195      * Unicode category constant Zs.
196      */
197     public static final byte SPACE_SEPARATOR = 12;
198 
199     /**
200      * Unicode category constant Zl.
201      */
202     public static final byte LINE_SEPARATOR = 13;
203 
204     /**
205      * Unicode category constant Zp.
206      */
207     public static final byte PARAGRAPH_SEPARATOR = 14;
208 
209     /**
210      * Unicode category constant Cc.
211      */
212     public static final byte CONTROL = 15;
213 
214     /**
215      * Unicode category constant Cf.
216      */
217     public static final byte FORMAT = 16;
218 
219     /**
220      * Unicode category constant Co.
221      */
222     public static final byte PRIVATE_USE = 18;
223 
224     /**
225      * Unicode category constant Cs.
226      */
227     public static final byte SURROGATE = 19;
228 
229     /**
230      * Unicode category constant Pd.
231      */
232     public static final byte DASH_PUNCTUATION = 20;
233 
234     /**
235      * Unicode category constant Ps.
236      */
237     public static final byte START_PUNCTUATION = 21;
238 
239     /**
240      * Unicode category constant Pe.
241      */
242     public static final byte END_PUNCTUATION = 22;
243 
244     /**
245      * Unicode category constant Pc.
246      */
247     public static final byte CONNECTOR_PUNCTUATION = 23;
248 
249     /**
250      * Unicode category constant Po.
251      */
252     public static final byte OTHER_PUNCTUATION = 24;
253 
254     /**
255      * Unicode category constant Sm.
256      */
257     public static final byte MATH_SYMBOL = 25;
258 
259     /**
260      * Unicode category constant Sc.
261      */
262     public static final byte CURRENCY_SYMBOL = 26;
263 
264     /**
265      * Unicode category constant Sk.
266      */
267     public static final byte MODIFIER_SYMBOL = 27;
268 
269     /**
270      * Unicode category constant So.
271      */
272     public static final byte OTHER_SYMBOL = 28;
273 
274     /**
275      * Unicode category constant Pi.
276      *
277      * @since 1.4
278      */
279     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
280 
281     /**
282      * Unicode category constant Pf.
283      *
284      * @since 1.4
285      */
286     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
287 
288     /**
289      * Unicode bidirectional constant.
290      *
291      * @since 1.4
292      */
293     public static final byte DIRECTIONALITY_UNDEFINED = -1;
294 
295     /**
296      * Unicode bidirectional constant L.
297      *
298      * @since 1.4
299      */
300     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
301 
302     /**
303      * Unicode bidirectional constant R.
304      *
305      * @since 1.4
306      */
307     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
308 
309     /**
310      * Unicode bidirectional constant AL.
311      *
312      * @since 1.4
313      */
314     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
315 
316     /**
317      * Unicode bidirectional constant EN.
318      *
319      * @since 1.4
320      */
321     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
322 
323     /**
324      * Unicode bidirectional constant ES.
325      *
326      * @since 1.4
327      */
328     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
329 
330     /**
331      * Unicode bidirectional constant ET.
332      *
333      * @since 1.4
334      */
335     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
336 
337     /**
338      * Unicode bidirectional constant AN.
339      *
340      * @since 1.4
341      */
342     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
343 
344     /**
345      * Unicode bidirectional constant CS.
346      *
347      * @since 1.4
348      */
349     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
350 
351     /**
352      * Unicode bidirectional constant NSM.
353      *
354      * @since 1.4
355      */
356     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
357 
358     /**
359      * Unicode bidirectional constant BN.
360      *
361      * @since 1.4
362      */
363     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
364 
365     /**
366      * Unicode bidirectional constant B.
367      *
368      * @since 1.4
369      */
370     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
371 
372     /**
373      * Unicode bidirectional constant S.
374      *
375      * @since 1.4
376      */
377     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
378 
379     /**
380      * Unicode bidirectional constant WS.
381      *
382      * @since 1.4
383      */
384     public static final byte DIRECTIONALITY_WHITESPACE = 12;
385 
386     /**
387      * Unicode bidirectional constant ON.
388      *
389      * @since 1.4
390      */
391     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
392 
393     /**
394      * Unicode bidirectional constant LRE.
395      *
396      * @since 1.4
397      */
398     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
399 
400     /**
401      * Unicode bidirectional constant LRO.
402      *
403      * @since 1.4
404      */
405     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
406 
407     /**
408      * Unicode bidirectional constant RLE.
409      *
410      * @since 1.4
411      */
412     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
413 
414     /**
415      * Unicode bidirectional constant RLO.
416      *
417      * @since 1.4
418      */
419     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
420 
421     /**
422      * Unicode bidirectional constant PDF.
423      *
424      * @since 1.4
425      */
426     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
427 
428     /**
429      * The minimum value of a high surrogate or leading surrogate unit in UTF-16
430      * encoding, {@code '\uD800'}.
431      *
432      * @since 1.5
433      */
434     public static final char MIN_HIGH_SURROGATE = '\uD800';
435 
436     /**
437      * The maximum value of a high surrogate or leading surrogate unit in UTF-16
438      * encoding, {@code '\uDBFF'}.
439      *
440      * @since 1.5
441      */
442     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
443 
444     /**
445      * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
446      * encoding, {@code '\uDC00'}.
447      *
448      * @since 1.5
449      */
450     public static final char MIN_LOW_SURROGATE = '\uDC00';
451 
452     /**
453      * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
454      * encoding, {@code '\uDFFF'}.
455      *
456      * @since 1.5
457      */
458     public static final char MAX_LOW_SURROGATE = '\uDFFF';
459 
460     /**
461      * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
462      *
463      * @since 1.5
464      */
465     public static final char MIN_SURROGATE = '\uD800';
466 
467     /**
468      * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
469      *
470      * @since 1.5
471      */
472     public static final char MAX_SURROGATE = '\uDFFF';
473 
474     /**
475      * The minimum value of a supplementary code point, {@code U+010000}.
476      *
477      * @since 1.5
478      */
479     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
480 
481     /**
482      * The minimum code point value, {@code U+0000}.
483      *
484      * @since 1.5
485      */
486     public static final int MIN_CODE_POINT = 0x000000;
487 
488     /**
489      * The maximum code point value, {@code U+10FFFF}.
490      *
491      * @since 1.5
492      */
493     public static final int MAX_CODE_POINT = 0x10FFFF;
494 
495     /**
496      * The number of bits required to represent a {@code Character} value
497      * unsigned form.
498      *
499      * @since 1.5
500      */
501     public static final int SIZE = 16;
502 
503     private static final byte[] DIRECTIONALITY = new byte[] {
504             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
505             DIRECTIONALITY_EUROPEAN_NUMBER,
506             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
507             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
508             DIRECTIONALITY_ARABIC_NUMBER,
509             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
510             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
511             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
512             DIRECTIONALITY_OTHER_NEUTRALS,
513             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
514             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
515             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
516             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
517             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
518             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
519             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
520 
521     /*
522      * Represents a subset of the Unicode character set.
523      */
524     public static class Subset {
525         private final String name;
526 
527         /**
528          * Constructs a new {@code Subset}.
529          */
Subset(String name)530         protected Subset(String name) {
531             if (name == null) {
532                 throw new NullPointerException("name == null");
533             }
534             this.name = name;
535         }
536 
537         /**
538          * Compares this character subset for identity with the specified object.
539          */
equals(Object object)540         @Override public final boolean equals(Object object) {
541             return object == this;
542         }
543 
544         /**
545          * Returns this subset's hash code, which is the hash code computed by
546          *         {@link java.lang.Object#hashCode()}.
547          */
hashCode()548         @Override public final int hashCode() {
549             return super.hashCode();
550         }
551 
552         /**
553          * Returns this subset's name.
554          */
toString()555         @Override public final String toString() {
556             return name;
557         }
558     }
559 
560     /**
561      * Represents a block of Unicode characters. This class provides constants for various
562      * well-known blocks (but not all blocks) and methods for looking up a block
563      * by name {@link #forName} or by code point {@link #of}.
564      *
565      * @since 1.2
566      */
567     public static final class UnicodeBlock extends Subset {
568         /**
569          * The Surrogates Area Unicode block.
570          *
571          * @deprecated As of Java 5, this block has been replaced by
572          *             {@link #HIGH_SURROGATES},
573          *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
574          *             {@link #LOW_SURROGATES}.
575          */
576         @Deprecated
577         public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA");
578 
579         /** The Basic Latin Unicode block. */
580         public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN");
581 
582         /** The Latin-1 Supplement Unicode block. */
583         public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT");
584 
585         /** The Latin Extended-A Unicode block. */
586         public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A");
587 
588         /** The Latin Extended-B Unicode block. */
589         public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B");
590 
591         /** The IPA Extensions Unicode block. */
592         public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS");
593 
594         /** The Spacing Modifier Letters Unicode block. */
595         public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS");
596 
597         /** The Combining Diacritical Marks Unicode block. */
598         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS");
599 
600         /**
601          * The Greek and Coptic Unicode block. Previously referred to as Greek.
602          */
603         public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK");
604 
605         /** The Cyrillic Unicode block. */
606         public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC");
607 
608         /**
609          * The Cyrillic Supplement Unicode block. Previously referred to as Cyrillic Supplementary.
610          */
611         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY");
612 
613         /** The Armenian Unicode block. */
614         public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN");
615 
616         /** The Hebrew Unicode block. */
617         public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW");
618 
619         /** The Arabic Unicode block. */
620         public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC");
621 
622         /** The Syriac Unicode block. */
623         public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC");
624 
625         /** The Thaana Unicode block. */
626         public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA");
627 
628         /** The Devanagari Unicode block. */
629         public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI");
630 
631         /** The Bengali Unicode block. */
632         public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI");
633 
634         /** The Gurmukhi Unicode block. */
635         public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI");
636 
637         /** The Gujarati Unicode block. */
638         public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI");
639 
640         /** The Oriya Unicode block. */
641         public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA");
642 
643         /** The Tamil Unicode block. */
644         public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL");
645 
646         /** The Telugu Unicode block. */
647         public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU");
648 
649         /** The Kannada Unicode block. */
650         public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA");
651 
652         /** The Malayalam Unicode block. */
653         public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM");
654 
655         /** The Sinhala Unicode block. */
656         public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA");
657 
658         /** The Thai Unicode block. */
659         public static final UnicodeBlock THAI = new UnicodeBlock("THAI");
660 
661         /** The Lao Unicode block. */
662         public static final UnicodeBlock LAO = new UnicodeBlock("LAO");
663 
664         /** The Tibetan Unicode block. */
665         public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN");
666 
667         /** The Myanmar Unicode block. */
668         public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR");
669 
670         /** The Georgian Unicode block. */
671         public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN");
672 
673         /** The Hangul Jamo Unicode block. */
674         public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO");
675 
676         /** The Ethiopic Unicode block. */
677         public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC");
678 
679         /** The Cherokee Unicode block. */
680         public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE");
681 
682         /** The Unified Canadian Aboriginal Syllabics Unicode block. */
683         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
684 
685         /** The Ogham Unicode block. */
686         public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM");
687 
688         /** The Runic Unicode block. */
689         public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC");
690 
691         /** The Tagalog Unicode block. */
692         public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG");
693 
694         /** The Hanunoo Unicode block. */
695         public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO");
696 
697         /** The Buhid Unicode block. */
698         public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID");
699 
700         /** The Tagbanwa Unicode block. */
701         public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA");
702 
703         /** The Khmer Unicode block. */
704         public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER");
705 
706         /** The Mongolian Unicode block. */
707         public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN");
708 
709         /** The Limbu Unicode block. */
710         public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU");
711 
712         /** The Tai Le Unicode block. */
713         public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE");
714 
715         /** The Khmer Symbols Unicode block. */
716         public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS");
717 
718         /** The Phonetic Extensions Unicode block. */
719         public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS");
720 
721         /** The Latin Extended Additional Unicode block. */
722         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL");
723 
724         /** The Greek Extended Unicode block. */
725         public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED");
726 
727         /** The General Punctuation Unicode block. */
728         public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION");
729 
730         /** The Superscripts and Subscripts Unicode block. */
731         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS");
732 
733         /** The Currency Symbols Unicode block. */
734         public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS");
735 
736         /**
737          * The Combining Diacritical Marks for Symbols Unicode
738          * Block. Previously referred to as Combining Marks for
739          * Symbols.
740          */
741         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS");
742 
743         /** The Letterlike Symbols Unicode block. */
744         public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS");
745 
746         /** The Number Forms Unicode block. */
747         public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS");
748 
749         /** The Arrows Unicode block. */
750         public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS");
751 
752         /** The Mathematical Operators Unicode block. */
753         public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS");
754 
755         /** The Miscellaneous Technical Unicode block. */
756         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL");
757 
758         /** The Control Pictures Unicode block. */
759         public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES");
760 
761         /** The Optical Character Recognition Unicode block. */
762         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION");
763 
764         /** The Enclosed Alphanumerics Unicode block. */
765         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS");
766 
767         /** The Box Drawing Unicode block. */
768         public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING");
769 
770         /** The Block Elements Unicode block. */
771         public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS");
772 
773         /** The Geometric Shapes Unicode block. */
774         public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES");
775 
776         /** The Miscellaneous Symbols Unicode block. */
777         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS");
778 
779         /** The Dingbats Unicode block. */
780         public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS");
781 
782         /** The Miscellaneous Mathematical Symbols-A Unicode block. */
783         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A");
784 
785         /** The Supplemental Arrows-A Unicode block. */
786         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A");
787 
788         /** The Braille Patterns Unicode block. */
789         public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS");
790 
791         /** The Supplemental Arrows-B Unicode block. */
792         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B");
793 
794         /** The Miscellaneous Mathematical Symbols-B Unicode block. */
795         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B");
796 
797         /** The Supplemental Mathematical Operators Unicode block. */
798         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS");
799 
800         /** The Miscellaneous Symbols and Arrows Unicode block. */
801         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS");
802 
803         /** The CJK Radicals Supplement Unicode block. */
804         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT");
805 
806         /** The Kangxi Radicals Unicode block. */
807         public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS");
808 
809         /** The Ideographic Description Characters Unicode block. */
810         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
811 
812         /** The CJK Symbols and Punctuation Unicode block. */
813         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION");
814 
815         /** The Hiragana Unicode block. */
816         public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA");
817 
818         /** The Katakana Unicode block. */
819         public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA");
820 
821         /** The Bopomofo Unicode block. */
822         public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO");
823 
824         /** The Hangul Compatibility Jamo Unicode block. */
825         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO");
826 
827         /** The Kanbun Unicode block. */
828         public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN");
829 
830         /** The Bopomofo Extended Unicode block. */
831         public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED");
832 
833         /** The Katakana Phonetic Extensions Unicode block. */
834         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS");
835 
836         /** The Enclosed CJK Letters and Months Unicode block. */
837         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS");
838 
839         /** The CJK Compatibility Unicode block. */
840         public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY");
841 
842         /** The CJK Unified Ideographs Extension A Unicode block. */
843         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
844 
845         /** The Yijing Hexagram Symbols Unicode block. */
846         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS");
847 
848         /** The CJK Unified Ideographs Unicode block. */
849         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS");
850 
851         /** The Yi Syllables Unicode block. */
852         public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES");
853 
854         /** The Yi Radicals Unicode block. */
855         public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS");
856 
857         /** The Hangul Syllables Unicode block. */
858         public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES");
859 
860         /**
861          * The High Surrogates Unicode block. This block represents
862          * code point values in the high surrogate range 0xD800 to 0xDB7F
863          */
864         public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES");
865 
866         /**
867          * The High Private Use Surrogates Unicode block. This block
868          * represents code point values in the high surrogate range 0xDB80 to
869          * 0xDBFF
870          */
871         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES");
872 
873         /**
874          * The Low Surrogates Unicode block. This block represents
875          * code point values in the low surrogate range 0xDC00 to 0xDFFF
876          */
877         public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES");
878 
879         /** The Private Use Area Unicode block. */
880         public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA");
881 
882         /** The CJK Compatibility Ideographs Unicode block. */
883         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS");
884 
885         /** The Alphabetic Presentation Forms Unicode block. */
886         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS");
887 
888         /** The Arabic Presentation Forms-A Unicode block. */
889         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A");
890 
891         /** The Variation Selectors Unicode block. */
892         public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS");
893 
894         /** The Combining Half Marks Unicode block. */
895         public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS");
896 
897         /** The CJK Compatibility Forms Unicode block. */
898         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS");
899 
900         /** The Small Form Variants Unicode block. */
901         public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS");
902 
903         /** The Arabic Presentation Forms-B Unicode block. */
904         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B");
905 
906         /** The Halfwidth and Fullwidth Forms Unicode block. */
907         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS");
908 
909         /** The Specials Unicode block. */
910         public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS");
911 
912         /** The Linear B Syllabary Unicode block. */
913         public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY");
914 
915         /** The Linear B Ideograms Unicode block. */
916         public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS");
917 
918         /** The Aegean Numbers Unicode block. */
919         public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS");
920 
921         /** The Old Italic Unicode block. */
922         public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC");
923 
924         /** The Gothic Unicode block. */
925         public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
926 
927         /** The Ugaritic Unicode block. */
928         public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
929 
930         /** The Deseret Unicode block. */
931         public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
932 
933         /** The Shavian Unicode block. */
934         public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
935 
936         /** The Osmanya Unicode block. */
937         public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
938 
939         /** The Cypriot Syllabary Unicode block. */
940         public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY");
941 
942         /** The Byzantine Musical Symbols Unicode block. */
943         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS");
944 
945         /** The Musical Symbols Unicode block. */
946         public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS");
947 
948         /** The Tai Xuan Jing Symbols Unicode block. */
949         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS");
950 
951         /** The Mathematical Alphanumeric Symbols Unicode block. */
952         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS");
953 
954         /** The CJK Unified Ideographs Extension B Unicode block. */
955         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B");
956 
957         /** The CJK Compatibility Ideographs Supplement Unicode block. */
958         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT");
959 
960         /** The Tags Unicode block. */
961         public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
962 
963         /** The Variation Selectors Supplement Unicode block. */
964         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT");
965 
966         /** The Supplementary Private Use Area-A Unicode block. */
967         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A");
968 
969         /** The Supplementary Private Use Area-B Unicode block. */
970         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B");
971 
972         // Unicode 4.1.
973 
974         /** The Ancient Greek Musical Notation Unicode 4.1 block. */
975         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION");
976 
977         /** The Ancient Greek Numbers Unicode 4.1 block. */
978         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock("ANCIENT_GREEK_NUMBERS");
979 
980         /** The Arabic Supplement Unicode 4.1 block. */
981         public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock("ARABIC_SUPPLEMENT");
982 
983         /** The Buginese Unicode 4.1 block. */
984         public static final UnicodeBlock BUGINESE = new UnicodeBlock("BUGINESE");
985 
986         /** The CJK Strokes Unicode 4.1 block. */
987         public static final UnicodeBlock CJK_STROKES = new UnicodeBlock("CJK_STROKES");
988 
989         /** The Combining Diacritical Marks Supplement Unicode 4.1 block. */
990         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT");
991 
992         /** The Coptic Unicode 4.1 block. */
993         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC");
994 
995         /** The Ethiopic Extended Unicode 4.1 block. */
996         public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock("ETHIOPIC_EXTENDED");
997 
998         /** The Ethiopic Supplement Unicode 4.1 block. */
999         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock("ETHIOPIC_SUPPLEMENT");
1000 
1001         /** The Georgian Supplement Unicode 4.1 block. */
1002         public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock("GEORGIAN_SUPPLEMENT");
1003 
1004         /** The Glagolitic Unicode 4.1 block. */
1005         public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock("GLAGOLITIC");
1006 
1007         /** The Kharoshthi Unicode 4.1 block. */
1008         public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock("KHAROSHTHI");
1009 
1010         /** The Modifier Tone Letters Unicode 4.1 block. */
1011         public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock("MODIFIER_TONE_LETTERS");
1012 
1013         /** The New Tai Lue Unicode 4.1 block. */
1014         public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock("NEW_TAI_LUE");
1015 
1016         /** The Old Persian Unicode 4.1 block. */
1017         public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock("OLD_PERSIAN");
1018 
1019         /** The Phonetic Extensions Supplement Unicode 4.1 block. */
1020         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT");
1021 
1022         /** The Supplemental Punctuation Unicode 4.1 block. */
1023         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION");
1024 
1025         /** The Syloti Nagri Unicode 4.1 block. */
1026         public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock("SYLOTI_NAGRI");
1027 
1028         /** The Tifinagh Unicode 4.1 block. */
1029         public static final UnicodeBlock TIFINAGH = new UnicodeBlock("TIFINAGH");
1030 
1031         /** The Vertical Forms Unicode 4.1 block. */
1032         public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock("VERTICAL_FORMS");
1033 
1034         // Unicode 5.0.
1035 
1036         /** The NKo Unicode 5.0 block. */
1037         public static final UnicodeBlock NKO = new UnicodeBlock("NKO");
1038 
1039         /** The Balinese Unicode 5.0 block. */
1040         public static final UnicodeBlock BALINESE = new UnicodeBlock("BALINESE");
1041 
1042         /** The Latin Extended C Unicode 5.0 block. */
1043         public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock("LATIN_EXTENDED_C");
1044 
1045         /** The Latin Extended D Unicode 5.0 block. */
1046         public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock("LATIN_EXTENDED_D");
1047 
1048         /** The Phags-pa Unicode 5.0 block. */
1049         public static final UnicodeBlock PHAGS_PA = new UnicodeBlock("PHAGS_PA");
1050 
1051         /** The Phoenician Unicode 5.0 block. */
1052         public static final UnicodeBlock PHOENICIAN = new UnicodeBlock("PHOENICIAN");
1053 
1054         /** The Cuneiform Unicode 5.0 block. */
1055         public static final UnicodeBlock CUNEIFORM = new UnicodeBlock("CUNEIFORM");
1056 
1057         /** The Cuneiform Numbers And Punctuation Unicode 5.0 block. */
1058         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION");
1059 
1060         /** The Counting Rod Numerals Unicode 5.0 block. */
1061         public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock("COUNTING_ROD_NUMERALS");
1062 
1063         // Unicode 5.1.
1064 
1065         /** The Sudanese Unicode 5.1 block. */
1066         public static final UnicodeBlock SUNDANESE = new UnicodeBlock("SUNDANESE");
1067 
1068         /** The Lepcha Unicode 5.1 block. */
1069         public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA");
1070 
1071         /** The Ol Chiki Unicode 5.1 block. */
1072         public static final UnicodeBlock OL_CHIKI = new UnicodeBlock("OL_CHIKI");
1073 
1074         /** The Cyrillic Extended-A Unicode 5.1 block. */
1075         public static final UnicodeBlock CYRILLIC_EXTENDED_A = new UnicodeBlock("CYRILLIC_EXTENDED_A");
1076 
1077         /** The Vai Unicode 5.1 block. */
1078         public static final UnicodeBlock VAI = new UnicodeBlock("VAI");
1079 
1080         /** The Cyrillic Extended-B Unicode 5.1 block. */
1081         public static final UnicodeBlock CYRILLIC_EXTENDED_B = new UnicodeBlock("CYRILLIC_EXTENDED_B");
1082 
1083         /** The Saurashtra Unicode 5.1 block. */
1084         public static final UnicodeBlock SAURASHTRA = new UnicodeBlock("SAURASHTRA");
1085 
1086         /** The Kayah Li Unicode 5.1 block. */
1087         public static final UnicodeBlock KAYAH_LI = new UnicodeBlock("KAYAH_LI");
1088 
1089         /** The Rejang Unicode 5.1 block. */
1090         public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG");
1091 
1092         /** The Cham Unicode 5.1 block. */
1093         public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM");
1094 
1095         /** The Ancient Symbols Unicode 5.1 block. */
1096         public static final UnicodeBlock ANCIENT_SYMBOLS = new UnicodeBlock("ANCIENT_SYMBOLS");
1097 
1098         /** The Phaistos Disc Unicode 5.1 block. */
1099         public static final UnicodeBlock PHAISTOS_DISC = new UnicodeBlock("PHAISTOS_DISC");
1100 
1101         /** The Lycian Unicode 5.1 block. */
1102         public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN");
1103 
1104         /** The Carian Unicode 5.1 block. */
1105         public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN");
1106 
1107         /** The Lydian Unicode 5.1 block. */
1108         public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN");
1109 
1110         /** The Mahjong Tiles Unicode 5.1 block. */
1111         public static final UnicodeBlock MAHJONG_TILES = new UnicodeBlock("MAHJONG_TILES");
1112 
1113         /** The Domino Tiles Unicode 5.1 block. */
1114         public static final UnicodeBlock DOMINO_TILES = new UnicodeBlock("DOMINO_TILES");
1115 
1116         // Unicode 5.2.
1117 
1118         /** The Samaritan Unicode 5.2 block. */
1119         public static final UnicodeBlock SAMARITAN = new UnicodeBlock("SAMARITAN");
1120 
1121         /** The Unified Canadian Aboriginal Syllabics Expanded Unicode 5.2 block. */
1122         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED");
1123 
1124         /** The Tai Tham Unicode 5.2 block. */
1125         public static final UnicodeBlock TAI_THAM = new UnicodeBlock("TAI_THAM");
1126 
1127         /** The Vedic Extensions Unicode 5.2 block. */
1128         public static final UnicodeBlock VEDIC_EXTENSIONS = new UnicodeBlock("VEDIC_EXTENSIONS");
1129 
1130         /** The Lisu Extensions Unicode 5.2 block. */
1131         public static final UnicodeBlock LISU = new UnicodeBlock("LISU");
1132 
1133         /** The Bamum Extensions Unicode 5.2 block. */
1134         public static final UnicodeBlock BAMUM = new UnicodeBlock("BAMUM");
1135 
1136         /** The Common Indic Number Forms Unicode 5.2 block. */
1137         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS");
1138 
1139         /** The Devanagari Extended Unicode 5.2 block. */
1140         public static final UnicodeBlock DEVANAGARI_EXTENDED = new UnicodeBlock("DEVANAGARI_EXTENDED");
1141 
1142         /** The Hangul Jamo Extended-A Unicode 5.2 block. */
1143         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = new UnicodeBlock("HANGUL_JAMO_EXTENDED_A");
1144 
1145         /** The Javanese Unicode 5.2 block. */
1146         public static final UnicodeBlock JAVANESE = new UnicodeBlock("JAVANESE");
1147 
1148         /** The Myanmar Extended-A Unicode 5.2 block. */
1149         public static final UnicodeBlock MYANMAR_EXTENDED_A = new UnicodeBlock("MYANMAR_EXTENDED_A");
1150 
1151         /** The Tai Viet Unicode 5.2 block. */
1152         public static final UnicodeBlock TAI_VIET = new UnicodeBlock("TAI_VIET");
1153 
1154         /** The Meetei Mayek Unicode 5.2 block. */
1155         public static final UnicodeBlock MEETEI_MAYEK = new UnicodeBlock("MEETEI_MAYEK");
1156 
1157         /** The Hangul Jamo Extended-B Unicode 5.2 block. */
1158         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = new UnicodeBlock("HANGUL_JAMO_EXTENDED_B");
1159 
1160         /** The Imperial Aramaic Unicode 5.2 block. */
1161         public static final UnicodeBlock IMPERIAL_ARAMAIC = new UnicodeBlock("IMPERIAL_ARAMAIC");
1162 
1163         /** The Old South Arabian Unicode 5.2 block. */
1164         public static final UnicodeBlock OLD_SOUTH_ARABIAN = new UnicodeBlock("OLD_SOUTH_ARABIAN");
1165 
1166         /** The Avestan Unicode 5.2 block. */
1167         public static final UnicodeBlock AVESTAN = new UnicodeBlock("AVESTAN");
1168 
1169         /** The Inscriptional Pathian Unicode 5.2 block. */
1170         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = new UnicodeBlock("INSCRIPTIONAL_PARTHIAN");
1171 
1172         /** The Inscriptional Pahlavi Unicode 5.2 block. */
1173         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = new UnicodeBlock("INSCRIPTIONAL_PAHLAVI");
1174 
1175         /** The Old Turkic Unicode 5.2 block. */
1176         public static final UnicodeBlock OLD_TURKIC = new UnicodeBlock("OLD_TURKIC");
1177 
1178         /** The Rumi Numeral Symbols Unicode 5.2 block. */
1179         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = new UnicodeBlock("RUMI_NUMERAL_SYMBOLS");
1180 
1181         /** The Kaithi Unicode 5.2 block. */
1182         public static final UnicodeBlock KAITHI = new UnicodeBlock("KAITHI");
1183 
1184         /** The Egyptian Hieroglyphs Unicode 5.2 block. */
1185         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = new UnicodeBlock("EGYPTIAN_HIEROGLYPHS");
1186 
1187         /** The Enclosed Alphanumeric Supplement Unicode 5.2 block. */
1188         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT");
1189 
1190         /** The Enclosed Ideographic Supplement Unicode 5.2 block. */
1191         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT");
1192 
1193         /** The CJK Unified Ideographs Unicode 5.2 block. */
1194         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C");
1195 
1196         // Unicode 6.0.
1197 
1198         /** The Mandaic Unicode 6.0 block. */
1199         public static final UnicodeBlock MANDAIC = new UnicodeBlock("MANDAIC");
1200 
1201         /** The Batak Unicode 6.0 block. */
1202         public static final UnicodeBlock BATAK = new UnicodeBlock("BATAK");
1203 
1204         /** The Ethiopic Extended-A Unicode 6.0 block. */
1205         public static final UnicodeBlock ETHIOPIC_EXTENDED_A = new UnicodeBlock("ETHIOPIC_EXTENDED_A");
1206 
1207         /** The Brahmi Unicode 6.0 block. */
1208         public static final UnicodeBlock BRAHMI = new UnicodeBlock("BRAHMI");
1209 
1210         /** The Bamum Supplement Unicode 6.0 block. */
1211         public static final UnicodeBlock BAMUM_SUPPLEMENT = new UnicodeBlock("BAMUM_SUPPLEMENT");
1212 
1213         /** The Kana Supplement Unicode 6.0 block. */
1214         public static final UnicodeBlock KANA_SUPPLEMENT = new UnicodeBlock("KANA_SUPPLEMENT");
1215 
1216         /** The Playing Cards Supplement Unicode 6.0 block. */
1217         public static final UnicodeBlock PLAYING_CARDS = new UnicodeBlock("PLAYING_CARDS");
1218 
1219         /** The Miscellaneous Symbols And Pictographs Supplement Unicode 6.0 block. */
1220         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS");
1221 
1222         /** The Emoticons Unicode 6.0 block. */
1223         public static final UnicodeBlock EMOTICONS = new UnicodeBlock("EMOTICONS");
1224 
1225         /** The Transport And Map Symbols Unicode 6.0 block. */
1226         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS");
1227 
1228         /** The Alchemical Symbols Unicode 6.0 block. */
1229         public static final UnicodeBlock ALCHEMICAL_SYMBOLS = new UnicodeBlock("ALCHEMICAL_SYMBOLS");
1230 
1231         /** The CJK Unified Ideographs Extension-D Unicode 6.0 block. */
1232         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D");
1233 
1234         /*
1235          * All of the UnicodeBlocks above, in the icu4c UBlock enum order.
1236          */
1237         private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] {
1238             null, // icu4c numbers blocks starting at 1, so index 0 should be null.
1239 
1240             UnicodeBlock.BASIC_LATIN,
1241             UnicodeBlock.LATIN_1_SUPPLEMENT,
1242             UnicodeBlock.LATIN_EXTENDED_A,
1243             UnicodeBlock.LATIN_EXTENDED_B,
1244             UnicodeBlock.IPA_EXTENSIONS,
1245             UnicodeBlock.SPACING_MODIFIER_LETTERS,
1246             UnicodeBlock.COMBINING_DIACRITICAL_MARKS,
1247             UnicodeBlock.GREEK,
1248             UnicodeBlock.CYRILLIC,
1249             UnicodeBlock.ARMENIAN,
1250             UnicodeBlock.HEBREW,
1251             UnicodeBlock.ARABIC,
1252             UnicodeBlock.SYRIAC,
1253             UnicodeBlock.THAANA,
1254             UnicodeBlock.DEVANAGARI,
1255             UnicodeBlock.BENGALI,
1256             UnicodeBlock.GURMUKHI,
1257             UnicodeBlock.GUJARATI,
1258             UnicodeBlock.ORIYA,
1259             UnicodeBlock.TAMIL,
1260             UnicodeBlock.TELUGU,
1261             UnicodeBlock.KANNADA,
1262             UnicodeBlock.MALAYALAM,
1263             UnicodeBlock.SINHALA,
1264             UnicodeBlock.THAI,
1265             UnicodeBlock.LAO,
1266             UnicodeBlock.TIBETAN,
1267             UnicodeBlock.MYANMAR,
1268             UnicodeBlock.GEORGIAN,
1269             UnicodeBlock.HANGUL_JAMO,
1270             UnicodeBlock.ETHIOPIC,
1271             UnicodeBlock.CHEROKEE,
1272             UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1273             UnicodeBlock.OGHAM,
1274             UnicodeBlock.RUNIC,
1275             UnicodeBlock.KHMER,
1276             UnicodeBlock.MONGOLIAN,
1277             UnicodeBlock.LATIN_EXTENDED_ADDITIONAL,
1278             UnicodeBlock.GREEK_EXTENDED,
1279             UnicodeBlock.GENERAL_PUNCTUATION,
1280             UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS,
1281             UnicodeBlock.CURRENCY_SYMBOLS,
1282             UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS,
1283             UnicodeBlock.LETTERLIKE_SYMBOLS,
1284             UnicodeBlock.NUMBER_FORMS,
1285             UnicodeBlock.ARROWS,
1286             UnicodeBlock.MATHEMATICAL_OPERATORS,
1287             UnicodeBlock.MISCELLANEOUS_TECHNICAL,
1288             UnicodeBlock.CONTROL_PICTURES,
1289             UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION,
1290             UnicodeBlock.ENCLOSED_ALPHANUMERICS,
1291             UnicodeBlock.BOX_DRAWING,
1292             UnicodeBlock.BLOCK_ELEMENTS,
1293             UnicodeBlock.GEOMETRIC_SHAPES,
1294             UnicodeBlock.MISCELLANEOUS_SYMBOLS,
1295             UnicodeBlock.DINGBATS,
1296             UnicodeBlock.BRAILLE_PATTERNS,
1297             UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
1298             UnicodeBlock.KANGXI_RADICALS,
1299             UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1300             UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
1301             UnicodeBlock.HIRAGANA,
1302             UnicodeBlock.KATAKANA,
1303             UnicodeBlock.BOPOMOFO,
1304             UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
1305             UnicodeBlock.KANBUN,
1306             UnicodeBlock.BOPOMOFO_EXTENDED,
1307             UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS,
1308             UnicodeBlock.CJK_COMPATIBILITY,
1309             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1310             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
1311             UnicodeBlock.YI_SYLLABLES,
1312             UnicodeBlock.YI_RADICALS,
1313             UnicodeBlock.HANGUL_SYLLABLES,
1314             UnicodeBlock.HIGH_SURROGATES,
1315             UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES,
1316             UnicodeBlock.LOW_SURROGATES,
1317             UnicodeBlock.PRIVATE_USE_AREA,
1318             UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
1319             UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS,
1320             UnicodeBlock.ARABIC_PRESENTATION_FORMS_A,
1321             UnicodeBlock.COMBINING_HALF_MARKS,
1322             UnicodeBlock.CJK_COMPATIBILITY_FORMS,
1323             UnicodeBlock.SMALL_FORM_VARIANTS,
1324             UnicodeBlock.ARABIC_PRESENTATION_FORMS_B,
1325             UnicodeBlock.SPECIALS,
1326             UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
1327 
1328             // Unicode 3.1.
1329             UnicodeBlock.OLD_ITALIC,
1330             UnicodeBlock.GOTHIC,
1331             UnicodeBlock.DESERET,
1332             UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS,
1333             UnicodeBlock.MUSICAL_SYMBOLS,
1334             UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1335             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1336             UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1337             UnicodeBlock.TAGS,
1338 
1339             // Unicode 3.2.
1340             UnicodeBlock.CYRILLIC_SUPPLEMENTARY,
1341             UnicodeBlock.TAGALOG,
1342             UnicodeBlock.HANUNOO,
1343             UnicodeBlock.BUHID,
1344             UnicodeBlock.TAGBANWA,
1345             UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1346             UnicodeBlock.SUPPLEMENTAL_ARROWS_A,
1347             UnicodeBlock.SUPPLEMENTAL_ARROWS_B,
1348             UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1349             UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1350             UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
1351             UnicodeBlock.VARIATION_SELECTORS,
1352             UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1353             UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1354 
1355             // Unicode 4.0.
1356             UnicodeBlock.LIMBU,
1357             UnicodeBlock.TAI_LE,
1358             UnicodeBlock.KHMER_SYMBOLS,
1359             UnicodeBlock.PHONETIC_EXTENSIONS,
1360             UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1361             UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS,
1362             UnicodeBlock.LINEAR_B_SYLLABARY,
1363             UnicodeBlock.LINEAR_B_IDEOGRAMS,
1364             UnicodeBlock.AEGEAN_NUMBERS,
1365             UnicodeBlock.UGARITIC,
1366             UnicodeBlock.SHAVIAN,
1367             UnicodeBlock.OSMANYA,
1368             UnicodeBlock.CYPRIOT_SYLLABARY,
1369             UnicodeBlock.TAI_XUAN_JING_SYMBOLS,
1370             UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT,
1371 
1372             // Unicode 4.1.
1373             UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION,
1374             UnicodeBlock.ANCIENT_GREEK_NUMBERS,
1375             UnicodeBlock.ARABIC_SUPPLEMENT,
1376             UnicodeBlock.BUGINESE,
1377             UnicodeBlock.CJK_STROKES,
1378             UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
1379             UnicodeBlock.COPTIC,
1380             UnicodeBlock.ETHIOPIC_EXTENDED,
1381             UnicodeBlock.ETHIOPIC_SUPPLEMENT,
1382             UnicodeBlock.GEORGIAN_SUPPLEMENT,
1383             UnicodeBlock.GLAGOLITIC,
1384             UnicodeBlock.KHAROSHTHI,
1385             UnicodeBlock.MODIFIER_TONE_LETTERS,
1386             UnicodeBlock.NEW_TAI_LUE,
1387             UnicodeBlock.OLD_PERSIAN,
1388             UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT,
1389             UnicodeBlock.SUPPLEMENTAL_PUNCTUATION,
1390             UnicodeBlock.SYLOTI_NAGRI,
1391             UnicodeBlock.TIFINAGH,
1392             UnicodeBlock.VERTICAL_FORMS,
1393 
1394             // Unicode 5.0.
1395             UnicodeBlock.NKO,
1396             UnicodeBlock.BALINESE,
1397             UnicodeBlock.LATIN_EXTENDED_C,
1398             UnicodeBlock.LATIN_EXTENDED_D,
1399             UnicodeBlock.PHAGS_PA,
1400             UnicodeBlock.PHOENICIAN,
1401             UnicodeBlock.CUNEIFORM,
1402             UnicodeBlock.CUNEIFORM_NUMBERS_AND_PUNCTUATION,
1403             UnicodeBlock.COUNTING_ROD_NUMERALS,
1404 
1405             // Unicode 5.1.
1406             UnicodeBlock.SUNDANESE,
1407             UnicodeBlock.LEPCHA,
1408             UnicodeBlock.OL_CHIKI,
1409             UnicodeBlock.CYRILLIC_EXTENDED_A,
1410             UnicodeBlock.VAI,
1411             UnicodeBlock.CYRILLIC_EXTENDED_B,
1412             UnicodeBlock.SAURASHTRA,
1413             UnicodeBlock.KAYAH_LI,
1414             UnicodeBlock.REJANG,
1415             UnicodeBlock.CHAM,
1416             UnicodeBlock.ANCIENT_SYMBOLS,
1417             UnicodeBlock.PHAISTOS_DISC,
1418             UnicodeBlock.LYCIAN,
1419             UnicodeBlock.CARIAN,
1420             UnicodeBlock.LYDIAN,
1421             UnicodeBlock.MAHJONG_TILES,
1422             UnicodeBlock.DOMINO_TILES,
1423 
1424             // Unicode 5.2.
1425             UnicodeBlock.SAMARITAN,
1426             UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
1427             UnicodeBlock.TAI_THAM,
1428             UnicodeBlock.VEDIC_EXTENSIONS,
1429             UnicodeBlock.LISU,
1430             UnicodeBlock.BAMUM,
1431             UnicodeBlock.COMMON_INDIC_NUMBER_FORMS,
1432             UnicodeBlock.DEVANAGARI_EXTENDED,
1433             UnicodeBlock.HANGUL_JAMO_EXTENDED_A,
1434             UnicodeBlock.JAVANESE,
1435             UnicodeBlock.MYANMAR_EXTENDED_A,
1436             UnicodeBlock.TAI_VIET,
1437             UnicodeBlock.MEETEI_MAYEK,
1438             UnicodeBlock.HANGUL_JAMO_EXTENDED_B,
1439             UnicodeBlock.IMPERIAL_ARAMAIC,
1440             UnicodeBlock.OLD_SOUTH_ARABIAN,
1441             UnicodeBlock.AVESTAN,
1442             UnicodeBlock.INSCRIPTIONAL_PARTHIAN,
1443             UnicodeBlock.INSCRIPTIONAL_PAHLAVI,
1444             UnicodeBlock.OLD_TURKIC,
1445             UnicodeBlock.RUMI_NUMERAL_SYMBOLS,
1446             UnicodeBlock.KAITHI,
1447             UnicodeBlock.EGYPTIAN_HIEROGLYPHS,
1448             UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
1449             UnicodeBlock.ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
1450             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
1451 
1452             // Unicode 6.0.
1453             UnicodeBlock.MANDAIC,
1454             UnicodeBlock.BATAK,
1455             UnicodeBlock.ETHIOPIC_EXTENDED_A,
1456             UnicodeBlock.BRAHMI,
1457             UnicodeBlock.BAMUM_SUPPLEMENT,
1458             UnicodeBlock.KANA_SUPPLEMENT,
1459             UnicodeBlock.PLAYING_CARDS,
1460             UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
1461             UnicodeBlock.EMOTICONS,
1462             UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS,
1463             UnicodeBlock.ALCHEMICAL_SYMBOLS,
1464             UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
1465         };
1466 
1467         /**
1468          * Returns the Unicode block for the given block name, or null if there is no
1469          * such block.
1470          *
1471          * <p>Block names may be one of the following:
1472          * <ul>
1473          * <li>Canonical block name, as defined by the Unicode specification;
1474          * case-insensitive.</li>
1475          * <li>Canonical block name without any spaces, as defined by the
1476          * Unicode specification; case-insensitive.</li>
1477          * <li>A {@code UnicodeBlock} constant identifier. This is determined by
1478          * converting the canonical name to uppercase and replacing all spaces and hyphens
1479          * with underscores.</li>
1480          * </ul>
1481          *
1482          * @throws NullPointerException
1483          *             if {@code blockName == null}.
1484          * @throws IllegalArgumentException
1485          *             if {@code blockName} is not the name of any known block.
1486          * @since 1.5
1487          */
forName(String blockName)1488         public static UnicodeBlock forName(String blockName) {
1489             if (blockName == null) {
1490                 throw new NullPointerException("blockName == null");
1491             }
1492             int block = unicodeBlockForName(blockName);
1493             if (block == -1) {
1494                 throw new IllegalArgumentException("Unknown block: " + blockName);
1495             }
1496             return BLOCKS[block];
1497         }
1498 
1499         /**
1500          * Returns the Unicode block containing the given code point, or null if the
1501          * code point does not belong to any known block.
1502          */
of(char c)1503         public static UnicodeBlock of(char c) {
1504             return of((int) c);
1505         }
1506 
1507         /**
1508          * Returns the Unicode block containing the given code point, or null if the
1509          * code point does not belong to any known block.
1510          */
of(int codePoint)1511         public static UnicodeBlock of(int codePoint) {
1512             checkValidCodePoint(codePoint);
1513             int block = unicodeBlockForCodePoint(codePoint);
1514             if (block == -1 || block >= BLOCKS.length) {
1515                 return null;
1516             }
1517             return BLOCKS[block];
1518         }
1519 
UnicodeBlock(String blockName)1520         private UnicodeBlock(String blockName) {
1521             super(blockName);
1522         }
1523     }
1524 
unicodeBlockForName(String blockName)1525     private static native int unicodeBlockForName(String blockName);
1526 
unicodeBlockForCodePoint(int codePoint)1527     private static native int unicodeBlockForCodePoint(int codePoint);
1528 
unicodeScriptForName(String blockName)1529     private static native int unicodeScriptForName(String blockName);
1530 
unicodeScriptForCodePoint(int codePoint)1531     private static native int unicodeScriptForCodePoint(int codePoint);
1532 
1533 
1534     /**
1535      * Constructs a new {@code Character} with the specified primitive char
1536      * value.
1537      *
1538      * @param value
1539      *            the primitive char value to store in the new instance.
1540      */
Character(char value)1541     public Character(char value) {
1542         this.value = value;
1543     }
1544 
1545     /**
1546      * Gets the primitive value of this character.
1547      *
1548      * @return this object's primitive value.
1549      */
charValue()1550     public char charValue() {
1551         return value;
1552     }
1553 
checkValidCodePoint(int codePoint)1554     private static void checkValidCodePoint(int codePoint) {
1555         if (!isValidCodePoint(codePoint)) {
1556             throw new IllegalArgumentException("Invalid code point: " + codePoint);
1557         }
1558     }
1559 
1560     /**
1561      * Compares this object to the specified character object to determine their
1562      * relative order.
1563      *
1564      * @param c
1565      *            the character object to compare this object to.
1566      * @return {@code 0} if the value of this character and the value of
1567      *         {@code c} are equal; a positive value if the value of this
1568      *         character is greater than the value of {@code c}; a negative
1569      *         value if the value of this character is less than the value of
1570      *         {@code c}.
1571      * @see java.lang.Comparable
1572      * @since 1.2
1573      */
compareTo(Character c)1574     public int compareTo(Character c) {
1575         return compare(value, c.value);
1576     }
1577 
1578     /**
1579      * Compares two {@code char} values.
1580      * @return 0 if lhs = rhs, less than 0 if lhs &lt; rhs, and greater than 0 if lhs &gt; rhs.
1581      * @since 1.7
1582      */
compare(char lhs, char rhs)1583     public static int compare(char lhs, char rhs) {
1584         return lhs - rhs;
1585     }
1586 
1587     /**
1588      * Returns a {@code Character} instance for the {@code char} value passed.
1589      * <p>
1590      * If it is not necessary to get a new {@code Character} instance, it is
1591      * recommended to use this method instead of the constructor, since it
1592      * maintains a cache of instances which may result in better performance.
1593      *
1594      * @param c
1595      *            the char value for which to get a {@code Character} instance.
1596      * @return the {@code Character} instance for {@code c}.
1597      * @since 1.5
1598      */
valueOf(char c)1599     public static Character valueOf(char c) {
1600         return c < 128 ? SMALL_VALUES[c] : new Character(c);
1601     }
1602 
1603     /**
1604      * A cache of instances used by {@link #valueOf(char)} and auto-boxing
1605      */
1606     private static final Character[] SMALL_VALUES = new Character[128];
1607 
1608     static {
1609         for (int i = 0; i < 128; i++) {
1610             SMALL_VALUES[i] = new Character((char) i);
1611         }
1612     }
1613     /**
1614      * Indicates whether {@code codePoint} is a valid Unicode code point.
1615      *
1616      * @param codePoint
1617      *            the code point to test.
1618      * @return {@code true} if {@code codePoint} is a valid Unicode code point;
1619      *         {@code false} otherwise.
1620      * @since 1.5
1621      */
isValidCodePoint(int codePoint)1622     public static boolean isValidCodePoint(int codePoint) {
1623         return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1624     }
1625 
1626     /**
1627      * Indicates whether {@code codePoint} is within the supplementary code
1628      * point range.
1629      *
1630      * @param codePoint
1631      *            the code point to test.
1632      * @return {@code true} if {@code codePoint} is within the supplementary
1633      *         code point range; {@code false} otherwise.
1634      * @since 1.5
1635      */
isSupplementaryCodePoint(int codePoint)1636     public static boolean isSupplementaryCodePoint(int codePoint) {
1637         return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1638     }
1639 
1640     /**
1641      * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
1642      * that is used for representing supplementary characters in UTF-16
1643      * encoding.
1644      *
1645      * @param ch
1646      *            the character to test.
1647      * @return {@code true} if {@code ch} is a high-surrogate code unit;
1648      *         {@code false} otherwise.
1649      * @see #isLowSurrogate(char)
1650      * @since 1.5
1651      */
isHighSurrogate(char ch)1652     public static boolean isHighSurrogate(char ch) {
1653         return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
1654     }
1655 
1656     /**
1657      * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
1658      * that is used for representing supplementary characters in UTF-16
1659      * encoding.
1660      *
1661      * @param ch
1662      *            the character to test.
1663      * @return {@code true} if {@code ch} is a low-surrogate code unit;
1664      *         {@code false} otherwise.
1665      * @see #isHighSurrogate(char)
1666      * @since 1.5
1667      */
isLowSurrogate(char ch)1668     public static boolean isLowSurrogate(char ch) {
1669         return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
1670     }
1671 
1672     /**
1673      * Returns true if the given character is a high or low surrogate.
1674      * @since 1.7
1675      */
isSurrogate(char ch)1676     public static boolean isSurrogate(char ch) {
1677         return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
1678     }
1679 
1680     /**
1681      * Indicates whether the specified character pair is a valid surrogate pair.
1682      *
1683      * @param high
1684      *            the high surrogate unit to test.
1685      * @param low
1686      *            the low surrogate unit to test.
1687      * @return {@code true} if {@code high} is a high-surrogate code unit and
1688      *         {@code low} is a low-surrogate code unit; {@code false}
1689      *         otherwise.
1690      * @see #isHighSurrogate(char)
1691      * @see #isLowSurrogate(char)
1692      * @since 1.5
1693      */
isSurrogatePair(char high, char low)1694     public static boolean isSurrogatePair(char high, char low) {
1695         return (isHighSurrogate(high) && isLowSurrogate(low));
1696     }
1697 
1698     /**
1699      * Calculates the number of {@code char} values required to represent the
1700      * specified Unicode code point. This method checks if the {@code codePoint}
1701      * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
1702      * returned, otherwise {@code 1}. To test if the code point is valid, use
1703      * the {@link #isValidCodePoint(int)} method.
1704      *
1705      * @param codePoint
1706      *            the code point for which to calculate the number of required
1707      *            chars.
1708      * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
1709      * @see #isValidCodePoint(int)
1710      * @see #isSupplementaryCodePoint(int)
1711      * @since 1.5
1712      */
charCount(int codePoint)1713     public static int charCount(int codePoint) {
1714         return (codePoint >= 0x10000 ? 2 : 1);
1715     }
1716 
1717     /**
1718      * Converts a surrogate pair into a Unicode code point. This method assumes
1719      * that the pair are valid surrogates. If the pair are <i>not</i> valid
1720      * surrogates, then the result is indeterminate. The
1721      * {@link #isSurrogatePair(char, char)} method should be used prior to this
1722      * method to validate the pair.
1723      *
1724      * @param high
1725      *            the high surrogate unit.
1726      * @param low
1727      *            the low surrogate unit.
1728      * @return the Unicode code point corresponding to the surrogate unit pair.
1729      * @see #isSurrogatePair(char, char)
1730      * @since 1.5
1731      */
toCodePoint(char high, char low)1732     public static int toCodePoint(char high, char low) {
1733         // See RFC 2781, Section 2.2
1734         // http://www.ietf.org/rfc/rfc2781.txt
1735         int h = (high & 0x3FF) << 10;
1736         int l = low & 0x3FF;
1737         return (h | l) + 0x10000;
1738     }
1739 
1740     /**
1741      * Returns the code point at {@code index} in the specified sequence of
1742      * character units. If the unit at {@code index} is a high-surrogate unit,
1743      * {@code index + 1} is less than the length of the sequence and the unit at
1744      * {@code index + 1} is a low-surrogate unit, then the supplementary code
1745      * point represented by the pair is returned; otherwise the {@code char}
1746      * value at {@code index} is returned.
1747      *
1748      * @param seq
1749      *            the source sequence of {@code char} units.
1750      * @param index
1751      *            the position in {@code seq} from which to retrieve the code
1752      *            point.
1753      * @return the Unicode code point or {@code char} value at {@code index} in
1754      *         {@code seq}.
1755      * @throws NullPointerException
1756      *             if {@code seq} is {@code null}.
1757      * @throws IndexOutOfBoundsException
1758      *             if the {@code index} is negative or greater than or equal to
1759      *             the length of {@code seq}.
1760      * @since 1.5
1761      */
codePointAt(CharSequence seq, int index)1762     public static int codePointAt(CharSequence seq, int index) {
1763         if (seq == null) {
1764             throw new NullPointerException("seq == null");
1765         }
1766         int len = seq.length();
1767         if (index < 0 || index >= len) {
1768             throw new IndexOutOfBoundsException();
1769         }
1770 
1771         char high = seq.charAt(index++);
1772         if (index >= len) {
1773             return high;
1774         }
1775         char low = seq.charAt(index);
1776         if (isSurrogatePair(high, low)) {
1777             return toCodePoint(high, low);
1778         }
1779         return high;
1780     }
1781 
1782     /**
1783      * Returns the code point at {@code index} in the specified array of
1784      * character units. If the unit at {@code index} is a high-surrogate unit,
1785      * {@code index + 1} is less than the length of the array and the unit at
1786      * {@code index + 1} is a low-surrogate unit, then the supplementary code
1787      * point represented by the pair is returned; otherwise the {@code char}
1788      * value at {@code index} is returned.
1789      *
1790      * @param seq
1791      *            the source array of {@code char} units.
1792      * @param index
1793      *            the position in {@code seq} from which to retrieve the code
1794      *            point.
1795      * @return the Unicode code point or {@code char} value at {@code index} in
1796      *         {@code seq}.
1797      * @throws NullPointerException
1798      *             if {@code seq} is {@code null}.
1799      * @throws IndexOutOfBoundsException
1800      *             if the {@code index} is negative or greater than or equal to
1801      *             the length of {@code seq}.
1802      * @since 1.5
1803      */
codePointAt(char[] seq, int index)1804     public static int codePointAt(char[] seq, int index) {
1805         if (seq == null) {
1806             throw new NullPointerException("seq == null");
1807         }
1808         int len = seq.length;
1809         if (index < 0 || index >= len) {
1810             throw new IndexOutOfBoundsException();
1811         }
1812 
1813         char high = seq[index++];
1814         if (index >= len) {
1815             return high;
1816         }
1817         char low = seq[index];
1818         if (isSurrogatePair(high, low)) {
1819             return toCodePoint(high, low);
1820         }
1821         return high;
1822     }
1823 
1824     /**
1825      * Returns the code point at {@code index} in the specified array of
1826      * character units, where {@code index} has to be less than {@code limit}.
1827      * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
1828      * is less than {@code limit} and the unit at {@code index + 1} is a
1829      * low-surrogate unit, then the supplementary code point represented by the
1830      * pair is returned; otherwise the {@code char} value at {@code index} is
1831      * returned.
1832      *
1833      * @param seq
1834      *            the source array of {@code char} units.
1835      * @param index
1836      *            the position in {@code seq} from which to get the code point.
1837      * @param limit
1838      *            the index after the last unit in {@code seq} that can be used.
1839      * @return the Unicode code point or {@code char} value at {@code index} in
1840      *         {@code seq}.
1841      * @throws NullPointerException
1842      *             if {@code seq} is {@code null}.
1843      * @throws IndexOutOfBoundsException
1844      *             if {@code index < 0}, {@code index >= limit},
1845      *             {@code limit < 0} or if {@code limit} is greater than the
1846      *             length of {@code seq}.
1847      * @since 1.5
1848      */
codePointAt(char[] seq, int index, int limit)1849     public static int codePointAt(char[] seq, int index, int limit) {
1850         if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
1851             throw new IndexOutOfBoundsException();
1852         }
1853 
1854         char high = seq[index++];
1855         if (index >= limit) {
1856             return high;
1857         }
1858         char low = seq[index];
1859         if (isSurrogatePair(high, low)) {
1860             return toCodePoint(high, low);
1861         }
1862         return high;
1863     }
1864 
1865     /**
1866      * Returns the code point that precedes {@code index} in the specified
1867      * sequence of character units. If the unit at {@code index - 1} is a
1868      * low-surrogate unit, {@code index - 2} is not negative and the unit at
1869      * {@code index - 2} is a high-surrogate unit, then the supplementary code
1870      * point represented by the pair is returned; otherwise the {@code char}
1871      * value at {@code index - 1} is returned.
1872      *
1873      * @param seq
1874      *            the source sequence of {@code char} units.
1875      * @param index
1876      *            the position in {@code seq} following the code
1877      *            point that should be returned.
1878      * @return the Unicode code point or {@code char} value before {@code index}
1879      *         in {@code seq}.
1880      * @throws NullPointerException
1881      *             if {@code seq} is {@code null}.
1882      * @throws IndexOutOfBoundsException
1883      *             if the {@code index} is less than 1 or greater than the
1884      *             length of {@code seq}.
1885      * @since 1.5
1886      */
codePointBefore(CharSequence seq, int index)1887     public static int codePointBefore(CharSequence seq, int index) {
1888         if (seq == null) {
1889             throw new NullPointerException("seq == null");
1890         }
1891         int len = seq.length();
1892         if (index < 1 || index > len) {
1893             throw new IndexOutOfBoundsException();
1894         }
1895 
1896         char low = seq.charAt(--index);
1897         if (--index < 0) {
1898             return low;
1899         }
1900         char high = seq.charAt(index);
1901         if (isSurrogatePair(high, low)) {
1902             return toCodePoint(high, low);
1903         }
1904         return low;
1905     }
1906 
1907     /**
1908      * Returns the code point that precedes {@code index} in the specified
1909      * array of character units. If the unit at {@code index - 1} is a
1910      * low-surrogate unit, {@code index - 2} is not negative and the unit at
1911      * {@code index - 2} is a high-surrogate unit, then the supplementary code
1912      * point represented by the pair is returned; otherwise the {@code char}
1913      * value at {@code index - 1} is returned.
1914      *
1915      * @param seq
1916      *            the source array of {@code char} units.
1917      * @param index
1918      *            the position in {@code seq} following the code
1919      *            point that should be returned.
1920      * @return the Unicode code point or {@code char} value before {@code index}
1921      *         in {@code seq}.
1922      * @throws NullPointerException
1923      *             if {@code seq} is {@code null}.
1924      * @throws IndexOutOfBoundsException
1925      *             if the {@code index} is less than 1 or greater than the
1926      *             length of {@code seq}.
1927      * @since 1.5
1928      */
codePointBefore(char[] seq, int index)1929     public static int codePointBefore(char[] seq, int index) {
1930         if (seq == null) {
1931             throw new NullPointerException("seq == null");
1932         }
1933         int len = seq.length;
1934         if (index < 1 || index > len) {
1935             throw new IndexOutOfBoundsException();
1936         }
1937 
1938         char low = seq[--index];
1939         if (--index < 0) {
1940             return low;
1941         }
1942         char high = seq[index];
1943         if (isSurrogatePair(high, low)) {
1944             return toCodePoint(high, low);
1945         }
1946         return low;
1947     }
1948 
1949     /**
1950      * Returns the code point that precedes the {@code index} in the specified
1951      * array of character units and is not less than {@code start}. If the unit
1952      * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
1953      * less than {@code start} and the unit at {@code index - 2} is a
1954      * high-surrogate unit, then the supplementary code point represented by the
1955      * pair is returned; otherwise the {@code char} value at {@code index - 1}
1956      * is returned.
1957      *
1958      * @param seq
1959      *            the source array of {@code char} units.
1960      * @param index
1961      *            the position in {@code seq} following the code point that
1962      *            should be returned.
1963      * @param start
1964      *            the index of the first element in {@code seq}.
1965      * @return the Unicode code point or {@code char} value before {@code index}
1966      *         in {@code seq}.
1967      * @throws NullPointerException
1968      *             if {@code seq} is {@code null}.
1969      * @throws IndexOutOfBoundsException
1970      *             if the {@code index <= start}, {@code start < 0},
1971      *             {@code index} is greater than the length of {@code seq}, or
1972      *             if {@code start} is equal or greater than the length of
1973      *             {@code seq}.
1974      * @since 1.5
1975      */
codePointBefore(char[] seq, int index, int start)1976     public static int codePointBefore(char[] seq, int index, int start) {
1977         if (seq == null) {
1978             throw new NullPointerException("seq == null");
1979         }
1980         int len = seq.length;
1981         if (index <= start || index > len || start < 0 || start >= len) {
1982             throw new IndexOutOfBoundsException();
1983         }
1984 
1985         char low = seq[--index];
1986         if (--index < start) {
1987             return low;
1988         }
1989         char high = seq[index];
1990         if (isSurrogatePair(high, low)) {
1991             return toCodePoint(high, low);
1992         }
1993         return low;
1994     }
1995 
1996     /**
1997      * Converts the specified Unicode code point into a UTF-16 encoded sequence
1998      * and copies the value(s) into the char array {@code dst}, starting at
1999      * index {@code dstIndex}.
2000      *
2001      * @param codePoint
2002      *            the Unicode code point to encode.
2003      * @param dst
2004      *            the destination array to copy the encoded value into.
2005      * @param dstIndex
2006      *            the index in {@code dst} from where to start copying.
2007      * @return the number of {@code char} value units copied into {@code dst}.
2008      * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2009      * @throws NullPointerException
2010      *             if {@code dst} is {@code null}.
2011      * @throws IndexOutOfBoundsException
2012      *             if {@code dstIndex} is negative, greater than or equal to
2013      *             {@code dst.length} or equals {@code dst.length - 1} when
2014      *             {@code codePoint} is a
2015      *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
2016      * @since 1.5
2017      */
toChars(int codePoint, char[] dst, int dstIndex)2018     public static int toChars(int codePoint, char[] dst, int dstIndex) {
2019         checkValidCodePoint(codePoint);
2020         if (dst == null) {
2021             throw new NullPointerException("dst == null");
2022         }
2023         if (dstIndex < 0 || dstIndex >= dst.length) {
2024             throw new IndexOutOfBoundsException();
2025         }
2026 
2027         if (isSupplementaryCodePoint(codePoint)) {
2028             if (dstIndex == dst.length - 1) {
2029                 throw new IndexOutOfBoundsException();
2030             }
2031             // See RFC 2781, Section 2.1
2032             // http://www.ietf.org/rfc/rfc2781.txt
2033             int cpPrime = codePoint - 0x10000;
2034             int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2035             int low = 0xDC00 | (cpPrime & 0x3FF);
2036             dst[dstIndex] = (char) high;
2037             dst[dstIndex + 1] = (char) low;
2038             return 2;
2039         }
2040 
2041         dst[dstIndex] = (char) codePoint;
2042         return 1;
2043     }
2044 
2045     /**
2046      * Converts the specified Unicode code point into a UTF-16 encoded sequence
2047      * and returns it as a char array.
2048      *
2049      * @param codePoint
2050      *            the Unicode code point to encode.
2051      * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
2052      *         {@link #isSupplementaryCodePoint(int) supplementary code point},
2053      *         then the returned array contains two characters, otherwise it
2054      *         contains just one character.
2055      * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2056      * @since 1.5
2057      */
toChars(int codePoint)2058     public static char[] toChars(int codePoint) {
2059         checkValidCodePoint(codePoint);
2060         if (isSupplementaryCodePoint(codePoint)) {
2061             int cpPrime = codePoint - 0x10000;
2062             int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2063             int low = 0xDC00 | (cpPrime & 0x3FF);
2064             return new char[] { (char) high, (char) low };
2065         }
2066         return new char[] { (char) codePoint };
2067     }
2068 
2069     /**
2070      * Counts the number of Unicode code points in the subsequence of the
2071      * specified character sequence, as delineated by {@code beginIndex} and
2072      * {@code endIndex}. Any surrogate values with missing pair values will be
2073      * counted as one code point.
2074      *
2075      * @param seq
2076      *            the {@code CharSequence} to look through.
2077      * @param beginIndex
2078      *            the inclusive index to begin counting at.
2079      * @param endIndex
2080      *            the exclusive index to stop counting at.
2081      * @return the number of Unicode code points.
2082      * @throws NullPointerException
2083      *             if {@code seq} is {@code null}.
2084      * @throws IndexOutOfBoundsException
2085      *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
2086      *             if {@code endIndex} is greater than the length of {@code seq}.
2087      * @since 1.5
2088      */
codePointCount(CharSequence seq, int beginIndex, int endIndex)2089     public static int codePointCount(CharSequence seq, int beginIndex,
2090             int endIndex) {
2091         if (seq == null) {
2092             throw new NullPointerException("seq == null");
2093         }
2094         int len = seq.length();
2095         if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2096             throw new IndexOutOfBoundsException();
2097         }
2098 
2099         int result = 0;
2100         for (int i = beginIndex; i < endIndex; i++) {
2101             char c = seq.charAt(i);
2102             if (isHighSurrogate(c)) {
2103                 if (++i < endIndex) {
2104                     c = seq.charAt(i);
2105                     if (!isLowSurrogate(c)) {
2106                         result++;
2107                     }
2108                 }
2109             }
2110             result++;
2111         }
2112         return result;
2113     }
2114 
2115     /**
2116      * Counts the number of Unicode code points in the subsequence of the
2117      * specified char array, as delineated by {@code offset} and {@code count}.
2118      * Any surrogate values with missing pair values will be counted as one code
2119      * point.
2120      *
2121      * @param seq
2122      *            the char array to look through
2123      * @param offset
2124      *            the inclusive index to begin counting at.
2125      * @param count
2126      *            the number of {@code char} values to look through in
2127      *            {@code seq}.
2128      * @return the number of Unicode code points.
2129      * @throws NullPointerException
2130      *             if {@code seq} is {@code null}.
2131      * @throws IndexOutOfBoundsException
2132      *             if {@code offset < 0}, {@code count < 0} or if
2133      *             {@code offset + count} is greater than the length of
2134      *             {@code seq}.
2135      * @since 1.5
2136      */
codePointCount(char[] seq, int offset, int count)2137     public static int codePointCount(char[] seq, int offset, int count) {
2138         Arrays.checkOffsetAndCount(seq.length, offset, count);
2139         int endIndex = offset + count;
2140         int result = 0;
2141         for (int i = offset; i < endIndex; i++) {
2142             char c = seq[i];
2143             if (isHighSurrogate(c)) {
2144                 if (++i < endIndex) {
2145                     c = seq[i];
2146                     if (!isLowSurrogate(c)) {
2147                         result++;
2148                     }
2149                 }
2150             }
2151             result++;
2152         }
2153         return result;
2154     }
2155 
2156     /**
2157      * Determines the index in the specified character sequence that is offset
2158      * {@code codePointOffset} code points from {@code index}.
2159      *
2160      * @param seq
2161      *            the character sequence to find the index in.
2162      * @param index
2163      *            the start index in {@code seq}.
2164      * @param codePointOffset
2165      *            the number of code points to look backwards or forwards; may
2166      *            be a negative or positive value.
2167      * @return the index in {@code seq} that is {@code codePointOffset} code
2168      *         points away from {@code index}.
2169      * @throws NullPointerException
2170      *             if {@code seq} is {@code null}.
2171      * @throws IndexOutOfBoundsException
2172      *             if {@code index < 0}, {@code index} is greater than the
2173      *             length of {@code seq}, or if there are not enough values in
2174      *             {@code seq} to skip {@code codePointOffset} code points
2175      *             forwards or backwards (if {@code codePointOffset} is
2176      *             negative) from {@code index}.
2177      * @since 1.5
2178      */
offsetByCodePoints(CharSequence seq, int index, int codePointOffset)2179     public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) {
2180         if (seq == null) {
2181             throw new NullPointerException("seq == null");
2182         }
2183         int len = seq.length();
2184         if (index < 0 || index > len) {
2185             throw new IndexOutOfBoundsException();
2186         }
2187 
2188         if (codePointOffset == 0) {
2189             return index;
2190         }
2191 
2192         if (codePointOffset > 0) {
2193             int codePoints = codePointOffset;
2194             int i = index;
2195             while (codePoints > 0) {
2196                 codePoints--;
2197                 if (i >= len) {
2198                     throw new IndexOutOfBoundsException();
2199                 }
2200                 if (isHighSurrogate(seq.charAt(i))) {
2201                     int next = i + 1;
2202                     if (next < len && isLowSurrogate(seq.charAt(next))) {
2203                         i++;
2204                     }
2205                 }
2206                 i++;
2207             }
2208             return i;
2209         }
2210 
2211         int codePoints = -codePointOffset;
2212         int i = index;
2213         while (codePoints > 0) {
2214             codePoints--;
2215             i--;
2216             if (i < 0) {
2217                 throw new IndexOutOfBoundsException();
2218             }
2219             if (isLowSurrogate(seq.charAt(i))) {
2220                 int prev = i - 1;
2221                 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2222                     i--;
2223                 }
2224             }
2225         }
2226         return i;
2227     }
2228 
2229     /**
2230      * Determines the index in a subsequence of the specified character array
2231      * that is offset {@code codePointOffset} code points from {@code index}.
2232      * The subsequence is delineated by {@code start} and {@code count}.
2233      *
2234      * @param seq
2235      *            the character array to find the index in.
2236      * @param start
2237      *            the inclusive index that marks the beginning of the
2238      *            subsequence.
2239      * @param count
2240      *            the number of {@code char} values to include within the
2241      *            subsequence.
2242      * @param index
2243      *            the start index in the subsequence of the char array.
2244      * @param codePointOffset
2245      *            the number of code points to look backwards or forwards; may
2246      *            be a negative or positive value.
2247      * @return the index in {@code seq} that is {@code codePointOffset} code
2248      *         points away from {@code index}.
2249      * @throws NullPointerException
2250      *             if {@code seq} is {@code null}.
2251      * @throws IndexOutOfBoundsException
2252      *             if {@code start < 0}, {@code count < 0},
2253      *             {@code index < start}, {@code index > start + count},
2254      *             {@code start + count} is greater than the length of
2255      *             {@code seq}, or if there are not enough values in
2256      *             {@code seq} to skip {@code codePointOffset} code points
2257      *             forward or backward (if {@code codePointOffset} is
2258      *             negative) from {@code index}.
2259      * @since 1.5
2260      */
offsetByCodePoints(char[] seq, int start, int count, int index, int codePointOffset)2261     public static int offsetByCodePoints(char[] seq, int start, int count,
2262             int index, int codePointOffset) {
2263         Arrays.checkOffsetAndCount(seq.length, start, count);
2264         int end = start + count;
2265         if (index < start || index > end) {
2266             throw new IndexOutOfBoundsException();
2267         }
2268 
2269         if (codePointOffset == 0) {
2270             return index;
2271         }
2272 
2273         if (codePointOffset > 0) {
2274             int codePoints = codePointOffset;
2275             int i = index;
2276             while (codePoints > 0) {
2277                 codePoints--;
2278                 if (i >= end) {
2279                     throw new IndexOutOfBoundsException();
2280                 }
2281                 if (isHighSurrogate(seq[i])) {
2282                     int next = i + 1;
2283                     if (next < end && isLowSurrogate(seq[next])) {
2284                         i++;
2285                     }
2286                 }
2287                 i++;
2288             }
2289             return i;
2290         }
2291 
2292         int codePoints = -codePointOffset;
2293         int i = index;
2294         while (codePoints > 0) {
2295             codePoints--;
2296             i--;
2297             if (i < start) {
2298                 throw new IndexOutOfBoundsException();
2299             }
2300             if (isLowSurrogate(seq[i])) {
2301                 int prev = i - 1;
2302                 if (prev >= start && isHighSurrogate(seq[prev])) {
2303                     i--;
2304                 }
2305             }
2306         }
2307         return i;
2308     }
2309 
2310     /**
2311      * Convenience method to determine the value of the specified character
2312      * {@code c} in the supplied radix. The value of {@code radix} must be
2313      * between MIN_RADIX and MAX_RADIX.
2314      *
2315      * @param c
2316      *            the character to determine the value of.
2317      * @param radix
2318      *            the radix.
2319      * @return the value of {@code c} in {@code radix} if {@code radix} lies
2320      *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
2321      */
digit(char c, int radix)2322     public static int digit(char c, int radix) {
2323         return digit((int) c, radix);
2324     }
2325 
2326     /**
2327      * Convenience method to determine the value of the character
2328      * {@code codePoint} in the supplied radix. The value of {@code radix} must
2329      * be between MIN_RADIX and MAX_RADIX.
2330      *
2331      * @param codePoint
2332      *            the character, including supplementary characters.
2333      * @param radix
2334      *            the radix.
2335      * @return if {@code radix} lies between {@link #MIN_RADIX} and
2336      *         {@link #MAX_RADIX} then the value of the character in the radix;
2337      *         -1 otherwise.
2338      */
digit(int codePoint, int radix)2339     public static int digit(int codePoint, int radix) {
2340         if (radix < MIN_RADIX || radix > MAX_RADIX) {
2341             return -1;
2342         }
2343         if (codePoint < 128) {
2344             // Optimized for ASCII
2345             int result = -1;
2346             if ('0' <= codePoint && codePoint <= '9') {
2347                 result = codePoint - '0';
2348             } else if ('a' <= codePoint && codePoint <= 'z') {
2349                 result = 10 + (codePoint - 'a');
2350             } else if ('A' <= codePoint && codePoint <= 'Z') {
2351                 result = 10 + (codePoint - 'A');
2352             }
2353             return result < radix ? result : -1;
2354         }
2355         return digitImpl(codePoint, radix);
2356     }
2357 
digitImpl(int codePoint, int radix)2358     private static native int digitImpl(int codePoint, int radix);
2359 
2360     /**
2361      * Compares this object with the specified object and indicates if they are
2362      * equal. In order to be equal, {@code object} must be an instance of
2363      * {@code Character} and have the same char value as this object.
2364      *
2365      * @param object
2366      *            the object to compare this double with.
2367      * @return {@code true} if the specified object is equal to this
2368      *         {@code Character}; {@code false} otherwise.
2369      */
2370     @Override
equals(Object object)2371     public boolean equals(Object object) {
2372         return (object instanceof Character) && (((Character) object).value == value);
2373     }
2374 
2375     /**
2376      * Returns the character which represents the specified digit in the
2377      * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
2378      * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
2379      * smaller than {@code radix}. If any of these conditions does not hold, 0
2380      * is returned.
2381      *
2382      * @param digit
2383      *            the integer value.
2384      * @param radix
2385      *            the radix.
2386      * @return the character which represents the {@code digit} in the
2387      *         {@code radix}.
2388      */
forDigit(int digit, int radix)2389     public static char forDigit(int digit, int radix) {
2390         if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
2391             if (digit >= 0 && digit < radix) {
2392                 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
2393             }
2394         }
2395         return 0;
2396     }
2397 
2398     /**
2399      * Returns a human-readable name for the given code point,
2400      * or null if the code point is unassigned.
2401      *
2402      * <p>As a fallback mechanism this method returns strings consisting of the Unicode
2403      * block name (with underscores replaced by spaces), a single space, and the uppercase
2404      * hex value of the code point, using as few digits as necessary.
2405      *
2406      * <p>Examples:
2407      * <ul>
2408      * <li>{@code Character.getName(0)} returns "NULL".
2409      * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E".
2410      * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX".
2411      * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000".
2412      * </ul>
2413      *
2414      * <p>Note that the exact strings returned will vary from release to release.
2415      *
2416      * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2417      * @since 1.7
2418      */
getName(int codePoint)2419     public static String getName(int codePoint) {
2420         checkValidCodePoint(codePoint);
2421         if (getType(codePoint) == Character.UNASSIGNED) {
2422             return null;
2423         }
2424         String result = getNameImpl(codePoint);
2425         if (result == null) {
2426             String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ');
2427             result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0);
2428         }
2429         return result;
2430     }
2431 
getNameImpl(int codePoint)2432     private static native String getNameImpl(int codePoint);
2433 
2434     /**
2435      * Returns the numeric value of the specified Unicode character.
2436      * See {@link #getNumericValue(int)}.
2437      *
2438      * @param c the character
2439      * @return a non-negative numeric integer value if a numeric value for
2440      *         {@code c} exists, -1 if there is no numeric value for {@code c},
2441      *         -2 if the numeric value can not be represented as an integer.
2442      */
getNumericValue(char c)2443     public static int getNumericValue(char c) {
2444         return getNumericValue((int) c);
2445     }
2446 
2447     /**
2448      * Gets the numeric value of the specified Unicode code point. For example,
2449      * the code point '\u216B' stands for the Roman number XII, which has the
2450      * numeric value 12.
2451      *
2452      * <p>There are two points of divergence between this method and the Unicode
2453      * specification. This method treats the letters a-z (in both upper and lower
2454      * cases, and their full-width variants) as numbers from 10 to 35. The
2455      * Unicode specification also supports the idea of code points with non-integer
2456      * numeric values; this method does not (except to the extent of returning -2
2457      * for such code points).
2458      *
2459      * @param codePoint the code point
2460      * @return a non-negative numeric integer value if a numeric value for
2461      *         {@code codePoint} exists, -1 if there is no numeric value for
2462      *         {@code codePoint}, -2 if the numeric value can not be
2463      *         represented with an integer.
2464      */
getNumericValue(int codePoint)2465     public static int getNumericValue(int codePoint) {
2466         // This is both an optimization and papers over differences between Java and ICU.
2467         if (codePoint < 128) {
2468             if (codePoint >= '0' && codePoint <= '9') {
2469                 return codePoint - '0';
2470             }
2471             if (codePoint >= 'a' && codePoint <= 'z') {
2472                 return codePoint - ('a' - 10);
2473             }
2474             if (codePoint >= 'A' && codePoint <= 'Z') {
2475                 return codePoint - ('A' - 10);
2476             }
2477             return -1;
2478         }
2479         // Full-width uppercase A-Z.
2480         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
2481             return codePoint - 0xff17;
2482         }
2483         // Full-width lowercase a-z.
2484         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
2485             return codePoint - 0xff37;
2486         }
2487         return getNumericValueImpl(codePoint);
2488     }
2489 
getNumericValueImpl(int codePoint)2490     private static native int getNumericValueImpl(int codePoint);
2491 
2492     /**
2493      * Gets the general Unicode category of the specified character.
2494      *
2495      * @param c
2496      *            the character to get the category of.
2497      * @return the Unicode category of {@code c}.
2498      */
getType(char c)2499     public static int getType(char c) {
2500         return getType((int) c);
2501     }
2502 
2503     /**
2504      * Gets the general Unicode category of the specified code point.
2505      *
2506      * @param codePoint
2507      *            the Unicode code point to get the category of.
2508      * @return the Unicode category of {@code codePoint}.
2509      */
getType(int codePoint)2510     public static int getType(int codePoint) {
2511         int type = getTypeImpl(codePoint);
2512         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
2513         if (type <= Character.FORMAT) {
2514             return type;
2515         }
2516         return (type + 1);
2517     }
2518 
getTypeImpl(int codePoint)2519     private static native int getTypeImpl(int codePoint);
2520 
2521     /**
2522      * Gets the Unicode directionality of the specified character.
2523      *
2524      * @param c
2525      *            the character to get the directionality of.
2526      * @return the Unicode directionality of {@code c}.
2527      */
getDirectionality(char c)2528     public static byte getDirectionality(char c) {
2529         return getDirectionality((int)c);
2530     }
2531 
2532     /**
2533      * Returns the Unicode directionality of the given code point.
2534      * This will be one of the {@code DIRECTIONALITY_} constants.
2535      * For characters whose directionality is undefined, or whose
2536      * directionality has no appropriate constant in this class,
2537      * {@code DIRECTIONALITY_UNDEFINED} is returned.
2538      */
getDirectionality(int codePoint)2539     public static byte getDirectionality(int codePoint) {
2540         if (getType(codePoint) == Character.UNASSIGNED) {
2541             return Character.DIRECTIONALITY_UNDEFINED;
2542         }
2543 
2544         byte directionality = getIcuDirectionality(codePoint);
2545         if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
2546             return DIRECTIONALITY[directionality];
2547         }
2548         return Character.DIRECTIONALITY_UNDEFINED;
2549     }
2550 
2551     /**
2552      * @hide - internal use only.
2553      */
getIcuDirectionality(int codePoint)2554     public static native byte getIcuDirectionality(int codePoint);
2555 
2556     /**
2557      * Indicates whether the specified character is mirrored.
2558      *
2559      * @param c
2560      *            the character to check.
2561      * @return {@code true} if {@code c} is mirrored; {@code false}
2562      *         otherwise.
2563      */
isMirrored(char c)2564     public static boolean isMirrored(char c) {
2565         return isMirrored((int) c);
2566     }
2567 
2568     /**
2569      * Indicates whether the specified code point is mirrored.
2570      *
2571      * @param codePoint
2572      *            the code point to check.
2573      * @return {@code true} if {@code codePoint} is mirrored, {@code false}
2574      *         otherwise.
2575      */
isMirrored(int codePoint)2576     public static boolean isMirrored(int codePoint) {
2577         return isMirroredImpl(codePoint);
2578     }
2579 
isMirroredImpl(int codePoint)2580     private static native boolean isMirroredImpl(int codePoint);
2581 
2582     @Override
hashCode()2583     public int hashCode() {
2584         return value;
2585     }
2586 
2587     /**
2588      * Returns the high surrogate for the given code point. The result is meaningless if
2589      * the given code point is not a supplementary character.
2590      * @since 1.7
2591      */
highSurrogate(int codePoint)2592     public static char highSurrogate(int codePoint) {
2593         return (char) ((codePoint >> 10) + 0xd7c0);
2594     }
2595 
2596     /**
2597      * Returns the low surrogate for the given code point. The result is meaningless if
2598      * the given code point is not a supplementary character.
2599      * @since 1.7
2600      */
lowSurrogate(int codePoint)2601     public static char lowSurrogate(int codePoint) {
2602         return (char) ((codePoint & 0x3ff) | 0xdc00);
2603     }
2604 
2605     /**
2606      * Returns true if the given code point is alphabetic. That is,
2607      * if it is in any of the Lu, Ll, Lt, Lm, Lo, Nl, or Other_Alphabetic categories.
2608      * @since 1.7
2609      */
isAlphabetic(int codePoint)2610     public static native boolean isAlphabetic(int codePoint);
2611 
2612     /**
2613      * Returns true if the given code point is in the Basic Multilingual Plane (BMP).
2614      * Such code points can be represented by a single {@code char}.
2615      * @since 1.7
2616      */
isBmpCodePoint(int codePoint)2617     public static boolean isBmpCodePoint(int codePoint) {
2618        return codePoint >= Character.MIN_VALUE && codePoint <= Character.MAX_VALUE;
2619     }
2620 
2621     /**
2622      * Indicates whether the specified character is defined in the Unicode
2623      * specification.
2624      *
2625      * @param c
2626      *            the character to check.
2627      * @return {@code true} if the general Unicode category of the character is
2628      *         not {@code UNASSIGNED}; {@code false} otherwise.
2629      */
isDefined(char c)2630     public static boolean isDefined(char c) {
2631         return isDefinedImpl(c);
2632     }
2633 
2634     /**
2635      * Indicates whether the specified code point is defined in the Unicode
2636      * specification.
2637      *
2638      * @param codePoint
2639      *            the code point to check.
2640      * @return {@code true} if the general Unicode category of the code point is
2641      *         not {@code UNASSIGNED}; {@code false} otherwise.
2642      */
isDefined(int codePoint)2643     public static boolean isDefined(int codePoint) {
2644         return isDefinedImpl(codePoint);
2645     }
2646 
isDefinedImpl(int codePoint)2647     private static native boolean isDefinedImpl(int codePoint);
2648 
2649     /**
2650      * Indicates whether the specified character is a digit.
2651      *
2652      * @param c
2653      *            the character to check.
2654      * @return {@code true} if {@code c} is a digit; {@code false}
2655      *         otherwise.
2656      */
isDigit(char c)2657     public static boolean isDigit(char c) {
2658         return isDigit((int) c);
2659     }
2660 
2661     /**
2662      * Indicates whether the specified code point is a digit.
2663      *
2664      * @param codePoint
2665      *            the code point to check.
2666      * @return {@code true} if {@code codePoint} is a digit; {@code false}
2667      *         otherwise.
2668      */
isDigit(int codePoint)2669     public static boolean isDigit(int codePoint) {
2670         // Optimized case for ASCII
2671         if ('0' <= codePoint && codePoint <= '9') {
2672             return true;
2673         }
2674         if (codePoint < 1632) {
2675             return false;
2676         }
2677         return isDigitImpl(codePoint);
2678     }
2679 
isDigitImpl(int codePoint)2680     private static native boolean isDigitImpl(int codePoint);
2681 
2682     /**
2683      * Indicates whether the specified character is ignorable in a Java or
2684      * Unicode identifier.
2685      *
2686      * @param c
2687      *            the character to check.
2688      * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
2689      */
isIdentifierIgnorable(char c)2690     public static boolean isIdentifierIgnorable(char c) {
2691         return isIdentifierIgnorable((int) c);
2692     }
2693 
2694     /**
2695      * Returns true if the given code point is a CJKV ideographic character.
2696      * @since 1.7
2697      */
isIdeographic(int codePoint)2698     public static native boolean isIdeographic(int codePoint);
2699 
2700     /**
2701      * Indicates whether the specified code point is ignorable in a Java or
2702      * Unicode identifier.
2703      *
2704      * @param codePoint
2705      *            the code point to check.
2706      * @return {@code true} if {@code codePoint} is ignorable; {@code false}
2707      *         otherwise.
2708      */
isIdentifierIgnorable(int codePoint)2709     public static boolean isIdentifierIgnorable(int codePoint) {
2710         // This is both an optimization and papers over differences between Java and ICU.
2711         if (codePoint < 0x600) {
2712             return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
2713                     (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
2714         }
2715         return isIdentifierIgnorableImpl(codePoint);
2716     }
2717 
isIdentifierIgnorableImpl(int codePoint)2718     private static native boolean isIdentifierIgnorableImpl(int codePoint);
2719 
2720     /**
2721      * Indicates whether the specified character is an ISO control character.
2722      *
2723      * @param c
2724      *            the character to check.
2725      * @return {@code true} if {@code c} is an ISO control character;
2726      *         {@code false} otherwise.
2727      */
isISOControl(char c)2728     public static boolean isISOControl(char c) {
2729         return isISOControl((int) c);
2730     }
2731 
2732     /**
2733      * Indicates whether the specified code point is an ISO control character.
2734      *
2735      * @param c
2736      *            the code point to check.
2737      * @return {@code true} if {@code c} is an ISO control character;
2738      *         {@code false} otherwise.
2739      */
isISOControl(int c)2740     public static boolean isISOControl(int c) {
2741         return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
2742     }
2743 
2744     /**
2745      * Indicates whether the specified character is a valid part of a Java
2746      * identifier other than the first character.
2747      *
2748      * @param c
2749      *            the character to check.
2750      * @return {@code true} if {@code c} is valid as part of a Java identifier;
2751      *         {@code false} otherwise.
2752      */
isJavaIdentifierPart(char c)2753     public static boolean isJavaIdentifierPart(char c) {
2754         return isJavaIdentifierPart((int) c);
2755     }
2756 
2757     /**
2758      * Indicates whether the specified code point is a valid part of a Java
2759      * identifier other than the first character.
2760      *
2761      * @param codePoint
2762      *            the code point to check.
2763      * @return {@code true} if {@code c} is valid as part of a Java identifier;
2764      *         {@code false} otherwise.
2765      */
isJavaIdentifierPart(int codePoint)2766     public static boolean isJavaIdentifierPart(int codePoint) {
2767         // Use precomputed bitmasks to optimize the ASCII range.
2768         if (codePoint < 64) {
2769             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
2770         } else if (codePoint < 128) {
2771             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2772         }
2773         int type = getType(codePoint);
2774         return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2775                 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2776                 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2777                 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
2778                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
2779                 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
2780     }
2781 
2782     /**
2783      * Indicates whether the specified character is a valid first character for
2784      * a Java identifier.
2785      *
2786      * @param c
2787      *            the character to check.
2788      * @return {@code true} if {@code c} is a valid first character of a Java
2789      *         identifier; {@code false} otherwise.
2790      */
isJavaIdentifierStart(char c)2791     public static boolean isJavaIdentifierStart(char c) {
2792         return isJavaIdentifierStart((int) c);
2793     }
2794 
2795     /**
2796      * Indicates whether the specified code point is a valid first character for
2797      * a Java identifier.
2798      *
2799      * @param codePoint
2800      *            the code point to check.
2801      * @return {@code true} if {@code codePoint} is a valid start of a Java
2802      *         identifier; {@code false} otherwise.
2803      */
isJavaIdentifierStart(int codePoint)2804     public static boolean isJavaIdentifierStart(int codePoint) {
2805         // Use precomputed bitmasks to optimize the ASCII range.
2806         if (codePoint < 64) {
2807             return (codePoint == '$'); // There's only one character in this range.
2808         } else if (codePoint < 128) {
2809             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2810         }
2811         int type = getType(codePoint);
2812         return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
2813                 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
2814     }
2815 
2816     /**
2817      * Indicates whether the specified character is a Java letter.
2818      *
2819      * @param c
2820      *            the character to check.
2821      * @return {@code true} if {@code c} is a Java letter; {@code false}
2822      *         otherwise.
2823      * @deprecated Use {@link #isJavaIdentifierStart(char)} instead.
2824      */
2825     @Deprecated
isJavaLetter(char c)2826     public static boolean isJavaLetter(char c) {
2827         return isJavaIdentifierStart(c);
2828     }
2829 
2830     /**
2831      * Indicates whether the specified character is a Java letter or digit
2832      * character.
2833      *
2834      * @param c
2835      *            the character to check.
2836      * @return {@code true} if {@code c} is a Java letter or digit;
2837      *         {@code false} otherwise.
2838      * @deprecated Use {@link #isJavaIdentifierPart(char)} instead.
2839      */
2840     @Deprecated
isJavaLetterOrDigit(char c)2841     public static boolean isJavaLetterOrDigit(char c) {
2842         return isJavaIdentifierPart(c);
2843     }
2844 
2845     /**
2846      * Indicates whether the specified character is a letter.
2847      *
2848      * @param c
2849      *            the character to check.
2850      * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
2851      */
isLetter(char c)2852     public static boolean isLetter(char c) {
2853         return isLetter((int) c);
2854     }
2855 
2856     /**
2857      * Indicates whether the specified code point is a letter.
2858      *
2859      * @param codePoint
2860      *            the code point to check.
2861      * @return {@code true} if {@code codePoint} is a letter; {@code false}
2862      *         otherwise.
2863      */
isLetter(int codePoint)2864     public static boolean isLetter(int codePoint) {
2865         if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2866             return true;
2867         }
2868         if (codePoint < 128) {
2869             return false;
2870         }
2871         return isLetterImpl(codePoint);
2872     }
2873 
isLetterImpl(int codePoint)2874     private static native boolean isLetterImpl(int codePoint);
2875 
2876     /**
2877      * Indicates whether the specified character is a letter or a digit.
2878      *
2879      * @param c
2880      *            the character to check.
2881      * @return {@code true} if {@code c} is a letter or a digit; {@code false}
2882      *         otherwise.
2883      */
isLetterOrDigit(char c)2884     public static boolean isLetterOrDigit(char c) {
2885         return isLetterOrDigit((int) c);
2886     }
2887 
2888     /**
2889      * Indicates whether the specified code point is a letter or a digit.
2890      *
2891      * @param codePoint
2892      *            the code point to check.
2893      * @return {@code true} if {@code codePoint} is a letter or a digit;
2894      *         {@code false} otherwise.
2895      */
isLetterOrDigit(int codePoint)2896     public static boolean isLetterOrDigit(int codePoint) {
2897         // Optimized case for ASCII
2898         if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2899             return true;
2900         }
2901         if ('0' <= codePoint && codePoint <= '9') {
2902             return true;
2903         }
2904         if (codePoint < 128) {
2905             return false;
2906         }
2907         return isLetterOrDigitImpl(codePoint);
2908     }
2909 
isLetterOrDigitImpl(int codePoint)2910     private static native boolean isLetterOrDigitImpl(int codePoint);
2911 
2912     /**
2913      * Indicates whether the specified character is a lower case letter.
2914      *
2915      * @param c
2916      *            the character to check.
2917      * @return {@code true} if {@code c} is a lower case letter; {@code false}
2918      *         otherwise.
2919      */
isLowerCase(char c)2920     public static boolean isLowerCase(char c) {
2921         return isLowerCase((int) c);
2922     }
2923 
2924     /**
2925      * Indicates whether the specified code point is a lower case letter.
2926      *
2927      * @param codePoint
2928      *            the code point to check.
2929      * @return {@code true} if {@code codePoint} is a lower case letter;
2930      *         {@code false} otherwise.
2931      */
isLowerCase(int codePoint)2932     public static boolean isLowerCase(int codePoint) {
2933         // Optimized case for ASCII
2934         if ('a' <= codePoint && codePoint <= 'z') {
2935             return true;
2936         }
2937         if (codePoint < 128) {
2938             return false;
2939         }
2940         return isLowerCaseImpl(codePoint);
2941     }
2942 
isLowerCaseImpl(int codePoint)2943     private static native boolean isLowerCaseImpl(int codePoint);
2944 
2945     /**
2946      * Use {@link #isWhitespace(char)} instead.
2947      * @deprecated Use {@link #isWhitespace(char)} instead.
2948      */
2949     @Deprecated
isSpace(char c)2950     public static boolean isSpace(char c) {
2951         return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
2952     }
2953 
2954     /**
2955      * See {@link #isSpaceChar(int)}.
2956      */
isSpaceChar(char c)2957     public static boolean isSpaceChar(char c) {
2958         return isSpaceChar((int) c);
2959     }
2960 
2961     /**
2962      * Returns true if the given code point is a Unicode space character.
2963      * The exact set of characters considered as whitespace varies with Unicode version.
2964      * Note that non-breaking spaces are considered whitespace.
2965      * Note also that line separators are not considered whitespace; see {@link #isWhitespace}
2966      * for an alternative.
2967      */
isSpaceChar(int codePoint)2968     public static boolean isSpaceChar(int codePoint) {
2969         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
2970         // SPACE or NO-BREAK SPACE?
2971         if (codePoint == 0x20 || codePoint == 0xa0) {
2972             return true;
2973         }
2974         if (codePoint < 0x1000) {
2975             return false;
2976         }
2977         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
2978         if (codePoint == 0x1680 || codePoint == 0x180e) {
2979             return true;
2980         }
2981         if (codePoint < 0x2000) {
2982             return false;
2983         }
2984         if (codePoint <= 0xffff) {
2985             // Other whitespace from General Punctuation...
2986             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
2987                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
2988         }
2989         // Let icu4c worry about non-BMP code points.
2990         return isSpaceCharImpl(codePoint);
2991     }
2992 
isSpaceCharImpl(int codePoint)2993     private static native boolean isSpaceCharImpl(int codePoint);
2994 
2995     /**
2996      * Indicates whether the specified character is a titlecase character.
2997      *
2998      * @param c
2999      *            the character to check.
3000      * @return {@code true} if {@code c} is a titlecase character, {@code false}
3001      *         otherwise.
3002      */
isTitleCase(char c)3003     public static boolean isTitleCase(char c) {
3004         return isTitleCaseImpl(c);
3005     }
3006 
3007     /**
3008      * Indicates whether the specified code point is a titlecase character.
3009      *
3010      * @param codePoint
3011      *            the code point to check.
3012      * @return {@code true} if {@code codePoint} is a titlecase character,
3013      *         {@code false} otherwise.
3014      */
isTitleCase(int codePoint)3015     public static boolean isTitleCase(int codePoint) {
3016         return isTitleCaseImpl(codePoint);
3017     }
3018 
isTitleCaseImpl(int codePoint)3019     private static native boolean isTitleCaseImpl(int codePoint);
3020 
3021     /**
3022      * Indicates whether the specified character is valid as part of a Unicode
3023      * identifier other than the first character.
3024      *
3025      * @param c
3026      *            the character to check.
3027      * @return {@code true} if {@code c} is valid as part of a Unicode
3028      *         identifier; {@code false} otherwise.
3029      */
isUnicodeIdentifierPart(char c)3030     public static boolean isUnicodeIdentifierPart(char c) {
3031         return isUnicodeIdentifierPartImpl(c);
3032     }
3033 
3034     /**
3035      * Indicates whether the specified code point is valid as part of a Unicode
3036      * identifier other than the first character.
3037      *
3038      * @param codePoint
3039      *            the code point to check.
3040      * @return {@code true} if {@code codePoint} is valid as part of a Unicode
3041      *         identifier; {@code false} otherwise.
3042      */
isUnicodeIdentifierPart(int codePoint)3043     public static boolean isUnicodeIdentifierPart(int codePoint) {
3044         return isUnicodeIdentifierPartImpl(codePoint);
3045     }
3046 
isUnicodeIdentifierPartImpl(int codePoint)3047     private static native boolean isUnicodeIdentifierPartImpl(int codePoint);
3048 
3049     /**
3050      * Indicates whether the specified character is a valid initial character
3051      * for a Unicode identifier.
3052      *
3053      * @param c
3054      *            the character to check.
3055      * @return {@code true} if {@code c} is a valid first character for a
3056      *         Unicode identifier; {@code false} otherwise.
3057      */
isUnicodeIdentifierStart(char c)3058     public static boolean isUnicodeIdentifierStart(char c) {
3059         return isUnicodeIdentifierStartImpl(c);
3060     }
3061 
3062     /**
3063      * Indicates whether the specified code point is a valid initial character
3064      * for a Unicode identifier.
3065      *
3066      * @param codePoint
3067      *            the code point to check.
3068      * @return {@code true} if {@code codePoint} is a valid first character for
3069      *         a Unicode identifier; {@code false} otherwise.
3070      */
isUnicodeIdentifierStart(int codePoint)3071     public static boolean isUnicodeIdentifierStart(int codePoint) {
3072         return isUnicodeIdentifierStartImpl(codePoint);
3073     }
3074 
isUnicodeIdentifierStartImpl(int codePoint)3075     private static native boolean isUnicodeIdentifierStartImpl(int codePoint);
3076 
3077     /**
3078      * Indicates whether the specified character is an upper case letter.
3079      *
3080      * @param c
3081      *            the character to check.
3082      * @return {@code true} if {@code c} is a upper case letter; {@code false}
3083      *         otherwise.
3084      */
isUpperCase(char c)3085     public static boolean isUpperCase(char c) {
3086         return isUpperCase((int) c);
3087     }
3088 
3089     /**
3090      * Indicates whether the specified code point is an upper case letter.
3091      *
3092      * @param codePoint
3093      *            the code point to check.
3094      * @return {@code true} if {@code codePoint} is a upper case letter;
3095      *         {@code false} otherwise.
3096      */
isUpperCase(int codePoint)3097     public static boolean isUpperCase(int codePoint) {
3098         // Optimized case for ASCII
3099         if ('A' <= codePoint && codePoint <= 'Z') {
3100             return true;
3101         }
3102         if (codePoint < 128) {
3103             return false;
3104         }
3105         return isUpperCaseImpl(codePoint);
3106     }
3107 
isUpperCaseImpl(int codePoint)3108     private static native boolean isUpperCaseImpl(int codePoint);
3109 
3110     /**
3111      * See {@link #isWhitespace(int)}.
3112      */
isWhitespace(char c)3113     public static boolean isWhitespace(char c) {
3114         return isWhitespace((int) c);
3115     }
3116 
3117     /**
3118      * Returns true if the given code point is a Unicode whitespace character.
3119      * The exact set of characters considered as whitespace varies with Unicode version.
3120      * Note that non-breaking spaces are not considered whitespace.
3121      * Note also that line separators are considered whitespace; see {@link #isSpaceChar}
3122      * for an alternative.
3123      */
isWhitespace(int codePoint)3124     public static boolean isWhitespace(int codePoint) {
3125         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
3126         // Any ASCII whitespace character?
3127         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
3128             return true;
3129         }
3130         if (codePoint < 0x1000) {
3131             return false;
3132         }
3133         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
3134         if (codePoint == 0x1680 || codePoint == 0x180e) {
3135             return true;
3136         }
3137         if (codePoint < 0x2000) {
3138             return false;
3139         }
3140         // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
3141         if (codePoint == 0x2007 || codePoint == 0x202f) {
3142             return false;
3143         }
3144         if (codePoint <= 0xffff) {
3145             // Other whitespace from General Punctuation...
3146             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
3147                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
3148         }
3149         // Let icu4c worry about non-BMP code points.
3150         return isWhitespaceImpl(codePoint);
3151     }
3152 
isWhitespaceImpl(int codePoint)3153     private static native boolean isWhitespaceImpl(int codePoint);
3154 
3155     /**
3156      * Reverses the order of the first and second byte in the specified
3157      * character.
3158      *
3159      * @param c
3160      *            the character to reverse.
3161      * @return the character with reordered bytes.
3162      */
reverseBytes(char c)3163     public static char reverseBytes(char c) {
3164         return (char)((c<<8) | (c>>8));
3165     }
3166 
3167     /**
3168      * Returns the lower case equivalent for the specified character if the
3169      * character is an upper case letter. Otherwise, the specified character is
3170      * returned unchanged.
3171      *
3172      * @param c
3173      *            the character
3174      * @return if {@code c} is an upper case character then its lower case
3175      *         counterpart, otherwise just {@code c}.
3176      */
toLowerCase(char c)3177     public static char toLowerCase(char c) {
3178         return (char) toLowerCase((int) c);
3179     }
3180 
3181     /**
3182      * Returns the lower case equivalent for the specified code point if it is
3183      * an upper case letter. Otherwise, the specified code point is returned
3184      * unchanged.
3185      *
3186      * @param codePoint
3187      *            the code point to check.
3188      * @return if {@code codePoint} is an upper case character then its lower
3189      *         case counterpart, otherwise just {@code codePoint}.
3190      */
toLowerCase(int codePoint)3191     public static int toLowerCase(int codePoint) {
3192         // Optimized case for ASCII
3193         if ('A' <= codePoint && codePoint <= 'Z') {
3194             return (char) (codePoint + ('a' - 'A'));
3195         }
3196         if (codePoint < 192) {
3197             return codePoint;
3198         }
3199         return toLowerCaseImpl(codePoint);
3200     }
3201 
toLowerCaseImpl(int codePoint)3202     private static native int toLowerCaseImpl(int codePoint);
3203 
3204     @Override
toString()3205     public String toString() {
3206         return String.valueOf(value);
3207     }
3208 
3209     /**
3210      * Converts the specified character to its string representation.
3211      *
3212      * @param value
3213      *            the character to convert.
3214      * @return the character converted to a string.
3215      */
toString(char value)3216     public static String toString(char value) {
3217         return String.valueOf(value);
3218     }
3219 
3220     /**
3221      * Returns the title case equivalent for the specified character if it
3222      * exists. Otherwise, the specified character is returned unchanged.
3223      *
3224      * @param c
3225      *            the character to convert.
3226      * @return the title case equivalent of {@code c} if it exists, otherwise
3227      *         {@code c}.
3228      */
toTitleCase(char c)3229     public static char toTitleCase(char c) {
3230         return (char) toTitleCaseImpl(c);
3231     }
3232 
3233     /**
3234      * Returns the title case equivalent for the specified code point if it
3235      * exists. Otherwise, the specified code point is returned unchanged.
3236      *
3237      * @param codePoint
3238      *            the code point to convert.
3239      * @return the title case equivalent of {@code codePoint} if it exists,
3240      *         otherwise {@code codePoint}.
3241      */
toTitleCase(int codePoint)3242     public static int toTitleCase(int codePoint) {
3243         return toTitleCaseImpl(codePoint);
3244     }
3245 
toTitleCaseImpl(int codePoint)3246     private static native int toTitleCaseImpl(int codePoint);
3247 
3248     /**
3249      * Returns the upper case equivalent for the specified character if the
3250      * character is a lower case letter. Otherwise, the specified character is
3251      * returned unchanged.
3252      *
3253      * @param c
3254      *            the character to convert.
3255      * @return if {@code c} is a lower case character then its upper case
3256      *         counterpart, otherwise just {@code c}.
3257      */
toUpperCase(char c)3258     public static char toUpperCase(char c) {
3259         return (char) toUpperCase((int) c);
3260     }
3261 
3262     /**
3263      * Returns the upper case equivalent for the specified code point if the
3264      * code point is a lower case letter. Otherwise, the specified code point is
3265      * returned unchanged.
3266      *
3267      * @param codePoint
3268      *            the code point to convert.
3269      * @return if {@code codePoint} is a lower case character then its upper
3270      *         case counterpart, otherwise just {@code codePoint}.
3271      */
toUpperCase(int codePoint)3272     public static int toUpperCase(int codePoint) {
3273         // Optimized case for ASCII
3274         if ('a' <= codePoint && codePoint <= 'z') {
3275             return (char) (codePoint - ('a' - 'A'));
3276         }
3277         if (codePoint < 181) {
3278             return codePoint;
3279         }
3280         return toUpperCaseImpl(codePoint);
3281     }
3282 
toUpperCaseImpl(int codePoint)3283     private static native int toUpperCaseImpl(int codePoint);
3284 }
3285