1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.lang; 19 20 import java.io.Serializable; 21 import java.util.Arrays; 22 23 /** 24 * The wrapper for the primitive type {@code char}. This class also provides a 25 * number of utility methods for working with characters. 26 * 27 * <p>Character data is kept up to date as Unicode evolves. 28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of 29 * the {@code Locale} documentation for details of the Unicode versions implemented by current 30 * and historical Android releases. 31 * 32 * <p>The Unicode specification, character tables, and other information are available at 33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>. 34 * 35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid 36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 39 * encoding and {@code char} pairs are used to represent code points in the 40 * supplementary range. A pair of {@code char} values that represent a 41 * supplementary character are made up of a <i>high surrogate</i> with a value 42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 43 * 0xDC00 to 0xDFFF. 44 * <p> 45 * On the Java platform a {@code char} value represents either a single BMP code 46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 47 * is used to represent all Unicode code points. 48 * 49 * <a name="unicode_categories"></a><h3>Unicode categories</h3> 50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant, 51 * grouped semantically to provide a convenient overview. This table is also useful in 52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}. 53 * <span class="datatable"> 54 * <style type="text/css"> 55 * .datatable td { padding-right: 20px; } 56 * </style> 57 * <p><table> 58 * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr> 59 * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr> 60 * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr> 61 * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr> 62 * <tr> <td> Cs </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr> 63 * <tr> <td><br></td> </tr> 64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr> 65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr> 66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr> 67 * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr> 68 * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr> 69 * <tr> <td><br></td> </tr> 70 * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr> 71 * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr> 72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr> 73 * <tr> <td><br></td> </tr> 74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr> 75 * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr> 76 * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr> 77 * <tr> <td><br></td> </tr> 78 * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr> 79 * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr> 80 * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr> 81 * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr> 82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr> 83 * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr> 84 * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr> 85 * <tr> <td><br></td> </tr> 86 * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr> 87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr> 88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr> 89 * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr> 90 * <tr> <td><br></td> </tr> 91 * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr> 92 * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr> 93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr> 94 * </table> 95 * </span> 96 * 97 * @since 1.0 98 */ 99 @FindBugsSuppressWarnings("DM_NUMBER_CTOR") 100 public final class Character implements Serializable, Comparable<Character> { 101 private static final long serialVersionUID = 3786198910865385080L; 102 103 private final char value; 104 105 /** 106 * The minimum {@code Character} value. 107 */ 108 public static final char MIN_VALUE = '\u0000'; 109 110 /** 111 * The maximum {@code Character} value. 112 */ 113 public static final char MAX_VALUE = '\uffff'; 114 115 /** 116 * The minimum radix used for conversions between characters and integers. 117 */ 118 public static final int MIN_RADIX = 2; 119 120 /** 121 * The maximum radix used for conversions between characters and integers. 122 */ 123 public static final int MAX_RADIX = 36; 124 125 /** 126 * The {@link Class} object that represents the primitive type {@code char}. 127 */ 128 @SuppressWarnings("unchecked") 129 public static final Class<Character> TYPE 130 = (Class<Character>) char[].class.getComponentType(); 131 // Note: Character.TYPE can't be set to "char.class", since *that* is 132 // defined to be "java.lang.Character.TYPE"; 133 134 /** 135 * Unicode category constant Cn. 136 */ 137 public static final byte UNASSIGNED = 0; 138 139 /** 140 * Unicode category constant Lu. 141 */ 142 public static final byte UPPERCASE_LETTER = 1; 143 144 /** 145 * Unicode category constant Ll. 146 */ 147 public static final byte LOWERCASE_LETTER = 2; 148 149 /** 150 * Unicode category constant Lt. 151 */ 152 public static final byte TITLECASE_LETTER = 3; 153 154 /** 155 * Unicode category constant Lm. 156 */ 157 public static final byte MODIFIER_LETTER = 4; 158 159 /** 160 * Unicode category constant Lo. 161 */ 162 public static final byte OTHER_LETTER = 5; 163 164 /** 165 * Unicode category constant Mn. 166 */ 167 public static final byte NON_SPACING_MARK = 6; 168 169 /** 170 * Unicode category constant Me. 171 */ 172 public static final byte ENCLOSING_MARK = 7; 173 174 /** 175 * Unicode category constant Mc. 176 */ 177 public static final byte COMBINING_SPACING_MARK = 8; 178 179 /** 180 * Unicode category constant Nd. 181 */ 182 public static final byte DECIMAL_DIGIT_NUMBER = 9; 183 184 /** 185 * Unicode category constant Nl. 186 */ 187 public static final byte LETTER_NUMBER = 10; 188 189 /** 190 * Unicode category constant No. 191 */ 192 public static final byte OTHER_NUMBER = 11; 193 194 /** 195 * Unicode category constant Zs. 196 */ 197 public static final byte SPACE_SEPARATOR = 12; 198 199 /** 200 * Unicode category constant Zl. 201 */ 202 public static final byte LINE_SEPARATOR = 13; 203 204 /** 205 * Unicode category constant Zp. 206 */ 207 public static final byte PARAGRAPH_SEPARATOR = 14; 208 209 /** 210 * Unicode category constant Cc. 211 */ 212 public static final byte CONTROL = 15; 213 214 /** 215 * Unicode category constant Cf. 216 */ 217 public static final byte FORMAT = 16; 218 219 /** 220 * Unicode category constant Co. 221 */ 222 public static final byte PRIVATE_USE = 18; 223 224 /** 225 * Unicode category constant Cs. 226 */ 227 public static final byte SURROGATE = 19; 228 229 /** 230 * Unicode category constant Pd. 231 */ 232 public static final byte DASH_PUNCTUATION = 20; 233 234 /** 235 * Unicode category constant Ps. 236 */ 237 public static final byte START_PUNCTUATION = 21; 238 239 /** 240 * Unicode category constant Pe. 241 */ 242 public static final byte END_PUNCTUATION = 22; 243 244 /** 245 * Unicode category constant Pc. 246 */ 247 public static final byte CONNECTOR_PUNCTUATION = 23; 248 249 /** 250 * Unicode category constant Po. 251 */ 252 public static final byte OTHER_PUNCTUATION = 24; 253 254 /** 255 * Unicode category constant Sm. 256 */ 257 public static final byte MATH_SYMBOL = 25; 258 259 /** 260 * Unicode category constant Sc. 261 */ 262 public static final byte CURRENCY_SYMBOL = 26; 263 264 /** 265 * Unicode category constant Sk. 266 */ 267 public static final byte MODIFIER_SYMBOL = 27; 268 269 /** 270 * Unicode category constant So. 271 */ 272 public static final byte OTHER_SYMBOL = 28; 273 274 /** 275 * Unicode category constant Pi. 276 * 277 * @since 1.4 278 */ 279 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 280 281 /** 282 * Unicode category constant Pf. 283 * 284 * @since 1.4 285 */ 286 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 287 288 /** 289 * Unicode bidirectional constant. 290 * 291 * @since 1.4 292 */ 293 public static final byte DIRECTIONALITY_UNDEFINED = -1; 294 295 /** 296 * Unicode bidirectional constant L. 297 * 298 * @since 1.4 299 */ 300 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 301 302 /** 303 * Unicode bidirectional constant R. 304 * 305 * @since 1.4 306 */ 307 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 308 309 /** 310 * Unicode bidirectional constant AL. 311 * 312 * @since 1.4 313 */ 314 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 315 316 /** 317 * Unicode bidirectional constant EN. 318 * 319 * @since 1.4 320 */ 321 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 322 323 /** 324 * Unicode bidirectional constant ES. 325 * 326 * @since 1.4 327 */ 328 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 329 330 /** 331 * Unicode bidirectional constant ET. 332 * 333 * @since 1.4 334 */ 335 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 336 337 /** 338 * Unicode bidirectional constant AN. 339 * 340 * @since 1.4 341 */ 342 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 343 344 /** 345 * Unicode bidirectional constant CS. 346 * 347 * @since 1.4 348 */ 349 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 350 351 /** 352 * Unicode bidirectional constant NSM. 353 * 354 * @since 1.4 355 */ 356 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 357 358 /** 359 * Unicode bidirectional constant BN. 360 * 361 * @since 1.4 362 */ 363 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 364 365 /** 366 * Unicode bidirectional constant B. 367 * 368 * @since 1.4 369 */ 370 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 371 372 /** 373 * Unicode bidirectional constant S. 374 * 375 * @since 1.4 376 */ 377 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 378 379 /** 380 * Unicode bidirectional constant WS. 381 * 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_WHITESPACE = 12; 385 386 /** 387 * Unicode bidirectional constant ON. 388 * 389 * @since 1.4 390 */ 391 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 392 393 /** 394 * Unicode bidirectional constant LRE. 395 * 396 * @since 1.4 397 */ 398 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 399 400 /** 401 * Unicode bidirectional constant LRO. 402 * 403 * @since 1.4 404 */ 405 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 406 407 /** 408 * Unicode bidirectional constant RLE. 409 * 410 * @since 1.4 411 */ 412 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 413 414 /** 415 * Unicode bidirectional constant RLO. 416 * 417 * @since 1.4 418 */ 419 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 420 421 /** 422 * Unicode bidirectional constant PDF. 423 * 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 427 428 /** 429 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 430 * encoding, {@code '\uD800'}. 431 * 432 * @since 1.5 433 */ 434 public static final char MIN_HIGH_SURROGATE = '\uD800'; 435 436 /** 437 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 438 * encoding, {@code '\uDBFF'}. 439 * 440 * @since 1.5 441 */ 442 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 443 444 /** 445 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 446 * encoding, {@code '\uDC00'}. 447 * 448 * @since 1.5 449 */ 450 public static final char MIN_LOW_SURROGATE = '\uDC00'; 451 452 /** 453 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 454 * encoding, {@code '\uDFFF'}. 455 * 456 * @since 1.5 457 */ 458 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 459 460 /** 461 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 462 * 463 * @since 1.5 464 */ 465 public static final char MIN_SURROGATE = '\uD800'; 466 467 /** 468 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 469 * 470 * @since 1.5 471 */ 472 public static final char MAX_SURROGATE = '\uDFFF'; 473 474 /** 475 * The minimum value of a supplementary code point, {@code U+010000}. 476 * 477 * @since 1.5 478 */ 479 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 480 481 /** 482 * The minimum code point value, {@code U+0000}. 483 * 484 * @since 1.5 485 */ 486 public static final int MIN_CODE_POINT = 0x000000; 487 488 /** 489 * The maximum code point value, {@code U+10FFFF}. 490 * 491 * @since 1.5 492 */ 493 public static final int MAX_CODE_POINT = 0x10FFFF; 494 495 /** 496 * The number of bits required to represent a {@code Character} value 497 * unsigned form. 498 * 499 * @since 1.5 500 */ 501 public static final int SIZE = 16; 502 503 private static final byte[] DIRECTIONALITY = new byte[] { 504 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 505 DIRECTIONALITY_EUROPEAN_NUMBER, 506 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 507 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 508 DIRECTIONALITY_ARABIC_NUMBER, 509 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 510 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 511 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 512 DIRECTIONALITY_OTHER_NEUTRALS, 513 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 514 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 515 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 516 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 517 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 518 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 519 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 520 521 /* 522 * Represents a subset of the Unicode character set. 523 */ 524 public static class Subset { 525 private final String name; 526 527 /** 528 * Constructs a new {@code Subset}. 529 */ Subset(String name)530 protected Subset(String name) { 531 if (name == null) { 532 throw new NullPointerException("name == null"); 533 } 534 this.name = name; 535 } 536 537 /** 538 * Compares this character subset for identity with the specified object. 539 */ equals(Object object)540 @Override public final boolean equals(Object object) { 541 return object == this; 542 } 543 544 /** 545 * Returns this subset's hash code, which is the hash code computed by 546 * {@link java.lang.Object#hashCode()}. 547 */ hashCode()548 @Override public final int hashCode() { 549 return super.hashCode(); 550 } 551 552 /** 553 * Returns this subset's name. 554 */ toString()555 @Override public final String toString() { 556 return name; 557 } 558 } 559 560 /** 561 * Represents a block of Unicode characters. This class provides constants for various 562 * well-known blocks (but not all blocks) and methods for looking up a block 563 * by name {@link #forName} or by code point {@link #of}. 564 * 565 * @since 1.2 566 */ 567 public static final class UnicodeBlock extends Subset { 568 /** 569 * The Surrogates Area Unicode block. 570 * 571 * @deprecated As of Java 5, this block has been replaced by 572 * {@link #HIGH_SURROGATES}, 573 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 574 * {@link #LOW_SURROGATES}. 575 */ 576 @Deprecated 577 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA"); 578 579 /** The Basic Latin Unicode block. */ 580 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN"); 581 582 /** The Latin-1 Supplement Unicode block. */ 583 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT"); 584 585 /** The Latin Extended-A Unicode block. */ 586 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A"); 587 588 /** The Latin Extended-B Unicode block. */ 589 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B"); 590 591 /** The IPA Extensions Unicode block. */ 592 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS"); 593 594 /** The Spacing Modifier Letters Unicode block. */ 595 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS"); 596 597 /** The Combining Diacritical Marks Unicode block. */ 598 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS"); 599 600 /** 601 * The Greek and Coptic Unicode block. Previously referred to as Greek. 602 */ 603 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK"); 604 605 /** The Cyrillic Unicode block. */ 606 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC"); 607 608 /** 609 * The Cyrillic Supplement Unicode block. Previously referred to as Cyrillic Supplementary. 610 */ 611 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY"); 612 613 /** The Armenian Unicode block. */ 614 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN"); 615 616 /** The Hebrew Unicode block. */ 617 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW"); 618 619 /** The Arabic Unicode block. */ 620 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC"); 621 622 /** The Syriac Unicode block. */ 623 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC"); 624 625 /** The Thaana Unicode block. */ 626 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA"); 627 628 /** The Devanagari Unicode block. */ 629 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI"); 630 631 /** The Bengali Unicode block. */ 632 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI"); 633 634 /** The Gurmukhi Unicode block. */ 635 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI"); 636 637 /** The Gujarati Unicode block. */ 638 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI"); 639 640 /** The Oriya Unicode block. */ 641 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA"); 642 643 /** The Tamil Unicode block. */ 644 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL"); 645 646 /** The Telugu Unicode block. */ 647 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU"); 648 649 /** The Kannada Unicode block. */ 650 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA"); 651 652 /** The Malayalam Unicode block. */ 653 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM"); 654 655 /** The Sinhala Unicode block. */ 656 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA"); 657 658 /** The Thai Unicode block. */ 659 public static final UnicodeBlock THAI = new UnicodeBlock("THAI"); 660 661 /** The Lao Unicode block. */ 662 public static final UnicodeBlock LAO = new UnicodeBlock("LAO"); 663 664 /** The Tibetan Unicode block. */ 665 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN"); 666 667 /** The Myanmar Unicode block. */ 668 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR"); 669 670 /** The Georgian Unicode block. */ 671 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN"); 672 673 /** The Hangul Jamo Unicode block. */ 674 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO"); 675 676 /** The Ethiopic Unicode block. */ 677 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC"); 678 679 /** The Cherokee Unicode block. */ 680 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE"); 681 682 /** The Unified Canadian Aboriginal Syllabics Unicode block. */ 683 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"); 684 685 /** The Ogham Unicode block. */ 686 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM"); 687 688 /** The Runic Unicode block. */ 689 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC"); 690 691 /** The Tagalog Unicode block. */ 692 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG"); 693 694 /** The Hanunoo Unicode block. */ 695 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO"); 696 697 /** The Buhid Unicode block. */ 698 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID"); 699 700 /** The Tagbanwa Unicode block. */ 701 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA"); 702 703 /** The Khmer Unicode block. */ 704 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER"); 705 706 /** The Mongolian Unicode block. */ 707 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN"); 708 709 /** The Limbu Unicode block. */ 710 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU"); 711 712 /** The Tai Le Unicode block. */ 713 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE"); 714 715 /** The Khmer Symbols Unicode block. */ 716 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS"); 717 718 /** The Phonetic Extensions Unicode block. */ 719 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS"); 720 721 /** The Latin Extended Additional Unicode block. */ 722 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL"); 723 724 /** The Greek Extended Unicode block. */ 725 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED"); 726 727 /** The General Punctuation Unicode block. */ 728 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION"); 729 730 /** The Superscripts and Subscripts Unicode block. */ 731 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS"); 732 733 /** The Currency Symbols Unicode block. */ 734 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS"); 735 736 /** 737 * The Combining Diacritical Marks for Symbols Unicode 738 * Block. Previously referred to as Combining Marks for 739 * Symbols. 740 */ 741 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS"); 742 743 /** The Letterlike Symbols Unicode block. */ 744 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS"); 745 746 /** The Number Forms Unicode block. */ 747 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS"); 748 749 /** The Arrows Unicode block. */ 750 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS"); 751 752 /** The Mathematical Operators Unicode block. */ 753 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS"); 754 755 /** The Miscellaneous Technical Unicode block. */ 756 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL"); 757 758 /** The Control Pictures Unicode block. */ 759 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES"); 760 761 /** The Optical Character Recognition Unicode block. */ 762 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION"); 763 764 /** The Enclosed Alphanumerics Unicode block. */ 765 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS"); 766 767 /** The Box Drawing Unicode block. */ 768 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING"); 769 770 /** The Block Elements Unicode block. */ 771 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS"); 772 773 /** The Geometric Shapes Unicode block. */ 774 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES"); 775 776 /** The Miscellaneous Symbols Unicode block. */ 777 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS"); 778 779 /** The Dingbats Unicode block. */ 780 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS"); 781 782 /** The Miscellaneous Mathematical Symbols-A Unicode block. */ 783 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A"); 784 785 /** The Supplemental Arrows-A Unicode block. */ 786 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A"); 787 788 /** The Braille Patterns Unicode block. */ 789 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS"); 790 791 /** The Supplemental Arrows-B Unicode block. */ 792 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B"); 793 794 /** The Miscellaneous Mathematical Symbols-B Unicode block. */ 795 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B"); 796 797 /** The Supplemental Mathematical Operators Unicode block. */ 798 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS"); 799 800 /** The Miscellaneous Symbols and Arrows Unicode block. */ 801 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS"); 802 803 /** The CJK Radicals Supplement Unicode block. */ 804 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT"); 805 806 /** The Kangxi Radicals Unicode block. */ 807 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS"); 808 809 /** The Ideographic Description Characters Unicode block. */ 810 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS"); 811 812 /** The CJK Symbols and Punctuation Unicode block. */ 813 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION"); 814 815 /** The Hiragana Unicode block. */ 816 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA"); 817 818 /** The Katakana Unicode block. */ 819 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA"); 820 821 /** The Bopomofo Unicode block. */ 822 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO"); 823 824 /** The Hangul Compatibility Jamo Unicode block. */ 825 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO"); 826 827 /** The Kanbun Unicode block. */ 828 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN"); 829 830 /** The Bopomofo Extended Unicode block. */ 831 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED"); 832 833 /** The Katakana Phonetic Extensions Unicode block. */ 834 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS"); 835 836 /** The Enclosed CJK Letters and Months Unicode block. */ 837 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS"); 838 839 /** The CJK Compatibility Unicode block. */ 840 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY"); 841 842 /** The CJK Unified Ideographs Extension A Unicode block. */ 843 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"); 844 845 /** The Yijing Hexagram Symbols Unicode block. */ 846 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS"); 847 848 /** The CJK Unified Ideographs Unicode block. */ 849 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS"); 850 851 /** The Yi Syllables Unicode block. */ 852 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES"); 853 854 /** The Yi Radicals Unicode block. */ 855 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS"); 856 857 /** The Hangul Syllables Unicode block. */ 858 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES"); 859 860 /** 861 * The High Surrogates Unicode block. This block represents 862 * code point values in the high surrogate range 0xD800 to 0xDB7F 863 */ 864 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES"); 865 866 /** 867 * The High Private Use Surrogates Unicode block. This block 868 * represents code point values in the high surrogate range 0xDB80 to 869 * 0xDBFF 870 */ 871 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES"); 872 873 /** 874 * The Low Surrogates Unicode block. This block represents 875 * code point values in the low surrogate range 0xDC00 to 0xDFFF 876 */ 877 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES"); 878 879 /** The Private Use Area Unicode block. */ 880 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA"); 881 882 /** The CJK Compatibility Ideographs Unicode block. */ 883 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS"); 884 885 /** The Alphabetic Presentation Forms Unicode block. */ 886 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS"); 887 888 /** The Arabic Presentation Forms-A Unicode block. */ 889 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A"); 890 891 /** The Variation Selectors Unicode block. */ 892 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS"); 893 894 /** The Combining Half Marks Unicode block. */ 895 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS"); 896 897 /** The CJK Compatibility Forms Unicode block. */ 898 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS"); 899 900 /** The Small Form Variants Unicode block. */ 901 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS"); 902 903 /** The Arabic Presentation Forms-B Unicode block. */ 904 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B"); 905 906 /** The Halfwidth and Fullwidth Forms Unicode block. */ 907 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS"); 908 909 /** The Specials Unicode block. */ 910 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS"); 911 912 /** The Linear B Syllabary Unicode block. */ 913 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY"); 914 915 /** The Linear B Ideograms Unicode block. */ 916 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS"); 917 918 /** The Aegean Numbers Unicode block. */ 919 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS"); 920 921 /** The Old Italic Unicode block. */ 922 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC"); 923 924 /** The Gothic Unicode block. */ 925 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC"); 926 927 /** The Ugaritic Unicode block. */ 928 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC"); 929 930 /** The Deseret Unicode block. */ 931 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET"); 932 933 /** The Shavian Unicode block. */ 934 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN"); 935 936 /** The Osmanya Unicode block. */ 937 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA"); 938 939 /** The Cypriot Syllabary Unicode block. */ 940 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY"); 941 942 /** The Byzantine Musical Symbols Unicode block. */ 943 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS"); 944 945 /** The Musical Symbols Unicode block. */ 946 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS"); 947 948 /** The Tai Xuan Jing Symbols Unicode block. */ 949 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS"); 950 951 /** The Mathematical Alphanumeric Symbols Unicode block. */ 952 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS"); 953 954 /** The CJK Unified Ideographs Extension B Unicode block. */ 955 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B"); 956 957 /** The CJK Compatibility Ideographs Supplement Unicode block. */ 958 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT"); 959 960 /** The Tags Unicode block. */ 961 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS"); 962 963 /** The Variation Selectors Supplement Unicode block. */ 964 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT"); 965 966 /** The Supplementary Private Use Area-A Unicode block. */ 967 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A"); 968 969 /** The Supplementary Private Use Area-B Unicode block. */ 970 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B"); 971 972 // Unicode 4.1. 973 974 /** The Ancient Greek Musical Notation Unicode 4.1 block. */ 975 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION"); 976 977 /** The Ancient Greek Numbers Unicode 4.1 block. */ 978 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock("ANCIENT_GREEK_NUMBERS"); 979 980 /** The Arabic Supplement Unicode 4.1 block. */ 981 public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock("ARABIC_SUPPLEMENT"); 982 983 /** The Buginese Unicode 4.1 block. */ 984 public static final UnicodeBlock BUGINESE = new UnicodeBlock("BUGINESE"); 985 986 /** The CJK Strokes Unicode 4.1 block. */ 987 public static final UnicodeBlock CJK_STROKES = new UnicodeBlock("CJK_STROKES"); 988 989 /** The Combining Diacritical Marks Supplement Unicode 4.1 block. */ 990 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT"); 991 992 /** The Coptic Unicode 4.1 block. */ 993 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC"); 994 995 /** The Ethiopic Extended Unicode 4.1 block. */ 996 public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock("ETHIOPIC_EXTENDED"); 997 998 /** The Ethiopic Supplement Unicode 4.1 block. */ 999 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock("ETHIOPIC_SUPPLEMENT"); 1000 1001 /** The Georgian Supplement Unicode 4.1 block. */ 1002 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock("GEORGIAN_SUPPLEMENT"); 1003 1004 /** The Glagolitic Unicode 4.1 block. */ 1005 public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock("GLAGOLITIC"); 1006 1007 /** The Kharoshthi Unicode 4.1 block. */ 1008 public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock("KHAROSHTHI"); 1009 1010 /** The Modifier Tone Letters Unicode 4.1 block. */ 1011 public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock("MODIFIER_TONE_LETTERS"); 1012 1013 /** The New Tai Lue Unicode 4.1 block. */ 1014 public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock("NEW_TAI_LUE"); 1015 1016 /** The Old Persian Unicode 4.1 block. */ 1017 public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock("OLD_PERSIAN"); 1018 1019 /** The Phonetic Extensions Supplement Unicode 4.1 block. */ 1020 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT"); 1021 1022 /** The Supplemental Punctuation Unicode 4.1 block. */ 1023 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION"); 1024 1025 /** The Syloti Nagri Unicode 4.1 block. */ 1026 public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock("SYLOTI_NAGRI"); 1027 1028 /** The Tifinagh Unicode 4.1 block. */ 1029 public static final UnicodeBlock TIFINAGH = new UnicodeBlock("TIFINAGH"); 1030 1031 /** The Vertical Forms Unicode 4.1 block. */ 1032 public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock("VERTICAL_FORMS"); 1033 1034 // Unicode 5.0. 1035 1036 /** The NKo Unicode 5.0 block. */ 1037 public static final UnicodeBlock NKO = new UnicodeBlock("NKO"); 1038 1039 /** The Balinese Unicode 5.0 block. */ 1040 public static final UnicodeBlock BALINESE = new UnicodeBlock("BALINESE"); 1041 1042 /** The Latin Extended C Unicode 5.0 block. */ 1043 public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock("LATIN_EXTENDED_C"); 1044 1045 /** The Latin Extended D Unicode 5.0 block. */ 1046 public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock("LATIN_EXTENDED_D"); 1047 1048 /** The Phags-pa Unicode 5.0 block. */ 1049 public static final UnicodeBlock PHAGS_PA = new UnicodeBlock("PHAGS_PA"); 1050 1051 /** The Phoenician Unicode 5.0 block. */ 1052 public static final UnicodeBlock PHOENICIAN = new UnicodeBlock("PHOENICIAN"); 1053 1054 /** The Cuneiform Unicode 5.0 block. */ 1055 public static final UnicodeBlock CUNEIFORM = new UnicodeBlock("CUNEIFORM"); 1056 1057 /** The Cuneiform Numbers And Punctuation Unicode 5.0 block. */ 1058 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION"); 1059 1060 /** The Counting Rod Numerals Unicode 5.0 block. */ 1061 public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock("COUNTING_ROD_NUMERALS"); 1062 1063 // Unicode 5.1. 1064 1065 /** The Sudanese Unicode 5.1 block. */ 1066 public static final UnicodeBlock SUNDANESE = new UnicodeBlock("SUNDANESE"); 1067 1068 /** The Lepcha Unicode 5.1 block. */ 1069 public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA"); 1070 1071 /** The Ol Chiki Unicode 5.1 block. */ 1072 public static final UnicodeBlock OL_CHIKI = new UnicodeBlock("OL_CHIKI"); 1073 1074 /** The Cyrillic Extended-A Unicode 5.1 block. */ 1075 public static final UnicodeBlock CYRILLIC_EXTENDED_A = new UnicodeBlock("CYRILLIC_EXTENDED_A"); 1076 1077 /** The Vai Unicode 5.1 block. */ 1078 public static final UnicodeBlock VAI = new UnicodeBlock("VAI"); 1079 1080 /** The Cyrillic Extended-B Unicode 5.1 block. */ 1081 public static final UnicodeBlock CYRILLIC_EXTENDED_B = new UnicodeBlock("CYRILLIC_EXTENDED_B"); 1082 1083 /** The Saurashtra Unicode 5.1 block. */ 1084 public static final UnicodeBlock SAURASHTRA = new UnicodeBlock("SAURASHTRA"); 1085 1086 /** The Kayah Li Unicode 5.1 block. */ 1087 public static final UnicodeBlock KAYAH_LI = new UnicodeBlock("KAYAH_LI"); 1088 1089 /** The Rejang Unicode 5.1 block. */ 1090 public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG"); 1091 1092 /** The Cham Unicode 5.1 block. */ 1093 public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM"); 1094 1095 /** The Ancient Symbols Unicode 5.1 block. */ 1096 public static final UnicodeBlock ANCIENT_SYMBOLS = new UnicodeBlock("ANCIENT_SYMBOLS"); 1097 1098 /** The Phaistos Disc Unicode 5.1 block. */ 1099 public static final UnicodeBlock PHAISTOS_DISC = new UnicodeBlock("PHAISTOS_DISC"); 1100 1101 /** The Lycian Unicode 5.1 block. */ 1102 public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN"); 1103 1104 /** The Carian Unicode 5.1 block. */ 1105 public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN"); 1106 1107 /** The Lydian Unicode 5.1 block. */ 1108 public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN"); 1109 1110 /** The Mahjong Tiles Unicode 5.1 block. */ 1111 public static final UnicodeBlock MAHJONG_TILES = new UnicodeBlock("MAHJONG_TILES"); 1112 1113 /** The Domino Tiles Unicode 5.1 block. */ 1114 public static final UnicodeBlock DOMINO_TILES = new UnicodeBlock("DOMINO_TILES"); 1115 1116 // Unicode 5.2. 1117 1118 /** The Samaritan Unicode 5.2 block. */ 1119 public static final UnicodeBlock SAMARITAN = new UnicodeBlock("SAMARITAN"); 1120 1121 /** The Unified Canadian Aboriginal Syllabics Expanded Unicode 5.2 block. */ 1122 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED"); 1123 1124 /** The Tai Tham Unicode 5.2 block. */ 1125 public static final UnicodeBlock TAI_THAM = new UnicodeBlock("TAI_THAM"); 1126 1127 /** The Vedic Extensions Unicode 5.2 block. */ 1128 public static final UnicodeBlock VEDIC_EXTENSIONS = new UnicodeBlock("VEDIC_EXTENSIONS"); 1129 1130 /** The Lisu Extensions Unicode 5.2 block. */ 1131 public static final UnicodeBlock LISU = new UnicodeBlock("LISU"); 1132 1133 /** The Bamum Extensions Unicode 5.2 block. */ 1134 public static final UnicodeBlock BAMUM = new UnicodeBlock("BAMUM"); 1135 1136 /** The Common Indic Number Forms Unicode 5.2 block. */ 1137 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS"); 1138 1139 /** The Devanagari Extended Unicode 5.2 block. */ 1140 public static final UnicodeBlock DEVANAGARI_EXTENDED = new UnicodeBlock("DEVANAGARI_EXTENDED"); 1141 1142 /** The Hangul Jamo Extended-A Unicode 5.2 block. */ 1143 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = new UnicodeBlock("HANGUL_JAMO_EXTENDED_A"); 1144 1145 /** The Javanese Unicode 5.2 block. */ 1146 public static final UnicodeBlock JAVANESE = new UnicodeBlock("JAVANESE"); 1147 1148 /** The Myanmar Extended-A Unicode 5.2 block. */ 1149 public static final UnicodeBlock MYANMAR_EXTENDED_A = new UnicodeBlock("MYANMAR_EXTENDED_A"); 1150 1151 /** The Tai Viet Unicode 5.2 block. */ 1152 public static final UnicodeBlock TAI_VIET = new UnicodeBlock("TAI_VIET"); 1153 1154 /** The Meetei Mayek Unicode 5.2 block. */ 1155 public static final UnicodeBlock MEETEI_MAYEK = new UnicodeBlock("MEETEI_MAYEK"); 1156 1157 /** The Hangul Jamo Extended-B Unicode 5.2 block. */ 1158 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = new UnicodeBlock("HANGUL_JAMO_EXTENDED_B"); 1159 1160 /** The Imperial Aramaic Unicode 5.2 block. */ 1161 public static final UnicodeBlock IMPERIAL_ARAMAIC = new UnicodeBlock("IMPERIAL_ARAMAIC"); 1162 1163 /** The Old South Arabian Unicode 5.2 block. */ 1164 public static final UnicodeBlock OLD_SOUTH_ARABIAN = new UnicodeBlock("OLD_SOUTH_ARABIAN"); 1165 1166 /** The Avestan Unicode 5.2 block. */ 1167 public static final UnicodeBlock AVESTAN = new UnicodeBlock("AVESTAN"); 1168 1169 /** The Inscriptional Pathian Unicode 5.2 block. */ 1170 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = new UnicodeBlock("INSCRIPTIONAL_PARTHIAN"); 1171 1172 /** The Inscriptional Pahlavi Unicode 5.2 block. */ 1173 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = new UnicodeBlock("INSCRIPTIONAL_PAHLAVI"); 1174 1175 /** The Old Turkic Unicode 5.2 block. */ 1176 public static final UnicodeBlock OLD_TURKIC = new UnicodeBlock("OLD_TURKIC"); 1177 1178 /** The Rumi Numeral Symbols Unicode 5.2 block. */ 1179 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = new UnicodeBlock("RUMI_NUMERAL_SYMBOLS"); 1180 1181 /** The Kaithi Unicode 5.2 block. */ 1182 public static final UnicodeBlock KAITHI = new UnicodeBlock("KAITHI"); 1183 1184 /** The Egyptian Hieroglyphs Unicode 5.2 block. */ 1185 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = new UnicodeBlock("EGYPTIAN_HIEROGLYPHS"); 1186 1187 /** The Enclosed Alphanumeric Supplement Unicode 5.2 block. */ 1188 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT"); 1189 1190 /** The Enclosed Ideographic Supplement Unicode 5.2 block. */ 1191 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT"); 1192 1193 /** The CJK Unified Ideographs Unicode 5.2 block. */ 1194 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C"); 1195 1196 // Unicode 6.0. 1197 1198 /** The Mandaic Unicode 6.0 block. */ 1199 public static final UnicodeBlock MANDAIC = new UnicodeBlock("MANDAIC"); 1200 1201 /** The Batak Unicode 6.0 block. */ 1202 public static final UnicodeBlock BATAK = new UnicodeBlock("BATAK"); 1203 1204 /** The Ethiopic Extended-A Unicode 6.0 block. */ 1205 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = new UnicodeBlock("ETHIOPIC_EXTENDED_A"); 1206 1207 /** The Brahmi Unicode 6.0 block. */ 1208 public static final UnicodeBlock BRAHMI = new UnicodeBlock("BRAHMI"); 1209 1210 /** The Bamum Supplement Unicode 6.0 block. */ 1211 public static final UnicodeBlock BAMUM_SUPPLEMENT = new UnicodeBlock("BAMUM_SUPPLEMENT"); 1212 1213 /** The Kana Supplement Unicode 6.0 block. */ 1214 public static final UnicodeBlock KANA_SUPPLEMENT = new UnicodeBlock("KANA_SUPPLEMENT"); 1215 1216 /** The Playing Cards Supplement Unicode 6.0 block. */ 1217 public static final UnicodeBlock PLAYING_CARDS = new UnicodeBlock("PLAYING_CARDS"); 1218 1219 /** The Miscellaneous Symbols And Pictographs Supplement Unicode 6.0 block. */ 1220 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS"); 1221 1222 /** The Emoticons Unicode 6.0 block. */ 1223 public static final UnicodeBlock EMOTICONS = new UnicodeBlock("EMOTICONS"); 1224 1225 /** The Transport And Map Symbols Unicode 6.0 block. */ 1226 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS"); 1227 1228 /** The Alchemical Symbols Unicode 6.0 block. */ 1229 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = new UnicodeBlock("ALCHEMICAL_SYMBOLS"); 1230 1231 /** The CJK Unified Ideographs Extension-D Unicode 6.0 block. */ 1232 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D"); 1233 1234 /* 1235 * All of the UnicodeBlocks above, in the icu4c UBlock enum order. 1236 */ 1237 private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] { 1238 null, // icu4c numbers blocks starting at 1, so index 0 should be null. 1239 1240 UnicodeBlock.BASIC_LATIN, 1241 UnicodeBlock.LATIN_1_SUPPLEMENT, 1242 UnicodeBlock.LATIN_EXTENDED_A, 1243 UnicodeBlock.LATIN_EXTENDED_B, 1244 UnicodeBlock.IPA_EXTENSIONS, 1245 UnicodeBlock.SPACING_MODIFIER_LETTERS, 1246 UnicodeBlock.COMBINING_DIACRITICAL_MARKS, 1247 UnicodeBlock.GREEK, 1248 UnicodeBlock.CYRILLIC, 1249 UnicodeBlock.ARMENIAN, 1250 UnicodeBlock.HEBREW, 1251 UnicodeBlock.ARABIC, 1252 UnicodeBlock.SYRIAC, 1253 UnicodeBlock.THAANA, 1254 UnicodeBlock.DEVANAGARI, 1255 UnicodeBlock.BENGALI, 1256 UnicodeBlock.GURMUKHI, 1257 UnicodeBlock.GUJARATI, 1258 UnicodeBlock.ORIYA, 1259 UnicodeBlock.TAMIL, 1260 UnicodeBlock.TELUGU, 1261 UnicodeBlock.KANNADA, 1262 UnicodeBlock.MALAYALAM, 1263 UnicodeBlock.SINHALA, 1264 UnicodeBlock.THAI, 1265 UnicodeBlock.LAO, 1266 UnicodeBlock.TIBETAN, 1267 UnicodeBlock.MYANMAR, 1268 UnicodeBlock.GEORGIAN, 1269 UnicodeBlock.HANGUL_JAMO, 1270 UnicodeBlock.ETHIOPIC, 1271 UnicodeBlock.CHEROKEE, 1272 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 1273 UnicodeBlock.OGHAM, 1274 UnicodeBlock.RUNIC, 1275 UnicodeBlock.KHMER, 1276 UnicodeBlock.MONGOLIAN, 1277 UnicodeBlock.LATIN_EXTENDED_ADDITIONAL, 1278 UnicodeBlock.GREEK_EXTENDED, 1279 UnicodeBlock.GENERAL_PUNCTUATION, 1280 UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS, 1281 UnicodeBlock.CURRENCY_SYMBOLS, 1282 UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS, 1283 UnicodeBlock.LETTERLIKE_SYMBOLS, 1284 UnicodeBlock.NUMBER_FORMS, 1285 UnicodeBlock.ARROWS, 1286 UnicodeBlock.MATHEMATICAL_OPERATORS, 1287 UnicodeBlock.MISCELLANEOUS_TECHNICAL, 1288 UnicodeBlock.CONTROL_PICTURES, 1289 UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION, 1290 UnicodeBlock.ENCLOSED_ALPHANUMERICS, 1291 UnicodeBlock.BOX_DRAWING, 1292 UnicodeBlock.BLOCK_ELEMENTS, 1293 UnicodeBlock.GEOMETRIC_SHAPES, 1294 UnicodeBlock.MISCELLANEOUS_SYMBOLS, 1295 UnicodeBlock.DINGBATS, 1296 UnicodeBlock.BRAILLE_PATTERNS, 1297 UnicodeBlock.CJK_RADICALS_SUPPLEMENT, 1298 UnicodeBlock.KANGXI_RADICALS, 1299 UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 1300 UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION, 1301 UnicodeBlock.HIRAGANA, 1302 UnicodeBlock.KATAKANA, 1303 UnicodeBlock.BOPOMOFO, 1304 UnicodeBlock.HANGUL_COMPATIBILITY_JAMO, 1305 UnicodeBlock.KANBUN, 1306 UnicodeBlock.BOPOMOFO_EXTENDED, 1307 UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS, 1308 UnicodeBlock.CJK_COMPATIBILITY, 1309 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 1310 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, 1311 UnicodeBlock.YI_SYLLABLES, 1312 UnicodeBlock.YI_RADICALS, 1313 UnicodeBlock.HANGUL_SYLLABLES, 1314 UnicodeBlock.HIGH_SURROGATES, 1315 UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES, 1316 UnicodeBlock.LOW_SURROGATES, 1317 UnicodeBlock.PRIVATE_USE_AREA, 1318 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS, 1319 UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS, 1320 UnicodeBlock.ARABIC_PRESENTATION_FORMS_A, 1321 UnicodeBlock.COMBINING_HALF_MARKS, 1322 UnicodeBlock.CJK_COMPATIBILITY_FORMS, 1323 UnicodeBlock.SMALL_FORM_VARIANTS, 1324 UnicodeBlock.ARABIC_PRESENTATION_FORMS_B, 1325 UnicodeBlock.SPECIALS, 1326 UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, 1327 1328 // Unicode 3.1. 1329 UnicodeBlock.OLD_ITALIC, 1330 UnicodeBlock.GOTHIC, 1331 UnicodeBlock.DESERET, 1332 UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS, 1333 UnicodeBlock.MUSICAL_SYMBOLS, 1334 UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 1335 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 1336 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 1337 UnicodeBlock.TAGS, 1338 1339 // Unicode 3.2. 1340 UnicodeBlock.CYRILLIC_SUPPLEMENTARY, 1341 UnicodeBlock.TAGALOG, 1342 UnicodeBlock.HANUNOO, 1343 UnicodeBlock.BUHID, 1344 UnicodeBlock.TAGBANWA, 1345 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 1346 UnicodeBlock.SUPPLEMENTAL_ARROWS_A, 1347 UnicodeBlock.SUPPLEMENTAL_ARROWS_B, 1348 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 1349 UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 1350 UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS, 1351 UnicodeBlock.VARIATION_SELECTORS, 1352 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, 1353 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B, 1354 1355 // Unicode 4.0. 1356 UnicodeBlock.LIMBU, 1357 UnicodeBlock.TAI_LE, 1358 UnicodeBlock.KHMER_SYMBOLS, 1359 UnicodeBlock.PHONETIC_EXTENSIONS, 1360 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS, 1361 UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS, 1362 UnicodeBlock.LINEAR_B_SYLLABARY, 1363 UnicodeBlock.LINEAR_B_IDEOGRAMS, 1364 UnicodeBlock.AEGEAN_NUMBERS, 1365 UnicodeBlock.UGARITIC, 1366 UnicodeBlock.SHAVIAN, 1367 UnicodeBlock.OSMANYA, 1368 UnicodeBlock.CYPRIOT_SYLLABARY, 1369 UnicodeBlock.TAI_XUAN_JING_SYMBOLS, 1370 UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT, 1371 1372 // Unicode 4.1. 1373 UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION, 1374 UnicodeBlock.ANCIENT_GREEK_NUMBERS, 1375 UnicodeBlock.ARABIC_SUPPLEMENT, 1376 UnicodeBlock.BUGINESE, 1377 UnicodeBlock.CJK_STROKES, 1378 UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 1379 UnicodeBlock.COPTIC, 1380 UnicodeBlock.ETHIOPIC_EXTENDED, 1381 UnicodeBlock.ETHIOPIC_SUPPLEMENT, 1382 UnicodeBlock.GEORGIAN_SUPPLEMENT, 1383 UnicodeBlock.GLAGOLITIC, 1384 UnicodeBlock.KHAROSHTHI, 1385 UnicodeBlock.MODIFIER_TONE_LETTERS, 1386 UnicodeBlock.NEW_TAI_LUE, 1387 UnicodeBlock.OLD_PERSIAN, 1388 UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT, 1389 UnicodeBlock.SUPPLEMENTAL_PUNCTUATION, 1390 UnicodeBlock.SYLOTI_NAGRI, 1391 UnicodeBlock.TIFINAGH, 1392 UnicodeBlock.VERTICAL_FORMS, 1393 1394 // Unicode 5.0. 1395 UnicodeBlock.NKO, 1396 UnicodeBlock.BALINESE, 1397 UnicodeBlock.LATIN_EXTENDED_C, 1398 UnicodeBlock.LATIN_EXTENDED_D, 1399 UnicodeBlock.PHAGS_PA, 1400 UnicodeBlock.PHOENICIAN, 1401 UnicodeBlock.CUNEIFORM, 1402 UnicodeBlock.CUNEIFORM_NUMBERS_AND_PUNCTUATION, 1403 UnicodeBlock.COUNTING_ROD_NUMERALS, 1404 1405 // Unicode 5.1. 1406 UnicodeBlock.SUNDANESE, 1407 UnicodeBlock.LEPCHA, 1408 UnicodeBlock.OL_CHIKI, 1409 UnicodeBlock.CYRILLIC_EXTENDED_A, 1410 UnicodeBlock.VAI, 1411 UnicodeBlock.CYRILLIC_EXTENDED_B, 1412 UnicodeBlock.SAURASHTRA, 1413 UnicodeBlock.KAYAH_LI, 1414 UnicodeBlock.REJANG, 1415 UnicodeBlock.CHAM, 1416 UnicodeBlock.ANCIENT_SYMBOLS, 1417 UnicodeBlock.PHAISTOS_DISC, 1418 UnicodeBlock.LYCIAN, 1419 UnicodeBlock.CARIAN, 1420 UnicodeBlock.LYDIAN, 1421 UnicodeBlock.MAHJONG_TILES, 1422 UnicodeBlock.DOMINO_TILES, 1423 1424 // Unicode 5.2. 1425 UnicodeBlock.SAMARITAN, 1426 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 1427 UnicodeBlock.TAI_THAM, 1428 UnicodeBlock.VEDIC_EXTENSIONS, 1429 UnicodeBlock.LISU, 1430 UnicodeBlock.BAMUM, 1431 UnicodeBlock.COMMON_INDIC_NUMBER_FORMS, 1432 UnicodeBlock.DEVANAGARI_EXTENDED, 1433 UnicodeBlock.HANGUL_JAMO_EXTENDED_A, 1434 UnicodeBlock.JAVANESE, 1435 UnicodeBlock.MYANMAR_EXTENDED_A, 1436 UnicodeBlock.TAI_VIET, 1437 UnicodeBlock.MEETEI_MAYEK, 1438 UnicodeBlock.HANGUL_JAMO_EXTENDED_B, 1439 UnicodeBlock.IMPERIAL_ARAMAIC, 1440 UnicodeBlock.OLD_SOUTH_ARABIAN, 1441 UnicodeBlock.AVESTAN, 1442 UnicodeBlock.INSCRIPTIONAL_PARTHIAN, 1443 UnicodeBlock.INSCRIPTIONAL_PAHLAVI, 1444 UnicodeBlock.OLD_TURKIC, 1445 UnicodeBlock.RUMI_NUMERAL_SYMBOLS, 1446 UnicodeBlock.KAITHI, 1447 UnicodeBlock.EGYPTIAN_HIEROGLYPHS, 1448 UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 1449 UnicodeBlock.ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 1450 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 1451 1452 // Unicode 6.0. 1453 UnicodeBlock.MANDAIC, 1454 UnicodeBlock.BATAK, 1455 UnicodeBlock.ETHIOPIC_EXTENDED_A, 1456 UnicodeBlock.BRAHMI, 1457 UnicodeBlock.BAMUM_SUPPLEMENT, 1458 UnicodeBlock.KANA_SUPPLEMENT, 1459 UnicodeBlock.PLAYING_CARDS, 1460 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 1461 UnicodeBlock.EMOTICONS, 1462 UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS, 1463 UnicodeBlock.ALCHEMICAL_SYMBOLS, 1464 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 1465 }; 1466 1467 /** 1468 * Returns the Unicode block for the given block name, or null if there is no 1469 * such block. 1470 * 1471 * <p>Block names may be one of the following: 1472 * <ul> 1473 * <li>Canonical block name, as defined by the Unicode specification; 1474 * case-insensitive.</li> 1475 * <li>Canonical block name without any spaces, as defined by the 1476 * Unicode specification; case-insensitive.</li> 1477 * <li>A {@code UnicodeBlock} constant identifier. This is determined by 1478 * converting the canonical name to uppercase and replacing all spaces and hyphens 1479 * with underscores.</li> 1480 * </ul> 1481 * 1482 * @throws NullPointerException 1483 * if {@code blockName == null}. 1484 * @throws IllegalArgumentException 1485 * if {@code blockName} is not the name of any known block. 1486 * @since 1.5 1487 */ forName(String blockName)1488 public static UnicodeBlock forName(String blockName) { 1489 if (blockName == null) { 1490 throw new NullPointerException("blockName == null"); 1491 } 1492 int block = unicodeBlockForName(blockName); 1493 if (block == -1) { 1494 throw new IllegalArgumentException("Unknown block: " + blockName); 1495 } 1496 return BLOCKS[block]; 1497 } 1498 1499 /** 1500 * Returns the Unicode block containing the given code point, or null if the 1501 * code point does not belong to any known block. 1502 */ of(char c)1503 public static UnicodeBlock of(char c) { 1504 return of((int) c); 1505 } 1506 1507 /** 1508 * Returns the Unicode block containing the given code point, or null if the 1509 * code point does not belong to any known block. 1510 */ of(int codePoint)1511 public static UnicodeBlock of(int codePoint) { 1512 checkValidCodePoint(codePoint); 1513 int block = unicodeBlockForCodePoint(codePoint); 1514 if (block == -1 || block >= BLOCKS.length) { 1515 return null; 1516 } 1517 return BLOCKS[block]; 1518 } 1519 UnicodeBlock(String blockName)1520 private UnicodeBlock(String blockName) { 1521 super(blockName); 1522 } 1523 } 1524 unicodeBlockForName(String blockName)1525 private static native int unicodeBlockForName(String blockName); 1526 unicodeBlockForCodePoint(int codePoint)1527 private static native int unicodeBlockForCodePoint(int codePoint); 1528 unicodeScriptForName(String blockName)1529 private static native int unicodeScriptForName(String blockName); 1530 unicodeScriptForCodePoint(int codePoint)1531 private static native int unicodeScriptForCodePoint(int codePoint); 1532 1533 1534 /** 1535 * Constructs a new {@code Character} with the specified primitive char 1536 * value. 1537 * 1538 * @param value 1539 * the primitive char value to store in the new instance. 1540 */ Character(char value)1541 public Character(char value) { 1542 this.value = value; 1543 } 1544 1545 /** 1546 * Gets the primitive value of this character. 1547 * 1548 * @return this object's primitive value. 1549 */ charValue()1550 public char charValue() { 1551 return value; 1552 } 1553 checkValidCodePoint(int codePoint)1554 private static void checkValidCodePoint(int codePoint) { 1555 if (!isValidCodePoint(codePoint)) { 1556 throw new IllegalArgumentException("Invalid code point: " + codePoint); 1557 } 1558 } 1559 1560 /** 1561 * Compares this object to the specified character object to determine their 1562 * relative order. 1563 * 1564 * @param c 1565 * the character object to compare this object to. 1566 * @return {@code 0} if the value of this character and the value of 1567 * {@code c} are equal; a positive value if the value of this 1568 * character is greater than the value of {@code c}; a negative 1569 * value if the value of this character is less than the value of 1570 * {@code c}. 1571 * @see java.lang.Comparable 1572 * @since 1.2 1573 */ compareTo(Character c)1574 public int compareTo(Character c) { 1575 return compare(value, c.value); 1576 } 1577 1578 /** 1579 * Compares two {@code char} values. 1580 * @return 0 if lhs = rhs, less than 0 if lhs < rhs, and greater than 0 if lhs > rhs. 1581 * @since 1.7 1582 */ compare(char lhs, char rhs)1583 public static int compare(char lhs, char rhs) { 1584 return lhs - rhs; 1585 } 1586 1587 /** 1588 * Returns a {@code Character} instance for the {@code char} value passed. 1589 * <p> 1590 * If it is not necessary to get a new {@code Character} instance, it is 1591 * recommended to use this method instead of the constructor, since it 1592 * maintains a cache of instances which may result in better performance. 1593 * 1594 * @param c 1595 * the char value for which to get a {@code Character} instance. 1596 * @return the {@code Character} instance for {@code c}. 1597 * @since 1.5 1598 */ valueOf(char c)1599 public static Character valueOf(char c) { 1600 return c < 128 ? SMALL_VALUES[c] : new Character(c); 1601 } 1602 1603 /** 1604 * A cache of instances used by {@link #valueOf(char)} and auto-boxing 1605 */ 1606 private static final Character[] SMALL_VALUES = new Character[128]; 1607 1608 static { 1609 for (int i = 0; i < 128; i++) { 1610 SMALL_VALUES[i] = new Character((char) i); 1611 } 1612 } 1613 /** 1614 * Indicates whether {@code codePoint} is a valid Unicode code point. 1615 * 1616 * @param codePoint 1617 * the code point to test. 1618 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 1619 * {@code false} otherwise. 1620 * @since 1.5 1621 */ isValidCodePoint(int codePoint)1622 public static boolean isValidCodePoint(int codePoint) { 1623 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1624 } 1625 1626 /** 1627 * Indicates whether {@code codePoint} is within the supplementary code 1628 * point range. 1629 * 1630 * @param codePoint 1631 * the code point to test. 1632 * @return {@code true} if {@code codePoint} is within the supplementary 1633 * code point range; {@code false} otherwise. 1634 * @since 1.5 1635 */ isSupplementaryCodePoint(int codePoint)1636 public static boolean isSupplementaryCodePoint(int codePoint) { 1637 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1638 } 1639 1640 /** 1641 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 1642 * that is used for representing supplementary characters in UTF-16 1643 * encoding. 1644 * 1645 * @param ch 1646 * the character to test. 1647 * @return {@code true} if {@code ch} is a high-surrogate code unit; 1648 * {@code false} otherwise. 1649 * @see #isLowSurrogate(char) 1650 * @since 1.5 1651 */ isHighSurrogate(char ch)1652 public static boolean isHighSurrogate(char ch) { 1653 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1654 } 1655 1656 /** 1657 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 1658 * that is used for representing supplementary characters in UTF-16 1659 * encoding. 1660 * 1661 * @param ch 1662 * the character to test. 1663 * @return {@code true} if {@code ch} is a low-surrogate code unit; 1664 * {@code false} otherwise. 1665 * @see #isHighSurrogate(char) 1666 * @since 1.5 1667 */ isLowSurrogate(char ch)1668 public static boolean isLowSurrogate(char ch) { 1669 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1670 } 1671 1672 /** 1673 * Returns true if the given character is a high or low surrogate. 1674 * @since 1.7 1675 */ isSurrogate(char ch)1676 public static boolean isSurrogate(char ch) { 1677 return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE; 1678 } 1679 1680 /** 1681 * Indicates whether the specified character pair is a valid surrogate pair. 1682 * 1683 * @param high 1684 * the high surrogate unit to test. 1685 * @param low 1686 * the low surrogate unit to test. 1687 * @return {@code true} if {@code high} is a high-surrogate code unit and 1688 * {@code low} is a low-surrogate code unit; {@code false} 1689 * otherwise. 1690 * @see #isHighSurrogate(char) 1691 * @see #isLowSurrogate(char) 1692 * @since 1.5 1693 */ isSurrogatePair(char high, char low)1694 public static boolean isSurrogatePair(char high, char low) { 1695 return (isHighSurrogate(high) && isLowSurrogate(low)); 1696 } 1697 1698 /** 1699 * Calculates the number of {@code char} values required to represent the 1700 * specified Unicode code point. This method checks if the {@code codePoint} 1701 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 1702 * returned, otherwise {@code 1}. To test if the code point is valid, use 1703 * the {@link #isValidCodePoint(int)} method. 1704 * 1705 * @param codePoint 1706 * the code point for which to calculate the number of required 1707 * chars. 1708 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 1709 * @see #isValidCodePoint(int) 1710 * @see #isSupplementaryCodePoint(int) 1711 * @since 1.5 1712 */ charCount(int codePoint)1713 public static int charCount(int codePoint) { 1714 return (codePoint >= 0x10000 ? 2 : 1); 1715 } 1716 1717 /** 1718 * Converts a surrogate pair into a Unicode code point. This method assumes 1719 * that the pair are valid surrogates. If the pair are <i>not</i> valid 1720 * surrogates, then the result is indeterminate. The 1721 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1722 * method to validate the pair. 1723 * 1724 * @param high 1725 * the high surrogate unit. 1726 * @param low 1727 * the low surrogate unit. 1728 * @return the Unicode code point corresponding to the surrogate unit pair. 1729 * @see #isSurrogatePair(char, char) 1730 * @since 1.5 1731 */ toCodePoint(char high, char low)1732 public static int toCodePoint(char high, char low) { 1733 // See RFC 2781, Section 2.2 1734 // http://www.ietf.org/rfc/rfc2781.txt 1735 int h = (high & 0x3FF) << 10; 1736 int l = low & 0x3FF; 1737 return (h | l) + 0x10000; 1738 } 1739 1740 /** 1741 * Returns the code point at {@code index} in the specified sequence of 1742 * character units. If the unit at {@code index} is a high-surrogate unit, 1743 * {@code index + 1} is less than the length of the sequence and the unit at 1744 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1745 * point represented by the pair is returned; otherwise the {@code char} 1746 * value at {@code index} is returned. 1747 * 1748 * @param seq 1749 * the source sequence of {@code char} units. 1750 * @param index 1751 * the position in {@code seq} from which to retrieve the code 1752 * point. 1753 * @return the Unicode code point or {@code char} value at {@code index} in 1754 * {@code seq}. 1755 * @throws NullPointerException 1756 * if {@code seq} is {@code null}. 1757 * @throws IndexOutOfBoundsException 1758 * if the {@code index} is negative or greater than or equal to 1759 * the length of {@code seq}. 1760 * @since 1.5 1761 */ codePointAt(CharSequence seq, int index)1762 public static int codePointAt(CharSequence seq, int index) { 1763 if (seq == null) { 1764 throw new NullPointerException("seq == null"); 1765 } 1766 int len = seq.length(); 1767 if (index < 0 || index >= len) { 1768 throw new IndexOutOfBoundsException(); 1769 } 1770 1771 char high = seq.charAt(index++); 1772 if (index >= len) { 1773 return high; 1774 } 1775 char low = seq.charAt(index); 1776 if (isSurrogatePair(high, low)) { 1777 return toCodePoint(high, low); 1778 } 1779 return high; 1780 } 1781 1782 /** 1783 * Returns the code point at {@code index} in the specified array of 1784 * character units. If the unit at {@code index} is a high-surrogate unit, 1785 * {@code index + 1} is less than the length of the array and the unit at 1786 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1787 * point represented by the pair is returned; otherwise the {@code char} 1788 * value at {@code index} is returned. 1789 * 1790 * @param seq 1791 * the source array of {@code char} units. 1792 * @param index 1793 * the position in {@code seq} from which to retrieve the code 1794 * point. 1795 * @return the Unicode code point or {@code char} value at {@code index} in 1796 * {@code seq}. 1797 * @throws NullPointerException 1798 * if {@code seq} is {@code null}. 1799 * @throws IndexOutOfBoundsException 1800 * if the {@code index} is negative or greater than or equal to 1801 * the length of {@code seq}. 1802 * @since 1.5 1803 */ codePointAt(char[] seq, int index)1804 public static int codePointAt(char[] seq, int index) { 1805 if (seq == null) { 1806 throw new NullPointerException("seq == null"); 1807 } 1808 int len = seq.length; 1809 if (index < 0 || index >= len) { 1810 throw new IndexOutOfBoundsException(); 1811 } 1812 1813 char high = seq[index++]; 1814 if (index >= len) { 1815 return high; 1816 } 1817 char low = seq[index]; 1818 if (isSurrogatePair(high, low)) { 1819 return toCodePoint(high, low); 1820 } 1821 return high; 1822 } 1823 1824 /** 1825 * Returns the code point at {@code index} in the specified array of 1826 * character units, where {@code index} has to be less than {@code limit}. 1827 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 1828 * is less than {@code limit} and the unit at {@code index + 1} is a 1829 * low-surrogate unit, then the supplementary code point represented by the 1830 * pair is returned; otherwise the {@code char} value at {@code index} is 1831 * returned. 1832 * 1833 * @param seq 1834 * the source array of {@code char} units. 1835 * @param index 1836 * the position in {@code seq} from which to get the code point. 1837 * @param limit 1838 * the index after the last unit in {@code seq} that can be used. 1839 * @return the Unicode code point or {@code char} value at {@code index} in 1840 * {@code seq}. 1841 * @throws NullPointerException 1842 * if {@code seq} is {@code null}. 1843 * @throws IndexOutOfBoundsException 1844 * if {@code index < 0}, {@code index >= limit}, 1845 * {@code limit < 0} or if {@code limit} is greater than the 1846 * length of {@code seq}. 1847 * @since 1.5 1848 */ codePointAt(char[] seq, int index, int limit)1849 public static int codePointAt(char[] seq, int index, int limit) { 1850 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1851 throw new IndexOutOfBoundsException(); 1852 } 1853 1854 char high = seq[index++]; 1855 if (index >= limit) { 1856 return high; 1857 } 1858 char low = seq[index]; 1859 if (isSurrogatePair(high, low)) { 1860 return toCodePoint(high, low); 1861 } 1862 return high; 1863 } 1864 1865 /** 1866 * Returns the code point that precedes {@code index} in the specified 1867 * sequence of character units. If the unit at {@code index - 1} is a 1868 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1869 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1870 * point represented by the pair is returned; otherwise the {@code char} 1871 * value at {@code index - 1} is returned. 1872 * 1873 * @param seq 1874 * the source sequence of {@code char} units. 1875 * @param index 1876 * the position in {@code seq} following the code 1877 * point that should be returned. 1878 * @return the Unicode code point or {@code char} value before {@code index} 1879 * in {@code seq}. 1880 * @throws NullPointerException 1881 * if {@code seq} is {@code null}. 1882 * @throws IndexOutOfBoundsException 1883 * if the {@code index} is less than 1 or greater than the 1884 * length of {@code seq}. 1885 * @since 1.5 1886 */ codePointBefore(CharSequence seq, int index)1887 public static int codePointBefore(CharSequence seq, int index) { 1888 if (seq == null) { 1889 throw new NullPointerException("seq == null"); 1890 } 1891 int len = seq.length(); 1892 if (index < 1 || index > len) { 1893 throw new IndexOutOfBoundsException(); 1894 } 1895 1896 char low = seq.charAt(--index); 1897 if (--index < 0) { 1898 return low; 1899 } 1900 char high = seq.charAt(index); 1901 if (isSurrogatePair(high, low)) { 1902 return toCodePoint(high, low); 1903 } 1904 return low; 1905 } 1906 1907 /** 1908 * Returns the code point that precedes {@code index} in the specified 1909 * array of character units. If the unit at {@code index - 1} is a 1910 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1911 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1912 * point represented by the pair is returned; otherwise the {@code char} 1913 * value at {@code index - 1} is returned. 1914 * 1915 * @param seq 1916 * the source array of {@code char} units. 1917 * @param index 1918 * the position in {@code seq} following the code 1919 * point that should be returned. 1920 * @return the Unicode code point or {@code char} value before {@code index} 1921 * in {@code seq}. 1922 * @throws NullPointerException 1923 * if {@code seq} is {@code null}. 1924 * @throws IndexOutOfBoundsException 1925 * if the {@code index} is less than 1 or greater than the 1926 * length of {@code seq}. 1927 * @since 1.5 1928 */ codePointBefore(char[] seq, int index)1929 public static int codePointBefore(char[] seq, int index) { 1930 if (seq == null) { 1931 throw new NullPointerException("seq == null"); 1932 } 1933 int len = seq.length; 1934 if (index < 1 || index > len) { 1935 throw new IndexOutOfBoundsException(); 1936 } 1937 1938 char low = seq[--index]; 1939 if (--index < 0) { 1940 return low; 1941 } 1942 char high = seq[index]; 1943 if (isSurrogatePair(high, low)) { 1944 return toCodePoint(high, low); 1945 } 1946 return low; 1947 } 1948 1949 /** 1950 * Returns the code point that precedes the {@code index} in the specified 1951 * array of character units and is not less than {@code start}. If the unit 1952 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 1953 * less than {@code start} and the unit at {@code index - 2} is a 1954 * high-surrogate unit, then the supplementary code point represented by the 1955 * pair is returned; otherwise the {@code char} value at {@code index - 1} 1956 * is returned. 1957 * 1958 * @param seq 1959 * the source array of {@code char} units. 1960 * @param index 1961 * the position in {@code seq} following the code point that 1962 * should be returned. 1963 * @param start 1964 * the index of the first element in {@code seq}. 1965 * @return the Unicode code point or {@code char} value before {@code index} 1966 * in {@code seq}. 1967 * @throws NullPointerException 1968 * if {@code seq} is {@code null}. 1969 * @throws IndexOutOfBoundsException 1970 * if the {@code index <= start}, {@code start < 0}, 1971 * {@code index} is greater than the length of {@code seq}, or 1972 * if {@code start} is equal or greater than the length of 1973 * {@code seq}. 1974 * @since 1.5 1975 */ codePointBefore(char[] seq, int index, int start)1976 public static int codePointBefore(char[] seq, int index, int start) { 1977 if (seq == null) { 1978 throw new NullPointerException("seq == null"); 1979 } 1980 int len = seq.length; 1981 if (index <= start || index > len || start < 0 || start >= len) { 1982 throw new IndexOutOfBoundsException(); 1983 } 1984 1985 char low = seq[--index]; 1986 if (--index < start) { 1987 return low; 1988 } 1989 char high = seq[index]; 1990 if (isSurrogatePair(high, low)) { 1991 return toCodePoint(high, low); 1992 } 1993 return low; 1994 } 1995 1996 /** 1997 * Converts the specified Unicode code point into a UTF-16 encoded sequence 1998 * and copies the value(s) into the char array {@code dst}, starting at 1999 * index {@code dstIndex}. 2000 * 2001 * @param codePoint 2002 * the Unicode code point to encode. 2003 * @param dst 2004 * the destination array to copy the encoded value into. 2005 * @param dstIndex 2006 * the index in {@code dst} from where to start copying. 2007 * @return the number of {@code char} value units copied into {@code dst}. 2008 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2009 * @throws NullPointerException 2010 * if {@code dst} is {@code null}. 2011 * @throws IndexOutOfBoundsException 2012 * if {@code dstIndex} is negative, greater than or equal to 2013 * {@code dst.length} or equals {@code dst.length - 1} when 2014 * {@code codePoint} is a 2015 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 2016 * @since 1.5 2017 */ toChars(int codePoint, char[] dst, int dstIndex)2018 public static int toChars(int codePoint, char[] dst, int dstIndex) { 2019 checkValidCodePoint(codePoint); 2020 if (dst == null) { 2021 throw new NullPointerException("dst == null"); 2022 } 2023 if (dstIndex < 0 || dstIndex >= dst.length) { 2024 throw new IndexOutOfBoundsException(); 2025 } 2026 2027 if (isSupplementaryCodePoint(codePoint)) { 2028 if (dstIndex == dst.length - 1) { 2029 throw new IndexOutOfBoundsException(); 2030 } 2031 // See RFC 2781, Section 2.1 2032 // http://www.ietf.org/rfc/rfc2781.txt 2033 int cpPrime = codePoint - 0x10000; 2034 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2035 int low = 0xDC00 | (cpPrime & 0x3FF); 2036 dst[dstIndex] = (char) high; 2037 dst[dstIndex + 1] = (char) low; 2038 return 2; 2039 } 2040 2041 dst[dstIndex] = (char) codePoint; 2042 return 1; 2043 } 2044 2045 /** 2046 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2047 * and returns it as a char array. 2048 * 2049 * @param codePoint 2050 * the Unicode code point to encode. 2051 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 2052 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 2053 * then the returned array contains two characters, otherwise it 2054 * contains just one character. 2055 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2056 * @since 1.5 2057 */ toChars(int codePoint)2058 public static char[] toChars(int codePoint) { 2059 checkValidCodePoint(codePoint); 2060 if (isSupplementaryCodePoint(codePoint)) { 2061 int cpPrime = codePoint - 0x10000; 2062 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2063 int low = 0xDC00 | (cpPrime & 0x3FF); 2064 return new char[] { (char) high, (char) low }; 2065 } 2066 return new char[] { (char) codePoint }; 2067 } 2068 2069 /** 2070 * Counts the number of Unicode code points in the subsequence of the 2071 * specified character sequence, as delineated by {@code beginIndex} and 2072 * {@code endIndex}. Any surrogate values with missing pair values will be 2073 * counted as one code point. 2074 * 2075 * @param seq 2076 * the {@code CharSequence} to look through. 2077 * @param beginIndex 2078 * the inclusive index to begin counting at. 2079 * @param endIndex 2080 * the exclusive index to stop counting at. 2081 * @return the number of Unicode code points. 2082 * @throws NullPointerException 2083 * if {@code seq} is {@code null}. 2084 * @throws IndexOutOfBoundsException 2085 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2086 * if {@code endIndex} is greater than the length of {@code seq}. 2087 * @since 1.5 2088 */ codePointCount(CharSequence seq, int beginIndex, int endIndex)2089 public static int codePointCount(CharSequence seq, int beginIndex, 2090 int endIndex) { 2091 if (seq == null) { 2092 throw new NullPointerException("seq == null"); 2093 } 2094 int len = seq.length(); 2095 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2096 throw new IndexOutOfBoundsException(); 2097 } 2098 2099 int result = 0; 2100 for (int i = beginIndex; i < endIndex; i++) { 2101 char c = seq.charAt(i); 2102 if (isHighSurrogate(c)) { 2103 if (++i < endIndex) { 2104 c = seq.charAt(i); 2105 if (!isLowSurrogate(c)) { 2106 result++; 2107 } 2108 } 2109 } 2110 result++; 2111 } 2112 return result; 2113 } 2114 2115 /** 2116 * Counts the number of Unicode code points in the subsequence of the 2117 * specified char array, as delineated by {@code offset} and {@code count}. 2118 * Any surrogate values with missing pair values will be counted as one code 2119 * point. 2120 * 2121 * @param seq 2122 * the char array to look through 2123 * @param offset 2124 * the inclusive index to begin counting at. 2125 * @param count 2126 * the number of {@code char} values to look through in 2127 * {@code seq}. 2128 * @return the number of Unicode code points. 2129 * @throws NullPointerException 2130 * if {@code seq} is {@code null}. 2131 * @throws IndexOutOfBoundsException 2132 * if {@code offset < 0}, {@code count < 0} or if 2133 * {@code offset + count} is greater than the length of 2134 * {@code seq}. 2135 * @since 1.5 2136 */ codePointCount(char[] seq, int offset, int count)2137 public static int codePointCount(char[] seq, int offset, int count) { 2138 Arrays.checkOffsetAndCount(seq.length, offset, count); 2139 int endIndex = offset + count; 2140 int result = 0; 2141 for (int i = offset; i < endIndex; i++) { 2142 char c = seq[i]; 2143 if (isHighSurrogate(c)) { 2144 if (++i < endIndex) { 2145 c = seq[i]; 2146 if (!isLowSurrogate(c)) { 2147 result++; 2148 } 2149 } 2150 } 2151 result++; 2152 } 2153 return result; 2154 } 2155 2156 /** 2157 * Determines the index in the specified character sequence that is offset 2158 * {@code codePointOffset} code points from {@code index}. 2159 * 2160 * @param seq 2161 * the character sequence to find the index in. 2162 * @param index 2163 * the start index in {@code seq}. 2164 * @param codePointOffset 2165 * the number of code points to look backwards or forwards; may 2166 * be a negative or positive value. 2167 * @return the index in {@code seq} that is {@code codePointOffset} code 2168 * points away from {@code index}. 2169 * @throws NullPointerException 2170 * if {@code seq} is {@code null}. 2171 * @throws IndexOutOfBoundsException 2172 * if {@code index < 0}, {@code index} is greater than the 2173 * length of {@code seq}, or if there are not enough values in 2174 * {@code seq} to skip {@code codePointOffset} code points 2175 * forwards or backwards (if {@code codePointOffset} is 2176 * negative) from {@code index}. 2177 * @since 1.5 2178 */ offsetByCodePoints(CharSequence seq, int index, int codePointOffset)2179 public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) { 2180 if (seq == null) { 2181 throw new NullPointerException("seq == null"); 2182 } 2183 int len = seq.length(); 2184 if (index < 0 || index > len) { 2185 throw new IndexOutOfBoundsException(); 2186 } 2187 2188 if (codePointOffset == 0) { 2189 return index; 2190 } 2191 2192 if (codePointOffset > 0) { 2193 int codePoints = codePointOffset; 2194 int i = index; 2195 while (codePoints > 0) { 2196 codePoints--; 2197 if (i >= len) { 2198 throw new IndexOutOfBoundsException(); 2199 } 2200 if (isHighSurrogate(seq.charAt(i))) { 2201 int next = i + 1; 2202 if (next < len && isLowSurrogate(seq.charAt(next))) { 2203 i++; 2204 } 2205 } 2206 i++; 2207 } 2208 return i; 2209 } 2210 2211 int codePoints = -codePointOffset; 2212 int i = index; 2213 while (codePoints > 0) { 2214 codePoints--; 2215 i--; 2216 if (i < 0) { 2217 throw new IndexOutOfBoundsException(); 2218 } 2219 if (isLowSurrogate(seq.charAt(i))) { 2220 int prev = i - 1; 2221 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2222 i--; 2223 } 2224 } 2225 } 2226 return i; 2227 } 2228 2229 /** 2230 * Determines the index in a subsequence of the specified character array 2231 * that is offset {@code codePointOffset} code points from {@code index}. 2232 * The subsequence is delineated by {@code start} and {@code count}. 2233 * 2234 * @param seq 2235 * the character array to find the index in. 2236 * @param start 2237 * the inclusive index that marks the beginning of the 2238 * subsequence. 2239 * @param count 2240 * the number of {@code char} values to include within the 2241 * subsequence. 2242 * @param index 2243 * the start index in the subsequence of the char array. 2244 * @param codePointOffset 2245 * the number of code points to look backwards or forwards; may 2246 * be a negative or positive value. 2247 * @return the index in {@code seq} that is {@code codePointOffset} code 2248 * points away from {@code index}. 2249 * @throws NullPointerException 2250 * if {@code seq} is {@code null}. 2251 * @throws IndexOutOfBoundsException 2252 * if {@code start < 0}, {@code count < 0}, 2253 * {@code index < start}, {@code index > start + count}, 2254 * {@code start + count} is greater than the length of 2255 * {@code seq}, or if there are not enough values in 2256 * {@code seq} to skip {@code codePointOffset} code points 2257 * forward or backward (if {@code codePointOffset} is 2258 * negative) from {@code index}. 2259 * @since 1.5 2260 */ offsetByCodePoints(char[] seq, int start, int count, int index, int codePointOffset)2261 public static int offsetByCodePoints(char[] seq, int start, int count, 2262 int index, int codePointOffset) { 2263 Arrays.checkOffsetAndCount(seq.length, start, count); 2264 int end = start + count; 2265 if (index < start || index > end) { 2266 throw new IndexOutOfBoundsException(); 2267 } 2268 2269 if (codePointOffset == 0) { 2270 return index; 2271 } 2272 2273 if (codePointOffset > 0) { 2274 int codePoints = codePointOffset; 2275 int i = index; 2276 while (codePoints > 0) { 2277 codePoints--; 2278 if (i >= end) { 2279 throw new IndexOutOfBoundsException(); 2280 } 2281 if (isHighSurrogate(seq[i])) { 2282 int next = i + 1; 2283 if (next < end && isLowSurrogate(seq[next])) { 2284 i++; 2285 } 2286 } 2287 i++; 2288 } 2289 return i; 2290 } 2291 2292 int codePoints = -codePointOffset; 2293 int i = index; 2294 while (codePoints > 0) { 2295 codePoints--; 2296 i--; 2297 if (i < start) { 2298 throw new IndexOutOfBoundsException(); 2299 } 2300 if (isLowSurrogate(seq[i])) { 2301 int prev = i - 1; 2302 if (prev >= start && isHighSurrogate(seq[prev])) { 2303 i--; 2304 } 2305 } 2306 } 2307 return i; 2308 } 2309 2310 /** 2311 * Convenience method to determine the value of the specified character 2312 * {@code c} in the supplied radix. The value of {@code radix} must be 2313 * between MIN_RADIX and MAX_RADIX. 2314 * 2315 * @param c 2316 * the character to determine the value of. 2317 * @param radix 2318 * the radix. 2319 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2320 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2321 */ digit(char c, int radix)2322 public static int digit(char c, int radix) { 2323 return digit((int) c, radix); 2324 } 2325 2326 /** 2327 * Convenience method to determine the value of the character 2328 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2329 * be between MIN_RADIX and MAX_RADIX. 2330 * 2331 * @param codePoint 2332 * the character, including supplementary characters. 2333 * @param radix 2334 * the radix. 2335 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2336 * {@link #MAX_RADIX} then the value of the character in the radix; 2337 * -1 otherwise. 2338 */ digit(int codePoint, int radix)2339 public static int digit(int codePoint, int radix) { 2340 if (radix < MIN_RADIX || radix > MAX_RADIX) { 2341 return -1; 2342 } 2343 if (codePoint < 128) { 2344 // Optimized for ASCII 2345 int result = -1; 2346 if ('0' <= codePoint && codePoint <= '9') { 2347 result = codePoint - '0'; 2348 } else if ('a' <= codePoint && codePoint <= 'z') { 2349 result = 10 + (codePoint - 'a'); 2350 } else if ('A' <= codePoint && codePoint <= 'Z') { 2351 result = 10 + (codePoint - 'A'); 2352 } 2353 return result < radix ? result : -1; 2354 } 2355 return digitImpl(codePoint, radix); 2356 } 2357 digitImpl(int codePoint, int radix)2358 private static native int digitImpl(int codePoint, int radix); 2359 2360 /** 2361 * Compares this object with the specified object and indicates if they are 2362 * equal. In order to be equal, {@code object} must be an instance of 2363 * {@code Character} and have the same char value as this object. 2364 * 2365 * @param object 2366 * the object to compare this double with. 2367 * @return {@code true} if the specified object is equal to this 2368 * {@code Character}; {@code false} otherwise. 2369 */ 2370 @Override equals(Object object)2371 public boolean equals(Object object) { 2372 return (object instanceof Character) && (((Character) object).value == value); 2373 } 2374 2375 /** 2376 * Returns the character which represents the specified digit in the 2377 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2378 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2379 * smaller than {@code radix}. If any of these conditions does not hold, 0 2380 * is returned. 2381 * 2382 * @param digit 2383 * the integer value. 2384 * @param radix 2385 * the radix. 2386 * @return the character which represents the {@code digit} in the 2387 * {@code radix}. 2388 */ forDigit(int digit, int radix)2389 public static char forDigit(int digit, int radix) { 2390 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2391 if (digit >= 0 && digit < radix) { 2392 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2393 } 2394 } 2395 return 0; 2396 } 2397 2398 /** 2399 * Returns a human-readable name for the given code point, 2400 * or null if the code point is unassigned. 2401 * 2402 * <p>As a fallback mechanism this method returns strings consisting of the Unicode 2403 * block name (with underscores replaced by spaces), a single space, and the uppercase 2404 * hex value of the code point, using as few digits as necessary. 2405 * 2406 * <p>Examples: 2407 * <ul> 2408 * <li>{@code Character.getName(0)} returns "NULL". 2409 * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E". 2410 * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX". 2411 * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000". 2412 * </ul> 2413 * 2414 * <p>Note that the exact strings returned will vary from release to release. 2415 * 2416 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2417 * @since 1.7 2418 */ getName(int codePoint)2419 public static String getName(int codePoint) { 2420 checkValidCodePoint(codePoint); 2421 if (getType(codePoint) == Character.UNASSIGNED) { 2422 return null; 2423 } 2424 String result = getNameImpl(codePoint); 2425 if (result == null) { 2426 String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' '); 2427 result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0); 2428 } 2429 return result; 2430 } 2431 getNameImpl(int codePoint)2432 private static native String getNameImpl(int codePoint); 2433 2434 /** 2435 * Returns the numeric value of the specified Unicode character. 2436 * See {@link #getNumericValue(int)}. 2437 * 2438 * @param c the character 2439 * @return a non-negative numeric integer value if a numeric value for 2440 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2441 * -2 if the numeric value can not be represented as an integer. 2442 */ getNumericValue(char c)2443 public static int getNumericValue(char c) { 2444 return getNumericValue((int) c); 2445 } 2446 2447 /** 2448 * Gets the numeric value of the specified Unicode code point. For example, 2449 * the code point '\u216B' stands for the Roman number XII, which has the 2450 * numeric value 12. 2451 * 2452 * <p>There are two points of divergence between this method and the Unicode 2453 * specification. This method treats the letters a-z (in both upper and lower 2454 * cases, and their full-width variants) as numbers from 10 to 35. The 2455 * Unicode specification also supports the idea of code points with non-integer 2456 * numeric values; this method does not (except to the extent of returning -2 2457 * for such code points). 2458 * 2459 * @param codePoint the code point 2460 * @return a non-negative numeric integer value if a numeric value for 2461 * {@code codePoint} exists, -1 if there is no numeric value for 2462 * {@code codePoint}, -2 if the numeric value can not be 2463 * represented with an integer. 2464 */ getNumericValue(int codePoint)2465 public static int getNumericValue(int codePoint) { 2466 // This is both an optimization and papers over differences between Java and ICU. 2467 if (codePoint < 128) { 2468 if (codePoint >= '0' && codePoint <= '9') { 2469 return codePoint - '0'; 2470 } 2471 if (codePoint >= 'a' && codePoint <= 'z') { 2472 return codePoint - ('a' - 10); 2473 } 2474 if (codePoint >= 'A' && codePoint <= 'Z') { 2475 return codePoint - ('A' - 10); 2476 } 2477 return -1; 2478 } 2479 // Full-width uppercase A-Z. 2480 if (codePoint >= 0xff21 && codePoint <= 0xff3a) { 2481 return codePoint - 0xff17; 2482 } 2483 // Full-width lowercase a-z. 2484 if (codePoint >= 0xff41 && codePoint <= 0xff5a) { 2485 return codePoint - 0xff37; 2486 } 2487 return getNumericValueImpl(codePoint); 2488 } 2489 getNumericValueImpl(int codePoint)2490 private static native int getNumericValueImpl(int codePoint); 2491 2492 /** 2493 * Gets the general Unicode category of the specified character. 2494 * 2495 * @param c 2496 * the character to get the category of. 2497 * @return the Unicode category of {@code c}. 2498 */ getType(char c)2499 public static int getType(char c) { 2500 return getType((int) c); 2501 } 2502 2503 /** 2504 * Gets the general Unicode category of the specified code point. 2505 * 2506 * @param codePoint 2507 * the Unicode code point to get the category of. 2508 * @return the Unicode category of {@code codePoint}. 2509 */ getType(int codePoint)2510 public static int getType(int codePoint) { 2511 int type = getTypeImpl(codePoint); 2512 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 2513 if (type <= Character.FORMAT) { 2514 return type; 2515 } 2516 return (type + 1); 2517 } 2518 getTypeImpl(int codePoint)2519 private static native int getTypeImpl(int codePoint); 2520 2521 /** 2522 * Gets the Unicode directionality of the specified character. 2523 * 2524 * @param c 2525 * the character to get the directionality of. 2526 * @return the Unicode directionality of {@code c}. 2527 */ getDirectionality(char c)2528 public static byte getDirectionality(char c) { 2529 return getDirectionality((int)c); 2530 } 2531 2532 /** 2533 * Returns the Unicode directionality of the given code point. 2534 * This will be one of the {@code DIRECTIONALITY_} constants. 2535 * For characters whose directionality is undefined, or whose 2536 * directionality has no appropriate constant in this class, 2537 * {@code DIRECTIONALITY_UNDEFINED} is returned. 2538 */ getDirectionality(int codePoint)2539 public static byte getDirectionality(int codePoint) { 2540 if (getType(codePoint) == Character.UNASSIGNED) { 2541 return Character.DIRECTIONALITY_UNDEFINED; 2542 } 2543 2544 byte directionality = getIcuDirectionality(codePoint); 2545 if (directionality >= 0 && directionality < DIRECTIONALITY.length) { 2546 return DIRECTIONALITY[directionality]; 2547 } 2548 return Character.DIRECTIONALITY_UNDEFINED; 2549 } 2550 2551 /** 2552 * @hide - internal use only. 2553 */ getIcuDirectionality(int codePoint)2554 public static native byte getIcuDirectionality(int codePoint); 2555 2556 /** 2557 * Indicates whether the specified character is mirrored. 2558 * 2559 * @param c 2560 * the character to check. 2561 * @return {@code true} if {@code c} is mirrored; {@code false} 2562 * otherwise. 2563 */ isMirrored(char c)2564 public static boolean isMirrored(char c) { 2565 return isMirrored((int) c); 2566 } 2567 2568 /** 2569 * Indicates whether the specified code point is mirrored. 2570 * 2571 * @param codePoint 2572 * the code point to check. 2573 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2574 * otherwise. 2575 */ isMirrored(int codePoint)2576 public static boolean isMirrored(int codePoint) { 2577 return isMirroredImpl(codePoint); 2578 } 2579 isMirroredImpl(int codePoint)2580 private static native boolean isMirroredImpl(int codePoint); 2581 2582 @Override hashCode()2583 public int hashCode() { 2584 return value; 2585 } 2586 2587 /** 2588 * Returns the high surrogate for the given code point. The result is meaningless if 2589 * the given code point is not a supplementary character. 2590 * @since 1.7 2591 */ highSurrogate(int codePoint)2592 public static char highSurrogate(int codePoint) { 2593 return (char) ((codePoint >> 10) + 0xd7c0); 2594 } 2595 2596 /** 2597 * Returns the low surrogate for the given code point. The result is meaningless if 2598 * the given code point is not a supplementary character. 2599 * @since 1.7 2600 */ lowSurrogate(int codePoint)2601 public static char lowSurrogate(int codePoint) { 2602 return (char) ((codePoint & 0x3ff) | 0xdc00); 2603 } 2604 2605 /** 2606 * Returns true if the given code point is alphabetic. That is, 2607 * if it is in any of the Lu, Ll, Lt, Lm, Lo, Nl, or Other_Alphabetic categories. 2608 * @since 1.7 2609 */ isAlphabetic(int codePoint)2610 public static native boolean isAlphabetic(int codePoint); 2611 2612 /** 2613 * Returns true if the given code point is in the Basic Multilingual Plane (BMP). 2614 * Such code points can be represented by a single {@code char}. 2615 * @since 1.7 2616 */ isBmpCodePoint(int codePoint)2617 public static boolean isBmpCodePoint(int codePoint) { 2618 return codePoint >= Character.MIN_VALUE && codePoint <= Character.MAX_VALUE; 2619 } 2620 2621 /** 2622 * Indicates whether the specified character is defined in the Unicode 2623 * specification. 2624 * 2625 * @param c 2626 * the character to check. 2627 * @return {@code true} if the general Unicode category of the character is 2628 * not {@code UNASSIGNED}; {@code false} otherwise. 2629 */ isDefined(char c)2630 public static boolean isDefined(char c) { 2631 return isDefinedImpl(c); 2632 } 2633 2634 /** 2635 * Indicates whether the specified code point is defined in the Unicode 2636 * specification. 2637 * 2638 * @param codePoint 2639 * the code point to check. 2640 * @return {@code true} if the general Unicode category of the code point is 2641 * not {@code UNASSIGNED}; {@code false} otherwise. 2642 */ isDefined(int codePoint)2643 public static boolean isDefined(int codePoint) { 2644 return isDefinedImpl(codePoint); 2645 } 2646 isDefinedImpl(int codePoint)2647 private static native boolean isDefinedImpl(int codePoint); 2648 2649 /** 2650 * Indicates whether the specified character is a digit. 2651 * 2652 * @param c 2653 * the character to check. 2654 * @return {@code true} if {@code c} is a digit; {@code false} 2655 * otherwise. 2656 */ isDigit(char c)2657 public static boolean isDigit(char c) { 2658 return isDigit((int) c); 2659 } 2660 2661 /** 2662 * Indicates whether the specified code point is a digit. 2663 * 2664 * @param codePoint 2665 * the code point to check. 2666 * @return {@code true} if {@code codePoint} is a digit; {@code false} 2667 * otherwise. 2668 */ isDigit(int codePoint)2669 public static boolean isDigit(int codePoint) { 2670 // Optimized case for ASCII 2671 if ('0' <= codePoint && codePoint <= '9') { 2672 return true; 2673 } 2674 if (codePoint < 1632) { 2675 return false; 2676 } 2677 return isDigitImpl(codePoint); 2678 } 2679 isDigitImpl(int codePoint)2680 private static native boolean isDigitImpl(int codePoint); 2681 2682 /** 2683 * Indicates whether the specified character is ignorable in a Java or 2684 * Unicode identifier. 2685 * 2686 * @param c 2687 * the character to check. 2688 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 2689 */ isIdentifierIgnorable(char c)2690 public static boolean isIdentifierIgnorable(char c) { 2691 return isIdentifierIgnorable((int) c); 2692 } 2693 2694 /** 2695 * Returns true if the given code point is a CJKV ideographic character. 2696 * @since 1.7 2697 */ isIdeographic(int codePoint)2698 public static native boolean isIdeographic(int codePoint); 2699 2700 /** 2701 * Indicates whether the specified code point is ignorable in a Java or 2702 * Unicode identifier. 2703 * 2704 * @param codePoint 2705 * the code point to check. 2706 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 2707 * otherwise. 2708 */ isIdentifierIgnorable(int codePoint)2709 public static boolean isIdentifierIgnorable(int codePoint) { 2710 // This is both an optimization and papers over differences between Java and ICU. 2711 if (codePoint < 0x600) { 2712 return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || 2713 (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad); 2714 } 2715 return isIdentifierIgnorableImpl(codePoint); 2716 } 2717 isIdentifierIgnorableImpl(int codePoint)2718 private static native boolean isIdentifierIgnorableImpl(int codePoint); 2719 2720 /** 2721 * Indicates whether the specified character is an ISO control character. 2722 * 2723 * @param c 2724 * the character to check. 2725 * @return {@code true} if {@code c} is an ISO control character; 2726 * {@code false} otherwise. 2727 */ isISOControl(char c)2728 public static boolean isISOControl(char c) { 2729 return isISOControl((int) c); 2730 } 2731 2732 /** 2733 * Indicates whether the specified code point is an ISO control character. 2734 * 2735 * @param c 2736 * the code point to check. 2737 * @return {@code true} if {@code c} is an ISO control character; 2738 * {@code false} otherwise. 2739 */ isISOControl(int c)2740 public static boolean isISOControl(int c) { 2741 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2742 } 2743 2744 /** 2745 * Indicates whether the specified character is a valid part of a Java 2746 * identifier other than the first character. 2747 * 2748 * @param c 2749 * the character to check. 2750 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2751 * {@code false} otherwise. 2752 */ isJavaIdentifierPart(char c)2753 public static boolean isJavaIdentifierPart(char c) { 2754 return isJavaIdentifierPart((int) c); 2755 } 2756 2757 /** 2758 * Indicates whether the specified code point is a valid part of a Java 2759 * identifier other than the first character. 2760 * 2761 * @param codePoint 2762 * the code point to check. 2763 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2764 * {@code false} otherwise. 2765 */ isJavaIdentifierPart(int codePoint)2766 public static boolean isJavaIdentifierPart(int codePoint) { 2767 // Use precomputed bitmasks to optimize the ASCII range. 2768 if (codePoint < 64) { 2769 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 2770 } else if (codePoint < 128) { 2771 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2772 } 2773 int type = getType(codePoint); 2774 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2775 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2776 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2777 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2778 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 2779 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT; 2780 } 2781 2782 /** 2783 * Indicates whether the specified character is a valid first character for 2784 * a Java identifier. 2785 * 2786 * @param c 2787 * the character to check. 2788 * @return {@code true} if {@code c} is a valid first character of a Java 2789 * identifier; {@code false} otherwise. 2790 */ isJavaIdentifierStart(char c)2791 public static boolean isJavaIdentifierStart(char c) { 2792 return isJavaIdentifierStart((int) c); 2793 } 2794 2795 /** 2796 * Indicates whether the specified code point is a valid first character for 2797 * a Java identifier. 2798 * 2799 * @param codePoint 2800 * the code point to check. 2801 * @return {@code true} if {@code codePoint} is a valid start of a Java 2802 * identifier; {@code false} otherwise. 2803 */ isJavaIdentifierStart(int codePoint)2804 public static boolean isJavaIdentifierStart(int codePoint) { 2805 // Use precomputed bitmasks to optimize the ASCII range. 2806 if (codePoint < 64) { 2807 return (codePoint == '$'); // There's only one character in this range. 2808 } else if (codePoint < 128) { 2809 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2810 } 2811 int type = getType(codePoint); 2812 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL 2813 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2814 } 2815 2816 /** 2817 * Indicates whether the specified character is a Java letter. 2818 * 2819 * @param c 2820 * the character to check. 2821 * @return {@code true} if {@code c} is a Java letter; {@code false} 2822 * otherwise. 2823 * @deprecated Use {@link #isJavaIdentifierStart(char)} instead. 2824 */ 2825 @Deprecated isJavaLetter(char c)2826 public static boolean isJavaLetter(char c) { 2827 return isJavaIdentifierStart(c); 2828 } 2829 2830 /** 2831 * Indicates whether the specified character is a Java letter or digit 2832 * character. 2833 * 2834 * @param c 2835 * the character to check. 2836 * @return {@code true} if {@code c} is a Java letter or digit; 2837 * {@code false} otherwise. 2838 * @deprecated Use {@link #isJavaIdentifierPart(char)} instead. 2839 */ 2840 @Deprecated isJavaLetterOrDigit(char c)2841 public static boolean isJavaLetterOrDigit(char c) { 2842 return isJavaIdentifierPart(c); 2843 } 2844 2845 /** 2846 * Indicates whether the specified character is a letter. 2847 * 2848 * @param c 2849 * the character to check. 2850 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 2851 */ isLetter(char c)2852 public static boolean isLetter(char c) { 2853 return isLetter((int) c); 2854 } 2855 2856 /** 2857 * Indicates whether the specified code point is a letter. 2858 * 2859 * @param codePoint 2860 * the code point to check. 2861 * @return {@code true} if {@code codePoint} is a letter; {@code false} 2862 * otherwise. 2863 */ isLetter(int codePoint)2864 public static boolean isLetter(int codePoint) { 2865 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2866 return true; 2867 } 2868 if (codePoint < 128) { 2869 return false; 2870 } 2871 return isLetterImpl(codePoint); 2872 } 2873 isLetterImpl(int codePoint)2874 private static native boolean isLetterImpl(int codePoint); 2875 2876 /** 2877 * Indicates whether the specified character is a letter or a digit. 2878 * 2879 * @param c 2880 * the character to check. 2881 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 2882 * otherwise. 2883 */ isLetterOrDigit(char c)2884 public static boolean isLetterOrDigit(char c) { 2885 return isLetterOrDigit((int) c); 2886 } 2887 2888 /** 2889 * Indicates whether the specified code point is a letter or a digit. 2890 * 2891 * @param codePoint 2892 * the code point to check. 2893 * @return {@code true} if {@code codePoint} is a letter or a digit; 2894 * {@code false} otherwise. 2895 */ isLetterOrDigit(int codePoint)2896 public static boolean isLetterOrDigit(int codePoint) { 2897 // Optimized case for ASCII 2898 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2899 return true; 2900 } 2901 if ('0' <= codePoint && codePoint <= '9') { 2902 return true; 2903 } 2904 if (codePoint < 128) { 2905 return false; 2906 } 2907 return isLetterOrDigitImpl(codePoint); 2908 } 2909 isLetterOrDigitImpl(int codePoint)2910 private static native boolean isLetterOrDigitImpl(int codePoint); 2911 2912 /** 2913 * Indicates whether the specified character is a lower case letter. 2914 * 2915 * @param c 2916 * the character to check. 2917 * @return {@code true} if {@code c} is a lower case letter; {@code false} 2918 * otherwise. 2919 */ isLowerCase(char c)2920 public static boolean isLowerCase(char c) { 2921 return isLowerCase((int) c); 2922 } 2923 2924 /** 2925 * Indicates whether the specified code point is a lower case letter. 2926 * 2927 * @param codePoint 2928 * the code point to check. 2929 * @return {@code true} if {@code codePoint} is a lower case letter; 2930 * {@code false} otherwise. 2931 */ isLowerCase(int codePoint)2932 public static boolean isLowerCase(int codePoint) { 2933 // Optimized case for ASCII 2934 if ('a' <= codePoint && codePoint <= 'z') { 2935 return true; 2936 } 2937 if (codePoint < 128) { 2938 return false; 2939 } 2940 return isLowerCaseImpl(codePoint); 2941 } 2942 isLowerCaseImpl(int codePoint)2943 private static native boolean isLowerCaseImpl(int codePoint); 2944 2945 /** 2946 * Use {@link #isWhitespace(char)} instead. 2947 * @deprecated Use {@link #isWhitespace(char)} instead. 2948 */ 2949 @Deprecated isSpace(char c)2950 public static boolean isSpace(char c) { 2951 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2952 } 2953 2954 /** 2955 * See {@link #isSpaceChar(int)}. 2956 */ isSpaceChar(char c)2957 public static boolean isSpaceChar(char c) { 2958 return isSpaceChar((int) c); 2959 } 2960 2961 /** 2962 * Returns true if the given code point is a Unicode space character. 2963 * The exact set of characters considered as whitespace varies with Unicode version. 2964 * Note that non-breaking spaces are considered whitespace. 2965 * Note also that line separators are not considered whitespace; see {@link #isWhitespace} 2966 * for an alternative. 2967 */ isSpaceChar(int codePoint)2968 public static boolean isSpaceChar(int codePoint) { 2969 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 2970 // SPACE or NO-BREAK SPACE? 2971 if (codePoint == 0x20 || codePoint == 0xa0) { 2972 return true; 2973 } 2974 if (codePoint < 0x1000) { 2975 return false; 2976 } 2977 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 2978 if (codePoint == 0x1680 || codePoint == 0x180e) { 2979 return true; 2980 } 2981 if (codePoint < 0x2000) { 2982 return false; 2983 } 2984 if (codePoint <= 0xffff) { 2985 // Other whitespace from General Punctuation... 2986 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f || 2987 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 2988 } 2989 // Let icu4c worry about non-BMP code points. 2990 return isSpaceCharImpl(codePoint); 2991 } 2992 isSpaceCharImpl(int codePoint)2993 private static native boolean isSpaceCharImpl(int codePoint); 2994 2995 /** 2996 * Indicates whether the specified character is a titlecase character. 2997 * 2998 * @param c 2999 * the character to check. 3000 * @return {@code true} if {@code c} is a titlecase character, {@code false} 3001 * otherwise. 3002 */ isTitleCase(char c)3003 public static boolean isTitleCase(char c) { 3004 return isTitleCaseImpl(c); 3005 } 3006 3007 /** 3008 * Indicates whether the specified code point is a titlecase character. 3009 * 3010 * @param codePoint 3011 * the code point to check. 3012 * @return {@code true} if {@code codePoint} is a titlecase character, 3013 * {@code false} otherwise. 3014 */ isTitleCase(int codePoint)3015 public static boolean isTitleCase(int codePoint) { 3016 return isTitleCaseImpl(codePoint); 3017 } 3018 isTitleCaseImpl(int codePoint)3019 private static native boolean isTitleCaseImpl(int codePoint); 3020 3021 /** 3022 * Indicates whether the specified character is valid as part of a Unicode 3023 * identifier other than the first character. 3024 * 3025 * @param c 3026 * the character to check. 3027 * @return {@code true} if {@code c} is valid as part of a Unicode 3028 * identifier; {@code false} otherwise. 3029 */ isUnicodeIdentifierPart(char c)3030 public static boolean isUnicodeIdentifierPart(char c) { 3031 return isUnicodeIdentifierPartImpl(c); 3032 } 3033 3034 /** 3035 * Indicates whether the specified code point is valid as part of a Unicode 3036 * identifier other than the first character. 3037 * 3038 * @param codePoint 3039 * the code point to check. 3040 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 3041 * identifier; {@code false} otherwise. 3042 */ isUnicodeIdentifierPart(int codePoint)3043 public static boolean isUnicodeIdentifierPart(int codePoint) { 3044 return isUnicodeIdentifierPartImpl(codePoint); 3045 } 3046 isUnicodeIdentifierPartImpl(int codePoint)3047 private static native boolean isUnicodeIdentifierPartImpl(int codePoint); 3048 3049 /** 3050 * Indicates whether the specified character is a valid initial character 3051 * for a Unicode identifier. 3052 * 3053 * @param c 3054 * the character to check. 3055 * @return {@code true} if {@code c} is a valid first character for a 3056 * Unicode identifier; {@code false} otherwise. 3057 */ isUnicodeIdentifierStart(char c)3058 public static boolean isUnicodeIdentifierStart(char c) { 3059 return isUnicodeIdentifierStartImpl(c); 3060 } 3061 3062 /** 3063 * Indicates whether the specified code point is a valid initial character 3064 * for a Unicode identifier. 3065 * 3066 * @param codePoint 3067 * the code point to check. 3068 * @return {@code true} if {@code codePoint} is a valid first character for 3069 * a Unicode identifier; {@code false} otherwise. 3070 */ isUnicodeIdentifierStart(int codePoint)3071 public static boolean isUnicodeIdentifierStart(int codePoint) { 3072 return isUnicodeIdentifierStartImpl(codePoint); 3073 } 3074 isUnicodeIdentifierStartImpl(int codePoint)3075 private static native boolean isUnicodeIdentifierStartImpl(int codePoint); 3076 3077 /** 3078 * Indicates whether the specified character is an upper case letter. 3079 * 3080 * @param c 3081 * the character to check. 3082 * @return {@code true} if {@code c} is a upper case letter; {@code false} 3083 * otherwise. 3084 */ isUpperCase(char c)3085 public static boolean isUpperCase(char c) { 3086 return isUpperCase((int) c); 3087 } 3088 3089 /** 3090 * Indicates whether the specified code point is an upper case letter. 3091 * 3092 * @param codePoint 3093 * the code point to check. 3094 * @return {@code true} if {@code codePoint} is a upper case letter; 3095 * {@code false} otherwise. 3096 */ isUpperCase(int codePoint)3097 public static boolean isUpperCase(int codePoint) { 3098 // Optimized case for ASCII 3099 if ('A' <= codePoint && codePoint <= 'Z') { 3100 return true; 3101 } 3102 if (codePoint < 128) { 3103 return false; 3104 } 3105 return isUpperCaseImpl(codePoint); 3106 } 3107 isUpperCaseImpl(int codePoint)3108 private static native boolean isUpperCaseImpl(int codePoint); 3109 3110 /** 3111 * See {@link #isWhitespace(int)}. 3112 */ isWhitespace(char c)3113 public static boolean isWhitespace(char c) { 3114 return isWhitespace((int) c); 3115 } 3116 3117 /** 3118 * Returns true if the given code point is a Unicode whitespace character. 3119 * The exact set of characters considered as whitespace varies with Unicode version. 3120 * Note that non-breaking spaces are not considered whitespace. 3121 * Note also that line separators are considered whitespace; see {@link #isSpaceChar} 3122 * for an alternative. 3123 */ isWhitespace(int codePoint)3124 public static boolean isWhitespace(int codePoint) { 3125 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 3126 // Any ASCII whitespace character? 3127 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) { 3128 return true; 3129 } 3130 if (codePoint < 0x1000) { 3131 return false; 3132 } 3133 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 3134 if (codePoint == 0x1680 || codePoint == 0x180e) { 3135 return true; 3136 } 3137 if (codePoint < 0x2000) { 3138 return false; 3139 } 3140 // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE). 3141 if (codePoint == 0x2007 || codePoint == 0x202f) { 3142 return false; 3143 } 3144 if (codePoint <= 0xffff) { 3145 // Other whitespace from General Punctuation... 3146 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f || 3147 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 3148 } 3149 // Let icu4c worry about non-BMP code points. 3150 return isWhitespaceImpl(codePoint); 3151 } 3152 isWhitespaceImpl(int codePoint)3153 private static native boolean isWhitespaceImpl(int codePoint); 3154 3155 /** 3156 * Reverses the order of the first and second byte in the specified 3157 * character. 3158 * 3159 * @param c 3160 * the character to reverse. 3161 * @return the character with reordered bytes. 3162 */ reverseBytes(char c)3163 public static char reverseBytes(char c) { 3164 return (char)((c<<8) | (c>>8)); 3165 } 3166 3167 /** 3168 * Returns the lower case equivalent for the specified character if the 3169 * character is an upper case letter. Otherwise, the specified character is 3170 * returned unchanged. 3171 * 3172 * @param c 3173 * the character 3174 * @return if {@code c} is an upper case character then its lower case 3175 * counterpart, otherwise just {@code c}. 3176 */ toLowerCase(char c)3177 public static char toLowerCase(char c) { 3178 return (char) toLowerCase((int) c); 3179 } 3180 3181 /** 3182 * Returns the lower case equivalent for the specified code point if it is 3183 * an upper case letter. Otherwise, the specified code point is returned 3184 * unchanged. 3185 * 3186 * @param codePoint 3187 * the code point to check. 3188 * @return if {@code codePoint} is an upper case character then its lower 3189 * case counterpart, otherwise just {@code codePoint}. 3190 */ toLowerCase(int codePoint)3191 public static int toLowerCase(int codePoint) { 3192 // Optimized case for ASCII 3193 if ('A' <= codePoint && codePoint <= 'Z') { 3194 return (char) (codePoint + ('a' - 'A')); 3195 } 3196 if (codePoint < 192) { 3197 return codePoint; 3198 } 3199 return toLowerCaseImpl(codePoint); 3200 } 3201 toLowerCaseImpl(int codePoint)3202 private static native int toLowerCaseImpl(int codePoint); 3203 3204 @Override toString()3205 public String toString() { 3206 return String.valueOf(value); 3207 } 3208 3209 /** 3210 * Converts the specified character to its string representation. 3211 * 3212 * @param value 3213 * the character to convert. 3214 * @return the character converted to a string. 3215 */ toString(char value)3216 public static String toString(char value) { 3217 return String.valueOf(value); 3218 } 3219 3220 /** 3221 * Returns the title case equivalent for the specified character if it 3222 * exists. Otherwise, the specified character is returned unchanged. 3223 * 3224 * @param c 3225 * the character to convert. 3226 * @return the title case equivalent of {@code c} if it exists, otherwise 3227 * {@code c}. 3228 */ toTitleCase(char c)3229 public static char toTitleCase(char c) { 3230 return (char) toTitleCaseImpl(c); 3231 } 3232 3233 /** 3234 * Returns the title case equivalent for the specified code point if it 3235 * exists. Otherwise, the specified code point is returned unchanged. 3236 * 3237 * @param codePoint 3238 * the code point to convert. 3239 * @return the title case equivalent of {@code codePoint} if it exists, 3240 * otherwise {@code codePoint}. 3241 */ toTitleCase(int codePoint)3242 public static int toTitleCase(int codePoint) { 3243 return toTitleCaseImpl(codePoint); 3244 } 3245 toTitleCaseImpl(int codePoint)3246 private static native int toTitleCaseImpl(int codePoint); 3247 3248 /** 3249 * Returns the upper case equivalent for the specified character if the 3250 * character is a lower case letter. Otherwise, the specified character is 3251 * returned unchanged. 3252 * 3253 * @param c 3254 * the character to convert. 3255 * @return if {@code c} is a lower case character then its upper case 3256 * counterpart, otherwise just {@code c}. 3257 */ toUpperCase(char c)3258 public static char toUpperCase(char c) { 3259 return (char) toUpperCase((int) c); 3260 } 3261 3262 /** 3263 * Returns the upper case equivalent for the specified code point if the 3264 * code point is a lower case letter. Otherwise, the specified code point is 3265 * returned unchanged. 3266 * 3267 * @param codePoint 3268 * the code point to convert. 3269 * @return if {@code codePoint} is a lower case character then its upper 3270 * case counterpart, otherwise just {@code codePoint}. 3271 */ toUpperCase(int codePoint)3272 public static int toUpperCase(int codePoint) { 3273 // Optimized case for ASCII 3274 if ('a' <= codePoint && codePoint <= 'z') { 3275 return (char) (codePoint - ('a' - 'A')); 3276 } 3277 if (codePoint < 181) { 3278 return codePoint; 3279 } 3280 return toUpperCaseImpl(codePoint); 3281 } 3282 toUpperCaseImpl(int codePoint)3283 private static native int toUpperCaseImpl(int codePoint); 3284 } 3285