1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 /** 3 ******************************************************************************* 4 * Copyright (C) 1996-2016, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************************* 7 */ 8 9 package android.icu.lang; 10 11 import java.lang.ref.SoftReference; 12 import java.util.HashMap; 13 import java.util.Iterator; 14 import java.util.Locale; 15 import java.util.Map; 16 17 import android.icu.impl.IllegalIcuArgumentException; 18 import android.icu.impl.Trie2; 19 import android.icu.impl.UBiDiProps; 20 import android.icu.impl.UCaseProps; 21 import android.icu.impl.UCharacterName; 22 import android.icu.impl.UCharacterNameChoice; 23 import android.icu.impl.UCharacterProperty; 24 import android.icu.impl.UCharacterUtility; 25 import android.icu.impl.UPropertyAliases; 26 import android.icu.lang.UCharacterEnums.ECharacterCategory; 27 import android.icu.lang.UCharacterEnums.ECharacterDirection; 28 import android.icu.text.BreakIterator; 29 import android.icu.text.Normalizer2; 30 import android.icu.util.RangeValueIterator; 31 import android.icu.util.ULocale; 32 import android.icu.util.ValueIterator; 33 import android.icu.util.VersionInfo; 34 35 /** 36 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 37 * 38 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 39 * These extensions provide support for more Unicode properties. 40 * Each ICU release supports the latest version of Unicode available at that time. 41 * 42 * <p>For some time before Java 5 added support for supplementary Unicode code points, 43 * The ICU UCharacter class and many other ICU classes already supported them. 44 * Some UCharacter methods and constants were widened slightly differently than 45 * how the Character class methods and constants were widened later. 46 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 47 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 48 * 49 * <p>Code points are represented in these API using ints. While it would be 50 * more convenient in Java to have a separate primitive datatype for them, 51 * ints suffice in the meantime. 52 * 53 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 54 * properties, the main differences between UCharacter and Character are: 55 * <ul> 56 * <li> UCharacter is not designed to be a char wrapper and does not have 57 * APIs to which involves management of that single char.<br> 58 * These include: 59 * <ul> 60 * <li> char charValue(), 61 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 62 * </ul> 63 * <li> UCharacter does not include Character APIs that are deprecated, nor 64 * does it include the Java-specific character information, such as 65 * boolean isJavaIdentifierPart(char ch). 66 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 67 * values '10' - '35'. UCharacter also does this in digit and 68 * getNumericValue, to adhere to the java semantics of these 69 * methods. New methods unicodeDigit, and 70 * getUnicodeNumericValue do not treat the above code points 71 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 72 * </ul> 73 * <p> 74 * In addition to Java compatibility functions, which calculate derived properties, 75 * this API provides low-level access to the Unicode Character Database. 76 * </p> 77 * <p> 78 * Unicode assigns each code point (not just assigned character) values for 79 * many properties. 80 * Most of them are simple boolean flags, or constants from a small enumerated list. 81 * For some properties, values are strings or other relatively more complex types. 82 * </p> 83 * <p> 84 * For more information see 85 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 86 * (http://www.unicode.org/ucd/) 87 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU 88 * User Guide chapter on Properties</a> 89 * (http://www.icu-project.org/userguide/properties.html). 90 * </p> 91 * <p> 92 * There are also functions that provide easy migration from C/POSIX functions 93 * like isblank(). Their use is generally discouraged because the C/POSIX 94 * standards do not define their semantics beyond the ASCII range, which means 95 * that different implementations exhibit very different behavior. 96 * Instead, Unicode properties should be used directly. 97 * </p> 98 * <p> 99 * There are also only a few, broad C/POSIX character classes, and they tend 100 * to be used for conflicting purposes. For example, the "isalpha()" class 101 * is sometimes used to determine word boundaries, while a more sophisticated 102 * approach would at least distinguish initial letters from continuation 103 * characters (the latter including combining marks). 104 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 105 * Another example: There is no "istitle()" class for titlecase characters. 106 * </p> 107 * <p> 108 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 109 * ICU implements them according to the Standard Recommendations in 110 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 111 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 112 * </p> 113 * <p> 114 * API access for C/POSIX character classes is as follows: 115 * <pre>{@code 116 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 117 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 118 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 119 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 120 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 121 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 122 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 123 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 124 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 125 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 126 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 127 * - cntrl: getType(c)==CONTROL 128 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 129 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 130 * </p> 131 * <p> 132 * The C/POSIX character classes are also available in UnicodeSet patterns, 133 * using patterns like [:graph:] or \p{graph}. 134 * </p> 135 * 136 * <strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions. 137 * Comparison:<ul> 138 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 139 * most of general categories "Z" (separators) + most whitespace ISO controls 140 * (including no-break spaces, but excluding IS1..IS4 and ZWSP) 141 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 142 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 143 * </p> 144 * <p> 145 * This class is not subclassable. 146 * </p> 147 * @author Syn Wee Quek 148 * @see android.icu.lang.UCharacterEnums 149 */ 150 151 public final class UCharacter implements ECharacterCategory, ECharacterDirection 152 { 153 // public inner classes ---------------------------------------------- 154 155 /** 156 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 157 * 158 * A family of character subsets representing the character blocks in the 159 * Unicode specification, generated from Unicode Data file Blocks.txt. 160 * Character blocks generally define characters used for a specific script 161 * or purpose. A character is contained by at most one Unicode block. 162 * 163 * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU. 164 */ 165 public static final class UnicodeBlock extends Character.Subset 166 { 167 // block id corresponding to icu4c ----------------------------------- 168 169 /** 170 */ 171 public static final int INVALID_CODE_ID = -1; 172 /** 173 */ 174 public static final int BASIC_LATIN_ID = 1; 175 /** 176 */ 177 public static final int LATIN_1_SUPPLEMENT_ID = 2; 178 /** 179 */ 180 public static final int LATIN_EXTENDED_A_ID = 3; 181 /** 182 */ 183 public static final int LATIN_EXTENDED_B_ID = 4; 184 /** 185 */ 186 public static final int IPA_EXTENSIONS_ID = 5; 187 /** 188 */ 189 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 190 /** 191 */ 192 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 193 /** 194 * Unicode 3.2 renames this block to "Greek and Coptic". 195 */ 196 public static final int GREEK_ID = 8; 197 /** 198 */ 199 public static final int CYRILLIC_ID = 9; 200 /** 201 */ 202 public static final int ARMENIAN_ID = 10; 203 /** 204 */ 205 public static final int HEBREW_ID = 11; 206 /** 207 */ 208 public static final int ARABIC_ID = 12; 209 /** 210 */ 211 public static final int SYRIAC_ID = 13; 212 /** 213 */ 214 public static final int THAANA_ID = 14; 215 /** 216 */ 217 public static final int DEVANAGARI_ID = 15; 218 /** 219 */ 220 public static final int BENGALI_ID = 16; 221 /** 222 */ 223 public static final int GURMUKHI_ID = 17; 224 /** 225 */ 226 public static final int GUJARATI_ID = 18; 227 /** 228 */ 229 public static final int ORIYA_ID = 19; 230 /** 231 */ 232 public static final int TAMIL_ID = 20; 233 /** 234 */ 235 public static final int TELUGU_ID = 21; 236 /** 237 */ 238 public static final int KANNADA_ID = 22; 239 /** 240 */ 241 public static final int MALAYALAM_ID = 23; 242 /** 243 */ 244 public static final int SINHALA_ID = 24; 245 /** 246 */ 247 public static final int THAI_ID = 25; 248 /** 249 */ 250 public static final int LAO_ID = 26; 251 /** 252 */ 253 public static final int TIBETAN_ID = 27; 254 /** 255 */ 256 public static final int MYANMAR_ID = 28; 257 /** 258 */ 259 public static final int GEORGIAN_ID = 29; 260 /** 261 */ 262 public static final int HANGUL_JAMO_ID = 30; 263 /** 264 */ 265 public static final int ETHIOPIC_ID = 31; 266 /** 267 */ 268 public static final int CHEROKEE_ID = 32; 269 /** 270 */ 271 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 272 /** 273 */ 274 public static final int OGHAM_ID = 34; 275 /** 276 */ 277 public static final int RUNIC_ID = 35; 278 /** 279 */ 280 public static final int KHMER_ID = 36; 281 /** 282 */ 283 public static final int MONGOLIAN_ID = 37; 284 /** 285 */ 286 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 287 /** 288 */ 289 public static final int GREEK_EXTENDED_ID = 39; 290 /** 291 */ 292 public static final int GENERAL_PUNCTUATION_ID = 40; 293 /** 294 */ 295 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 296 /** 297 */ 298 public static final int CURRENCY_SYMBOLS_ID = 42; 299 /** 300 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 301 * Symbols". 302 */ 303 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 304 /** 305 */ 306 public static final int LETTERLIKE_SYMBOLS_ID = 44; 307 /** 308 */ 309 public static final int NUMBER_FORMS_ID = 45; 310 /** 311 */ 312 public static final int ARROWS_ID = 46; 313 /** 314 */ 315 public static final int MATHEMATICAL_OPERATORS_ID = 47; 316 /** 317 */ 318 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 319 /** 320 */ 321 public static final int CONTROL_PICTURES_ID = 49; 322 /** 323 */ 324 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 325 /** 326 */ 327 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 328 /** 329 */ 330 public static final int BOX_DRAWING_ID = 52; 331 /** 332 */ 333 public static final int BLOCK_ELEMENTS_ID = 53; 334 /** 335 */ 336 public static final int GEOMETRIC_SHAPES_ID = 54; 337 /** 338 */ 339 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 340 /** 341 */ 342 public static final int DINGBATS_ID = 56; 343 /** 344 */ 345 public static final int BRAILLE_PATTERNS_ID = 57; 346 /** 347 */ 348 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 349 /** 350 */ 351 public static final int KANGXI_RADICALS_ID = 59; 352 /** 353 */ 354 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 355 /** 356 */ 357 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 358 /** 359 */ 360 public static final int HIRAGANA_ID = 62; 361 /** 362 */ 363 public static final int KATAKANA_ID = 63; 364 /** 365 */ 366 public static final int BOPOMOFO_ID = 64; 367 /** 368 */ 369 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 370 /** 371 */ 372 public static final int KANBUN_ID = 66; 373 /** 374 */ 375 public static final int BOPOMOFO_EXTENDED_ID = 67; 376 /** 377 */ 378 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 379 /** 380 */ 381 public static final int CJK_COMPATIBILITY_ID = 69; 382 /** 383 */ 384 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 385 /** 386 */ 387 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 388 /** 389 */ 390 public static final int YI_SYLLABLES_ID = 72; 391 /** 392 */ 393 public static final int YI_RADICALS_ID = 73; 394 /** 395 */ 396 public static final int HANGUL_SYLLABLES_ID = 74; 397 /** 398 */ 399 public static final int HIGH_SURROGATES_ID = 75; 400 /** 401 */ 402 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 403 /** 404 */ 405 public static final int LOW_SURROGATES_ID = 77; 406 /** 407 * Same as public static final int PRIVATE_USE. 408 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 409 * and multiple code point ranges had this block. 410 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 411 * and adds separate blocks for the supplementary PUAs. 412 */ 413 public static final int PRIVATE_USE_AREA_ID = 78; 414 /** 415 * Same as public static final int PRIVATE_USE_AREA. 416 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 417 * and multiple code point ranges had this block. 418 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 419 * and adds separate blocks for the supplementary PUAs. 420 */ 421 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 422 /** 423 */ 424 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 425 /** 426 */ 427 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 428 /** 429 */ 430 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 431 /** 432 */ 433 public static final int COMBINING_HALF_MARKS_ID = 82; 434 /** 435 */ 436 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 437 /** 438 */ 439 public static final int SMALL_FORM_VARIANTS_ID = 84; 440 /** 441 */ 442 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 443 /** 444 */ 445 public static final int SPECIALS_ID = 86; 446 /** 447 */ 448 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 449 /** 450 */ 451 public static final int OLD_ITALIC_ID = 88; 452 /** 453 */ 454 public static final int GOTHIC_ID = 89; 455 /** 456 */ 457 public static final int DESERET_ID = 90; 458 /** 459 */ 460 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 461 /** 462 */ 463 public static final int MUSICAL_SYMBOLS_ID = 92; 464 /** 465 */ 466 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 467 /** 468 */ 469 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 470 /** 471 */ 472 public static final int 473 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 474 /** 475 */ 476 public static final int TAGS_ID = 96; 477 478 // New blocks in Unicode 3.2 479 480 /** 481 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 482 */ 483 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 484 /** 485 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 486 */ 487 488 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 489 /** 490 */ 491 public static final int TAGALOG_ID = 98; 492 /** 493 */ 494 public static final int HANUNOO_ID = 99; 495 /** 496 */ 497 public static final int BUHID_ID = 100; 498 /** 499 */ 500 public static final int TAGBANWA_ID = 101; 501 /** 502 */ 503 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 504 /** 505 */ 506 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 507 /** 508 */ 509 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 510 /** 511 */ 512 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 513 /** 514 */ 515 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 516 /** 517 */ 518 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 519 /** 520 */ 521 public static final int VARIATION_SELECTORS_ID = 108; 522 /** 523 */ 524 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 525 /** 526 */ 527 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 528 529 /** 530 */ 531 public static final int LIMBU_ID = 111; /*[1900]*/ 532 /** 533 */ 534 public static final int TAI_LE_ID = 112; /*[1950]*/ 535 /** 536 */ 537 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 538 /** 539 */ 540 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 541 /** 542 */ 543 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 544 /** 545 */ 546 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 547 /** 548 */ 549 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 550 /** 551 */ 552 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 553 /** 554 */ 555 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 556 /** 557 */ 558 public static final int UGARITIC_ID = 120; /*[10380]*/ 559 /** 560 */ 561 public static final int SHAVIAN_ID = 121; /*[10450]*/ 562 /** 563 */ 564 public static final int OSMANYA_ID = 122; /*[10480]*/ 565 /** 566 */ 567 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 568 /** 569 */ 570 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 571 /** 572 */ 573 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 574 575 /* New blocks in Unicode 4.1 */ 576 577 /** 578 */ 579 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 580 581 /** 582 */ 583 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 584 585 /** 586 */ 587 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 588 589 /** 590 */ 591 public static final int BUGINESE_ID = 129; /*[1A00]*/ 592 593 /** 594 */ 595 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 596 597 /** 598 */ 599 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 600 601 /** 602 */ 603 public static final int COPTIC_ID = 132; /*[2C80]*/ 604 605 /** 606 */ 607 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 608 609 /** 610 */ 611 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 612 613 /** 614 */ 615 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 616 617 /** 618 */ 619 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 620 621 /** 622 */ 623 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 624 625 /** 626 */ 627 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 628 629 /** 630 */ 631 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 632 633 /** 634 */ 635 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 636 637 /** 638 */ 639 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 640 641 /** 642 */ 643 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 644 645 /** 646 */ 647 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 648 649 /** 650 */ 651 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 652 653 /** 654 */ 655 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 656 657 /* New blocks in Unicode 5.0 */ 658 659 /** 660 */ 661 public static final int NKO_ID = 146; /*[07C0]*/ 662 /** 663 */ 664 public static final int BALINESE_ID = 147; /*[1B00]*/ 665 /** 666 */ 667 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 668 /** 669 */ 670 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 671 /** 672 */ 673 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 674 /** 675 */ 676 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 677 /** 678 */ 679 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 680 /** 681 */ 682 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 683 /** 684 */ 685 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 686 687 /** 688 */ 689 public static final int SUNDANESE_ID = 155; /* [1B80] */ 690 691 /** 692 */ 693 public static final int LEPCHA_ID = 156; /* [1C00] */ 694 695 /** 696 */ 697 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 698 699 /** 700 */ 701 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 702 703 /** 704 */ 705 public static final int VAI_ID = 159; /* [A500] */ 706 707 /** 708 */ 709 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 710 711 /** 712 */ 713 public static final int SAURASHTRA_ID = 161; /* [A880] */ 714 715 /** 716 */ 717 public static final int KAYAH_LI_ID = 162; /* [A900] */ 718 719 /** 720 */ 721 public static final int REJANG_ID = 163; /* [A930] */ 722 723 /** 724 */ 725 public static final int CHAM_ID = 164; /* [AA00] */ 726 727 /** 728 */ 729 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 730 731 /** 732 */ 733 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 734 735 /** 736 */ 737 public static final int LYCIAN_ID = 167; /* [10280] */ 738 739 /** 740 */ 741 public static final int CARIAN_ID = 168; /* [102A0] */ 742 743 /** 744 */ 745 public static final int LYDIAN_ID = 169; /* [10920] */ 746 747 /** 748 */ 749 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 750 751 /** 752 */ 753 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 754 755 /* New blocks in Unicode 5.2 */ 756 757 /***/ 758 public static final int SAMARITAN_ID = 172; /*[0800]*/ 759 /***/ 760 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 761 /***/ 762 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 763 /***/ 764 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 765 /***/ 766 public static final int LISU_ID = 176; /*[A4D0]*/ 767 /***/ 768 public static final int BAMUM_ID = 177; /*[A6A0]*/ 769 /***/ 770 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 771 /***/ 772 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 773 /***/ 774 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 775 /***/ 776 public static final int JAVANESE_ID = 181; /*[A980]*/ 777 /***/ 778 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 779 /***/ 780 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 781 /***/ 782 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 783 /***/ 784 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 785 /***/ 786 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 787 /***/ 788 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 789 /***/ 790 public static final int AVESTAN_ID = 188; /*[10B00]*/ 791 /***/ 792 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 793 /***/ 794 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 795 /***/ 796 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 797 /***/ 798 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 799 /***/ 800 public static final int KAITHI_ID = 193; /*[11080]*/ 801 /***/ 802 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 803 /***/ 804 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 805 /***/ 806 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 807 /***/ 808 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 809 810 /* New blocks in Unicode 6.0 */ 811 812 /***/ 813 public static final int MANDAIC_ID = 198; /*[0840]*/ 814 /***/ 815 public static final int BATAK_ID = 199; /*[1BC0]*/ 816 /***/ 817 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 818 /***/ 819 public static final int BRAHMI_ID = 201; /*[11000]*/ 820 /***/ 821 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 822 /***/ 823 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 824 /***/ 825 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 826 /***/ 827 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 828 /***/ 829 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 830 /***/ 831 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 832 /***/ 833 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 834 /***/ 835 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 836 837 /* New blocks in Unicode 6.1 */ 838 839 /***/ 840 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 841 /***/ 842 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 843 /***/ 844 public static final int CHAKMA_ID = 212; /*[11100]*/ 845 /***/ 846 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 847 /***/ 848 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 849 /***/ 850 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 851 /***/ 852 public static final int MIAO_ID = 216; /*[16F00]*/ 853 /***/ 854 public static final int SHARADA_ID = 217; /*[11180]*/ 855 /***/ 856 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 857 /***/ 858 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 859 /***/ 860 public static final int TAKRI_ID = 220; /*[11680]*/ 861 862 /* New blocks in Unicode 7.0 */ 863 864 /***/ 865 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 866 /***/ 867 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 868 /***/ 869 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 870 /***/ 871 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 872 /***/ 873 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 874 /***/ 875 public static final int ELBASAN_ID = 226; /*[10500]*/ 876 /***/ 877 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 878 /***/ 879 public static final int GRANTHA_ID = 228; /*[11300]*/ 880 /***/ 881 public static final int KHOJKI_ID = 229; /*[11200]*/ 882 /***/ 883 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 884 /***/ 885 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 886 /***/ 887 public static final int LINEAR_A_ID = 232; /*[10600]*/ 888 /***/ 889 public static final int MAHAJANI_ID = 233; /*[11150]*/ 890 /***/ 891 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 892 /***/ 893 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 894 /***/ 895 public static final int MODI_ID = 236; /*[11600]*/ 896 /***/ 897 public static final int MRO_ID = 237; /*[16A40]*/ 898 /***/ 899 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 900 /***/ 901 public static final int NABATAEAN_ID = 239; /*[10880]*/ 902 /***/ 903 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 904 /***/ 905 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 906 /***/ 907 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 908 /***/ 909 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 910 /***/ 911 public static final int PALMYRENE_ID = 244; /*[10860]*/ 912 /***/ 913 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 914 /***/ 915 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 916 /***/ 917 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 918 /***/ 919 public static final int SIDDHAM_ID = 248; /*[11580]*/ 920 /***/ 921 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 922 /***/ 923 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 924 /***/ 925 public static final int TIRHUTA_ID = 251; /*[11480]*/ 926 /***/ 927 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 928 929 /* New blocks in Unicode 8.0 */ 930 931 /***/ 932 public static final int AHOM_ID = 253; /*[11700]*/ 933 /***/ 934 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 935 /***/ 936 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 937 /***/ 938 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 939 /***/ 940 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 941 /***/ 942 public static final int HATRAN_ID = 258; /*[108E0]*/ 943 /***/ 944 public static final int MULTANI_ID = 259; /*[11280]*/ 945 /***/ 946 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 947 /***/ 948 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 949 /***/ 950 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 951 952 /** 953 * @hide unsupported on Android 954 */ 955 public static final int COUNT = 263; 956 957 // blocks objects --------------------------------------------------- 958 959 /** 960 * Array of UnicodeBlocks, for easy access in getInstance(int) 961 */ 962 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 963 964 /** 965 */ 966 public static final UnicodeBlock NO_BLOCK 967 = new UnicodeBlock("NO_BLOCK", 0); 968 969 /** 970 */ 971 public static final UnicodeBlock BASIC_LATIN 972 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 973 /** 974 */ 975 public static final UnicodeBlock LATIN_1_SUPPLEMENT 976 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 977 /** 978 */ 979 public static final UnicodeBlock LATIN_EXTENDED_A 980 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 981 /** 982 */ 983 public static final UnicodeBlock LATIN_EXTENDED_B 984 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 985 /** 986 */ 987 public static final UnicodeBlock IPA_EXTENSIONS 988 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 989 /** 990 */ 991 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 992 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 993 /** 994 */ 995 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 996 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 997 /** 998 * Unicode 3.2 renames this block to "Greek and Coptic". 999 */ 1000 public static final UnicodeBlock GREEK 1001 = new UnicodeBlock("GREEK", GREEK_ID); 1002 /** 1003 */ 1004 public static final UnicodeBlock CYRILLIC 1005 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1006 /** 1007 */ 1008 public static final UnicodeBlock ARMENIAN 1009 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1010 /** 1011 */ 1012 public static final UnicodeBlock HEBREW 1013 = new UnicodeBlock("HEBREW", HEBREW_ID); 1014 /** 1015 */ 1016 public static final UnicodeBlock ARABIC 1017 = new UnicodeBlock("ARABIC", ARABIC_ID); 1018 /** 1019 */ 1020 public static final UnicodeBlock SYRIAC 1021 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1022 /** 1023 */ 1024 public static final UnicodeBlock THAANA 1025 = new UnicodeBlock("THAANA", THAANA_ID); 1026 /** 1027 */ 1028 public static final UnicodeBlock DEVANAGARI 1029 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1030 /** 1031 */ 1032 public static final UnicodeBlock BENGALI 1033 = new UnicodeBlock("BENGALI", BENGALI_ID); 1034 /** 1035 */ 1036 public static final UnicodeBlock GURMUKHI 1037 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1038 /** 1039 */ 1040 public static final UnicodeBlock GUJARATI 1041 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1042 /** 1043 */ 1044 public static final UnicodeBlock ORIYA 1045 = new UnicodeBlock("ORIYA", ORIYA_ID); 1046 /** 1047 */ 1048 public static final UnicodeBlock TAMIL 1049 = new UnicodeBlock("TAMIL", TAMIL_ID); 1050 /** 1051 */ 1052 public static final UnicodeBlock TELUGU 1053 = new UnicodeBlock("TELUGU", TELUGU_ID); 1054 /** 1055 */ 1056 public static final UnicodeBlock KANNADA 1057 = new UnicodeBlock("KANNADA", KANNADA_ID); 1058 /** 1059 */ 1060 public static final UnicodeBlock MALAYALAM 1061 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1062 /** 1063 */ 1064 public static final UnicodeBlock SINHALA 1065 = new UnicodeBlock("SINHALA", SINHALA_ID); 1066 /** 1067 */ 1068 public static final UnicodeBlock THAI 1069 = new UnicodeBlock("THAI", THAI_ID); 1070 /** 1071 */ 1072 public static final UnicodeBlock LAO 1073 = new UnicodeBlock("LAO", LAO_ID); 1074 /** 1075 */ 1076 public static final UnicodeBlock TIBETAN 1077 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1078 /** 1079 */ 1080 public static final UnicodeBlock MYANMAR 1081 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1082 /** 1083 */ 1084 public static final UnicodeBlock GEORGIAN 1085 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1086 /** 1087 */ 1088 public static final UnicodeBlock HANGUL_JAMO 1089 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1090 /** 1091 */ 1092 public static final UnicodeBlock ETHIOPIC 1093 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1094 /** 1095 */ 1096 public static final UnicodeBlock CHEROKEE 1097 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1098 /** 1099 */ 1100 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1101 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1102 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1103 /** 1104 */ 1105 public static final UnicodeBlock OGHAM 1106 = new UnicodeBlock("OGHAM", OGHAM_ID); 1107 /** 1108 */ 1109 public static final UnicodeBlock RUNIC 1110 = new UnicodeBlock("RUNIC", RUNIC_ID); 1111 /** 1112 */ 1113 public static final UnicodeBlock KHMER 1114 = new UnicodeBlock("KHMER", KHMER_ID); 1115 /** 1116 */ 1117 public static final UnicodeBlock MONGOLIAN 1118 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1119 /** 1120 */ 1121 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1122 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1123 /** 1124 */ 1125 public static final UnicodeBlock GREEK_EXTENDED 1126 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1127 /** 1128 */ 1129 public static final UnicodeBlock GENERAL_PUNCTUATION 1130 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1131 /** 1132 */ 1133 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1134 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1135 /** 1136 */ 1137 public static final UnicodeBlock CURRENCY_SYMBOLS 1138 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1139 /** 1140 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1141 * Symbols". 1142 */ 1143 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1144 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1145 /** 1146 */ 1147 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1148 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1149 /** 1150 */ 1151 public static final UnicodeBlock NUMBER_FORMS 1152 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1153 /** 1154 */ 1155 public static final UnicodeBlock ARROWS 1156 = new UnicodeBlock("ARROWS", ARROWS_ID); 1157 /** 1158 */ 1159 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1160 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1161 /** 1162 */ 1163 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1164 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1165 /** 1166 */ 1167 public static final UnicodeBlock CONTROL_PICTURES 1168 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1169 /** 1170 */ 1171 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1172 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1173 /** 1174 */ 1175 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1176 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1177 /** 1178 */ 1179 public static final UnicodeBlock BOX_DRAWING 1180 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1181 /** 1182 */ 1183 public static final UnicodeBlock BLOCK_ELEMENTS 1184 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1185 /** 1186 */ 1187 public static final UnicodeBlock GEOMETRIC_SHAPES 1188 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1189 /** 1190 */ 1191 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1192 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1193 /** 1194 */ 1195 public static final UnicodeBlock DINGBATS 1196 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1197 /** 1198 */ 1199 public static final UnicodeBlock BRAILLE_PATTERNS 1200 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1201 /** 1202 */ 1203 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1204 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1205 /** 1206 */ 1207 public static final UnicodeBlock KANGXI_RADICALS 1208 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1209 /** 1210 */ 1211 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1212 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1213 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1214 /** 1215 */ 1216 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1217 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1218 /** 1219 */ 1220 public static final UnicodeBlock HIRAGANA 1221 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1222 /** 1223 */ 1224 public static final UnicodeBlock KATAKANA 1225 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1226 /** 1227 */ 1228 public static final UnicodeBlock BOPOMOFO 1229 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1230 /** 1231 */ 1232 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1233 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1234 /** 1235 */ 1236 public static final UnicodeBlock KANBUN 1237 = new UnicodeBlock("KANBUN", KANBUN_ID); 1238 /** 1239 */ 1240 public static final UnicodeBlock BOPOMOFO_EXTENDED 1241 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1242 /** 1243 */ 1244 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1245 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1246 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1247 /** 1248 */ 1249 public static final UnicodeBlock CJK_COMPATIBILITY 1250 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1251 /** 1252 */ 1253 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1254 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1255 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1256 /** 1257 */ 1258 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1259 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1260 /** 1261 */ 1262 public static final UnicodeBlock YI_SYLLABLES 1263 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1264 /** 1265 */ 1266 public static final UnicodeBlock YI_RADICALS 1267 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1268 /** 1269 */ 1270 public static final UnicodeBlock HANGUL_SYLLABLES 1271 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1272 /** 1273 */ 1274 public static final UnicodeBlock HIGH_SURROGATES 1275 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1276 /** 1277 */ 1278 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1279 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1280 /** 1281 */ 1282 public static final UnicodeBlock LOW_SURROGATES 1283 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1284 /** 1285 * Same as public static final int PRIVATE_USE. 1286 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1287 * and multiple code point ranges had this block. 1288 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1289 * and adds separate blocks for the supplementary PUAs. 1290 */ 1291 public static final UnicodeBlock PRIVATE_USE_AREA 1292 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1293 /** 1294 * Same as public static final int PRIVATE_USE_AREA. 1295 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1296 * and multiple code point ranges had this block. 1297 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1298 * and adds separate blocks for the supplementary PUAs. 1299 */ 1300 public static final UnicodeBlock PRIVATE_USE 1301 = PRIVATE_USE_AREA; 1302 /** 1303 */ 1304 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1305 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1306 /** 1307 */ 1308 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1309 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1310 /** 1311 */ 1312 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1313 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1314 /** 1315 */ 1316 public static final UnicodeBlock COMBINING_HALF_MARKS 1317 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1318 /** 1319 */ 1320 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1321 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1322 /** 1323 */ 1324 public static final UnicodeBlock SMALL_FORM_VARIANTS 1325 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1326 /** 1327 */ 1328 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1329 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1330 /** 1331 */ 1332 public static final UnicodeBlock SPECIALS 1333 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1334 /** 1335 */ 1336 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1337 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1338 /** 1339 */ 1340 public static final UnicodeBlock OLD_ITALIC 1341 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1342 /** 1343 */ 1344 public static final UnicodeBlock GOTHIC 1345 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1346 /** 1347 */ 1348 public static final UnicodeBlock DESERET 1349 = new UnicodeBlock("DESERET", DESERET_ID); 1350 /** 1351 */ 1352 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1353 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1354 /** 1355 */ 1356 public static final UnicodeBlock MUSICAL_SYMBOLS 1357 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1358 /** 1359 */ 1360 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1361 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1362 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1363 /** 1364 */ 1365 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1366 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1367 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1368 /** 1369 */ 1370 public static final UnicodeBlock 1371 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1372 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1373 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1374 /** 1375 */ 1376 public static final UnicodeBlock TAGS 1377 = new UnicodeBlock("TAGS", TAGS_ID); 1378 1379 // New blocks in Unicode 3.2 1380 1381 /** 1382 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1383 */ 1384 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1385 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1386 /** 1387 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1388 */ 1389 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1390 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1391 /** 1392 */ 1393 public static final UnicodeBlock TAGALOG 1394 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1395 /** 1396 */ 1397 public static final UnicodeBlock HANUNOO 1398 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1399 /** 1400 */ 1401 public static final UnicodeBlock BUHID 1402 = new UnicodeBlock("BUHID", BUHID_ID); 1403 /** 1404 */ 1405 public static final UnicodeBlock TAGBANWA 1406 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1407 /** 1408 */ 1409 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1410 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1411 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1412 /** 1413 */ 1414 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1415 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1416 /** 1417 */ 1418 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1419 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1420 /** 1421 */ 1422 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1423 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1424 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1425 /** 1426 */ 1427 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1428 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1429 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1430 /** 1431 */ 1432 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1433 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1434 /** 1435 */ 1436 public static final UnicodeBlock VARIATION_SELECTORS 1437 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1438 /** 1439 */ 1440 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1441 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1442 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1443 /** 1444 */ 1445 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1446 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1447 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1448 1449 /** 1450 */ 1451 public static final UnicodeBlock LIMBU 1452 = new UnicodeBlock("LIMBU", LIMBU_ID); 1453 /** 1454 */ 1455 public static final UnicodeBlock TAI_LE 1456 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1457 /** 1458 */ 1459 public static final UnicodeBlock KHMER_SYMBOLS 1460 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1461 1462 /** 1463 */ 1464 public static final UnicodeBlock PHONETIC_EXTENSIONS 1465 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1466 1467 /** 1468 */ 1469 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1470 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1471 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1472 /** 1473 */ 1474 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1475 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1476 /** 1477 */ 1478 public static final UnicodeBlock LINEAR_B_SYLLABARY 1479 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1480 /** 1481 */ 1482 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1483 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1484 /** 1485 */ 1486 public static final UnicodeBlock AEGEAN_NUMBERS 1487 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1488 /** 1489 */ 1490 public static final UnicodeBlock UGARITIC 1491 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1492 /** 1493 */ 1494 public static final UnicodeBlock SHAVIAN 1495 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1496 /** 1497 */ 1498 public static final UnicodeBlock OSMANYA 1499 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1500 /** 1501 */ 1502 public static final UnicodeBlock CYPRIOT_SYLLABARY 1503 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1504 /** 1505 */ 1506 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1507 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1508 1509 /** 1510 */ 1511 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1512 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1513 1514 /* New blocks in Unicode 4.1 */ 1515 1516 /** 1517 */ 1518 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1519 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1520 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1521 1522 /** 1523 */ 1524 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1525 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1526 1527 /** 1528 */ 1529 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1530 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1531 1532 /** 1533 */ 1534 public static final UnicodeBlock BUGINESE = 1535 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1536 1537 /** 1538 */ 1539 public static final UnicodeBlock CJK_STROKES = 1540 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1541 1542 /** 1543 */ 1544 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1545 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1546 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1547 1548 /** 1549 */ 1550 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1551 1552 /** 1553 */ 1554 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1555 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1556 1557 /** 1558 */ 1559 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1560 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1561 1562 /** 1563 */ 1564 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1565 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 1566 1567 /** 1568 */ 1569 public static final UnicodeBlock GLAGOLITIC = 1570 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 1571 1572 /** 1573 */ 1574 public static final UnicodeBlock KHAROSHTHI = 1575 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 1576 1577 /** 1578 */ 1579 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1580 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 1581 1582 /** 1583 */ 1584 public static final UnicodeBlock NEW_TAI_LUE = 1585 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 1586 1587 /** 1588 */ 1589 public static final UnicodeBlock OLD_PERSIAN = 1590 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 1591 1592 /** 1593 */ 1594 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1595 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1596 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 1597 1598 /** 1599 */ 1600 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1601 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 1602 1603 /** 1604 */ 1605 public static final UnicodeBlock SYLOTI_NAGRI = 1606 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 1607 1608 /** 1609 */ 1610 public static final UnicodeBlock TIFINAGH = 1611 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 1612 1613 /** 1614 */ 1615 public static final UnicodeBlock VERTICAL_FORMS = 1616 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 1617 1618 /** 1619 */ 1620 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 1621 /** 1622 */ 1623 public static final UnicodeBlock BALINESE = 1624 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 1625 /** 1626 */ 1627 public static final UnicodeBlock LATIN_EXTENDED_C = 1628 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 1629 /** 1630 */ 1631 public static final UnicodeBlock LATIN_EXTENDED_D = 1632 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 1633 /** 1634 */ 1635 public static final UnicodeBlock PHAGS_PA = 1636 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 1637 /** 1638 */ 1639 public static final UnicodeBlock PHOENICIAN = 1640 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 1641 /** 1642 */ 1643 public static final UnicodeBlock CUNEIFORM = 1644 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 1645 /** 1646 */ 1647 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 1648 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 1649 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 1650 /** 1651 */ 1652 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 1653 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 1654 1655 /** 1656 */ 1657 public static final UnicodeBlock SUNDANESE = 1658 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 1659 1660 /** 1661 */ 1662 public static final UnicodeBlock LEPCHA = 1663 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 1664 1665 /** 1666 */ 1667 public static final UnicodeBlock OL_CHIKI = 1668 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 1669 1670 /** 1671 */ 1672 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1673 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 1674 1675 /** 1676 */ 1677 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 1678 1679 /** 1680 */ 1681 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1682 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 1683 1684 /** 1685 */ 1686 public static final UnicodeBlock SAURASHTRA = 1687 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 1688 1689 /** 1690 */ 1691 public static final UnicodeBlock KAYAH_LI = 1692 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 1693 1694 /** 1695 */ 1696 public static final UnicodeBlock REJANG = 1697 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 1698 1699 /** 1700 */ 1701 public static final UnicodeBlock CHAM = 1702 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 1703 1704 /** 1705 */ 1706 public static final UnicodeBlock ANCIENT_SYMBOLS = 1707 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 1708 1709 /** 1710 */ 1711 public static final UnicodeBlock PHAISTOS_DISC = 1712 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 1713 1714 /** 1715 */ 1716 public static final UnicodeBlock LYCIAN = 1717 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 1718 1719 /** 1720 */ 1721 public static final UnicodeBlock CARIAN = 1722 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 1723 1724 /** 1725 */ 1726 public static final UnicodeBlock LYDIAN = 1727 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 1728 1729 /** 1730 */ 1731 public static final UnicodeBlock MAHJONG_TILES = 1732 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 1733 1734 /** 1735 */ 1736 public static final UnicodeBlock DOMINO_TILES = 1737 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 1738 1739 /* New blocks in Unicode 5.2 */ 1740 1741 /***/ 1742 public static final UnicodeBlock SAMARITAN = 1743 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 1744 /***/ 1745 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1746 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1747 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 1748 /***/ 1749 public static final UnicodeBlock TAI_THAM = 1750 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 1751 /***/ 1752 public static final UnicodeBlock VEDIC_EXTENSIONS = 1753 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 1754 /***/ 1755 public static final UnicodeBlock LISU = 1756 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 1757 /***/ 1758 public static final UnicodeBlock BAMUM = 1759 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 1760 /***/ 1761 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 1762 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 1763 /***/ 1764 public static final UnicodeBlock DEVANAGARI_EXTENDED = 1765 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 1766 /***/ 1767 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 1768 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 1769 /***/ 1770 public static final UnicodeBlock JAVANESE = 1771 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 1772 /***/ 1773 public static final UnicodeBlock MYANMAR_EXTENDED_A = 1774 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 1775 /***/ 1776 public static final UnicodeBlock TAI_VIET = 1777 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 1778 /***/ 1779 public static final UnicodeBlock MEETEI_MAYEK = 1780 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 1781 /***/ 1782 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 1783 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 1784 /***/ 1785 public static final UnicodeBlock IMPERIAL_ARAMAIC = 1786 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 1787 /***/ 1788 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 1789 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 1790 /***/ 1791 public static final UnicodeBlock AVESTAN = 1792 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 1793 /***/ 1794 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 1795 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 1796 /***/ 1797 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 1798 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 1799 /***/ 1800 public static final UnicodeBlock OLD_TURKIC = 1801 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 1802 /***/ 1803 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 1804 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 1805 /***/ 1806 public static final UnicodeBlock KAITHI = 1807 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 1808 /***/ 1809 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 1810 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 1811 /***/ 1812 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 1813 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 1814 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 1815 /***/ 1816 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 1817 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 1818 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 1819 /***/ 1820 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 1821 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 1822 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 1823 1824 /* New blocks in Unicode 6.0 */ 1825 1826 /***/ 1827 public static final UnicodeBlock MANDAIC = 1828 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 1829 /***/ 1830 public static final UnicodeBlock BATAK = 1831 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 1832 /***/ 1833 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 1834 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 1835 /***/ 1836 public static final UnicodeBlock BRAHMI = 1837 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 1838 /***/ 1839 public static final UnicodeBlock BAMUM_SUPPLEMENT = 1840 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 1841 /***/ 1842 public static final UnicodeBlock KANA_SUPPLEMENT = 1843 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 1844 /***/ 1845 public static final UnicodeBlock PLAYING_CARDS = 1846 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 1847 /***/ 1848 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 1849 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 1850 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 1851 /***/ 1852 public static final UnicodeBlock EMOTICONS = 1853 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 1854 /***/ 1855 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 1856 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 1857 /***/ 1858 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 1859 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 1860 /***/ 1861 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 1862 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 1863 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 1864 1865 /* New blocks in Unicode 6.1 */ 1866 1867 /***/ 1868 public static final UnicodeBlock ARABIC_EXTENDED_A = 1869 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 1870 /***/ 1871 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 1872 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 1873 /***/ 1874 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 1875 /***/ 1876 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 1877 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 1878 /***/ 1879 public static final UnicodeBlock MEROITIC_CURSIVE = 1880 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 1881 /***/ 1882 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 1883 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 1884 /***/ 1885 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 1886 /***/ 1887 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 1888 /***/ 1889 public static final UnicodeBlock SORA_SOMPENG = 1890 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 1891 /***/ 1892 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 1893 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 1894 /***/ 1895 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 1896 1897 /* New blocks in Unicode 7.0 */ 1898 1899 /***/ 1900 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 1901 /***/ 1902 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 1903 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 1904 /***/ 1905 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 1906 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 1907 /***/ 1908 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 1909 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 1910 /***/ 1911 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 1912 /***/ 1913 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 1914 /***/ 1915 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 1916 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 1917 /***/ 1918 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 1919 /***/ 1920 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 1921 /***/ 1922 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 1923 /***/ 1924 public static final UnicodeBlock LATIN_EXTENDED_E = 1925 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 1926 /***/ 1927 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 1928 /***/ 1929 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 1930 /***/ 1931 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 1932 /***/ 1933 public static final UnicodeBlock MENDE_KIKAKUI = 1934 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 1935 /***/ 1936 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 1937 /***/ 1938 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 1939 /***/ 1940 public static final UnicodeBlock MYANMAR_EXTENDED_B = 1941 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 1942 /***/ 1943 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 1944 /***/ 1945 public static final UnicodeBlock OLD_NORTH_ARABIAN = 1946 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 1947 /***/ 1948 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 1949 /***/ 1950 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 1951 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 1952 /***/ 1953 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 1954 /***/ 1955 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 1956 /***/ 1957 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 1958 /***/ 1959 public static final UnicodeBlock PSALTER_PAHLAVI = 1960 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 1961 /***/ 1962 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 1963 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 1964 /***/ 1965 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 1966 /***/ 1967 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 1968 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 1969 /***/ 1970 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 1971 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 1972 /***/ 1973 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 1974 /***/ 1975 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 1976 1977 /* New blocks in Unicode 8.0 */ 1978 1979 /***/ 1980 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 1981 /***/ 1982 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 1983 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 1984 /***/ 1985 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 1986 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 1987 /***/ 1988 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 1989 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 1990 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 1991 /***/ 1992 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 1993 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 1994 /***/ 1995 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 1996 /***/ 1997 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 1998 /***/ 1999 public static final UnicodeBlock OLD_HUNGARIAN = 2000 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2001 /***/ 2002 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2003 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2004 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2005 /***/ 2006 public static final UnicodeBlock SUTTON_SIGNWRITING = 2007 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2008 2009 /** 2010 */ 2011 public static final UnicodeBlock INVALID_CODE 2012 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2013 2014 static { 2015 for (int blockId = 0; blockId < COUNT; ++blockId) { 2016 if (BLOCKS_[blockId] == null) { 2017 throw new java.lang.IllegalStateException( 2018 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2019 } 2020 } 2021 } 2022 2023 // public methods -------------------------------------------------- 2024 2025 /** 2026 * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID. 2027 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2028 * @param id UnicodeBlock ID 2029 * @return the only instance of the UnicodeBlock with the argument ID 2030 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2031 * returned. 2032 */ getInstance(int id)2033 public static UnicodeBlock getInstance(int id) 2034 { 2035 if (id >= 0 && id < BLOCKS_.length) { 2036 return BLOCKS_[id]; 2037 } 2038 return INVALID_CODE; 2039 } 2040 2041 /** 2042 * Returns the Unicode allocation block that contains the code point, 2043 * or null if the code point is not a member of a defined block. 2044 * @param ch code point to be tested 2045 * @return the Unicode allocation block that contains the code point 2046 */ of(int ch)2047 public static UnicodeBlock of(int ch) 2048 { 2049 if (ch > MAX_VALUE) { 2050 return INVALID_CODE; 2051 } 2052 2053 return UnicodeBlock.getInstance( 2054 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2055 } 2056 2057 /** 2058 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2059 * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike 2060 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2061 * against the official UCD name and the Java block name 2062 * (ignoring case). 2063 * @param blockName the name of the block to match 2064 * @return the UnicodeBlock with that name 2065 * @throws IllegalArgumentException if the blockName could not be matched 2066 */ forName(String blockName)2067 public static final UnicodeBlock forName(String blockName) { 2068 Map<String, UnicodeBlock> m = null; 2069 if (mref != null) { 2070 m = mref.get(); 2071 } 2072 if (m == null) { 2073 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length); 2074 for (int i = 0; i < BLOCKS_.length; ++i) { 2075 UnicodeBlock b = BLOCKS_[i]; 2076 String name = trimBlockName( 2077 getPropertyValueName(UProperty.BLOCK, b.getID(), 2078 UProperty.NameChoice.LONG)); 2079 m.put(name, b); 2080 } 2081 mref = new SoftReference<Map<String, UnicodeBlock>>(m); 2082 } 2083 UnicodeBlock b = m.get(trimBlockName(blockName)); 2084 if (b == null) { 2085 throw new IllegalArgumentException(); 2086 } 2087 return b; 2088 } 2089 private static SoftReference<Map<String, UnicodeBlock>> mref; 2090 trimBlockName(String name)2091 private static String trimBlockName(String name) { 2092 String upper = name.toUpperCase(Locale.ENGLISH); 2093 StringBuilder result = new StringBuilder(upper.length()); 2094 for (int i = 0; i < upper.length(); i++) { 2095 char c = upper.charAt(i); 2096 if (c != ' ' && c != '_' && c != '-') { 2097 result.append(c); 2098 } 2099 } 2100 return result.toString(); 2101 } 2102 2103 /** 2104 * {icu} Returns the type ID of this Unicode block 2105 * @return integer type ID of this Unicode block 2106 */ getID()2107 public int getID() 2108 { 2109 return m_id_; 2110 } 2111 2112 // private data members --------------------------------------------- 2113 2114 /** 2115 * Identification code for this UnicodeBlock 2116 */ 2117 private int m_id_; 2118 2119 // private constructor ---------------------------------------------- 2120 2121 /** 2122 * UnicodeBlock constructor 2123 * @param name name of this UnicodeBlock 2124 * @param id unique id of this UnicodeBlock 2125 * @exception NullPointerException if name is <code>null</code> 2126 */ UnicodeBlock(String name, int id)2127 private UnicodeBlock(String name, int id) 2128 { 2129 super(name); 2130 m_id_ = id; 2131 if (id >= 0) { 2132 BLOCKS_[id] = this; 2133 } 2134 } 2135 } 2136 2137 /** 2138 * East Asian Width constants. 2139 * @see UProperty#EAST_ASIAN_WIDTH 2140 * @see UCharacter#getIntPropertyValue 2141 */ 2142 public static interface EastAsianWidth 2143 { 2144 /** 2145 */ 2146 public static final int NEUTRAL = 0; 2147 /** 2148 */ 2149 public static final int AMBIGUOUS = 1; 2150 /** 2151 */ 2152 public static final int HALFWIDTH = 2; 2153 /** 2154 */ 2155 public static final int FULLWIDTH = 3; 2156 /** 2157 */ 2158 public static final int NARROW = 4; 2159 /** 2160 */ 2161 public static final int WIDE = 5; 2162 /** 2163 * @hide unsupported on Android 2164 */ 2165 public static final int COUNT = 6; 2166 } 2167 2168 /** 2169 * Decomposition Type constants. 2170 * @see UProperty#DECOMPOSITION_TYPE 2171 */ 2172 public static interface DecompositionType 2173 { 2174 /** 2175 */ 2176 public static final int NONE = 0; 2177 /** 2178 */ 2179 public static final int CANONICAL = 1; 2180 /** 2181 */ 2182 public static final int COMPAT = 2; 2183 /** 2184 */ 2185 public static final int CIRCLE = 3; 2186 /** 2187 */ 2188 public static final int FINAL = 4; 2189 /** 2190 */ 2191 public static final int FONT = 5; 2192 /** 2193 */ 2194 public static final int FRACTION = 6; 2195 /** 2196 */ 2197 public static final int INITIAL = 7; 2198 /** 2199 */ 2200 public static final int ISOLATED = 8; 2201 /** 2202 */ 2203 public static final int MEDIAL = 9; 2204 /** 2205 */ 2206 public static final int NARROW = 10; 2207 /** 2208 */ 2209 public static final int NOBREAK = 11; 2210 /** 2211 */ 2212 public static final int SMALL = 12; 2213 /** 2214 */ 2215 public static final int SQUARE = 13; 2216 /** 2217 */ 2218 public static final int SUB = 14; 2219 /** 2220 */ 2221 public static final int SUPER = 15; 2222 /** 2223 */ 2224 public static final int VERTICAL = 16; 2225 /** 2226 */ 2227 public static final int WIDE = 17; 2228 /** 2229 * @hide unsupported on Android 2230 */ 2231 public static final int COUNT = 18; 2232 } 2233 2234 /** 2235 * Joining Type constants. 2236 * @see UProperty#JOINING_TYPE 2237 */ 2238 public static interface JoiningType 2239 { 2240 /** 2241 */ 2242 public static final int NON_JOINING = 0; 2243 /** 2244 */ 2245 public static final int JOIN_CAUSING = 1; 2246 /** 2247 */ 2248 public static final int DUAL_JOINING = 2; 2249 /** 2250 */ 2251 public static final int LEFT_JOINING = 3; 2252 /** 2253 */ 2254 public static final int RIGHT_JOINING = 4; 2255 /** 2256 */ 2257 public static final int TRANSPARENT = 5; 2258 /** 2259 * @hide unsupported on Android 2260 */ 2261 public static final int COUNT = 6; 2262 } 2263 2264 /** 2265 * Joining Group constants. 2266 * @see UProperty#JOINING_GROUP 2267 */ 2268 public static interface JoiningGroup 2269 { 2270 /** 2271 */ 2272 public static final int NO_JOINING_GROUP = 0; 2273 /** 2274 */ 2275 public static final int AIN = 1; 2276 /** 2277 */ 2278 public static final int ALAPH = 2; 2279 /** 2280 */ 2281 public static final int ALEF = 3; 2282 /** 2283 */ 2284 public static final int BEH = 4; 2285 /** 2286 */ 2287 public static final int BETH = 5; 2288 /** 2289 */ 2290 public static final int DAL = 6; 2291 /** 2292 */ 2293 public static final int DALATH_RISH = 7; 2294 /** 2295 */ 2296 public static final int E = 8; 2297 /** 2298 */ 2299 public static final int FEH = 9; 2300 /** 2301 */ 2302 public static final int FINAL_SEMKATH = 10; 2303 /** 2304 */ 2305 public static final int GAF = 11; 2306 /** 2307 */ 2308 public static final int GAMAL = 12; 2309 /** 2310 */ 2311 public static final int HAH = 13; 2312 /***/ 2313 public static final int TEH_MARBUTA_GOAL = 14; 2314 /** 2315 */ 2316 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2317 /** 2318 */ 2319 public static final int HE = 15; 2320 /** 2321 */ 2322 public static final int HEH = 16; 2323 /** 2324 */ 2325 public static final int HEH_GOAL = 17; 2326 /** 2327 */ 2328 public static final int HETH = 18; 2329 /** 2330 */ 2331 public static final int KAF = 19; 2332 /** 2333 */ 2334 public static final int KAPH = 20; 2335 /** 2336 */ 2337 public static final int KNOTTED_HEH = 21; 2338 /** 2339 */ 2340 public static final int LAM = 22; 2341 /** 2342 */ 2343 public static final int LAMADH = 23; 2344 /** 2345 */ 2346 public static final int MEEM = 24; 2347 /** 2348 */ 2349 public static final int MIM = 25; 2350 /** 2351 */ 2352 public static final int NOON = 26; 2353 /** 2354 */ 2355 public static final int NUN = 27; 2356 /** 2357 */ 2358 public static final int PE = 28; 2359 /** 2360 */ 2361 public static final int QAF = 29; 2362 /** 2363 */ 2364 public static final int QAPH = 30; 2365 /** 2366 */ 2367 public static final int REH = 31; 2368 /** 2369 */ 2370 public static final int REVERSED_PE = 32; 2371 /** 2372 */ 2373 public static final int SAD = 33; 2374 /** 2375 */ 2376 public static final int SADHE = 34; 2377 /** 2378 */ 2379 public static final int SEEN = 35; 2380 /** 2381 */ 2382 public static final int SEMKATH = 36; 2383 /** 2384 */ 2385 public static final int SHIN = 37; 2386 /** 2387 */ 2388 public static final int SWASH_KAF = 38; 2389 /** 2390 */ 2391 public static final int SYRIAC_WAW = 39; 2392 /** 2393 */ 2394 public static final int TAH = 40; 2395 /** 2396 */ 2397 public static final int TAW = 41; 2398 /** 2399 */ 2400 public static final int TEH_MARBUTA = 42; 2401 /** 2402 */ 2403 public static final int TETH = 43; 2404 /** 2405 */ 2406 public static final int WAW = 44; 2407 /** 2408 */ 2409 public static final int YEH = 45; 2410 /** 2411 */ 2412 public static final int YEH_BARREE = 46; 2413 /** 2414 */ 2415 public static final int YEH_WITH_TAIL = 47; 2416 /** 2417 */ 2418 public static final int YUDH = 48; 2419 /** 2420 */ 2421 public static final int YUDH_HE = 49; 2422 /** 2423 */ 2424 public static final int ZAIN = 50; 2425 /** 2426 */ 2427 public static final int FE = 51; 2428 /** 2429 */ 2430 public static final int KHAPH = 52; 2431 /** 2432 */ 2433 public static final int ZHAIN = 53; 2434 /** 2435 */ 2436 public static final int BURUSHASKI_YEH_BARREE = 54; 2437 /***/ 2438 public static final int FARSI_YEH = 55; 2439 /***/ 2440 public static final int NYA = 56; 2441 /***/ 2442 public static final int ROHINGYA_YEH = 57; 2443 2444 /***/ 2445 public static final int MANICHAEAN_ALEPH = 58; 2446 /***/ 2447 public static final int MANICHAEAN_AYIN = 59; 2448 /***/ 2449 public static final int MANICHAEAN_BETH = 60; 2450 /***/ 2451 public static final int MANICHAEAN_DALETH = 61; 2452 /***/ 2453 public static final int MANICHAEAN_DHAMEDH = 62; 2454 /***/ 2455 public static final int MANICHAEAN_FIVE = 63; 2456 /***/ 2457 public static final int MANICHAEAN_GIMEL = 64; 2458 /***/ 2459 public static final int MANICHAEAN_HETH = 65; 2460 /***/ 2461 public static final int MANICHAEAN_HUNDRED = 66; 2462 /***/ 2463 public static final int MANICHAEAN_KAPH = 67; 2464 /***/ 2465 public static final int MANICHAEAN_LAMEDH = 68; 2466 /***/ 2467 public static final int MANICHAEAN_MEM = 69; 2468 /***/ 2469 public static final int MANICHAEAN_NUN = 70; 2470 /***/ 2471 public static final int MANICHAEAN_ONE = 71; 2472 /***/ 2473 public static final int MANICHAEAN_PE = 72; 2474 /***/ 2475 public static final int MANICHAEAN_QOPH = 73; 2476 /***/ 2477 public static final int MANICHAEAN_RESH = 74; 2478 /***/ 2479 public static final int MANICHAEAN_SADHE = 75; 2480 /***/ 2481 public static final int MANICHAEAN_SAMEKH = 76; 2482 /***/ 2483 public static final int MANICHAEAN_TAW = 77; 2484 /***/ 2485 public static final int MANICHAEAN_TEN = 78; 2486 /***/ 2487 public static final int MANICHAEAN_TETH = 79; 2488 /***/ 2489 public static final int MANICHAEAN_THAMEDH = 80; 2490 /***/ 2491 public static final int MANICHAEAN_TWENTY = 81; 2492 /***/ 2493 public static final int MANICHAEAN_WAW = 82; 2494 /***/ 2495 public static final int MANICHAEAN_YODH = 83; 2496 /***/ 2497 public static final int MANICHAEAN_ZAYIN = 84; 2498 /***/ 2499 public static final int STRAIGHT_WAW = 85; 2500 2501 /** 2502 * @hide unsupported on Android 2503 */ 2504 public static final int COUNT = 86; 2505 } 2506 2507 /** 2508 * Grapheme Cluster Break constants. 2509 * @see UProperty#GRAPHEME_CLUSTER_BREAK 2510 */ 2511 public static interface GraphemeClusterBreak { 2512 /** 2513 */ 2514 public static final int OTHER = 0; 2515 /** 2516 */ 2517 public static final int CONTROL = 1; 2518 /** 2519 */ 2520 public static final int CR = 2; 2521 /** 2522 */ 2523 public static final int EXTEND = 3; 2524 /** 2525 */ 2526 public static final int L = 4; 2527 /** 2528 */ 2529 public static final int LF = 5; 2530 /** 2531 */ 2532 public static final int LV = 6; 2533 /** 2534 */ 2535 public static final int LVT = 7; 2536 /** 2537 */ 2538 public static final int T = 8; 2539 /** 2540 */ 2541 public static final int V = 9; 2542 /** 2543 */ 2544 public static final int SPACING_MARK = 10; 2545 /** 2546 */ 2547 public static final int PREPEND = 11; 2548 /***/ 2549 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2550 /** 2551 * @hide unsupported on Android 2552 */ 2553 public static final int COUNT = 13; 2554 } 2555 2556 /** 2557 * Word Break constants. 2558 * @see UProperty#WORD_BREAK 2559 */ 2560 public static interface WordBreak { 2561 /** 2562 */ 2563 public static final int OTHER = 0; 2564 /** 2565 */ 2566 public static final int ALETTER = 1; 2567 /** 2568 */ 2569 public static final int FORMAT = 2; 2570 /** 2571 */ 2572 public static final int KATAKANA = 3; 2573 /** 2574 */ 2575 public static final int MIDLETTER = 4; 2576 /** 2577 */ 2578 public static final int MIDNUM = 5; 2579 /** 2580 */ 2581 public static final int NUMERIC = 6; 2582 /** 2583 */ 2584 public static final int EXTENDNUMLET = 7; 2585 /** 2586 */ 2587 public static final int CR = 8; 2588 /** 2589 */ 2590 public static final int EXTEND = 9; 2591 /** 2592 */ 2593 public static final int LF = 10; 2594 /** 2595 */ 2596 public static final int MIDNUMLET = 11; 2597 /** 2598 */ 2599 public static final int NEWLINE = 12; 2600 /***/ 2601 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2602 /***/ 2603 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 2604 /***/ 2605 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 2606 /***/ 2607 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 2608 /** 2609 * @hide unsupported on Android 2610 */ 2611 public static final int COUNT = 17; 2612 } 2613 2614 /** 2615 * Sentence Break constants. 2616 * @see UProperty#SENTENCE_BREAK 2617 */ 2618 public static interface SentenceBreak { 2619 /** 2620 */ 2621 public static final int OTHER = 0; 2622 /** 2623 */ 2624 public static final int ATERM = 1; 2625 /** 2626 */ 2627 public static final int CLOSE = 2; 2628 /** 2629 */ 2630 public static final int FORMAT = 3; 2631 /** 2632 */ 2633 public static final int LOWER = 4; 2634 /** 2635 */ 2636 public static final int NUMERIC = 5; 2637 /** 2638 */ 2639 public static final int OLETTER = 6; 2640 /** 2641 */ 2642 public static final int SEP = 7; 2643 /** 2644 */ 2645 public static final int SP = 8; 2646 /** 2647 */ 2648 public static final int STERM = 9; 2649 /** 2650 */ 2651 public static final int UPPER = 10; 2652 /** 2653 */ 2654 public static final int CR = 11; 2655 /** 2656 */ 2657 public static final int EXTEND = 12; 2658 /** 2659 */ 2660 public static final int LF = 13; 2661 /** 2662 */ 2663 public static final int SCONTINUE = 14; 2664 /** 2665 * @hide unsupported on Android 2666 */ 2667 public static final int COUNT = 15; 2668 } 2669 2670 /** 2671 * Line Break constants. 2672 * @see UProperty#LINE_BREAK 2673 */ 2674 public static interface LineBreak 2675 { 2676 /** 2677 */ 2678 public static final int UNKNOWN = 0; 2679 /** 2680 */ 2681 public static final int AMBIGUOUS = 1; 2682 /** 2683 */ 2684 public static final int ALPHABETIC = 2; 2685 /** 2686 */ 2687 public static final int BREAK_BOTH = 3; 2688 /** 2689 */ 2690 public static final int BREAK_AFTER = 4; 2691 /** 2692 */ 2693 public static final int BREAK_BEFORE = 5; 2694 /** 2695 */ 2696 public static final int MANDATORY_BREAK = 6; 2697 /** 2698 */ 2699 public static final int CONTINGENT_BREAK = 7; 2700 /** 2701 */ 2702 public static final int CLOSE_PUNCTUATION = 8; 2703 /** 2704 */ 2705 public static final int COMBINING_MARK = 9; 2706 /** 2707 */ 2708 public static final int CARRIAGE_RETURN = 10; 2709 /** 2710 */ 2711 public static final int EXCLAMATION = 11; 2712 /** 2713 */ 2714 public static final int GLUE = 12; 2715 /** 2716 */ 2717 public static final int HYPHEN = 13; 2718 /** 2719 */ 2720 public static final int IDEOGRAPHIC = 14; 2721 /** 2722 * @see #INSEPARABLE 2723 */ 2724 public static final int INSEPERABLE = 15; 2725 /** 2726 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 2727 */ 2728 public static final int INSEPARABLE = 15; 2729 /** 2730 */ 2731 public static final int INFIX_NUMERIC = 16; 2732 /** 2733 */ 2734 public static final int LINE_FEED = 17; 2735 /** 2736 */ 2737 public static final int NONSTARTER = 18; 2738 /** 2739 */ 2740 public static final int NUMERIC = 19; 2741 /** 2742 */ 2743 public static final int OPEN_PUNCTUATION = 20; 2744 /** 2745 */ 2746 public static final int POSTFIX_NUMERIC = 21; 2747 /** 2748 */ 2749 public static final int PREFIX_NUMERIC = 22; 2750 /** 2751 */ 2752 public static final int QUOTATION = 23; 2753 /** 2754 */ 2755 public static final int COMPLEX_CONTEXT = 24; 2756 /** 2757 */ 2758 public static final int SURROGATE = 25; 2759 /** 2760 */ 2761 public static final int SPACE = 26; 2762 /** 2763 */ 2764 public static final int BREAK_SYMBOLS = 27; 2765 /** 2766 */ 2767 public static final int ZWSPACE = 28; 2768 /** 2769 */ 2770 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 2771 /** 2772 */ 2773 public static final int WORD_JOINER = 30; /*[WJ]*/ 2774 /** 2775 */ 2776 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 2777 /** 2778 */ 2779 public static final int H3 = 32; 2780 /** 2781 */ 2782 public static final int JL = 33; 2783 /** 2784 */ 2785 public static final int JT = 34; 2786 /** 2787 */ 2788 public static final int JV = 35; 2789 /***/ 2790 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 2791 /***/ 2792 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 2793 /***/ 2794 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 2795 /***/ 2796 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2797 /** 2798 * @hide unsupported on Android 2799 */ 2800 public static final int COUNT = 40; 2801 } 2802 2803 /** 2804 * Numeric Type constants. 2805 * @see UProperty#NUMERIC_TYPE 2806 */ 2807 public static interface NumericType 2808 { 2809 /** 2810 */ 2811 public static final int NONE = 0; 2812 /** 2813 */ 2814 public static final int DECIMAL = 1; 2815 /** 2816 */ 2817 public static final int DIGIT = 2; 2818 /** 2819 */ 2820 public static final int NUMERIC = 3; 2821 /** 2822 * @hide unsupported on Android 2823 */ 2824 public static final int COUNT = 4; 2825 } 2826 2827 /** 2828 * Hangul Syllable Type constants. 2829 * 2830 * @see UProperty#HANGUL_SYLLABLE_TYPE 2831 */ 2832 public static interface HangulSyllableType 2833 { 2834 /** 2835 */ 2836 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 2837 /** 2838 */ 2839 public static final int LEADING_JAMO = 1; /*[L]*/ 2840 /** 2841 */ 2842 public static final int VOWEL_JAMO = 2; /*[V]*/ 2843 /** 2844 */ 2845 public static final int TRAILING_JAMO = 3; /*[T]*/ 2846 /** 2847 */ 2848 public static final int LV_SYLLABLE = 4; /*[LV]*/ 2849 /** 2850 */ 2851 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 2852 /** 2853 * @hide unsupported on Android 2854 */ 2855 public static final int COUNT = 6; 2856 } 2857 2858 /** 2859 * Bidi Paired Bracket Type constants. 2860 * 2861 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 2862 */ 2863 public static interface BidiPairedBracketType { 2864 /** 2865 * Not a paired bracket. 2866 */ 2867 public static final int NONE = 0; 2868 /** 2869 * Open paired bracket. 2870 */ 2871 public static final int OPEN = 1; 2872 /** 2873 * Close paired bracket. 2874 */ 2875 public static final int CLOSE = 2; 2876 /** 2877 * @hide unsupported on Android 2878 */ 2879 public static final int COUNT = 3; 2880 } 2881 2882 // public data members ----------------------------------------------- 2883 2884 /** 2885 * The lowest Unicode code point value, constant 0. 2886 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 2887 */ 2888 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 2889 2890 /** 2891 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 2892 * Same as {@link Character#MAX_CODE_POINT}. 2893 * 2894 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 2895 * which is still a char with the value U+FFFF. 2896 */ 2897 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 2898 2899 /** 2900 * The minimum value for Supplementary code points, constant U+10000. 2901 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 2902 */ 2903 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 2904 2905 /** 2906 * Unicode value used when translating into Unicode encoding form and there 2907 * is no existing character. 2908 */ 2909 public static final int REPLACEMENT_CHAR = '\uFFFD'; 2910 2911 /** 2912 * Special value that is returned by getUnicodeNumericValue(int) when no 2913 * numeric value is defined for a code point. 2914 * @see #getUnicodeNumericValue 2915 */ 2916 public static final double NO_NUMERIC_VALUE = -123456789; 2917 2918 /** 2919 * Compatibility constant for Java Character's MIN_RADIX. 2920 */ 2921 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 2922 2923 /** 2924 * Compatibility constant for Java Character's MAX_RADIX. 2925 */ 2926 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 2927 2928 /** 2929 * Do not lowercase non-initial parts of words when titlecasing. 2930 * Option bit for titlecasing APIs that take an options bit set. 2931 * 2932 * By default, titlecasing will titlecase the first cased character 2933 * of a word and lowercase all other characters. 2934 * With this option, the other characters will not be modified. 2935 * 2936 * @see #toTitleCase 2937 */ 2938 public static final int TITLECASE_NO_LOWERCASE = 0x100; 2939 2940 /** 2941 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 2942 * titlecase exactly the characters at breaks from the iterator. 2943 * Option bit for titlecasing APIs that take an options bit set. 2944 * 2945 * By default, titlecasing will take each break iterator index, 2946 * adjust it by looking for the next cased character, and titlecase that one. 2947 * Other characters are lowercased. 2948 * 2949 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 2950 * 2951 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 2952 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 2953 * cased character F. If F exists, map F to default_title(F); then map each 2954 * subsequent character C to default_lower(C). 2955 * 2956 * @see #toTitleCase 2957 * @see #TITLECASE_NO_LOWERCASE 2958 */ 2959 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 2960 2961 // public methods ---------------------------------------------------- 2962 2963 /** 2964 * Returnss the numeric value of a decimal digit code point. 2965 * <br>This method observes the semantics of 2966 * <code>java.lang.Character.digit()</code>. Note that this 2967 * will return positive values for code points for which isDigit 2968 * returns false, just like java.lang.Character. 2969 * <br><em>Semantic Change:</em> In release 1.3.1 and 2970 * prior, this did not treat the European letters as having a 2971 * digit value, and also treated numeric letters and other numbers as 2972 * digits. 2973 * This has been changed to conform to the java semantics. 2974 * <br>A code point is a valid digit if and only if: 2975 * <ul> 2976 * <li>ch is a decimal digit or one of the european letters, and 2977 * <li>the value of ch is less than the specified radix. 2978 * </ul> 2979 * @param ch the code point to query 2980 * @param radix the radix 2981 * @return the numeric value represented by the code point in the 2982 * specified radix, or -1 if the code point is not a decimal digit 2983 * or if its value is too large for the radix 2984 */ digit(int ch, int radix)2985 public static int digit(int ch, int radix) 2986 { 2987 if (2 <= radix && radix <= 36) { 2988 int value = digit(ch); 2989 if (value < 0) { 2990 // ch is not a decimal digit, try latin letters 2991 value = UCharacterProperty.getEuropeanDigit(ch); 2992 } 2993 return (value < radix) ? value : -1; 2994 } else { 2995 return -1; // invalid radix 2996 } 2997 } 2998 2999 /** 3000 * Returnss the numeric value of a decimal digit code point. 3001 * <br>This is a convenience overload of <code>digit(int, int)</code> 3002 * that provides a decimal radix. 3003 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 3004 * treated numeric letters and other numbers as digits. This has 3005 * been changed to conform to the java semantics. 3006 * @param ch the code point to query 3007 * @return the numeric value represented by the code point, 3008 * or -1 if the code point is not a decimal digit or if its 3009 * value is too large for a decimal radix 3010 */ digit(int ch)3011 public static int digit(int ch) 3012 { 3013 return UCharacterProperty.INSTANCE.digit(ch); 3014 } 3015 3016 /** 3017 * Returns the numeric value of the code point as a nonnegative 3018 * integer. 3019 * <br>If the code point does not have a numeric value, then -1 is returned. 3020 * <br> 3021 * If the code point has a numeric value that cannot be represented as a 3022 * nonnegative integer (for example, a fractional value), then -2 is 3023 * returned. 3024 * @param ch the code point to query 3025 * @return the numeric value of the code point, or -1 if it has no numeric 3026 * value, or -2 if it has a numeric value that cannot be represented as a 3027 * nonnegative integer 3028 */ getNumericValue(int ch)3029 public static int getNumericValue(int ch) 3030 { 3031 return UCharacterProperty.INSTANCE.getNumericValue(ch); 3032 } 3033 3034 /** 3035 * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the 3036 * Unicode Character Database. 3037 * <p>A "double" return type is necessary because some numeric values are 3038 * fractions, negative, or too large for int. 3039 * <p>For characters without any numeric values in the Unicode Character 3040 * Database, this function will return NO_NUMERIC_VALUE. 3041 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 3042 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 3043 * return type int and returns -1 when the argument ch does not have a 3044 * corresponding numeric value. This has been changed to synch with ICU4C 3045 * 3046 * This corresponds to the ICU4C function u_getNumericValue. 3047 * @param ch Code point to get the numeric value for. 3048 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 3049 */ getUnicodeNumericValue(int ch)3050 public static double getUnicodeNumericValue(int ch) 3051 { 3052 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 3053 } 3054 3055 /** 3056 * Compatibility override of Java deprecated method. This 3057 * method will always remain deprecated. 3058 * Same as java.lang.Character.isSpace(). 3059 * @param ch the code point 3060 * @return true if the code point is a space character as 3061 * defined by java.lang.Character.isSpace. 3062 * @deprecated ICU 3.4 (Java) 3063 * @hide original deprecated declaration 3064 */ 3065 @Deprecated isSpace(int ch)3066 public static boolean isSpace(int ch) { 3067 return ch <= 0x20 && 3068 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 3069 } 3070 3071 /** 3072 * Returns a value indicating a code point's Unicode category. 3073 * Up-to-date Unicode implementation of java.lang.Character.getType() 3074 * except for the above mentioned code points that had their category 3075 * changed.<br> 3076 * Return results are constants from the interface 3077 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 3078 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 3079 * those returned by java.lang.Character.getType. UCharacterCategory values 3080 * match the ones used in ICU4C, while java.lang.Character type 3081 * values, though similar, skip the value 17. 3082 * @param ch code point whose type is to be determined 3083 * @return category which is a value of UCharacterCategory 3084 */ getType(int ch)3085 public static int getType(int ch) 3086 { 3087 return UCharacterProperty.INSTANCE.getType(ch); 3088 } 3089 3090 /** 3091 * Determines if a code point has a defined meaning in the up-to-date 3092 * Unicode standard. 3093 * E.g. supplementary code points though allocated space are not defined in 3094 * Unicode yet.<br> 3095 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 3096 * @param ch code point to be determined if it is defined in the most 3097 * current version of Unicode 3098 * @return true if this code point is defined in unicode 3099 */ isDefined(int ch)3100 public static boolean isDefined(int ch) 3101 { 3102 return getType(ch) != 0; 3103 } 3104 3105 /** 3106 * Determines if a code point is a Java digit. 3107 * <br>This method observes the semantics of 3108 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 3109 * digits only. 3110 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 3111 * numeric letters and other numbers as digits. 3112 * This has been changed to conform to the java semantics. 3113 * @param ch code point to query 3114 * @return true if this code point is a digit 3115 */ isDigit(int ch)3116 public static boolean isDigit(int ch) 3117 { 3118 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 3119 } 3120 3121 /** 3122 * Determines if the specified code point is an ISO control character. 3123 * A code point is considered to be an ISO control character if it is in 3124 * the range \u0000 through \u001F or in the range \u007F through 3125 * \u009F.<br> 3126 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 3127 * @param ch code point to determine if it is an ISO control character 3128 * @return true if code point is a ISO control character 3129 */ isISOControl(int ch)3130 public static boolean isISOControl(int ch) 3131 { 3132 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 3133 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 3134 } 3135 3136 /** 3137 * Determines if the specified code point is a letter. 3138 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 3139 * @param ch code point to determine if it is a letter 3140 * @return true if code point is a letter 3141 */ isLetter(int ch)3142 public static boolean isLetter(int ch) 3143 { 3144 // if props == 0, it will just fall through and return false 3145 return ((1 << getType(ch)) 3146 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3147 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3148 | (1 << UCharacterCategory.TITLECASE_LETTER) 3149 | (1 << UCharacterCategory.MODIFIER_LETTER) 3150 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 3151 } 3152 3153 /** 3154 * Determines if the specified code point is a letter or digit. 3155 * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii 3156 * characters 'A' - 'Z' and 'a' - 'z' as digits. 3157 * @param ch code point to determine if it is a letter or a digit 3158 * @return true if code point is a letter or a digit 3159 */ isLetterOrDigit(int ch)3160 public static boolean isLetterOrDigit(int ch) 3161 { 3162 return ((1 << getType(ch)) 3163 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3164 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3165 | (1 << UCharacterCategory.TITLECASE_LETTER) 3166 | (1 << UCharacterCategory.MODIFIER_LETTER) 3167 | (1 << UCharacterCategory.OTHER_LETTER) 3168 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 3169 } 3170 3171 /** 3172 * Compatibility override of Java deprecated method. This 3173 * method will always remain deprecated. Delegates to 3174 * java.lang.Character.isJavaIdentifierStart. 3175 * @param cp the code point 3176 * @return true if the code point can start a java identifier. 3177 * @deprecated ICU 3.4 (Java) 3178 * @hide original deprecated declaration 3179 */ 3180 @Deprecated isJavaLetter(int cp)3181 public static boolean isJavaLetter(int cp) { 3182 return isJavaIdentifierStart(cp); 3183 } 3184 3185 /** 3186 * Compatibility override of Java deprecated method. This 3187 * method will always remain deprecated. Delegates to 3188 * java.lang.Character.isJavaIdentifierPart. 3189 * @param cp the code point 3190 * @return true if the code point can continue a java identifier. 3191 * @deprecated ICU 3.4 (Java) 3192 * @hide original deprecated declaration 3193 */ 3194 @Deprecated isJavaLetterOrDigit(int cp)3195 public static boolean isJavaLetterOrDigit(int cp) { 3196 return isJavaIdentifierPart(cp); 3197 } 3198 3199 /** 3200 * Compatibility override of Java method, delegates to 3201 * java.lang.Character.isJavaIdentifierStart. 3202 * @param cp the code point 3203 * @return true if the code point can start a java identifier. 3204 */ isJavaIdentifierStart(int cp)3205 public static boolean isJavaIdentifierStart(int cp) { 3206 // note, downcast to char for jdk 1.4 compatibility 3207 return java.lang.Character.isJavaIdentifierStart((char)cp); 3208 } 3209 3210 /** 3211 * Compatibility override of Java method, delegates to 3212 * java.lang.Character.isJavaIdentifierPart. 3213 * @param cp the code point 3214 * @return true if the code point can continue a java identifier. 3215 */ isJavaIdentifierPart(int cp)3216 public static boolean isJavaIdentifierPart(int cp) { 3217 // note, downcast to char for jdk 1.4 compatibility 3218 return java.lang.Character.isJavaIdentifierPart((char)cp); 3219 } 3220 3221 /** 3222 * Determines if the specified code point is a lowercase character. 3223 * UnicodeData only contains case mappings for code points where they are 3224 * one-to-one mappings; it also omits information about context-sensitive 3225 * case mappings.<br> For more information about Unicode case mapping 3226 * please refer to the 3227 * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 3228 * #21</a>.<br> 3229 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 3230 * @param ch code point to determine if it is in lowercase 3231 * @return true if code point is a lowercase character 3232 */ isLowerCase(int ch)3233 public static boolean isLowerCase(int ch) 3234 { 3235 // if props == 0, it will just fall through and return false 3236 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 3237 } 3238 3239 /** 3240 * Determines if the specified code point is a white space character. 3241 * A code point is considered to be an whitespace character if and only 3242 * if it satisfies one of the following criteria: 3243 * <ul> 3244 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 3245 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 3246 * <li> It is \u0009, HORIZONTAL TABULATION. 3247 * <li> It is \u000A, LINE FEED. 3248 * <li> It is \u000B, VERTICAL TABULATION. 3249 * <li> It is \u000C, FORM FEED. 3250 * <li> It is \u000D, CARRIAGE RETURN. 3251 * <li> It is \u001C, FILE SEPARATOR. 3252 * <li> It is \u001D, GROUP SEPARATOR. 3253 * <li> It is \u001E, RECORD SEPARATOR. 3254 * <li> It is \u001F, UNIT SEPARATOR. 3255 * </ul> 3256 * 3257 * This API tries to sync with the semantics of Java's 3258 * java.lang.Character.isWhitespace(), but it may not return 3259 * the exact same results because of the Unicode version 3260 * difference. 3261 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 3262 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 3263 * See http://www.unicode.org/versions/Unicode4.0.1/ 3264 * @param ch code point to determine if it is a white space 3265 * @return true if the specified code point is a white space character 3266 */ isWhitespace(int ch)3267 public static boolean isWhitespace(int ch) 3268 { 3269 // exclude no-break spaces 3270 // if props == 0, it will just fall through and return false 3271 return ((1 << getType(ch)) & 3272 ((1 << UCharacterCategory.SPACE_SEPARATOR) 3273 | (1 << UCharacterCategory.LINE_SEPARATOR) 3274 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 3275 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 3276 // TAB VT LF FF CR FS GS RS US NL are all control characters 3277 // that are white spaces. 3278 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 3279 } 3280 3281 /** 3282 * Determines if the specified code point is a Unicode specified space 3283 * character, i.e. if code point is in the category Zs, Zl and Zp. 3284 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 3285 * @param ch code point to determine if it is a space 3286 * @return true if the specified code point is a space character 3287 */ isSpaceChar(int ch)3288 public static boolean isSpaceChar(int ch) 3289 { 3290 // if props == 0, it will just fall through and return false 3291 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 3292 | (1 << UCharacterCategory.LINE_SEPARATOR) 3293 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 3294 != 0; 3295 } 3296 3297 /** 3298 * Determines if the specified code point is a titlecase character. 3299 * UnicodeData only contains case mappings for code points where they are 3300 * one-to-one mappings; it also omits information about context-sensitive 3301 * case mappings.<br> 3302 * For more information about Unicode case mapping please refer to the 3303 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3304 * Technical report #21</a>.<br> 3305 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 3306 * @param ch code point to determine if it is in title case 3307 * @return true if the specified code point is a titlecase character 3308 */ isTitleCase(int ch)3309 public static boolean isTitleCase(int ch) 3310 { 3311 // if props == 0, it will just fall through and return false 3312 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 3313 } 3314 3315 /** 3316 * Determines if the specified code point may be any part of a Unicode 3317 * identifier other than the starting character. 3318 * A code point may be part of a Unicode identifier if and only if it is 3319 * one of the following: 3320 * <ul> 3321 * <li> Lu Uppercase letter 3322 * <li> Ll Lowercase letter 3323 * <li> Lt Titlecase letter 3324 * <li> Lm Modifier letter 3325 * <li> Lo Other letter 3326 * <li> Nl Letter number 3327 * <li> Pc Connecting punctuation character 3328 * <li> Nd decimal number 3329 * <li> Mc Spacing combining mark 3330 * <li> Mn Non-spacing mark 3331 * <li> Cf formatting code 3332 * </ul> 3333 * Up-to-date Unicode implementation of 3334 * java.lang.Character.isUnicodeIdentifierPart().<br> 3335 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3336 * @param ch code point to determine if is can be part of a Unicode 3337 * identifier 3338 * @return true if code point is any character belonging a unicode 3339 * identifier suffix after the first character 3340 */ isUnicodeIdentifierPart(int ch)3341 public static boolean isUnicodeIdentifierPart(int ch) 3342 { 3343 // if props == 0, it will just fall through and return false 3344 // cat == format 3345 return ((1 << getType(ch)) 3346 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3347 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3348 | (1 << UCharacterCategory.TITLECASE_LETTER) 3349 | (1 << UCharacterCategory.MODIFIER_LETTER) 3350 | (1 << UCharacterCategory.OTHER_LETTER) 3351 | (1 << UCharacterCategory.LETTER_NUMBER) 3352 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 3353 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 3354 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 3355 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 3356 || isIdentifierIgnorable(ch); 3357 } 3358 3359 /** 3360 * Determines if the specified code point is permissible as the first 3361 * character in a Unicode identifier. 3362 * A code point may start a Unicode identifier if it is of type either 3363 * <ul> 3364 * <li> Lu Uppercase letter 3365 * <li> Ll Lowercase letter 3366 * <li> Lt Titlecase letter 3367 * <li> Lm Modifier letter 3368 * <li> Lo Other letter 3369 * <li> Nl Letter number 3370 * </ul> 3371 * Up-to-date Unicode implementation of 3372 * java.lang.Character.isUnicodeIdentifierStart().<br> 3373 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3374 * @param ch code point to determine if it can start a Unicode identifier 3375 * @return true if code point is the first character belonging a unicode 3376 * identifier 3377 */ isUnicodeIdentifierStart(int ch)3378 public static boolean isUnicodeIdentifierStart(int ch) 3379 { 3380 /*int cat = getType(ch);*/ 3381 // if props == 0, it will just fall through and return false 3382 return ((1 << getType(ch)) 3383 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3384 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3385 | (1 << UCharacterCategory.TITLECASE_LETTER) 3386 | (1 << UCharacterCategory.MODIFIER_LETTER) 3387 | (1 << UCharacterCategory.OTHER_LETTER) 3388 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 3389 } 3390 3391 /** 3392 * Determines if the specified code point should be regarded as an 3393 * ignorable character in a Java identifier. 3394 * A character is Java-identifier-ignorable if it has the general category 3395 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 3396 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 3397 * Up-to-date Unicode implementation of 3398 * java.lang.Character.isIdentifierIgnorable().<br> 3399 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3400 * <p>Note that Unicode just recommends to ignore Cf (format controls). 3401 * @param ch code point to be determined if it can be ignored in a Unicode 3402 * identifier. 3403 * @return true if the code point is ignorable 3404 */ isIdentifierIgnorable(int ch)3405 public static boolean isIdentifierIgnorable(int ch) 3406 { 3407 // see java.lang.Character.isIdentifierIgnorable() on range of 3408 // ignorable characters. 3409 if (ch <= 0x9f) { 3410 return isISOControl(ch) 3411 && !((ch >= 0x9 && ch <= 0xd) 3412 || (ch >= 0x1c && ch <= 0x1f)); 3413 } 3414 return getType(ch) == UCharacterCategory.FORMAT; 3415 } 3416 3417 /** 3418 * Determines if the specified code point is an uppercase character. 3419 * UnicodeData only contains case mappings for code point where they are 3420 * one-to-one mappings; it also omits information about context-sensitive 3421 * case mappings.<br> 3422 * For language specific case conversion behavior, use 3423 * toUpperCase(locale, str). <br> 3424 * For example, the case conversion for dot-less i and dotted I in Turkish, 3425 * or for final sigma in Greek. 3426 * For more information about Unicode case mapping please refer to the 3427 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3428 * Technical report #21</a>.<br> 3429 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 3430 * @param ch code point to determine if it is in uppercase 3431 * @return true if the code point is an uppercase character 3432 */ isUpperCase(int ch)3433 public static boolean isUpperCase(int ch) 3434 { 3435 // if props == 0, it will just fall through and return false 3436 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 3437 } 3438 3439 /** 3440 * The given code point is mapped to its lowercase equivalent; if the code 3441 * point has no lowercase equivalent, the code point itself is returned. 3442 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 3443 * 3444 * <p>This function only returns the simple, single-code point case mapping. 3445 * Full case mappings should be used whenever possible because they produce 3446 * better results by working on whole strings. 3447 * They take into account the string context and the language and can map 3448 * to a result string with a different length as appropriate. 3449 * Full case mappings are applied by the case mapping functions 3450 * that take String parameters rather than code points (int). 3451 * See also the User Guide chapter on C/POSIX migration: 3452 * http://www.icu-project.org/userguide/posix.html#case_mappings 3453 * 3454 * @param ch code point whose lowercase equivalent is to be retrieved 3455 * @return the lowercase equivalent code point 3456 */ toLowerCase(int ch)3457 public static int toLowerCase(int ch) { 3458 return UCaseProps.INSTANCE.tolower(ch); 3459 } 3460 3461 /** 3462 * Converts argument code point and returns a String object representing 3463 * the code point's value in UTF-16 format. 3464 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 3465 * 3466 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 3467 * 3468 * @param ch code point 3469 * @return string representation of the code point, null if code point is not 3470 * defined in unicode 3471 */ toString(int ch)3472 public static String toString(int ch) 3473 { 3474 if (ch < MIN_VALUE || ch > MAX_VALUE) { 3475 return null; 3476 } 3477 3478 if (ch < SUPPLEMENTARY_MIN_VALUE) { 3479 return String.valueOf((char)ch); 3480 } 3481 3482 return new String(Character.toChars(ch)); 3483 } 3484 3485 /** 3486 * Converts the code point argument to titlecase. 3487 * If no titlecase is available, the uppercase is returned. If no uppercase 3488 * is available, the code point itself is returned. 3489 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 3490 * 3491 * <p>This function only returns the simple, single-code point case mapping. 3492 * Full case mappings should be used whenever possible because they produce 3493 * better results by working on whole strings. 3494 * They take into account the string context and the language and can map 3495 * to a result string with a different length as appropriate. 3496 * Full case mappings are applied by the case mapping functions 3497 * that take String parameters rather than code points (int). 3498 * See also the User Guide chapter on C/POSIX migration: 3499 * http://www.icu-project.org/userguide/posix.html#case_mappings 3500 * 3501 * @param ch code point whose title case is to be retrieved 3502 * @return titlecase code point 3503 */ toTitleCase(int ch)3504 public static int toTitleCase(int ch) { 3505 return UCaseProps.INSTANCE.totitle(ch); 3506 } 3507 3508 /** 3509 * Converts the character argument to uppercase. 3510 * If no uppercase is available, the character itself is returned. 3511 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 3512 * 3513 * <p>This function only returns the simple, single-code point case mapping. 3514 * Full case mappings should be used whenever possible because they produce 3515 * better results by working on whole strings. 3516 * They take into account the string context and the language and can map 3517 * to a result string with a different length as appropriate. 3518 * Full case mappings are applied by the case mapping functions 3519 * that take String parameters rather than code points (int). 3520 * See also the User Guide chapter on C/POSIX migration: 3521 * http://www.icu-project.org/userguide/posix.html#case_mappings 3522 * 3523 * @param ch code point whose uppercase is to be retrieved 3524 * @return uppercase code point 3525 */ toUpperCase(int ch)3526 public static int toUpperCase(int ch) { 3527 return UCaseProps.INSTANCE.toupper(ch); 3528 } 3529 3530 // extra methods not in java.lang.Character -------------------------- 3531 3532 /** 3533 * <strong>[icu]</strong> Determines if the code point is a supplementary character. 3534 * A code point is a supplementary character if and only if it is greater 3535 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 3536 * @param ch code point to be determined if it is in the supplementary 3537 * plane 3538 * @return true if code point is a supplementary character 3539 */ isSupplementary(int ch)3540 public static boolean isSupplementary(int ch) 3541 { 3542 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 3543 ch <= UCharacter.MAX_VALUE; 3544 } 3545 3546 /** 3547 * <strong>[icu]</strong> Determines if the code point is in the BMP plane. 3548 * @param ch code point to be determined if it is not a supplementary 3549 * character 3550 * @return true if code point is not a supplementary character 3551 */ isBMP(int ch)3552 public static boolean isBMP(int ch) 3553 { 3554 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 3555 } 3556 3557 /** 3558 * <strong>[icu]</strong> Determines whether the specified code point is a printable character 3559 * according to the Unicode standard. 3560 * @param ch code point to be determined if it is printable 3561 * @return true if the code point is a printable character 3562 */ isPrintable(int ch)3563 public static boolean isPrintable(int ch) 3564 { 3565 int cat = getType(ch); 3566 // if props == 0, it will just fall through and return false 3567 return (cat != UCharacterCategory.UNASSIGNED && 3568 cat != UCharacterCategory.CONTROL && 3569 cat != UCharacterCategory.FORMAT && 3570 cat != UCharacterCategory.PRIVATE_USE && 3571 cat != UCharacterCategory.SURROGATE && 3572 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 3573 } 3574 3575 /** 3576 * <strong>[icu]</strong> Determines whether the specified code point is of base form. 3577 * A code point of base form does not graphically combine with preceding 3578 * characters, and is neither a control nor a format character. 3579 * @param ch code point to be determined if it is of base form 3580 * @return true if the code point is of base form 3581 */ isBaseForm(int ch)3582 public static boolean isBaseForm(int ch) 3583 { 3584 int cat = getType(ch); 3585 // if props == 0, it will just fall through and return false 3586 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 3587 cat == UCharacterCategory.OTHER_NUMBER || 3588 cat == UCharacterCategory.LETTER_NUMBER || 3589 cat == UCharacterCategory.UPPERCASE_LETTER || 3590 cat == UCharacterCategory.LOWERCASE_LETTER || 3591 cat == UCharacterCategory.TITLECASE_LETTER || 3592 cat == UCharacterCategory.MODIFIER_LETTER || 3593 cat == UCharacterCategory.OTHER_LETTER || 3594 cat == UCharacterCategory.NON_SPACING_MARK || 3595 cat == UCharacterCategory.ENCLOSING_MARK || 3596 cat == UCharacterCategory.COMBINING_SPACING_MARK; 3597 } 3598 3599 /** 3600 * <strong>[icu]</strong> Returns the Bidirection property of a code point. 3601 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 3602 * property.<br> 3603 * Result returned belongs to the interface 3604 * <a href=UCharacterDirection.html>UCharacterDirection</a> 3605 * @param ch the code point to be determined its direction 3606 * @return direction constant from UCharacterDirection. 3607 */ getDirection(int ch)3608 public static int getDirection(int ch) 3609 { 3610 return UBiDiProps.INSTANCE.getClass(ch); 3611 } 3612 3613 /** 3614 * Determines whether the code point has the "mirrored" property. 3615 * This property is set for characters that are commonly used in 3616 * Right-To-Left contexts and need to be displayed with a "mirrored" 3617 * glyph. 3618 * @param ch code point whose mirror is to be determined 3619 * @return true if the code point has the "mirrored" property 3620 */ isMirrored(int ch)3621 public static boolean isMirrored(int ch) 3622 { 3623 return UBiDiProps.INSTANCE.isMirrored(ch); 3624 } 3625 3626 /** 3627 * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point. 3628 * For code points with the "mirrored" property, implementations sometimes 3629 * need a "poor man's" mapping to another code point such that the default 3630 * glyph may serve as the mirror-image of the default glyph of the 3631 * specified code point.<br> 3632 * This is useful for text conversion to and from codepages with visual 3633 * order, and for displays without glyph selection capabilities. 3634 * @param ch code point whose mirror is to be retrieved 3635 * @return another code point that may serve as a mirror-image substitute, 3636 * or ch itself if there is no such mapping or ch does not have the 3637 * "mirrored" property 3638 */ getMirror(int ch)3639 public static int getMirror(int ch) 3640 { 3641 return UBiDiProps.INSTANCE.getMirror(ch); 3642 } 3643 3644 /** 3645 * <strong>[icu]</strong> Maps the specified character to its paired bracket character. 3646 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 3647 * Otherwise c itself is returned. 3648 * See http://www.unicode.org/reports/tr9/ 3649 * 3650 * @param c the code point to be mapped 3651 * @return the paired bracket code point, 3652 * or c itself if there is no such mapping 3653 * (Bidi_Paired_Bracket_Type=None) 3654 * 3655 * @see UProperty#BIDI_PAIRED_BRACKET 3656 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3657 * @see #getMirror(int) 3658 */ getBidiPairedBracket(int c)3659 public static int getBidiPairedBracket(int c) { 3660 return UBiDiProps.INSTANCE.getPairedBracket(c); 3661 } 3662 3663 /** 3664 * <strong>[icu]</strong> Returns the combining class of the argument codepoint 3665 * @param ch code point whose combining is to be retrieved 3666 * @return the combining class of the codepoint 3667 */ getCombiningClass(int ch)3668 public static int getCombiningClass(int ch) 3669 { 3670 return Normalizer2.getNFDInstance().getCombiningClass(ch); 3671 } 3672 3673 /** 3674 * <strong>[icu]</strong> A code point is illegal if and only if 3675 * <ul> 3676 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 3677 * <li> A surrogate value, 0xD800 to 0xDFFF 3678 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 3679 * </ul> 3680 * Note: legal does not mean that it is assigned in this version of Unicode. 3681 * @param ch code point to determine if it is a legal code point by itself 3682 * @return true if and only if legal. 3683 */ isLegal(int ch)3684 public static boolean isLegal(int ch) 3685 { 3686 if (ch < MIN_VALUE) { 3687 return false; 3688 } 3689 if (ch < Character.MIN_SURROGATE) { 3690 return true; 3691 } 3692 if (ch <= Character.MAX_SURROGATE) { 3693 return false; 3694 } 3695 if (UCharacterUtility.isNonCharacter(ch)) { 3696 return false; 3697 } 3698 return (ch <= MAX_VALUE); 3699 } 3700 3701 /** 3702 * <strong>[icu]</strong> A string is legal iff all its code points are legal. 3703 * A code point is illegal if and only if 3704 * <ul> 3705 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 3706 * <li> A surrogate value, 0xD800 to 0xDFFF 3707 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 3708 * </ul> 3709 * Note: legal does not mean that it is assigned in this version of Unicode. 3710 * @param str containing code points to examin 3711 * @return true if and only if legal. 3712 */ isLegal(String str)3713 public static boolean isLegal(String str) 3714 { 3715 int size = str.length(); 3716 int codepoint; 3717 for (int i = 0; i < size; i += Character.charCount(codepoint)) 3718 { 3719 codepoint = str.codePointAt(i); 3720 if (!isLegal(codepoint)) { 3721 return false; 3722 } 3723 } 3724 return true; 3725 } 3726 3727 /** 3728 * <strong>[icu]</strong> Returns the version of Unicode data used. 3729 * @return the unicode version number used 3730 */ getUnicodeVersion()3731 public static VersionInfo getUnicodeVersion() 3732 { 3733 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 3734 } 3735 3736 /** 3737 * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or 3738 * null if the character is unassigned or outside the range 3739 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 3740 * <br> 3741 * Note calling any methods related to code point names, e.g. get*Name*() 3742 * incurs a one-time initialisation cost to construct the name tables. 3743 * @param ch the code point for which to get the name 3744 * @return most current Unicode name 3745 */ getName(int ch)3746 public static String getName(int ch) 3747 { 3748 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 3749 } 3750 3751 /** 3752 * <strong>[icu]</strong> Returns the names for each of the characters in a string 3753 * @param s string to format 3754 * @param separator string to go between names 3755 * @return string of names 3756 */ getName(String s, String separator)3757 public static String getName(String s, String separator) { 3758 if (s.length() == 1) { // handle common case 3759 return getName(s.charAt(0)); 3760 } 3761 int cp; 3762 StringBuilder sb = new StringBuilder(); 3763 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 3764 cp = s.codePointAt(i); 3765 if (i != 0) sb.append(separator); 3766 sb.append(UCharacter.getName(cp)); 3767 } 3768 return sb.toString(); 3769 } 3770 3771 /** 3772 * <strong>[icu]</strong> Returns null. 3773 * Used to return the Unicode_1_Name property value which was of little practical value. 3774 * @param ch the code point for which to get the name 3775 * @return null 3776 * @deprecated ICU 49 3777 * @hide original deprecated declaration 3778 */ 3779 @Deprecated getName1_0(int ch)3780 public static String getName1_0(int ch) 3781 { 3782 return null; 3783 } 3784 3785 /** 3786 * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and 3787 * getName1_0(int), this method will return a name even for codepoints that 3788 * are not assigned a name in UnicodeData.txt. 3789 * 3790 * <p>The names are returned in the following order. 3791 * <ul> 3792 * <li> Most current Unicode name if there is any 3793 * <li> Unicode 1.0 name if there is any 3794 * <li> Extended name in the form of 3795 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 3796 * </ul> 3797 * Note calling any methods related to code point names, e.g. get*Name*() 3798 * incurs a one-time initialisation cost to construct the name tables. 3799 * @param ch the code point for which to get the name 3800 * @return a name for the argument codepoint 3801 */ getExtendedName(int ch)3802 public static String getExtendedName(int ch) { 3803 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 3804 } 3805 3806 /** 3807 * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one. 3808 * Returns null if the character is unassigned or outside the range 3809 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 3810 * <br> 3811 * Note calling any methods related to code point names, e.g. get*Name*() 3812 * incurs a one-time initialisation cost to construct the name tables. 3813 * @param ch the code point for which to get the name alias 3814 * @return Unicode name alias, or null 3815 */ getNameAlias(int ch)3816 public static String getNameAlias(int ch) 3817 { 3818 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 3819 } 3820 3821 /** 3822 * <strong>[icu]</strong> Returns null. 3823 * Used to return the ISO 10646 comment for a character. 3824 * The Unicode ISO_Comment property is deprecated and has no values. 3825 * 3826 * @param ch The code point for which to get the ISO comment. 3827 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 3828 * @return null 3829 * @deprecated ICU 49 3830 * @hide original deprecated declaration 3831 */ 3832 @Deprecated getISOComment(int ch)3833 public static String getISOComment(int ch) 3834 { 3835 return null; 3836 } 3837 3838 /** 3839 * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and 3840 * return its code point value. All Unicode names are in uppercase. 3841 * Note calling any methods related to code point names, e.g. get*Name*() 3842 * incurs a one-time initialisation cost to construct the name tables. 3843 * @param name most current Unicode character name whose code point is to 3844 * be returned 3845 * @return code point or -1 if name is not found 3846 */ getCharFromName(String name)3847 public static int getCharFromName(String name){ 3848 return UCharacterName.INSTANCE.getCharFromName( 3849 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 3850 } 3851 3852 /** 3853 * <strong>[icu]</strong> Returns -1. 3854 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 3855 * its code point value. 3856 * @param name Unicode 1.0 code point name whose code point is to be 3857 * returned 3858 * @return -1 3859 * @deprecated ICU 49 3860 * @see #getName1_0(int) 3861 * @hide original deprecated declaration 3862 */ 3863 @Deprecated getCharFromName1_0(String name)3864 public static int getCharFromName1_0(String name){ 3865 return -1; 3866 } 3867 3868 /** 3869 * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code 3870 * point value. All Unicode names are in uppercase. 3871 * Extended names are all lowercase except for numbers and are contained 3872 * within angle brackets. 3873 * The names are searched in the following order 3874 * <ul> 3875 * <li> Most current Unicode name if there is any 3876 * <li> Unicode 1.0 name if there is any 3877 * <li> Extended name in the form of 3878 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 3879 * </ul> 3880 * Note calling any methods related to code point names, e.g. get*Name*() 3881 * incurs a one-time initialisation cost to construct the name tables. 3882 * @param name codepoint name 3883 * @return code point associated with the name or -1 if the name is not 3884 * found. 3885 */ getCharFromExtendedName(String name)3886 public static int getCharFromExtendedName(String name){ 3887 return UCharacterName.INSTANCE.getCharFromName( 3888 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 3889 } 3890 3891 /** 3892 * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return 3893 * its code point value. All Unicode names are in uppercase. 3894 * Note calling any methods related to code point names, e.g. get*Name*() 3895 * incurs a one-time initialisation cost to construct the name tables. 3896 * @param name Unicode name alias whose code point is to be returned 3897 * @return code point or -1 if name is not found 3898 */ getCharFromNameAlias(String name)3899 public static int getCharFromNameAlias(String name){ 3900 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 3901 } 3902 3903 /** 3904 * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the 3905 * Unicode database file PropertyAliases.txt. Most properties 3906 * have more than one name. The nameChoice determines which one 3907 * is returned. 3908 * 3909 * In addition, this function maps the property 3910 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 3911 * "General_Category_Mask". These names are not in 3912 * PropertyAliases.txt. 3913 * 3914 * @param property UProperty selector. 3915 * 3916 * @param nameChoice UProperty.NameChoice selector for which name 3917 * to get. All properties have a long name. Most have a short 3918 * name, but some do not. Unicode allows for additional names; if 3919 * present these will be returned by UProperty.NameChoice.LONG + i, 3920 * where i=1, 2,... 3921 * 3922 * @return a name, or null if Unicode explicitly defines no name 3923 * ("n/a") for a given property/nameChoice. If a given nameChoice 3924 * throws an exception, then all larger values of nameChoice will 3925 * throw an exception. If null is returned for a given 3926 * nameChoice, then other nameChoice values may return non-null 3927 * results. 3928 * 3929 * @exception IllegalArgumentException thrown if property or 3930 * nameChoice are invalid. 3931 * 3932 * @see UProperty 3933 * @see UProperty.NameChoice 3934 */ getPropertyName(int property, int nameChoice)3935 public static String getPropertyName(int property, 3936 int nameChoice) { 3937 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 3938 } 3939 3940 /** 3941 * <strong>[icu]</strong> Return the UProperty selector for a given property name, as 3942 * specified in the Unicode database file PropertyAliases.txt. 3943 * Short, long, and any other variants are recognized. 3944 * 3945 * In addition, this function maps the synthetic names "gcm" / 3946 * "General_Category_Mask" to the property 3947 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 3948 * PropertyAliases.txt. 3949 * 3950 * @param propertyAlias the property name to be matched. The name 3951 * is compared using "loose matching" as described in 3952 * PropertyAliases.txt. 3953 * 3954 * @return a UProperty enum. 3955 * 3956 * @exception IllegalArgumentException thrown if propertyAlias 3957 * is not recognized. 3958 * 3959 * @see UProperty 3960 */ getPropertyEnum(CharSequence propertyAlias)3961 public static int getPropertyEnum(CharSequence propertyAlias) { 3962 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 3963 if (propEnum == UProperty.UNDEFINED) { 3964 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 3965 } 3966 return propEnum; 3967 } 3968 3969 /** 3970 * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in 3971 * the Unicode database file PropertyValueAliases.txt. Most 3972 * values have more than one name. The nameChoice determines 3973 * which one is returned. 3974 * 3975 * Note: Some of the names in PropertyValueAliases.txt can only be 3976 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 3977 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 3978 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 3979 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 3980 * 3981 * @param property UProperty selector constant. 3982 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 3983 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 3984 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 3985 * If out of range, null is returned. 3986 * 3987 * @param value selector for a value for the given property. In 3988 * general, valid values range from 0 up to some maximum. There 3989 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 3990 * non-zero value BASIC_LATIN.getID(). (2.) 3991 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 3992 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 3993 * are mask values produced by left-shifting 1 by 3994 * UCharacter.getType(). This allows grouped categories such as 3995 * [:L:] to be represented. Mask values are non-contiguous. 3996 * 3997 * @param nameChoice UProperty.NameChoice selector for which name 3998 * to get. All values have a long name. Most have a short name, 3999 * but some do not. Unicode allows for additional names; if 4000 * present these will be returned by UProperty.NameChoice.LONG + i, 4001 * where i=1, 2,... 4002 * 4003 * @return a name, or null if Unicode explicitly defines no name 4004 * ("n/a") for a given property/value/nameChoice. If a given 4005 * nameChoice throws an exception, then all larger values of 4006 * nameChoice will throw an exception. If null is returned for a 4007 * given nameChoice, then other nameChoice values may return 4008 * non-null results. 4009 * 4010 * @exception IllegalArgumentException thrown if property, value, 4011 * or nameChoice are invalid. 4012 * 4013 * @see UProperty 4014 * @see UProperty.NameChoice 4015 */ getPropertyValueName(int property, int value, int nameChoice)4016 public static String getPropertyValueName(int property, 4017 int value, 4018 int nameChoice) 4019 { 4020 if ((property == UProperty.CANONICAL_COMBINING_CLASS 4021 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 4022 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 4023 && value >= UCharacter.getIntPropertyMinValue( 4024 UProperty.CANONICAL_COMBINING_CLASS) 4025 && value <= UCharacter.getIntPropertyMaxValue( 4026 UProperty.CANONICAL_COMBINING_CLASS) 4027 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 4028 // this is hard coded for the valid cc 4029 // because PropertyValueAliases.txt does not contain all of them 4030 try { 4031 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 4032 nameChoice); 4033 } 4034 catch (IllegalArgumentException e) { 4035 return null; 4036 } 4037 } 4038 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 4039 } 4040 4041 /** 4042 * <strong>[icu]</strong> Return the property value integer for a given value name, as 4043 * specified in the Unicode database file PropertyValueAliases.txt. 4044 * Short, long, and any other variants are recognized. 4045 * 4046 * Note: Some of the names in PropertyValueAliases.txt will only be 4047 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 4048 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4049 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4050 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4051 * 4052 * @param property UProperty selector constant. 4053 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4054 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4055 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4056 * Only these properties can be enumerated. 4057 * 4058 * @param valueAlias the value name to be matched. The name is 4059 * compared using "loose matching" as described in 4060 * PropertyValueAliases.txt. 4061 * 4062 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 4063 * values are mask values produced by left-shifting 1 by 4064 * UCharacter.getType(). This allows grouped categories such as 4065 * [:L:] to be represented. 4066 * 4067 * @see UProperty 4068 * @throws IllegalArgumentException if property is not a valid UProperty 4069 * selector or valueAlias is not a value of this property 4070 */ getPropertyValueEnum(int property, CharSequence valueAlias)4071 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 4072 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 4073 if (propEnum == UProperty.UNDEFINED) { 4074 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 4075 } 4076 return propEnum; 4077 } 4078 4079 /** 4080 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 4081 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 4082 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 4083 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 4084 * @deprecated This API is ICU internal only. 4085 * @hide original deprecated declaration 4086 * @hide draft / provisional / internal are hidden on Android 4087 */ 4088 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4089 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 4090 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 4091 } 4092 4093 4094 /** 4095 * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units. 4096 * 4097 * @param lead the lead char 4098 * @param trail the trail char 4099 * @return code point if surrogate characters are valid. 4100 * @exception IllegalArgumentException thrown when the code units do 4101 * not form a valid code point 4102 */ getCodePoint(char lead, char trail)4103 public static int getCodePoint(char lead, char trail) 4104 { 4105 if (Character.isSurrogatePair(lead, trail)) { 4106 return Character.toCodePoint(lead, trail); 4107 } 4108 throw new IllegalArgumentException("Illegal surrogate characters"); 4109 } 4110 4111 /** 4112 * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point. 4113 * 4114 * @param char16 the BMP code point 4115 * @return code point if argument is a valid character. 4116 * @exception IllegalArgumentException thrown when char16 is not a valid 4117 * code point 4118 */ getCodePoint(char char16)4119 public static int getCodePoint(char char16) 4120 { 4121 if (UCharacter.isLegal(char16)) { 4122 return char16; 4123 } 4124 throw new IllegalArgumentException("Illegal codepoint"); 4125 } 4126 4127 /** 4128 * Implementation of UCaseProps.ContextIterator, iterates over a String. 4129 * See ustrcase.c/utf16_caseContextIterator(). 4130 */ 4131 private static class StringContextIterator implements UCaseProps.ContextIterator { 4132 /** 4133 * Constructor. 4134 * @param s String to iterate over. 4135 */ StringContextIterator(String s)4136 StringContextIterator(String s) { 4137 this.s=s; 4138 limit=s.length(); 4139 cpStart=cpLimit=index=0; 4140 dir=0; 4141 } 4142 4143 /** 4144 * Set the iteration limit for nextCaseMapCP() to an index within the string. 4145 * If the limit parameter is negative or past the string, then the 4146 * string length is restored as the iteration limit. 4147 * 4148 * <p>This limit does not affect the next() function which always 4149 * iterates to the very end of the string. 4150 * 4151 * @param lim The iteration limit. 4152 */ setLimit(int lim)4153 public void setLimit(int lim) { 4154 if(0<=lim && lim<=s.length()) { 4155 limit=lim; 4156 } else { 4157 limit=s.length(); 4158 } 4159 } 4160 4161 /** 4162 * Move to the iteration limit without fetching code points up to there. 4163 */ moveToLimit()4164 public void moveToLimit() { 4165 cpStart=cpLimit=limit; 4166 } 4167 4168 /** 4169 * Iterate forward through the string to fetch the next code point 4170 * to be case-mapped, and set the context indexes for it. 4171 * 4172 * <p>When the iteration limit is reached (and -1 is returned), 4173 * getCPStart() will be at the iteration limit. 4174 * 4175 * <p>Iteration with next() does not affect the position for nextCaseMapCP(). 4176 * 4177 * @return The next code point to be case-mapped, or <0 when the iteration is done. 4178 */ nextCaseMapCP()4179 public int nextCaseMapCP() { 4180 cpStart=cpLimit; 4181 if(cpLimit<limit) { 4182 int c=s.codePointAt(cpLimit); 4183 cpLimit+=Character.charCount(c); 4184 return c; 4185 } else { 4186 return -1; 4187 } 4188 } 4189 4190 /** 4191 * Returns the start of the code point that was last returned 4192 * by nextCaseMapCP(). 4193 */ getCPStart()4194 public int getCPStart() { 4195 return cpStart; 4196 } 4197 4198 /** 4199 * Returns the limit of the code point that was last returned 4200 * by nextCaseMapCP(). 4201 */ getCPLimit()4202 public int getCPLimit() { 4203 return cpLimit; 4204 } 4205 4206 // implement UCaseProps.ContextIterator 4207 // The following code is not used anywhere in this private class reset(int direction)4208 public void reset(int direction) { 4209 if(direction>0) { 4210 /* reset for forward iteration */ 4211 dir=1; 4212 index=cpLimit; 4213 } else if(direction<0) { 4214 /* reset for backward iteration */ 4215 dir=-1; 4216 index=cpStart; 4217 } else { 4218 // not a valid direction 4219 dir=0; 4220 index=0; 4221 } 4222 } 4223 next()4224 public int next() { 4225 int c; 4226 4227 if(dir>0 && index<s.length()) { 4228 c=s.codePointAt(index); 4229 index+=Character.charCount(c); 4230 return c; 4231 } else if(dir<0 && index>0) { 4232 c=s.codePointBefore(index); 4233 index-=Character.charCount(c); 4234 return c; 4235 } 4236 return -1; 4237 } 4238 4239 // variables 4240 protected String s; 4241 protected int index, limit, cpStart, cpLimit; 4242 protected int dir; // 0=initial state >0=forward <0=backward 4243 } 4244 4245 /** 4246 * Returns the uppercase version of the argument string. 4247 * Casing is dependent on the default locale and context-sensitive. 4248 * @param str source string to be performed on 4249 * @return uppercase version of the argument string 4250 */ toUpperCase(String str)4251 public static String toUpperCase(String str) 4252 { 4253 return toUpperCase(ULocale.getDefault(), str); 4254 } 4255 4256 /** 4257 * Returns the lowercase version of the argument string. 4258 * Casing is dependent on the default locale and context-sensitive 4259 * @param str source string to be performed on 4260 * @return lowercase version of the argument string 4261 */ toLowerCase(String str)4262 public static String toLowerCase(String str) 4263 { 4264 return toLowerCase(ULocale.getDefault(), str); 4265 } 4266 4267 /** 4268 * <p>Returns the titlecase version of the argument string. 4269 * <p>Position for titlecasing is determined by the argument break 4270 * iterator, hence the user can customize his break iterator for 4271 * a specialized titlecasing. In this case only the forward iteration 4272 * needs to be implemented. 4273 * If the break iterator passed in is null, the default Unicode algorithm 4274 * will be used to determine the titlecase positions. 4275 * 4276 * <p>Only positions returned by the break iterator will be title cased, 4277 * character in between the positions will all be in lower case. 4278 * <p>Casing is dependent on the default locale and context-sensitive 4279 * @param str source string to be performed on 4280 * @param breakiter break iterator to determine the positions in which 4281 * the character should be title cased. 4282 * @return lowercase version of the argument string 4283 */ toTitleCase(String str, BreakIterator breakiter)4284 public static String toTitleCase(String str, BreakIterator breakiter) 4285 { 4286 return toTitleCase(ULocale.getDefault(), str, breakiter); 4287 } 4288 4289 /** 4290 * Returns the uppercase version of the argument string. 4291 * Casing is dependent on the argument locale and context-sensitive. 4292 * @param locale which string is to be converted in 4293 * @param str source string to be performed on 4294 * @return uppercase version of the argument string 4295 */ toUpperCase(Locale locale, String str)4296 public static String toUpperCase(Locale locale, String str) 4297 { 4298 return toUpperCase(ULocale.forLocale(locale), str); 4299 } 4300 4301 /** 4302 * Returns the uppercase version of the argument string. 4303 * Casing is dependent on the argument locale and context-sensitive. 4304 * @param locale which string is to be converted in 4305 * @param str source string to be performed on 4306 * @return uppercase version of the argument string 4307 */ toUpperCase(ULocale locale, String str)4308 public static String toUpperCase(ULocale locale, String str) { 4309 StringContextIterator iter = new StringContextIterator(str); 4310 StringBuilder result = new StringBuilder(str.length()); 4311 int[] locCache = new int[1]; 4312 int c; 4313 4314 if (locale == null) { 4315 locale = ULocale.getDefault(); 4316 } 4317 locCache[0]=0; 4318 4319 while((c=iter.nextCaseMapCP())>=0) { 4320 c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache); 4321 4322 /* decode the result */ 4323 if(c<0) { 4324 /* (not) original code point */ 4325 c=~c; 4326 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4327 /* mapping already appended to result */ 4328 continue; 4329 /* } else { append single-code point mapping */ 4330 } 4331 result.appendCodePoint(c); 4332 } 4333 return result.toString(); 4334 } 4335 4336 /** 4337 * Returns the lowercase version of the argument string. 4338 * Casing is dependent on the argument locale and context-sensitive 4339 * @param locale which string is to be converted in 4340 * @param str source string to be performed on 4341 * @return lowercase version of the argument string 4342 */ toLowerCase(Locale locale, String str)4343 public static String toLowerCase(Locale locale, String str) 4344 { 4345 return toLowerCase(ULocale.forLocale(locale), str); 4346 } 4347 4348 /** 4349 * Returns the lowercase version of the argument string. 4350 * Casing is dependent on the argument locale and context-sensitive 4351 * @param locale which string is to be converted in 4352 * @param str source string to be performed on 4353 * @return lowercase version of the argument string 4354 */ toLowerCase(ULocale locale, String str)4355 public static String toLowerCase(ULocale locale, String str) { 4356 StringContextIterator iter = new StringContextIterator(str); 4357 StringBuilder result = new StringBuilder(str.length()); 4358 int[] locCache = new int[1]; 4359 int c; 4360 4361 if (locale == null) { 4362 locale = ULocale.getDefault(); 4363 } 4364 locCache[0]=0; 4365 4366 while((c=iter.nextCaseMapCP())>=0) { 4367 c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache); 4368 4369 /* decode the result */ 4370 if(c<0) { 4371 /* (not) original code point */ 4372 c=~c; 4373 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4374 /* mapping already appended to result */ 4375 continue; 4376 /* } else { append single-code point mapping */ 4377 } 4378 result.appendCodePoint(c); 4379 } 4380 return result.toString(); 4381 } 4382 4383 /** 4384 * <p>Returns the titlecase version of the argument string. 4385 * <p>Position for titlecasing is determined by the argument break 4386 * iterator, hence the user can customize his break iterator for 4387 * a specialized titlecasing. In this case only the forward iteration 4388 * needs to be implemented. 4389 * If the break iterator passed in is null, the default Unicode algorithm 4390 * will be used to determine the titlecase positions. 4391 * 4392 * <p>Only positions returned by the break iterator will be title cased, 4393 * character in between the positions will all be in lower case. 4394 * <p>Casing is dependent on the argument locale and context-sensitive 4395 * @param locale which string is to be converted in 4396 * @param str source string to be performed on 4397 * @param breakiter break iterator to determine the positions in which 4398 * the character should be title cased. 4399 * @return lowercase version of the argument string 4400 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)4401 public static String toTitleCase(Locale locale, String str, 4402 BreakIterator breakiter) 4403 { 4404 return toTitleCase(ULocale.forLocale(locale), str, breakiter); 4405 } 4406 4407 /** 4408 * <p>Returns the titlecase version of the argument string. 4409 * <p>Position for titlecasing is determined by the argument break 4410 * iterator, hence the user can customize his break iterator for 4411 * a specialized titlecasing. In this case only the forward iteration 4412 * needs to be implemented. 4413 * If the break iterator passed in is null, the default Unicode algorithm 4414 * will be used to determine the titlecase positions. 4415 * 4416 * <p>Only positions returned by the break iterator will be title cased, 4417 * character in between the positions will all be in lower case. 4418 * <p>Casing is dependent on the argument locale and context-sensitive 4419 * @param locale which string is to be converted in 4420 * @param str source string to be performed on 4421 * @param titleIter break iterator to determine the positions in which 4422 * the character should be title cased. 4423 * @return lowercase version of the argument string 4424 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)4425 public static String toTitleCase(ULocale locale, String str, 4426 BreakIterator titleIter) { 4427 return toTitleCase(locale, str, titleIter, 0); 4428 } 4429 4430 /** 4431 * <p>Returns the titlecase version of the argument string. 4432 * <p>Position for titlecasing is determined by the argument break 4433 * iterator, hence the user can customize his break iterator for 4434 * a specialized titlecasing. In this case only the forward iteration 4435 * needs to be implemented. 4436 * If the break iterator passed in is null, the default Unicode algorithm 4437 * will be used to determine the titlecase positions. 4438 * 4439 * <p>Only positions returned by the break iterator will be title cased, 4440 * character in between the positions will all be in lower case. 4441 * <p>Casing is dependent on the argument locale and context-sensitive 4442 * @param locale which string is to be converted in 4443 * @param str source string to be performed on 4444 * @param titleIter break iterator to determine the positions in which 4445 * the character should be title cased. 4446 * @param options bit set to modify the titlecasing operation 4447 * @return lowercase version of the argument string 4448 * @see #TITLECASE_NO_LOWERCASE 4449 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 4450 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)4451 public static String toTitleCase(ULocale locale, String str, 4452 BreakIterator titleIter, 4453 int options) { 4454 StringContextIterator iter = new StringContextIterator(str); 4455 StringBuilder result = new StringBuilder(str.length()); 4456 int[] locCache = new int[1]; 4457 int c, nc, srcLength = str.length(); 4458 4459 if (locale == null) { 4460 locale = ULocale.getDefault(); 4461 } 4462 locCache[0]=0; 4463 4464 if(titleIter == null) { 4465 titleIter = BreakIterator.getWordInstance(locale); 4466 } 4467 titleIter.setText(str); 4468 4469 int prev, titleStart, index; 4470 boolean isFirstIndex; 4471 boolean isDutch = locale.getLanguage().equals("nl"); 4472 boolean FirstIJ = true; 4473 4474 /* set up local variables */ 4475 prev=0; 4476 isFirstIndex=true; 4477 4478 /* titlecasing loop */ 4479 while(prev<srcLength) { 4480 /* find next index where to titlecase */ 4481 if(isFirstIndex) { 4482 isFirstIndex=false; 4483 index=titleIter.first(); 4484 } else { 4485 index=titleIter.next(); 4486 } 4487 if(index==BreakIterator.DONE || index>srcLength) { 4488 index=srcLength; 4489 } 4490 4491 /* 4492 * Unicode 4 & 5 section 3.13 Default Case Operations: 4493 * 4494 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 4495 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 4496 * cased character F. If F exists, map F to default_title(F); then map each 4497 * subsequent character C to default_lower(C). 4498 * 4499 * In this implementation, segment [prev..index[ into 3 parts: 4500 * a) uncased characters (copy as-is) [prev..titleStart[ 4501 * b) first case letter (titlecase) [titleStart..titleLimit[ 4502 * c) subsequent characters (lowercase) [titleLimit..index[ 4503 */ 4504 if(prev<index) { 4505 /* find and copy uncased characters [prev..titleStart[ */ 4506 iter.setLimit(index); 4507 c=iter.nextCaseMapCP(); 4508 if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0 4509 && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) { 4510 while((c=iter.nextCaseMapCP())>=0 4511 && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {} 4512 titleStart=iter.getCPStart(); 4513 if(prev<titleStart) { 4514 result.append(str, prev, titleStart); 4515 } 4516 } else { 4517 titleStart=prev; 4518 } 4519 4520 if(titleStart<index) { 4521 FirstIJ = true; 4522 /* titlecase c which is from titleStart */ 4523 c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache); 4524 4525 /* decode the result and lowercase up to index */ 4526 for(;;) { 4527 if(c<0) { 4528 /* (not) original code point */ 4529 c=~c; 4530 result.appendCodePoint(c); 4531 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4532 /* mapping already appended to result */ 4533 } else { 4534 /* append single-code point mapping */ 4535 result.appendCodePoint(c); 4536 } 4537 4538 if((options&TITLECASE_NO_LOWERCASE)!=0) { 4539 /* Optionally just copy the rest of the word unchanged. */ 4540 4541 int titleLimit=iter.getCPLimit(); 4542 if(titleLimit<index) { 4543 /* Special Case - Dutch IJ Titlecasing */ 4544 if (isDutch && c == 0x0049 && str.charAt(titleLimit) == 'j') { 4545 result.append('J').append(str, titleLimit + 1, index); 4546 } else { 4547 result.append(str, titleLimit, index); 4548 } 4549 } 4550 iter.moveToLimit(); 4551 break; 4552 } else if((nc=iter.nextCaseMapCP())>=0) { 4553 if (isDutch && (nc == 0x004A || nc == 0x006A) 4554 && (c == 0x0049) && (FirstIJ == true)) { 4555 c = 0x004A; /* J */ 4556 FirstIJ = false; 4557 } else { 4558 /* Normal operation: Lowercase the rest of the word. */ 4559 c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale, 4560 locCache); 4561 } 4562 } else { 4563 break; 4564 } 4565 } 4566 } 4567 } 4568 4569 prev=index; 4570 } 4571 return result.toString(); 4572 } 4573 4574 4575 private static final int BREAK_MASK = 4576 (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER) 4577 | (1<<UCharacterCategory.OTHER_LETTER) 4578 | (1<<UCharacterCategory.MODIFIER_LETTER); 4579 4580 /** 4581 * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string, 4582 * and sometimes has no effect at all; the original string is returned whenever casing 4583 * would not be appropriate for the first word (such as for CJK characters or initial numbers). 4584 * Initial non-letters are skipped in order to find the character to change. 4585 * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE. 4586 * <p>Examples: 4587 * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr> 4588 * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr> 4589 * <tr><td>“contact us”</td><td>“Contact us”</td></tr> 4590 * <tr><td>49ers win!</td><td>49ers win!</td></tr> 4591 * <tr><td>丰(abc)</td><td>丰(abc)</td></tr> 4592 * <tr><td>«ijs»</td><td>«Ijs»</td></tr> 4593 * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr> 4594 * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr> 4595 * </table> 4596 * @param locale the locale for accessing exceptional behavior (eg for tr). 4597 * @param str the source string to change 4598 * @return the modified string, or the original if no modifications were necessary. 4599 * @deprecated ICU internal only 4600 * @hide original deprecated declaration 4601 * @hide draft / provisional / internal are hidden on Android 4602 */ 4603 @Deprecated toTitleFirst(ULocale locale, String str)4604 public static String toTitleFirst(ULocale locale, String str) { 4605 int c = 0; 4606 for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) { 4607 c = UCharacter.codePointAt(str, i); 4608 int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK); 4609 if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK 4610 break; 4611 } 4612 if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) { 4613 continue; 4614 } 4615 4616 // we now have the first cased character 4617 // What we really want is something like: 4618 // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken); 4619 // That is, just give us the titlecased string, for the locale, at i and following, 4620 // and tell us how many characters are replaced. 4621 // The following won't work completely: it needs some more substantial changes to UCaseProps 4622 4623 String substring = str.substring(i, i+UCharacter.charCount(c)); 4624 String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0); 4625 4626 // skip if no change 4627 if (titled.codePointAt(0) == c) { 4628 // Using 0 is safe, since any change in titling will not have first initial character 4629 break; 4630 } 4631 StringBuilder result = new StringBuilder(str.length()).append(str, 0, i); 4632 int startOfSuffix; 4633 4634 // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps. 4635 4636 if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') { 4637 result.append("IJ"); 4638 startOfSuffix = 2; 4639 } else { 4640 result.append(titled); 4641 startOfSuffix = i + UCharacter.charCount(c); 4642 } 4643 4644 // add the remainder, and return 4645 return result.append(str, startOfSuffix, str.length()).toString(); 4646 } 4647 return str; // no change 4648 } 4649 4650 /** 4651 * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string. 4652 * <p>Position for titlecasing is determined by the argument break 4653 * iterator, hence the user can customize his break iterator for 4654 * a specialized titlecasing. In this case only the forward iteration 4655 * needs to be implemented. 4656 * If the break iterator passed in is null, the default Unicode algorithm 4657 * will be used to determine the titlecase positions. 4658 * 4659 * <p>Only positions returned by the break iterator will be title cased, 4660 * character in between the positions will all be in lower case. 4661 * <p>Casing is dependent on the argument locale and context-sensitive 4662 * @param locale which string is to be converted in 4663 * @param str source string to be performed on 4664 * @param titleIter break iterator to determine the positions in which 4665 * the character should be title cased. 4666 * @param options bit set to modify the titlecasing operation 4667 * @return lowercase version of the argument string 4668 * @see #TITLECASE_NO_LOWERCASE 4669 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 4670 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)4671 public static String toTitleCase(Locale locale, String str, 4672 BreakIterator titleIter, 4673 int options) { 4674 return toTitleCase(ULocale.forLocale(locale), str, titleIter, options); 4675 } 4676 4677 /** 4678 * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according 4679 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 4680 * folding equivalent, the character itself is returned. 4681 * 4682 * <p>This function only returns the simple, single-code point case mapping. 4683 * Full case mappings should be used whenever possible because they produce 4684 * better results by working on whole strings. 4685 * They can map to a result string with a different length as appropriate. 4686 * Full case mappings are applied by the case mapping functions 4687 * that take String parameters rather than code points (int). 4688 * See also the User Guide chapter on C/POSIX migration: 4689 * http://www.icu-project.org/userguide/posix.html#case_mappings 4690 * 4691 * @param ch the character to be converted 4692 * @param defaultmapping Indicates whether the default mappings defined in 4693 * CaseFolding.txt are to be used, otherwise the 4694 * mappings for dotted I and dotless i marked with 4695 * 'T' in CaseFolding.txt are included. 4696 * @return the case folding equivalent of the character, if 4697 * any; otherwise the character itself. 4698 * @see #foldCase(String, boolean) 4699 */ foldCase(int ch, boolean defaultmapping)4700 public static int foldCase(int ch, boolean defaultmapping) { 4701 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 4702 } 4703 4704 /** 4705 * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to 4706 * UnicodeData.txt and CaseFolding.txt; if any character has no case 4707 * folding equivalent, the character itself is returned. 4708 * "Full", multiple-code point case folding mappings are returned here. 4709 * For "simple" single-code point mappings use the API 4710 * foldCase(int ch, boolean defaultmapping). 4711 * @param str the String to be converted 4712 * @param defaultmapping Indicates whether the default mappings defined in 4713 * CaseFolding.txt are to be used, otherwise the 4714 * mappings for dotted I and dotless i marked with 4715 * 'T' in CaseFolding.txt are included. 4716 * @return the case folding equivalent of the character, if 4717 * any; otherwise the character itself. 4718 * @see #foldCase(int, boolean) 4719 */ foldCase(String str, boolean defaultmapping)4720 public static String foldCase(String str, boolean defaultmapping) { 4721 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 4722 } 4723 4724 /** 4725 * <strong>[icu]</strong> Option value for case folding: use default mappings defined in 4726 * CaseFolding.txt. 4727 */ 4728 public static final int FOLD_CASE_DEFAULT = 0x0000; 4729 /** 4730 * <strong>[icu]</strong> Option value for case folding: 4731 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 4732 * and dotless i appropriately for Turkic languages (tr, az). 4733 * 4734 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 4735 * are to be included for default mappings and 4736 * excluded for the Turkic-specific mappings. 4737 * 4738 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 4739 * are to be excluded for default mappings and 4740 * included for the Turkic-specific mappings. 4741 */ 4742 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 4743 4744 /** 4745 * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according 4746 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 4747 * folding equivalent, the character itself is returned. 4748 * 4749 * <p>This function only returns the simple, single-code point case mapping. 4750 * Full case mappings should be used whenever possible because they produce 4751 * better results by working on whole strings. 4752 * They can map to a result string with a different length as appropriate. 4753 * Full case mappings are applied by the case mapping functions 4754 * that take String parameters rather than code points (int). 4755 * See also the User Guide chapter on C/POSIX migration: 4756 * http://www.icu-project.org/userguide/posix.html#case_mappings 4757 * 4758 * @param ch the character to be converted 4759 * @param options A bit set for special processing. Currently the recognised options 4760 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 4761 * @return the case folding equivalent of the character, if any; otherwise the 4762 * character itself. 4763 * @see #foldCase(String, boolean) 4764 */ foldCase(int ch, int options)4765 public static int foldCase(int ch, int options) { 4766 return UCaseProps.INSTANCE.fold(ch, options); 4767 } 4768 4769 /** 4770 * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to 4771 * UnicodeData.txt and CaseFolding.txt; if any character has no case 4772 * folding equivalent, the character itself is returned. 4773 * "Full", multiple-code point case folding mappings are returned here. 4774 * For "simple" single-code point mappings use the API 4775 * foldCase(int ch, boolean defaultmapping). 4776 * @param str the String to be converted 4777 * @param options A bit set for special processing. Currently the recognised options 4778 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 4779 * @return the case folding equivalent of the character, if any; otherwise the 4780 * character itself. 4781 * @see #foldCase(int, boolean) 4782 */ foldCase(String str, int options)4783 public static final String foldCase(String str, int options) { 4784 StringBuilder result = new StringBuilder(str.length()); 4785 int c, i, length; 4786 4787 length = str.length(); 4788 for(i=0; i<length;) { 4789 c=str.codePointAt(i); 4790 i+=Character.charCount(c); 4791 c = UCaseProps.INSTANCE.toFullFolding(c, result, options); 4792 4793 /* decode the result */ 4794 if(c<0) { 4795 /* (not) original code point */ 4796 c=~c; 4797 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4798 /* mapping already appended to result */ 4799 continue; 4800 /* } else { append single-code point mapping */ 4801 } 4802 result.appendCodePoint(c); 4803 } 4804 return result.toString(); 4805 } 4806 4807 /** 4808 * <strong>[icu]</strong> Returns the numeric value of a Han character. 4809 * 4810 * <p>This returns the value of Han 'numeric' code points, 4811 * including those for zero, ten, hundred, thousand, ten thousand, 4812 * and hundred million. 4813 * This includes both the standard and 'checkwriting' 4814 * characters, the 'big circle' zero character, and the standard 4815 * zero character. 4816 * 4817 * <p>Note: The Unicode Standard has numeric values for more 4818 * Han characters recognized by this method 4819 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 4820 * and a {@link android.icu.text.NumberFormat} can be used with 4821 * a Chinese {@link android.icu.text.NumberingSystem}. 4822 * 4823 * @param ch code point to query 4824 * @return value if it is a Han 'numeric character,' otherwise return -1. 4825 */ getHanNumericValue(int ch)4826 public static int getHanNumericValue(int ch) 4827 { 4828 switch(ch) 4829 { 4830 case IDEOGRAPHIC_NUMBER_ZERO_ : 4831 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 4832 return 0; // Han Zero 4833 case CJK_IDEOGRAPH_FIRST_ : 4834 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 4835 return 1; // Han One 4836 case CJK_IDEOGRAPH_SECOND_ : 4837 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 4838 return 2; // Han Two 4839 case CJK_IDEOGRAPH_THIRD_ : 4840 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 4841 return 3; // Han Three 4842 case CJK_IDEOGRAPH_FOURTH_ : 4843 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 4844 return 4; // Han Four 4845 case CJK_IDEOGRAPH_FIFTH_ : 4846 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 4847 return 5; // Han Five 4848 case CJK_IDEOGRAPH_SIXTH_ : 4849 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 4850 return 6; // Han Six 4851 case CJK_IDEOGRAPH_SEVENTH_ : 4852 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 4853 return 7; // Han Seven 4854 case CJK_IDEOGRAPH_EIGHTH_ : 4855 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 4856 return 8; // Han Eight 4857 case CJK_IDEOGRAPH_NINETH_ : 4858 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 4859 return 9; // Han Nine 4860 case CJK_IDEOGRAPH_TEN_ : 4861 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 4862 return 10; 4863 case CJK_IDEOGRAPH_HUNDRED_ : 4864 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 4865 return 100; 4866 case CJK_IDEOGRAPH_THOUSAND_ : 4867 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 4868 return 1000; 4869 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 4870 return 10000; 4871 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 4872 return 100000000; 4873 } 4874 return -1; // no value 4875 } 4876 4877 /** 4878 * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints. 4879 * <p>Example of use:<br> 4880 * <pre> 4881 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 4882 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 4883 * while (iterator.next(element)) { 4884 * System.out.println("Codepoint \\u" + 4885 * Integer.toHexString(element.start) + 4886 * " to codepoint \\u" + 4887 * Integer.toHexString(element.limit - 1) + 4888 * " has the character type " + 4889 * element.value); 4890 * } 4891 * </pre> 4892 * @return an iterator 4893 */ getTypeIterator()4894 public static RangeValueIterator getTypeIterator() 4895 { 4896 return new UCharacterTypeIterator(); 4897 } 4898 4899 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()4900 UCharacterTypeIterator() { 4901 reset(); 4902 } 4903 4904 // implements RangeValueIterator next(Element element)4905 public boolean next(Element element) { 4906 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 4907 element.start=range.startCodePoint; 4908 element.limit=range.endCodePoint+1; 4909 element.value=range.value; 4910 return true; 4911 } else { 4912 return false; 4913 } 4914 } 4915 4916 // implements RangeValueIterator reset()4917 public void reset() { 4918 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 4919 } 4920 4921 private Iterator<Trie2.Range> trieIterator; 4922 private Trie2.Range range; 4923 4924 private static final class MaskType implements Trie2.ValueMapper { 4925 // Extracts the general category ("character type") from the trie value. map(int value)4926 public int map(int value) { 4927 return value & UCharacterProperty.TYPE_MASK; 4928 } 4929 } 4930 private static final MaskType MASK_TYPE=new MaskType(); 4931 } 4932 4933 /** 4934 * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints. 4935 * <p>This API only gets the iterator for the modern, most up-to-date 4936 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 4937 * for extended names use getExtendedNameIterator(). 4938 * <p>Example of use:<br> 4939 * <pre> 4940 * ValueIterator iterator = UCharacter.getNameIterator(); 4941 * ValueIterator.Element element = new ValueIterator.Element(); 4942 * while (iterator.next(element)) { 4943 * System.out.println("Codepoint \\u" + 4944 * Integer.toHexString(element.codepoint) + 4945 * " has the name " + (String)element.value); 4946 * } 4947 * </pre> 4948 * <p>The maximal range which the name iterator iterates is from 4949 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 4950 * @return an iterator 4951 */ getNameIterator()4952 public static ValueIterator getNameIterator(){ 4953 return new UCharacterNameIterator(UCharacterName.INSTANCE, 4954 UCharacterNameChoice.UNICODE_CHAR_NAME); 4955 } 4956 4957 /** 4958 * <strong>[icu]</strong> Returns an empty iterator. 4959 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 4960 * @return an empty iterator 4961 * @deprecated ICU 49 4962 * @see #getName1_0(int) 4963 * @hide original deprecated declaration 4964 */ 4965 @Deprecated getName1_0Iterator()4966 public static ValueIterator getName1_0Iterator(){ 4967 return new DummyValueIterator(); 4968 } 4969 4970 private static final class DummyValueIterator implements ValueIterator { next(Element element)4971 public boolean next(Element element) { return false; } reset()4972 public void reset() {} setRange(int start, int limit)4973 public void setRange(int start, int limit) {} 4974 } 4975 4976 /** 4977 * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints. 4978 * <p>This API only gets the iterator for the extended names. 4979 * For modern, most up-to-date Unicode names use getNameIterator() or 4980 * for older 1.0 Unicode names use get1_0NameIterator(). 4981 * <p>Example of use:<br> 4982 * <pre> 4983 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 4984 * ValueIterator.Element element = new ValueIterator.Element(); 4985 * while (iterator.next(element)) { 4986 * System.out.println("Codepoint \\u" + 4987 * Integer.toHexString(element.codepoint) + 4988 * " has the name " + (String)element.value); 4989 * } 4990 * </pre> 4991 * <p>The maximal range which the name iterator iterates is from 4992 * @return an iterator 4993 */ getExtendedNameIterator()4994 public static ValueIterator getExtendedNameIterator(){ 4995 return new UCharacterNameIterator(UCharacterName.INSTANCE, 4996 UCharacterNameChoice.EXTENDED_CHAR_NAME); 4997 } 4998 4999 /** 5000 * <strong>[icu]</strong> Returns the "age" of the code point. 5001 * <p>The "age" is the Unicode version when the code point was first 5002 * designated (as a non-character or for Private Use) or assigned a 5003 * character. 5004 * <p>This can be useful to avoid emitting code points to receiving 5005 * processes that do not accept newer characters. 5006 * <p>The data is from the UCD file DerivedAge.txt. 5007 * @param ch The code point. 5008 * @return the Unicode version number 5009 */ getAge(int ch)5010 public static VersionInfo getAge(int ch) 5011 { 5012 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5013 throw new IllegalArgumentException("Codepoint out of bounds"); 5014 } 5015 return UCharacterProperty.INSTANCE.getAge(ch); 5016 } 5017 5018 /** 5019 * <strong>[icu]</strong> <p>Check a binary Unicode property for a code point. 5020 * <p>Unicode, especially in version 3.2, defines many more properties 5021 * than the original set in UnicodeData.txt. 5022 * <p>This API is intended to reflect Unicode properties as defined in 5023 * the Unicode Character Database (UCD) and Unicode Technical Reports 5024 * (UTR). 5025 * <p>For details about the properties see 5026 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 5027 * <p>For names of Unicode properties see the UCD file 5028 * PropertyAliases.txt. 5029 * <p>This API does not check the validity of the codepoint. 5030 * <p>Important: If ICU is built with UCD files from Unicode versions 5031 * below 3.2, then properties marked with "new" are not or 5032 * not fully available. 5033 * @param ch code point to test. 5034 * @param property selector constant from android.icu.lang.UProperty, 5035 * identifies which binary property to check. 5036 * @return true or false according to the binary Unicode property value 5037 * for ch. Also false if property is out of bounds or if the 5038 * Unicode version does not have data for the property at all, or 5039 * not for this code point. 5040 * @see android.icu.lang.UProperty 5041 */ hasBinaryProperty(int ch, int property)5042 public static boolean hasBinaryProperty(int ch, int property) 5043 { 5044 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5045 } 5046 5047 /** 5048 * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property. 5049 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 5050 * <p>Different from UCharacter.isLetter(ch)! 5051 * @param ch codepoint to be tested 5052 */ isUAlphabetic(int ch)5053 public static boolean isUAlphabetic(int ch) 5054 { 5055 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5056 } 5057 5058 /** 5059 * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property. 5060 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 5061 * <p>This is different from UCharacter.isLowerCase(ch)! 5062 * @param ch codepoint to be tested 5063 */ isULowercase(int ch)5064 public static boolean isULowercase(int ch) 5065 { 5066 return hasBinaryProperty(ch, UProperty.LOWERCASE); 5067 } 5068 5069 /** 5070 * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property. 5071 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 5072 * <p>This is different from UCharacter.isUpperCase(ch)! 5073 * @param ch codepoint to be tested 5074 */ isUUppercase(int ch)5075 public static boolean isUUppercase(int ch) 5076 { 5077 return hasBinaryProperty(ch, UProperty.UPPERCASE); 5078 } 5079 5080 /** 5081 * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property. 5082 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 5083 * <p>This is different from both UCharacter.isSpace(ch) and 5084 * UCharacter.isWhitespace(ch)! 5085 * @param ch codepoint to be tested 5086 */ isUWhiteSpace(int ch)5087 public static boolean isUWhiteSpace(int ch) 5088 { 5089 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 5090 } 5091 5092 /** 5093 * <strong>[icu]</strong> <p>Returns the property value for an Unicode property type of a code point. 5094 * Also returns binary and mask property values. 5095 * <p>Unicode, especially in version 3.2, defines many more properties than 5096 * the original set in UnicodeData.txt. 5097 * <p>The properties APIs are intended to reflect Unicode properties as 5098 * defined in the Unicode Character Database (UCD) and Unicode Technical 5099 * Reports (UTR). For details about the properties see 5100 * http://www.unicode.org/. 5101 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 5102 * 5103 * <pre> 5104 * Sample usage: 5105 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 5106 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 5107 * boolean b = (ideo == 1) ? true : false; 5108 * </pre> 5109 * @param ch code point to test. 5110 * @param type UProperty selector constant, identifies which binary 5111 * property to check. Must be 5112 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5113 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5114 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5115 * @return numeric value that is directly the property value or, 5116 * for enumerated properties, corresponds to the numeric value of 5117 * the enumerated constant of the respective property value 5118 * enumeration type (cast to enum type if necessary). 5119 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5120 * Returns a bit-mask for mask properties. 5121 * Returns 0 if 'type' is out of bounds or if the Unicode version 5122 * does not have data for the property at all, or not for this code 5123 * point. 5124 * @see UProperty 5125 * @see #hasBinaryProperty 5126 * @see #getIntPropertyMinValue 5127 * @see #getIntPropertyMaxValue 5128 * @see #getUnicodeVersion 5129 */ getIntPropertyValue(int ch, int type)5130 public static int getIntPropertyValue(int ch, int type) 5131 { 5132 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5133 } 5134 /** 5135 * <strong>[icu]</strong> Returns a string version of the property value. 5136 * @param propertyEnum The property enum value. 5137 * @param codepoint The codepoint value. 5138 * @param nameChoice The choice of the name. 5139 * @return value as string 5140 * @deprecated This API is ICU internal only. 5141 * @hide original deprecated declaration 5142 * @hide draft / provisional / internal are hidden on Android 5143 */ 5144 @Deprecated 5145 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5146 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5147 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5148 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5149 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5150 nameChoice); 5151 } 5152 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5153 return String.valueOf(getUnicodeNumericValue(codepoint)); 5154 } 5155 // otherwise must be string property 5156 switch (propertyEnum) { 5157 case UProperty.AGE: return getAge(codepoint).toString(); 5158 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5159 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 5160 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 5161 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5162 case UProperty.NAME: return getName(codepoint); 5163 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 5164 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5165 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5166 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5167 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5168 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5169 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5170 } 5171 throw new IllegalArgumentException("Illegal Property Enum"); 5172 } 5173 ///CLOVER:ON 5174 5175 /** 5176 * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type. 5177 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5178 * to allocate arrays of android.icu.text.UnicodeSet or similar. 5179 * @param type UProperty selector constant, identifies which binary 5180 * property to check. Must be 5181 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5182 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5183 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5184 * for a Unicode property. 0 if the property 5185 * selector 'type' is out of range. 5186 * @see UProperty 5187 * @see #hasBinaryProperty 5188 * @see #getUnicodeVersion 5189 * @see #getIntPropertyMaxValue 5190 * @see #getIntPropertyValue 5191 */ getIntPropertyMinValue(int type)5192 public static int getIntPropertyMinValue(int type){ 5193 5194 return 0; // undefined; and: all other properties have a minimum value of 0 5195 } 5196 5197 5198 /** 5199 * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property. 5200 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5201 * to allocate arrays of android.icu.text.UnicodeSet or similar. 5202 * Examples for min/max values (for Unicode 3.2): 5203 * <ul> 5204 * <li> UProperty.BIDI_CLASS: 0/18 5205 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5206 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5207 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5208 * </ul> 5209 * For undefined UProperty constant values, min/max values will be 0/-1. 5210 * @param type UProperty selector constant, identifies which binary 5211 * property to check. Must be 5212 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5213 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5214 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 5215 * property. <= 0 if the property selector 'type' is out of range. 5216 * @see UProperty 5217 * @see #hasBinaryProperty 5218 * @see #getUnicodeVersion 5219 * @see #getIntPropertyMaxValue 5220 * @see #getIntPropertyValue 5221 */ getIntPropertyMaxValue(int type)5222 public static int getIntPropertyMaxValue(int type) 5223 { 5224 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 5225 } 5226 5227 /** 5228 * Provide the java.lang.Character forDigit API, for convenience. 5229 */ forDigit(int digit, int radix)5230 public static char forDigit(int digit, int radix) { 5231 return java.lang.Character.forDigit(digit, radix); 5232 } 5233 5234 // JDK 1.5 API coverage 5235 5236 /** 5237 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 5238 */ 5239 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 5240 5241 /** 5242 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 5243 */ 5244 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 5245 5246 /** 5247 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 5248 */ 5249 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 5250 5251 /** 5252 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 5253 */ 5254 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 5255 5256 /** 5257 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 5258 */ 5259 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 5260 5261 /** 5262 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 5263 */ 5264 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 5265 5266 /** 5267 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 5268 */ 5269 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 5270 5271 /** 5272 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 5273 */ 5274 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 5275 5276 /** 5277 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 5278 */ 5279 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 5280 5281 /** 5282 * Equivalent to {@link Character#isValidCodePoint}. 5283 * 5284 * @param cp the code point to check 5285 * @return true if cp is a valid code point 5286 */ isValidCodePoint(int cp)5287 public static final boolean isValidCodePoint(int cp) { 5288 return cp >= 0 && cp <= MAX_CODE_POINT; 5289 } 5290 5291 /** 5292 * Same as {@link Character#isSupplementaryCodePoint}. 5293 * 5294 * @param cp the code point to check 5295 * @return true if cp is a supplementary code point 5296 */ isSupplementaryCodePoint(int cp)5297 public static final boolean isSupplementaryCodePoint(int cp) { 5298 return Character.isSupplementaryCodePoint(cp); 5299 } 5300 5301 /** 5302 * Same as {@link Character#isHighSurrogate}. 5303 * 5304 * @param ch the char to check 5305 * @return true if ch is a high (lead) surrogate 5306 */ isHighSurrogate(char ch)5307 public static boolean isHighSurrogate(char ch) { 5308 return Character.isHighSurrogate(ch); 5309 } 5310 5311 /** 5312 * Same as {@link Character#isLowSurrogate}. 5313 * 5314 * @param ch the char to check 5315 * @return true if ch is a low (trail) surrogate 5316 */ isLowSurrogate(char ch)5317 public static boolean isLowSurrogate(char ch) { 5318 return Character.isLowSurrogate(ch); 5319 } 5320 5321 /** 5322 * Same as {@link Character#isSurrogatePair}. 5323 * 5324 * @param high the high (lead) char 5325 * @param low the low (trail) char 5326 * @return true if high, low form a surrogate pair 5327 */ isSurrogatePair(char high, char low)5328 public static final boolean isSurrogatePair(char high, char low) { 5329 return Character.isSurrogatePair(high, low); 5330 } 5331 5332 /** 5333 * Same as {@link Character#charCount}. 5334 * Returns the number of chars needed to represent the code point (1 or 2). 5335 * This does not check the code point for validity. 5336 * 5337 * @param cp the code point to check 5338 * @return the number of chars needed to represent the code point 5339 */ charCount(int cp)5340 public static int charCount(int cp) { 5341 return Character.charCount(cp); 5342 } 5343 5344 /** 5345 * Same as {@link Character#toCodePoint}. 5346 * Returns the code point represented by the two surrogate code units. 5347 * This does not check the surrogate pair for validity. 5348 * 5349 * @param high the high (lead) surrogate 5350 * @param low the low (trail) surrogate 5351 * @return the code point formed by the surrogate pair 5352 */ toCodePoint(char high, char low)5353 public static final int toCodePoint(char high, char low) { 5354 return Character.toCodePoint(high, low); 5355 } 5356 5357 /** 5358 * Same as {@link Character#codePointAt(CharSequence, int)}. 5359 * Returns the code point at index. 5360 * This examines only the characters at index and index+1. 5361 * 5362 * @param seq the characters to check 5363 * @param index the index of the first or only char forming the code point 5364 * @return the code point at the index 5365 */ codePointAt(CharSequence seq, int index)5366 public static final int codePointAt(CharSequence seq, int index) { 5367 char c1 = seq.charAt(index++); 5368 if (isHighSurrogate(c1)) { 5369 if (index < seq.length()) { 5370 char c2 = seq.charAt(index); 5371 if (isLowSurrogate(c2)) { 5372 return toCodePoint(c1, c2); 5373 } 5374 } 5375 } 5376 return c1; 5377 } 5378 5379 /** 5380 * Same as {@link Character#codePointAt(char[], int)}. 5381 * Returns the code point at index. 5382 * This examines only the characters at index and index+1. 5383 * 5384 * @param text the characters to check 5385 * @param index the index of the first or only char forming the code point 5386 * @return the code point at the index 5387 */ codePointAt(char[] text, int index)5388 public static final int codePointAt(char[] text, int index) { 5389 char c1 = text[index++]; 5390 if (isHighSurrogate(c1)) { 5391 if (index < text.length) { 5392 char c2 = text[index]; 5393 if (isLowSurrogate(c2)) { 5394 return toCodePoint(c1, c2); 5395 } 5396 } 5397 } 5398 return c1; 5399 } 5400 5401 /** 5402 * Same as {@link Character#codePointAt(char[], int, int)}. 5403 * Returns the code point at index. 5404 * This examines only the characters at index and index+1. 5405 * 5406 * @param text the characters to check 5407 * @param index the index of the first or only char forming the code point 5408 * @param limit the limit of the valid text 5409 * @return the code point at the index 5410 */ codePointAt(char[] text, int index, int limit)5411 public static final int codePointAt(char[] text, int index, int limit) { 5412 if (index >= limit || limit > text.length) { 5413 throw new IndexOutOfBoundsException(); 5414 } 5415 char c1 = text[index++]; 5416 if (isHighSurrogate(c1)) { 5417 if (index < limit) { 5418 char c2 = text[index]; 5419 if (isLowSurrogate(c2)) { 5420 return toCodePoint(c1, c2); 5421 } 5422 } 5423 } 5424 return c1; 5425 } 5426 5427 /** 5428 * Same as {@link Character#codePointBefore(CharSequence, int)}. 5429 * Return the code point before index. 5430 * This examines only the characters at index-1 and index-2. 5431 * 5432 * @param seq the characters to check 5433 * @param index the index after the last or only char forming the code point 5434 * @return the code point before the index 5435 */ codePointBefore(CharSequence seq, int index)5436 public static final int codePointBefore(CharSequence seq, int index) { 5437 char c2 = seq.charAt(--index); 5438 if (isLowSurrogate(c2)) { 5439 if (index > 0) { 5440 char c1 = seq.charAt(--index); 5441 if (isHighSurrogate(c1)) { 5442 return toCodePoint(c1, c2); 5443 } 5444 } 5445 } 5446 return c2; 5447 } 5448 5449 /** 5450 * Same as {@link Character#codePointBefore(char[], int)}. 5451 * Returns the code point before index. 5452 * This examines only the characters at index-1 and index-2. 5453 * 5454 * @param text the characters to check 5455 * @param index the index after the last or only char forming the code point 5456 * @return the code point before the index 5457 */ codePointBefore(char[] text, int index)5458 public static final int codePointBefore(char[] text, int index) { 5459 char c2 = text[--index]; 5460 if (isLowSurrogate(c2)) { 5461 if (index > 0) { 5462 char c1 = text[--index]; 5463 if (isHighSurrogate(c1)) { 5464 return toCodePoint(c1, c2); 5465 } 5466 } 5467 } 5468 return c2; 5469 } 5470 5471 /** 5472 * Same as {@link Character#codePointBefore(char[], int, int)}. 5473 * Return the code point before index. 5474 * This examines only the characters at index-1 and index-2. 5475 * 5476 * @param text the characters to check 5477 * @param index the index after the last or only char forming the code point 5478 * @param limit the start of the valid text 5479 * @return the code point before the index 5480 */ codePointBefore(char[] text, int index, int limit)5481 public static final int codePointBefore(char[] text, int index, int limit) { 5482 if (index <= limit || limit < 0) { 5483 throw new IndexOutOfBoundsException(); 5484 } 5485 char c2 = text[--index]; 5486 if (isLowSurrogate(c2)) { 5487 if (index > limit) { 5488 char c1 = text[--index]; 5489 if (isHighSurrogate(c1)) { 5490 return toCodePoint(c1, c2); 5491 } 5492 } 5493 } 5494 return c2; 5495 } 5496 5497 /** 5498 * Same as {@link Character#toChars(int, char[], int)}. 5499 * Writes the chars representing the 5500 * code point into the destination at the given index. 5501 * 5502 * @param cp the code point to convert 5503 * @param dst the destination array into which to put the char(s) representing the code point 5504 * @param dstIndex the index at which to put the first (or only) char 5505 * @return the count of the number of chars written (1 or 2) 5506 * @throws IllegalArgumentException if cp is not a valid code point 5507 */ toChars(int cp, char[] dst, int dstIndex)5508 public static final int toChars(int cp, char[] dst, int dstIndex) { 5509 return Character.toChars(cp, dst, dstIndex); 5510 } 5511 5512 /** 5513 * Same as {@link Character#toChars(int)}. 5514 * Returns a char array representing the code point. 5515 * 5516 * @param cp the code point to convert 5517 * @return an array containing the char(s) representing the code point 5518 * @throws IllegalArgumentException if cp is not a valid code point 5519 */ toChars(int cp)5520 public static final char[] toChars(int cp) { 5521 return Character.toChars(cp); 5522 } 5523 5524 /** 5525 * Equivalent to the {@link Character#getDirectionality(char)} method, for 5526 * convenience. Returns a byte representing the directionality of the 5527 * character. 5528 * 5529 * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns 5530 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 5531 * 5532 * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link 5533 * UCharacterDirection} and its interface {@link 5534 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 5535 * defined by <code>java.lang.Character</code>. 5536 * @param cp the code point to check 5537 * @return the directionality of the code point 5538 * @see #getDirection 5539 */ getDirectionality(int cp)5540 public static byte getDirectionality(int cp) 5541 { 5542 return (byte)getDirection(cp); 5543 } 5544 5545 /** 5546 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 5547 * method, for convenience. Counts the number of code points in the range 5548 * of text. 5549 * @param text the characters to check 5550 * @param start the start of the range 5551 * @param limit the limit of the range 5552 * @return the number of code points in the range 5553 */ codePointCount(CharSequence text, int start, int limit)5554 public static int codePointCount(CharSequence text, int start, int limit) { 5555 if (start < 0 || limit < start || limit > text.length()) { 5556 throw new IndexOutOfBoundsException("start (" + start + 5557 ") or limit (" + limit + 5558 ") invalid or out of range 0, " + text.length()); 5559 } 5560 5561 int len = limit - start; 5562 while (limit > start) { 5563 char ch = text.charAt(--limit); 5564 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 5565 ch = text.charAt(--limit); 5566 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 5567 --len; 5568 break; 5569 } 5570 } 5571 } 5572 return len; 5573 } 5574 5575 /** 5576 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 5577 * convenience. Counts the number of code points in the range of text. 5578 * @param text the characters to check 5579 * @param start the start of the range 5580 * @param limit the limit of the range 5581 * @return the number of code points in the range 5582 */ codePointCount(char[] text, int start, int limit)5583 public static int codePointCount(char[] text, int start, int limit) { 5584 if (start < 0 || limit < start || limit > text.length) { 5585 throw new IndexOutOfBoundsException("start (" + start + 5586 ") or limit (" + limit + 5587 ") invalid or out of range 0, " + text.length); 5588 } 5589 5590 int len = limit - start; 5591 while (limit > start) { 5592 char ch = text[--limit]; 5593 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 5594 ch = text[--limit]; 5595 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 5596 --len; 5597 break; 5598 } 5599 } 5600 } 5601 return len; 5602 } 5603 5604 /** 5605 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 5606 * method, for convenience. Adjusts the char index by a code point offset. 5607 * @param text the characters to check 5608 * @param index the index to adjust 5609 * @param codePointOffset the number of code points by which to offset the index 5610 * @return the adjusted index 5611 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)5612 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 5613 if (index < 0 || index > text.length()) { 5614 throw new IndexOutOfBoundsException("index ( " + index + 5615 ") out of range 0, " + text.length()); 5616 } 5617 5618 if (codePointOffset < 0) { 5619 while (++codePointOffset <= 0) { 5620 char ch = text.charAt(--index); 5621 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 5622 ch = text.charAt(--index); 5623 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 5624 if (++codePointOffset > 0) { 5625 return index+1; 5626 } 5627 } 5628 } 5629 } 5630 } else { 5631 int limit = text.length(); 5632 while (--codePointOffset >= 0) { 5633 char ch = text.charAt(index++); 5634 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 5635 ch = text.charAt(index++); 5636 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 5637 if (--codePointOffset < 0) { 5638 return index-1; 5639 } 5640 } 5641 } 5642 } 5643 } 5644 5645 return index; 5646 } 5647 5648 /** 5649 * Equivalent to the 5650 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 5651 * method, for convenience. Adjusts the char index by a code point offset. 5652 * @param text the characters to check 5653 * @param start the start of the range to check 5654 * @param count the length of the range to check 5655 * @param index the index to adjust 5656 * @param codePointOffset the number of code points by which to offset the index 5657 * @return the adjusted index 5658 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)5659 public static int offsetByCodePoints(char[] text, int start, int count, int index, 5660 int codePointOffset) { 5661 int limit = start + count; 5662 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 5663 throw new IndexOutOfBoundsException("index ( " + index + 5664 ") out of range " + start + 5665 ", " + limit + 5666 " in array 0, " + text.length); 5667 } 5668 5669 if (codePointOffset < 0) { 5670 while (++codePointOffset <= 0) { 5671 char ch = text[--index]; 5672 if (index < start) { 5673 throw new IndexOutOfBoundsException("index ( " + index + 5674 ") < start (" + start + 5675 ")"); 5676 } 5677 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 5678 ch = text[--index]; 5679 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 5680 if (++codePointOffset > 0) { 5681 return index+1; 5682 } 5683 } 5684 } 5685 } 5686 } else { 5687 while (--codePointOffset >= 0) { 5688 char ch = text[index++]; 5689 if (index > limit) { 5690 throw new IndexOutOfBoundsException("index ( " + index + 5691 ") > limit (" + limit + 5692 ")"); 5693 } 5694 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 5695 ch = text[index++]; 5696 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 5697 if (--codePointOffset < 0) { 5698 return index-1; 5699 } 5700 } 5701 } 5702 } 5703 } 5704 5705 return index; 5706 } 5707 5708 // private variables ------------------------------------------------- 5709 5710 /** 5711 * To get the last character out from a data type 5712 */ 5713 private static final int LAST_CHAR_MASK_ = 0xFFFF; 5714 5715 // /** 5716 // * To get the last byte out from a data type 5717 // */ 5718 // private static final int LAST_BYTE_MASK_ = 0xFF; 5719 // 5720 // /** 5721 // * Shift 16 bits 5722 // */ 5723 // private static final int SHIFT_16_ = 16; 5724 // 5725 // /** 5726 // * Shift 24 bits 5727 // */ 5728 // private static final int SHIFT_24_ = 24; 5729 // 5730 // /** 5731 // * Decimal radix 5732 // */ 5733 // private static final int DECIMAL_RADIX_ = 10; 5734 5735 /** 5736 * No break space code point 5737 */ 5738 private static final int NO_BREAK_SPACE_ = 0xA0; 5739 5740 /** 5741 * Figure space code point 5742 */ 5743 private static final int FIGURE_SPACE_ = 0x2007; 5744 5745 /** 5746 * Narrow no break space code point 5747 */ 5748 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 5749 5750 /** 5751 * Ideographic number zero code point 5752 */ 5753 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 5754 5755 /** 5756 * CJK Ideograph, First code point 5757 */ 5758 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 5759 5760 /** 5761 * CJK Ideograph, Second code point 5762 */ 5763 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 5764 5765 /** 5766 * CJK Ideograph, Third code point 5767 */ 5768 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 5769 5770 /** 5771 * CJK Ideograph, Fourth code point 5772 */ 5773 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 5774 5775 /** 5776 * CJK Ideograph, FIFTH code point 5777 */ 5778 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 5779 5780 /** 5781 * CJK Ideograph, Sixth code point 5782 */ 5783 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 5784 5785 /** 5786 * CJK Ideograph, Seventh code point 5787 */ 5788 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 5789 5790 /** 5791 * CJK Ideograph, Eighth code point 5792 */ 5793 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 5794 5795 /** 5796 * CJK Ideograph, Nineth code point 5797 */ 5798 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 5799 5800 /** 5801 * Application Program command code point 5802 */ 5803 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 5804 5805 /** 5806 * Unit separator code point 5807 */ 5808 private static final int UNIT_SEPARATOR_ = 0x001F; 5809 5810 /** 5811 * Delete code point 5812 */ 5813 private static final int DELETE_ = 0x007F; 5814 5815 /** 5816 * Han digit characters 5817 */ 5818 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 5819 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 5820 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 5821 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 5822 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 5823 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 5824 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 5825 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 5826 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 5827 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 5828 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 5829 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 5830 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 5831 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 5832 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 5833 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 5834 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 5835 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 5836 5837 // private constructor ----------------------------------------------- 5838 ///CLOVER:OFF 5839 /** 5840 * Private constructor to prevent instantiation 5841 */ UCharacter()5842 private UCharacter() 5843 { 5844 } 5845 ///CLOVER:ON 5846 } 5847