1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2016, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 package com.ibm.icu.lang; 9 10 import java.lang.ref.SoftReference; 11 import java.util.HashMap; 12 import java.util.Iterator; 13 import java.util.Locale; 14 import java.util.Map; 15 16 import com.ibm.icu.impl.IllegalIcuArgumentException; 17 import com.ibm.icu.impl.Trie2; 18 import com.ibm.icu.impl.UBiDiProps; 19 import com.ibm.icu.impl.UCaseProps; 20 import com.ibm.icu.impl.UCharacterName; 21 import com.ibm.icu.impl.UCharacterNameChoice; 22 import com.ibm.icu.impl.UCharacterProperty; 23 import com.ibm.icu.impl.UCharacterUtility; 24 import com.ibm.icu.impl.UPropertyAliases; 25 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 26 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; 27 import com.ibm.icu.text.BreakIterator; 28 import com.ibm.icu.text.Normalizer2; 29 import com.ibm.icu.util.RangeValueIterator; 30 import com.ibm.icu.util.ULocale; 31 import com.ibm.icu.util.ValueIterator; 32 import com.ibm.icu.util.VersionInfo; 33 34 /** 35 * {@icuenhanced java.lang.Character}.{@icu _usage_} 36 * 37 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 38 * These extensions provide support for more Unicode properties. 39 * Each ICU release supports the latest version of Unicode available at that time. 40 * 41 * <p>For some time before Java 5 added support for supplementary Unicode code points, 42 * The ICU UCharacter class and many other ICU classes already supported them. 43 * Some UCharacter methods and constants were widened slightly differently than 44 * how the Character class methods and constants were widened later. 45 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 46 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 47 * 48 * <p>Code points are represented in these API using ints. While it would be 49 * more convenient in Java to have a separate primitive datatype for them, 50 * ints suffice in the meantime. 51 * 52 * <p>To use this class please add the jar file name icu4j.jar to the 53 * class path, since it contains data files which supply the information used 54 * by this file.<br> 55 * E.g. In Windows <br> 56 * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> 57 * Otherwise, another method would be to copy the files uprops.dat and 58 * unames.icu from the icu4j source subdirectory 59 * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 60 * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>. 61 * 62 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 63 * properties, the main differences between UCharacter and Character are: 64 * <ul> 65 * <li> UCharacter is not designed to be a char wrapper and does not have 66 * APIs to which involves management of that single char.<br> 67 * These include: 68 * <ul> 69 * <li> char charValue(), 70 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 71 * </ul> 72 * <li> UCharacter does not include Character APIs that are deprecated, nor 73 * does it include the Java-specific character information, such as 74 * boolean isJavaIdentifierPart(char ch). 75 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 76 * values '10' - '35'. UCharacter also does this in digit and 77 * getNumericValue, to adhere to the java semantics of these 78 * methods. New methods unicodeDigit, and 79 * getUnicodeNumericValue do not treat the above code points 80 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 81 * </ul> 82 * <p> 83 * Further detail on differences can be determined using the program 84 * <a href= 85 * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java"> 86 * com.ibm.icu.dev.test.lang.UCharacterCompare</a> 87 * <p> 88 * In addition to Java compatibility functions, which calculate derived properties, 89 * this API provides low-level access to the Unicode Character Database. 90 * <p> 91 * Unicode assigns each code point (not just assigned character) values for 92 * many properties. 93 * Most of them are simple boolean flags, or constants from a small enumerated list. 94 * For some properties, values are strings or other relatively more complex types. 95 * <p> 96 * For more information see 97 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 98 * (http://www.unicode.org/ucd/) 99 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU 100 * User Guide chapter on Properties</a> 101 * (http://www.icu-project.org/userguide/properties.html). 102 * <p> 103 * There are also functions that provide easy migration from C/POSIX functions 104 * like isblank(). Their use is generally discouraged because the C/POSIX 105 * standards do not define their semantics beyond the ASCII range, which means 106 * that different implementations exhibit very different behavior. 107 * Instead, Unicode properties should be used directly. 108 * <p> 109 * There are also only a few, broad C/POSIX character classes, and they tend 110 * to be used for conflicting purposes. For example, the "isalpha()" class 111 * is sometimes used to determine word boundaries, while a more sophisticated 112 * approach would at least distinguish initial letters from continuation 113 * characters (the latter including combining marks). 114 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 115 * Another example: There is no "istitle()" class for titlecase characters. 116 * <p> 117 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 118 * ICU implements them according to the Standard Recommendations in 119 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 120 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 121 * <p> 122 * API access for C/POSIX character classes is as follows: 123 * <pre>{@code 124 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 125 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 126 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 127 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 128 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 129 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 130 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 131 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 132 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 133 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 134 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 135 * - cntrl: getType(c)==CONTROL 136 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 137 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 138 * <p> 139 * The C/POSIX character classes are also available in UnicodeSet patterns, 140 * using patterns like [:graph:] or \p{graph}. 141 * 142 * <p>{@icunote} There are several ICU (and Java) whitespace functions. 143 * Comparison:<ul> 144 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 145 * most of general categories "Z" (separators) + most whitespace ISO controls 146 * (including no-break spaces, but excluding IS1..IS4 and ZWSP) 147 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 148 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 149 * 150 * <p> 151 * This class is not subclassable. 152 * 153 * @author Syn Wee Quek 154 * @stable ICU 2.1 155 * @see com.ibm.icu.lang.UCharacterEnums 156 */ 157 158 public final class UCharacter implements ECharacterCategory, ECharacterDirection 159 { 160 // public inner classes ---------------------------------------------- 161 162 /** 163 * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_} 164 * 165 * A family of character subsets representing the character blocks in the 166 * Unicode specification, generated from Unicode Data file Blocks.txt. 167 * Character blocks generally define characters used for a specific script 168 * or purpose. A character is contained by at most one Unicode block. 169 * 170 * {@icunote} All fields named XXX_ID are specific to ICU. 171 * 172 * @stable ICU 2.4 173 */ 174 public static final class UnicodeBlock extends Character.Subset 175 { 176 // block id corresponding to icu4c ----------------------------------- 177 178 /** 179 * @stable ICU 2.4 180 */ 181 public static final int INVALID_CODE_ID = -1; 182 /** 183 * @stable ICU 2.4 184 */ 185 public static final int BASIC_LATIN_ID = 1; 186 /** 187 * @stable ICU 2.4 188 */ 189 public static final int LATIN_1_SUPPLEMENT_ID = 2; 190 /** 191 * @stable ICU 2.4 192 */ 193 public static final int LATIN_EXTENDED_A_ID = 3; 194 /** 195 * @stable ICU 2.4 196 */ 197 public static final int LATIN_EXTENDED_B_ID = 4; 198 /** 199 * @stable ICU 2.4 200 */ 201 public static final int IPA_EXTENSIONS_ID = 5; 202 /** 203 * @stable ICU 2.4 204 */ 205 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 206 /** 207 * @stable ICU 2.4 208 */ 209 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 210 /** 211 * Unicode 3.2 renames this block to "Greek and Coptic". 212 * @stable ICU 2.4 213 */ 214 public static final int GREEK_ID = 8; 215 /** 216 * @stable ICU 2.4 217 */ 218 public static final int CYRILLIC_ID = 9; 219 /** 220 * @stable ICU 2.4 221 */ 222 public static final int ARMENIAN_ID = 10; 223 /** 224 * @stable ICU 2.4 225 */ 226 public static final int HEBREW_ID = 11; 227 /** 228 * @stable ICU 2.4 229 */ 230 public static final int ARABIC_ID = 12; 231 /** 232 * @stable ICU 2.4 233 */ 234 public static final int SYRIAC_ID = 13; 235 /** 236 * @stable ICU 2.4 237 */ 238 public static final int THAANA_ID = 14; 239 /** 240 * @stable ICU 2.4 241 */ 242 public static final int DEVANAGARI_ID = 15; 243 /** 244 * @stable ICU 2.4 245 */ 246 public static final int BENGALI_ID = 16; 247 /** 248 * @stable ICU 2.4 249 */ 250 public static final int GURMUKHI_ID = 17; 251 /** 252 * @stable ICU 2.4 253 */ 254 public static final int GUJARATI_ID = 18; 255 /** 256 * @stable ICU 2.4 257 */ 258 public static final int ORIYA_ID = 19; 259 /** 260 * @stable ICU 2.4 261 */ 262 public static final int TAMIL_ID = 20; 263 /** 264 * @stable ICU 2.4 265 */ 266 public static final int TELUGU_ID = 21; 267 /** 268 * @stable ICU 2.4 269 */ 270 public static final int KANNADA_ID = 22; 271 /** 272 * @stable ICU 2.4 273 */ 274 public static final int MALAYALAM_ID = 23; 275 /** 276 * @stable ICU 2.4 277 */ 278 public static final int SINHALA_ID = 24; 279 /** 280 * @stable ICU 2.4 281 */ 282 public static final int THAI_ID = 25; 283 /** 284 * @stable ICU 2.4 285 */ 286 public static final int LAO_ID = 26; 287 /** 288 * @stable ICU 2.4 289 */ 290 public static final int TIBETAN_ID = 27; 291 /** 292 * @stable ICU 2.4 293 */ 294 public static final int MYANMAR_ID = 28; 295 /** 296 * @stable ICU 2.4 297 */ 298 public static final int GEORGIAN_ID = 29; 299 /** 300 * @stable ICU 2.4 301 */ 302 public static final int HANGUL_JAMO_ID = 30; 303 /** 304 * @stable ICU 2.4 305 */ 306 public static final int ETHIOPIC_ID = 31; 307 /** 308 * @stable ICU 2.4 309 */ 310 public static final int CHEROKEE_ID = 32; 311 /** 312 * @stable ICU 2.4 313 */ 314 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 315 /** 316 * @stable ICU 2.4 317 */ 318 public static final int OGHAM_ID = 34; 319 /** 320 * @stable ICU 2.4 321 */ 322 public static final int RUNIC_ID = 35; 323 /** 324 * @stable ICU 2.4 325 */ 326 public static final int KHMER_ID = 36; 327 /** 328 * @stable ICU 2.4 329 */ 330 public static final int MONGOLIAN_ID = 37; 331 /** 332 * @stable ICU 2.4 333 */ 334 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 335 /** 336 * @stable ICU 2.4 337 */ 338 public static final int GREEK_EXTENDED_ID = 39; 339 /** 340 * @stable ICU 2.4 341 */ 342 public static final int GENERAL_PUNCTUATION_ID = 40; 343 /** 344 * @stable ICU 2.4 345 */ 346 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 347 /** 348 * @stable ICU 2.4 349 */ 350 public static final int CURRENCY_SYMBOLS_ID = 42; 351 /** 352 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 353 * Symbols". 354 * @stable ICU 2.4 355 */ 356 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 357 /** 358 * @stable ICU 2.4 359 */ 360 public static final int LETTERLIKE_SYMBOLS_ID = 44; 361 /** 362 * @stable ICU 2.4 363 */ 364 public static final int NUMBER_FORMS_ID = 45; 365 /** 366 * @stable ICU 2.4 367 */ 368 public static final int ARROWS_ID = 46; 369 /** 370 * @stable ICU 2.4 371 */ 372 public static final int MATHEMATICAL_OPERATORS_ID = 47; 373 /** 374 * @stable ICU 2.4 375 */ 376 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 377 /** 378 * @stable ICU 2.4 379 */ 380 public static final int CONTROL_PICTURES_ID = 49; 381 /** 382 * @stable ICU 2.4 383 */ 384 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 385 /** 386 * @stable ICU 2.4 387 */ 388 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 389 /** 390 * @stable ICU 2.4 391 */ 392 public static final int BOX_DRAWING_ID = 52; 393 /** 394 * @stable ICU 2.4 395 */ 396 public static final int BLOCK_ELEMENTS_ID = 53; 397 /** 398 * @stable ICU 2.4 399 */ 400 public static final int GEOMETRIC_SHAPES_ID = 54; 401 /** 402 * @stable ICU 2.4 403 */ 404 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 405 /** 406 * @stable ICU 2.4 407 */ 408 public static final int DINGBATS_ID = 56; 409 /** 410 * @stable ICU 2.4 411 */ 412 public static final int BRAILLE_PATTERNS_ID = 57; 413 /** 414 * @stable ICU 2.4 415 */ 416 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 417 /** 418 * @stable ICU 2.4 419 */ 420 public static final int KANGXI_RADICALS_ID = 59; 421 /** 422 * @stable ICU 2.4 423 */ 424 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 425 /** 426 * @stable ICU 2.4 427 */ 428 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 429 /** 430 * @stable ICU 2.4 431 */ 432 public static final int HIRAGANA_ID = 62; 433 /** 434 * @stable ICU 2.4 435 */ 436 public static final int KATAKANA_ID = 63; 437 /** 438 * @stable ICU 2.4 439 */ 440 public static final int BOPOMOFO_ID = 64; 441 /** 442 * @stable ICU 2.4 443 */ 444 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 445 /** 446 * @stable ICU 2.4 447 */ 448 public static final int KANBUN_ID = 66; 449 /** 450 * @stable ICU 2.4 451 */ 452 public static final int BOPOMOFO_EXTENDED_ID = 67; 453 /** 454 * @stable ICU 2.4 455 */ 456 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 457 /** 458 * @stable ICU 2.4 459 */ 460 public static final int CJK_COMPATIBILITY_ID = 69; 461 /** 462 * @stable ICU 2.4 463 */ 464 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 465 /** 466 * @stable ICU 2.4 467 */ 468 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 469 /** 470 * @stable ICU 2.4 471 */ 472 public static final int YI_SYLLABLES_ID = 72; 473 /** 474 * @stable ICU 2.4 475 */ 476 public static final int YI_RADICALS_ID = 73; 477 /** 478 * @stable ICU 2.4 479 */ 480 public static final int HANGUL_SYLLABLES_ID = 74; 481 /** 482 * @stable ICU 2.4 483 */ 484 public static final int HIGH_SURROGATES_ID = 75; 485 /** 486 * @stable ICU 2.4 487 */ 488 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 489 /** 490 * @stable ICU 2.4 491 */ 492 public static final int LOW_SURROGATES_ID = 77; 493 /** 494 * Same as public static final int PRIVATE_USE. 495 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 496 * and multiple code point ranges had this block. 497 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 498 * and adds separate blocks for the supplementary PUAs. 499 * @stable ICU 2.4 500 */ 501 public static final int PRIVATE_USE_AREA_ID = 78; 502 /** 503 * Same as public static final int PRIVATE_USE_AREA. 504 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 505 * and multiple code point ranges had this block. 506 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 507 * and adds separate blocks for the supplementary PUAs. 508 * @stable ICU 2.4 509 */ 510 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 511 /** 512 * @stable ICU 2.4 513 */ 514 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 515 /** 516 * @stable ICU 2.4 517 */ 518 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 519 /** 520 * @stable ICU 2.4 521 */ 522 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 523 /** 524 * @stable ICU 2.4 525 */ 526 public static final int COMBINING_HALF_MARKS_ID = 82; 527 /** 528 * @stable ICU 2.4 529 */ 530 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 531 /** 532 * @stable ICU 2.4 533 */ 534 public static final int SMALL_FORM_VARIANTS_ID = 84; 535 /** 536 * @stable ICU 2.4 537 */ 538 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 539 /** 540 * @stable ICU 2.4 541 */ 542 public static final int SPECIALS_ID = 86; 543 /** 544 * @stable ICU 2.4 545 */ 546 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 547 /** 548 * @stable ICU 2.4 549 */ 550 public static final int OLD_ITALIC_ID = 88; 551 /** 552 * @stable ICU 2.4 553 */ 554 public static final int GOTHIC_ID = 89; 555 /** 556 * @stable ICU 2.4 557 */ 558 public static final int DESERET_ID = 90; 559 /** 560 * @stable ICU 2.4 561 */ 562 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 563 /** 564 * @stable ICU 2.4 565 */ 566 public static final int MUSICAL_SYMBOLS_ID = 92; 567 /** 568 * @stable ICU 2.4 569 */ 570 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 571 /** 572 * @stable ICU 2.4 573 */ 574 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 575 /** 576 * @stable ICU 2.4 577 */ 578 public static final int 579 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 580 /** 581 * @stable ICU 2.4 582 */ 583 public static final int TAGS_ID = 96; 584 585 // New blocks in Unicode 3.2 586 587 /** 588 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 589 * @stable ICU 2.4 590 */ 591 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 592 /** 593 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 594 * @stable ICU 3.0 595 */ 596 597 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 598 /** 599 * @stable ICU 2.4 600 */ 601 public static final int TAGALOG_ID = 98; 602 /** 603 * @stable ICU 2.4 604 */ 605 public static final int HANUNOO_ID = 99; 606 /** 607 * @stable ICU 2.4 608 */ 609 public static final int BUHID_ID = 100; 610 /** 611 * @stable ICU 2.4 612 */ 613 public static final int TAGBANWA_ID = 101; 614 /** 615 * @stable ICU 2.4 616 */ 617 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 618 /** 619 * @stable ICU 2.4 620 */ 621 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 622 /** 623 * @stable ICU 2.4 624 */ 625 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 626 /** 627 * @stable ICU 2.4 628 */ 629 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 630 /** 631 * @stable ICU 2.4 632 */ 633 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 634 /** 635 * @stable ICU 2.4 636 */ 637 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 638 /** 639 * @stable ICU 2.4 640 */ 641 public static final int VARIATION_SELECTORS_ID = 108; 642 /** 643 * @stable ICU 2.4 644 */ 645 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 646 /** 647 * @stable ICU 2.4 648 */ 649 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 650 651 /** 652 * @stable ICU 2.6 653 */ 654 public static final int LIMBU_ID = 111; /*[1900]*/ 655 /** 656 * @stable ICU 2.6 657 */ 658 public static final int TAI_LE_ID = 112; /*[1950]*/ 659 /** 660 * @stable ICU 2.6 661 */ 662 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 663 /** 664 * @stable ICU 2.6 665 */ 666 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 667 /** 668 * @stable ICU 2.6 669 */ 670 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 671 /** 672 * @stable ICU 2.6 673 */ 674 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 675 /** 676 * @stable ICU 2.6 677 */ 678 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 679 /** 680 * @stable ICU 2.6 681 */ 682 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 683 /** 684 * @stable ICU 2.6 685 */ 686 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 687 /** 688 * @stable ICU 2.6 689 */ 690 public static final int UGARITIC_ID = 120; /*[10380]*/ 691 /** 692 * @stable ICU 2.6 693 */ 694 public static final int SHAVIAN_ID = 121; /*[10450]*/ 695 /** 696 * @stable ICU 2.6 697 */ 698 public static final int OSMANYA_ID = 122; /*[10480]*/ 699 /** 700 * @stable ICU 2.6 701 */ 702 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 703 /** 704 * @stable ICU 2.6 705 */ 706 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 707 /** 708 * @stable ICU 2.6 709 */ 710 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 711 712 /* New blocks in Unicode 4.1 */ 713 714 /** 715 * @stable ICU 3.4 716 */ 717 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 718 719 /** 720 * @stable ICU 3.4 721 */ 722 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 723 724 /** 725 * @stable ICU 3.4 726 */ 727 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 728 729 /** 730 * @stable ICU 3.4 731 */ 732 public static final int BUGINESE_ID = 129; /*[1A00]*/ 733 734 /** 735 * @stable ICU 3.4 736 */ 737 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 738 739 /** 740 * @stable ICU 3.4 741 */ 742 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 743 744 /** 745 * @stable ICU 3.4 746 */ 747 public static final int COPTIC_ID = 132; /*[2C80]*/ 748 749 /** 750 * @stable ICU 3.4 751 */ 752 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 753 754 /** 755 * @stable ICU 3.4 756 */ 757 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 758 759 /** 760 * @stable ICU 3.4 761 */ 762 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 763 764 /** 765 * @stable ICU 3.4 766 */ 767 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 768 769 /** 770 * @stable ICU 3.4 771 */ 772 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 773 774 /** 775 * @stable ICU 3.4 776 */ 777 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 778 779 /** 780 * @stable ICU 3.4 781 */ 782 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 783 784 /** 785 * @stable ICU 3.4 786 */ 787 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 788 789 /** 790 * @stable ICU 3.4 791 */ 792 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 793 794 /** 795 * @stable ICU 3.4 796 */ 797 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 798 799 /** 800 * @stable ICU 3.4 801 */ 802 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 803 804 /** 805 * @stable ICU 3.4 806 */ 807 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 808 809 /** 810 * @stable ICU 3.4 811 */ 812 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 813 814 /* New blocks in Unicode 5.0 */ 815 816 /** 817 * @stable ICU 3.6 818 */ 819 public static final int NKO_ID = 146; /*[07C0]*/ 820 /** 821 * @stable ICU 3.6 822 */ 823 public static final int BALINESE_ID = 147; /*[1B00]*/ 824 /** 825 * @stable ICU 3.6 826 */ 827 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 828 /** 829 * @stable ICU 3.6 830 */ 831 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 832 /** 833 * @stable ICU 3.6 834 */ 835 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 836 /** 837 * @stable ICU 3.6 838 */ 839 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 840 /** 841 * @stable ICU 3.6 842 */ 843 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 844 /** 845 * @stable ICU 3.6 846 */ 847 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 848 /** 849 * @stable ICU 3.6 850 */ 851 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 852 853 /** 854 * @stable ICU 4.0 855 */ 856 public static final int SUNDANESE_ID = 155; /* [1B80] */ 857 858 /** 859 * @stable ICU 4.0 860 */ 861 public static final int LEPCHA_ID = 156; /* [1C00] */ 862 863 /** 864 * @stable ICU 4.0 865 */ 866 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 867 868 /** 869 * @stable ICU 4.0 870 */ 871 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 872 873 /** 874 * @stable ICU 4.0 875 */ 876 public static final int VAI_ID = 159; /* [A500] */ 877 878 /** 879 * @stable ICU 4.0 880 */ 881 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 882 883 /** 884 * @stable ICU 4.0 885 */ 886 public static final int SAURASHTRA_ID = 161; /* [A880] */ 887 888 /** 889 * @stable ICU 4.0 890 */ 891 public static final int KAYAH_LI_ID = 162; /* [A900] */ 892 893 /** 894 * @stable ICU 4.0 895 */ 896 public static final int REJANG_ID = 163; /* [A930] */ 897 898 /** 899 * @stable ICU 4.0 900 */ 901 public static final int CHAM_ID = 164; /* [AA00] */ 902 903 /** 904 * @stable ICU 4.0 905 */ 906 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 907 908 /** 909 * @stable ICU 4.0 910 */ 911 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 912 913 /** 914 * @stable ICU 4.0 915 */ 916 public static final int LYCIAN_ID = 167; /* [10280] */ 917 918 /** 919 * @stable ICU 4.0 920 */ 921 public static final int CARIAN_ID = 168; /* [102A0] */ 922 923 /** 924 * @stable ICU 4.0 925 */ 926 public static final int LYDIAN_ID = 169; /* [10920] */ 927 928 /** 929 * @stable ICU 4.0 930 */ 931 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 932 933 /** 934 * @stable ICU 4.0 935 */ 936 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 937 938 /* New blocks in Unicode 5.2 */ 939 940 /** @stable ICU 4.4 */ 941 public static final int SAMARITAN_ID = 172; /*[0800]*/ 942 /** @stable ICU 4.4 */ 943 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 944 /** @stable ICU 4.4 */ 945 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 946 /** @stable ICU 4.4 */ 947 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 948 /** @stable ICU 4.4 */ 949 public static final int LISU_ID = 176; /*[A4D0]*/ 950 /** @stable ICU 4.4 */ 951 public static final int BAMUM_ID = 177; /*[A6A0]*/ 952 /** @stable ICU 4.4 */ 953 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 954 /** @stable ICU 4.4 */ 955 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 956 /** @stable ICU 4.4 */ 957 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 958 /** @stable ICU 4.4 */ 959 public static final int JAVANESE_ID = 181; /*[A980]*/ 960 /** @stable ICU 4.4 */ 961 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 962 /** @stable ICU 4.4 */ 963 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 964 /** @stable ICU 4.4 */ 965 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 966 /** @stable ICU 4.4 */ 967 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 968 /** @stable ICU 4.4 */ 969 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 970 /** @stable ICU 4.4 */ 971 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 972 /** @stable ICU 4.4 */ 973 public static final int AVESTAN_ID = 188; /*[10B00]*/ 974 /** @stable ICU 4.4 */ 975 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 976 /** @stable ICU 4.4 */ 977 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 978 /** @stable ICU 4.4 */ 979 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 980 /** @stable ICU 4.4 */ 981 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 982 /** @stable ICU 4.4 */ 983 public static final int KAITHI_ID = 193; /*[11080]*/ 984 /** @stable ICU 4.4 */ 985 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 986 /** @stable ICU 4.4 */ 987 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 988 /** @stable ICU 4.4 */ 989 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 990 /** @stable ICU 4.4 */ 991 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 992 993 /* New blocks in Unicode 6.0 */ 994 995 /** @stable ICU 4.6 */ 996 public static final int MANDAIC_ID = 198; /*[0840]*/ 997 /** @stable ICU 4.6 */ 998 public static final int BATAK_ID = 199; /*[1BC0]*/ 999 /** @stable ICU 4.6 */ 1000 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 1001 /** @stable ICU 4.6 */ 1002 public static final int BRAHMI_ID = 201; /*[11000]*/ 1003 /** @stable ICU 4.6 */ 1004 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 1005 /** @stable ICU 4.6 */ 1006 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 1007 /** @stable ICU 4.6 */ 1008 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 1009 /** @stable ICU 4.6 */ 1010 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 1011 /** @stable ICU 4.6 */ 1012 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 1013 /** @stable ICU 4.6 */ 1014 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 1015 /** @stable ICU 4.6 */ 1016 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 1017 /** @stable ICU 4.6 */ 1018 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 1019 1020 /* New blocks in Unicode 6.1 */ 1021 1022 /** @stable ICU 49 */ 1023 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 1024 /** @stable ICU 49 */ 1025 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 1026 /** @stable ICU 49 */ 1027 public static final int CHAKMA_ID = 212; /*[11100]*/ 1028 /** @stable ICU 49 */ 1029 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 1030 /** @stable ICU 49 */ 1031 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 1032 /** @stable ICU 49 */ 1033 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 1034 /** @stable ICU 49 */ 1035 public static final int MIAO_ID = 216; /*[16F00]*/ 1036 /** @stable ICU 49 */ 1037 public static final int SHARADA_ID = 217; /*[11180]*/ 1038 /** @stable ICU 49 */ 1039 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 1040 /** @stable ICU 49 */ 1041 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 1042 /** @stable ICU 49 */ 1043 public static final int TAKRI_ID = 220; /*[11680]*/ 1044 1045 /* New blocks in Unicode 7.0 */ 1046 1047 /** @stable ICU 54 */ 1048 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 1049 /** @stable ICU 54 */ 1050 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 1051 /** @stable ICU 54 */ 1052 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 1053 /** @stable ICU 54 */ 1054 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 1055 /** @stable ICU 54 */ 1056 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 1057 /** @stable ICU 54 */ 1058 public static final int ELBASAN_ID = 226; /*[10500]*/ 1059 /** @stable ICU 54 */ 1060 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 1061 /** @stable ICU 54 */ 1062 public static final int GRANTHA_ID = 228; /*[11300]*/ 1063 /** @stable ICU 54 */ 1064 public static final int KHOJKI_ID = 229; /*[11200]*/ 1065 /** @stable ICU 54 */ 1066 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 1067 /** @stable ICU 54 */ 1068 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 1069 /** @stable ICU 54 */ 1070 public static final int LINEAR_A_ID = 232; /*[10600]*/ 1071 /** @stable ICU 54 */ 1072 public static final int MAHAJANI_ID = 233; /*[11150]*/ 1073 /** @stable ICU 54 */ 1074 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 1075 /** @stable ICU 54 */ 1076 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 1077 /** @stable ICU 54 */ 1078 public static final int MODI_ID = 236; /*[11600]*/ 1079 /** @stable ICU 54 */ 1080 public static final int MRO_ID = 237; /*[16A40]*/ 1081 /** @stable ICU 54 */ 1082 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 1083 /** @stable ICU 54 */ 1084 public static final int NABATAEAN_ID = 239; /*[10880]*/ 1085 /** @stable ICU 54 */ 1086 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 1087 /** @stable ICU 54 */ 1088 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 1089 /** @stable ICU 54 */ 1090 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 1091 /** @stable ICU 54 */ 1092 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 1093 /** @stable ICU 54 */ 1094 public static final int PALMYRENE_ID = 244; /*[10860]*/ 1095 /** @stable ICU 54 */ 1096 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 1097 /** @stable ICU 54 */ 1098 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 1099 /** @stable ICU 54 */ 1100 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 1101 /** @stable ICU 54 */ 1102 public static final int SIDDHAM_ID = 248; /*[11580]*/ 1103 /** @stable ICU 54 */ 1104 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 1105 /** @stable ICU 54 */ 1106 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 1107 /** @stable ICU 54 */ 1108 public static final int TIRHUTA_ID = 251; /*[11480]*/ 1109 /** @stable ICU 54 */ 1110 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 1111 1112 /* New blocks in Unicode 8.0 */ 1113 1114 /** @stable ICU 56 */ 1115 public static final int AHOM_ID = 253; /*[11700]*/ 1116 /** @stable ICU 56 */ 1117 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 1118 /** @stable ICU 56 */ 1119 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 1120 /** @stable ICU 56 */ 1121 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 1122 /** @stable ICU 56 */ 1123 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 1124 /** @stable ICU 56 */ 1125 public static final int HATRAN_ID = 258; /*[108E0]*/ 1126 /** @stable ICU 56 */ 1127 public static final int MULTANI_ID = 259; /*[11280]*/ 1128 /** @stable ICU 56 */ 1129 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 1130 /** @stable ICU 56 */ 1131 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 1132 /** @stable ICU 56 */ 1133 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 1134 1135 /** 1136 * @stable ICU 2.4 1137 */ 1138 public static final int COUNT = 263; 1139 1140 // blocks objects --------------------------------------------------- 1141 1142 /** 1143 * Array of UnicodeBlocks, for easy access in getInstance(int) 1144 */ 1145 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1146 1147 /** 1148 * @stable ICU 2.6 1149 */ 1150 public static final UnicodeBlock NO_BLOCK 1151 = new UnicodeBlock("NO_BLOCK", 0); 1152 1153 /** 1154 * @stable ICU 2.4 1155 */ 1156 public static final UnicodeBlock BASIC_LATIN 1157 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1158 /** 1159 * @stable ICU 2.4 1160 */ 1161 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1162 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1163 /** 1164 * @stable ICU 2.4 1165 */ 1166 public static final UnicodeBlock LATIN_EXTENDED_A 1167 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1168 /** 1169 * @stable ICU 2.4 1170 */ 1171 public static final UnicodeBlock LATIN_EXTENDED_B 1172 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1173 /** 1174 * @stable ICU 2.4 1175 */ 1176 public static final UnicodeBlock IPA_EXTENSIONS 1177 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1178 /** 1179 * @stable ICU 2.4 1180 */ 1181 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1182 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1183 /** 1184 * @stable ICU 2.4 1185 */ 1186 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1187 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1188 /** 1189 * Unicode 3.2 renames this block to "Greek and Coptic". 1190 * @stable ICU 2.4 1191 */ 1192 public static final UnicodeBlock GREEK 1193 = new UnicodeBlock("GREEK", GREEK_ID); 1194 /** 1195 * @stable ICU 2.4 1196 */ 1197 public static final UnicodeBlock CYRILLIC 1198 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1199 /** 1200 * @stable ICU 2.4 1201 */ 1202 public static final UnicodeBlock ARMENIAN 1203 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1204 /** 1205 * @stable ICU 2.4 1206 */ 1207 public static final UnicodeBlock HEBREW 1208 = new UnicodeBlock("HEBREW", HEBREW_ID); 1209 /** 1210 * @stable ICU 2.4 1211 */ 1212 public static final UnicodeBlock ARABIC 1213 = new UnicodeBlock("ARABIC", ARABIC_ID); 1214 /** 1215 * @stable ICU 2.4 1216 */ 1217 public static final UnicodeBlock SYRIAC 1218 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1219 /** 1220 * @stable ICU 2.4 1221 */ 1222 public static final UnicodeBlock THAANA 1223 = new UnicodeBlock("THAANA", THAANA_ID); 1224 /** 1225 * @stable ICU 2.4 1226 */ 1227 public static final UnicodeBlock DEVANAGARI 1228 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1229 /** 1230 * @stable ICU 2.4 1231 */ 1232 public static final UnicodeBlock BENGALI 1233 = new UnicodeBlock("BENGALI", BENGALI_ID); 1234 /** 1235 * @stable ICU 2.4 1236 */ 1237 public static final UnicodeBlock GURMUKHI 1238 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1239 /** 1240 * @stable ICU 2.4 1241 */ 1242 public static final UnicodeBlock GUJARATI 1243 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1244 /** 1245 * @stable ICU 2.4 1246 */ 1247 public static final UnicodeBlock ORIYA 1248 = new UnicodeBlock("ORIYA", ORIYA_ID); 1249 /** 1250 * @stable ICU 2.4 1251 */ 1252 public static final UnicodeBlock TAMIL 1253 = new UnicodeBlock("TAMIL", TAMIL_ID); 1254 /** 1255 * @stable ICU 2.4 1256 */ 1257 public static final UnicodeBlock TELUGU 1258 = new UnicodeBlock("TELUGU", TELUGU_ID); 1259 /** 1260 * @stable ICU 2.4 1261 */ 1262 public static final UnicodeBlock KANNADA 1263 = new UnicodeBlock("KANNADA", KANNADA_ID); 1264 /** 1265 * @stable ICU 2.4 1266 */ 1267 public static final UnicodeBlock MALAYALAM 1268 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1269 /** 1270 * @stable ICU 2.4 1271 */ 1272 public static final UnicodeBlock SINHALA 1273 = new UnicodeBlock("SINHALA", SINHALA_ID); 1274 /** 1275 * @stable ICU 2.4 1276 */ 1277 public static final UnicodeBlock THAI 1278 = new UnicodeBlock("THAI", THAI_ID); 1279 /** 1280 * @stable ICU 2.4 1281 */ 1282 public static final UnicodeBlock LAO 1283 = new UnicodeBlock("LAO", LAO_ID); 1284 /** 1285 * @stable ICU 2.4 1286 */ 1287 public static final UnicodeBlock TIBETAN 1288 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1289 /** 1290 * @stable ICU 2.4 1291 */ 1292 public static final UnicodeBlock MYANMAR 1293 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1294 /** 1295 * @stable ICU 2.4 1296 */ 1297 public static final UnicodeBlock GEORGIAN 1298 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1299 /** 1300 * @stable ICU 2.4 1301 */ 1302 public static final UnicodeBlock HANGUL_JAMO 1303 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1304 /** 1305 * @stable ICU 2.4 1306 */ 1307 public static final UnicodeBlock ETHIOPIC 1308 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1309 /** 1310 * @stable ICU 2.4 1311 */ 1312 public static final UnicodeBlock CHEROKEE 1313 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1314 /** 1315 * @stable ICU 2.4 1316 */ 1317 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1318 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1319 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1320 /** 1321 * @stable ICU 2.4 1322 */ 1323 public static final UnicodeBlock OGHAM 1324 = new UnicodeBlock("OGHAM", OGHAM_ID); 1325 /** 1326 * @stable ICU 2.4 1327 */ 1328 public static final UnicodeBlock RUNIC 1329 = new UnicodeBlock("RUNIC", RUNIC_ID); 1330 /** 1331 * @stable ICU 2.4 1332 */ 1333 public static final UnicodeBlock KHMER 1334 = new UnicodeBlock("KHMER", KHMER_ID); 1335 /** 1336 * @stable ICU 2.4 1337 */ 1338 public static final UnicodeBlock MONGOLIAN 1339 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1340 /** 1341 * @stable ICU 2.4 1342 */ 1343 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1344 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1345 /** 1346 * @stable ICU 2.4 1347 */ 1348 public static final UnicodeBlock GREEK_EXTENDED 1349 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1350 /** 1351 * @stable ICU 2.4 1352 */ 1353 public static final UnicodeBlock GENERAL_PUNCTUATION 1354 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1355 /** 1356 * @stable ICU 2.4 1357 */ 1358 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1359 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1360 /** 1361 * @stable ICU 2.4 1362 */ 1363 public static final UnicodeBlock CURRENCY_SYMBOLS 1364 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1365 /** 1366 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1367 * Symbols". 1368 * @stable ICU 2.4 1369 */ 1370 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1371 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1372 /** 1373 * @stable ICU 2.4 1374 */ 1375 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1376 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1377 /** 1378 * @stable ICU 2.4 1379 */ 1380 public static final UnicodeBlock NUMBER_FORMS 1381 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1382 /** 1383 * @stable ICU 2.4 1384 */ 1385 public static final UnicodeBlock ARROWS 1386 = new UnicodeBlock("ARROWS", ARROWS_ID); 1387 /** 1388 * @stable ICU 2.4 1389 */ 1390 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1391 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1392 /** 1393 * @stable ICU 2.4 1394 */ 1395 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1396 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1397 /** 1398 * @stable ICU 2.4 1399 */ 1400 public static final UnicodeBlock CONTROL_PICTURES 1401 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1402 /** 1403 * @stable ICU 2.4 1404 */ 1405 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1406 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1407 /** 1408 * @stable ICU 2.4 1409 */ 1410 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1411 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1412 /** 1413 * @stable ICU 2.4 1414 */ 1415 public static final UnicodeBlock BOX_DRAWING 1416 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1417 /** 1418 * @stable ICU 2.4 1419 */ 1420 public static final UnicodeBlock BLOCK_ELEMENTS 1421 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1422 /** 1423 * @stable ICU 2.4 1424 */ 1425 public static final UnicodeBlock GEOMETRIC_SHAPES 1426 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1427 /** 1428 * @stable ICU 2.4 1429 */ 1430 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1431 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1432 /** 1433 * @stable ICU 2.4 1434 */ 1435 public static final UnicodeBlock DINGBATS 1436 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1437 /** 1438 * @stable ICU 2.4 1439 */ 1440 public static final UnicodeBlock BRAILLE_PATTERNS 1441 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1442 /** 1443 * @stable ICU 2.4 1444 */ 1445 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1446 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1447 /** 1448 * @stable ICU 2.4 1449 */ 1450 public static final UnicodeBlock KANGXI_RADICALS 1451 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1452 /** 1453 * @stable ICU 2.4 1454 */ 1455 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1456 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1457 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1458 /** 1459 * @stable ICU 2.4 1460 */ 1461 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1462 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1463 /** 1464 * @stable ICU 2.4 1465 */ 1466 public static final UnicodeBlock HIRAGANA 1467 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1468 /** 1469 * @stable ICU 2.4 1470 */ 1471 public static final UnicodeBlock KATAKANA 1472 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1473 /** 1474 * @stable ICU 2.4 1475 */ 1476 public static final UnicodeBlock BOPOMOFO 1477 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1478 /** 1479 * @stable ICU 2.4 1480 */ 1481 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1482 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1483 /** 1484 * @stable ICU 2.4 1485 */ 1486 public static final UnicodeBlock KANBUN 1487 = new UnicodeBlock("KANBUN", KANBUN_ID); 1488 /** 1489 * @stable ICU 2.4 1490 */ 1491 public static final UnicodeBlock BOPOMOFO_EXTENDED 1492 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1493 /** 1494 * @stable ICU 2.4 1495 */ 1496 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1497 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1498 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1499 /** 1500 * @stable ICU 2.4 1501 */ 1502 public static final UnicodeBlock CJK_COMPATIBILITY 1503 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1504 /** 1505 * @stable ICU 2.4 1506 */ 1507 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1508 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1509 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1510 /** 1511 * @stable ICU 2.4 1512 */ 1513 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1514 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1515 /** 1516 * @stable ICU 2.4 1517 */ 1518 public static final UnicodeBlock YI_SYLLABLES 1519 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1520 /** 1521 * @stable ICU 2.4 1522 */ 1523 public static final UnicodeBlock YI_RADICALS 1524 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1525 /** 1526 * @stable ICU 2.4 1527 */ 1528 public static final UnicodeBlock HANGUL_SYLLABLES 1529 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1530 /** 1531 * @stable ICU 2.4 1532 */ 1533 public static final UnicodeBlock HIGH_SURROGATES 1534 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1535 /** 1536 * @stable ICU 2.4 1537 */ 1538 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1539 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1540 /** 1541 * @stable ICU 2.4 1542 */ 1543 public static final UnicodeBlock LOW_SURROGATES 1544 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1545 /** 1546 * Same as public static final int PRIVATE_USE. 1547 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1548 * and multiple code point ranges had this block. 1549 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1550 * and adds separate blocks for the supplementary PUAs. 1551 * @stable ICU 2.4 1552 */ 1553 public static final UnicodeBlock PRIVATE_USE_AREA 1554 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1555 /** 1556 * Same as public static final int PRIVATE_USE_AREA. 1557 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1558 * and multiple code point ranges had this block. 1559 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1560 * and adds separate blocks for the supplementary PUAs. 1561 * @stable ICU 2.4 1562 */ 1563 public static final UnicodeBlock PRIVATE_USE 1564 = PRIVATE_USE_AREA; 1565 /** 1566 * @stable ICU 2.4 1567 */ 1568 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1569 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1570 /** 1571 * @stable ICU 2.4 1572 */ 1573 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1574 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1575 /** 1576 * @stable ICU 2.4 1577 */ 1578 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1579 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1580 /** 1581 * @stable ICU 2.4 1582 */ 1583 public static final UnicodeBlock COMBINING_HALF_MARKS 1584 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1585 /** 1586 * @stable ICU 2.4 1587 */ 1588 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1589 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1590 /** 1591 * @stable ICU 2.4 1592 */ 1593 public static final UnicodeBlock SMALL_FORM_VARIANTS 1594 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1595 /** 1596 * @stable ICU 2.4 1597 */ 1598 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1599 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1600 /** 1601 * @stable ICU 2.4 1602 */ 1603 public static final UnicodeBlock SPECIALS 1604 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1605 /** 1606 * @stable ICU 2.4 1607 */ 1608 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1609 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1610 /** 1611 * @stable ICU 2.4 1612 */ 1613 public static final UnicodeBlock OLD_ITALIC 1614 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1615 /** 1616 * @stable ICU 2.4 1617 */ 1618 public static final UnicodeBlock GOTHIC 1619 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1620 /** 1621 * @stable ICU 2.4 1622 */ 1623 public static final UnicodeBlock DESERET 1624 = new UnicodeBlock("DESERET", DESERET_ID); 1625 /** 1626 * @stable ICU 2.4 1627 */ 1628 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1629 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1630 /** 1631 * @stable ICU 2.4 1632 */ 1633 public static final UnicodeBlock MUSICAL_SYMBOLS 1634 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1635 /** 1636 * @stable ICU 2.4 1637 */ 1638 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1639 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1640 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1641 /** 1642 * @stable ICU 2.4 1643 */ 1644 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1645 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1646 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1647 /** 1648 * @stable ICU 2.4 1649 */ 1650 public static final UnicodeBlock 1651 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1652 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1653 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1654 /** 1655 * @stable ICU 2.4 1656 */ 1657 public static final UnicodeBlock TAGS 1658 = new UnicodeBlock("TAGS", TAGS_ID); 1659 1660 // New blocks in Unicode 3.2 1661 1662 /** 1663 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1664 * @stable ICU 2.4 1665 */ 1666 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1667 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1668 /** 1669 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1670 * @stable ICU 3.0 1671 */ 1672 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1673 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1674 /** 1675 * @stable ICU 2.4 1676 */ 1677 public static final UnicodeBlock TAGALOG 1678 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1679 /** 1680 * @stable ICU 2.4 1681 */ 1682 public static final UnicodeBlock HANUNOO 1683 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1684 /** 1685 * @stable ICU 2.4 1686 */ 1687 public static final UnicodeBlock BUHID 1688 = new UnicodeBlock("BUHID", BUHID_ID); 1689 /** 1690 * @stable ICU 2.4 1691 */ 1692 public static final UnicodeBlock TAGBANWA 1693 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1694 /** 1695 * @stable ICU 2.4 1696 */ 1697 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1698 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1699 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1700 /** 1701 * @stable ICU 2.4 1702 */ 1703 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1704 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1705 /** 1706 * @stable ICU 2.4 1707 */ 1708 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1709 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1710 /** 1711 * @stable ICU 2.4 1712 */ 1713 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1714 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1715 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1716 /** 1717 * @stable ICU 2.4 1718 */ 1719 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1720 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1721 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1722 /** 1723 * @stable ICU 2.4 1724 */ 1725 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1726 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1727 /** 1728 * @stable ICU 2.4 1729 */ 1730 public static final UnicodeBlock VARIATION_SELECTORS 1731 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1732 /** 1733 * @stable ICU 2.4 1734 */ 1735 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1736 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1737 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1738 /** 1739 * @stable ICU 2.4 1740 */ 1741 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1742 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1743 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1744 1745 /** 1746 * @stable ICU 2.6 1747 */ 1748 public static final UnicodeBlock LIMBU 1749 = new UnicodeBlock("LIMBU", LIMBU_ID); 1750 /** 1751 * @stable ICU 2.6 1752 */ 1753 public static final UnicodeBlock TAI_LE 1754 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1755 /** 1756 * @stable ICU 2.6 1757 */ 1758 public static final UnicodeBlock KHMER_SYMBOLS 1759 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1760 1761 /** 1762 * @stable ICU 2.6 1763 */ 1764 public static final UnicodeBlock PHONETIC_EXTENSIONS 1765 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1766 1767 /** 1768 * @stable ICU 2.6 1769 */ 1770 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1771 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1772 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1773 /** 1774 * @stable ICU 2.6 1775 */ 1776 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1777 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1778 /** 1779 * @stable ICU 2.6 1780 */ 1781 public static final UnicodeBlock LINEAR_B_SYLLABARY 1782 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1783 /** 1784 * @stable ICU 2.6 1785 */ 1786 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1787 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1788 /** 1789 * @stable ICU 2.6 1790 */ 1791 public static final UnicodeBlock AEGEAN_NUMBERS 1792 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1793 /** 1794 * @stable ICU 2.6 1795 */ 1796 public static final UnicodeBlock UGARITIC 1797 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1798 /** 1799 * @stable ICU 2.6 1800 */ 1801 public static final UnicodeBlock SHAVIAN 1802 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1803 /** 1804 * @stable ICU 2.6 1805 */ 1806 public static final UnicodeBlock OSMANYA 1807 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1808 /** 1809 * @stable ICU 2.6 1810 */ 1811 public static final UnicodeBlock CYPRIOT_SYLLABARY 1812 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1813 /** 1814 * @stable ICU 2.6 1815 */ 1816 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1817 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1818 1819 /** 1820 * @stable ICU 2.6 1821 */ 1822 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1823 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1824 1825 /* New blocks in Unicode 4.1 */ 1826 1827 /** 1828 * @stable ICU 3.4 1829 */ 1830 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1831 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1832 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1833 1834 /** 1835 * @stable ICU 3.4 1836 */ 1837 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1838 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1839 1840 /** 1841 * @stable ICU 3.4 1842 */ 1843 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1844 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1845 1846 /** 1847 * @stable ICU 3.4 1848 */ 1849 public static final UnicodeBlock BUGINESE = 1850 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1851 1852 /** 1853 * @stable ICU 3.4 1854 */ 1855 public static final UnicodeBlock CJK_STROKES = 1856 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1857 1858 /** 1859 * @stable ICU 3.4 1860 */ 1861 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1862 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1863 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1864 1865 /** 1866 * @stable ICU 3.4 1867 */ 1868 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1869 1870 /** 1871 * @stable ICU 3.4 1872 */ 1873 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1874 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1875 1876 /** 1877 * @stable ICU 3.4 1878 */ 1879 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1880 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1881 1882 /** 1883 * @stable ICU 3.4 1884 */ 1885 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1886 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 1887 1888 /** 1889 * @stable ICU 3.4 1890 */ 1891 public static final UnicodeBlock GLAGOLITIC = 1892 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 1893 1894 /** 1895 * @stable ICU 3.4 1896 */ 1897 public static final UnicodeBlock KHAROSHTHI = 1898 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 1899 1900 /** 1901 * @stable ICU 3.4 1902 */ 1903 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1904 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 1905 1906 /** 1907 * @stable ICU 3.4 1908 */ 1909 public static final UnicodeBlock NEW_TAI_LUE = 1910 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 1911 1912 /** 1913 * @stable ICU 3.4 1914 */ 1915 public static final UnicodeBlock OLD_PERSIAN = 1916 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 1917 1918 /** 1919 * @stable ICU 3.4 1920 */ 1921 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1922 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1923 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 1924 1925 /** 1926 * @stable ICU 3.4 1927 */ 1928 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1929 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 1930 1931 /** 1932 * @stable ICU 3.4 1933 */ 1934 public static final UnicodeBlock SYLOTI_NAGRI = 1935 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 1936 1937 /** 1938 * @stable ICU 3.4 1939 */ 1940 public static final UnicodeBlock TIFINAGH = 1941 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 1942 1943 /** 1944 * @stable ICU 3.4 1945 */ 1946 public static final UnicodeBlock VERTICAL_FORMS = 1947 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 1948 1949 /** 1950 * @stable ICU 3.6 1951 */ 1952 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 1953 /** 1954 * @stable ICU 3.6 1955 */ 1956 public static final UnicodeBlock BALINESE = 1957 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 1958 /** 1959 * @stable ICU 3.6 1960 */ 1961 public static final UnicodeBlock LATIN_EXTENDED_C = 1962 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 1963 /** 1964 * @stable ICU 3.6 1965 */ 1966 public static final UnicodeBlock LATIN_EXTENDED_D = 1967 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 1968 /** 1969 * @stable ICU 3.6 1970 */ 1971 public static final UnicodeBlock PHAGS_PA = 1972 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 1973 /** 1974 * @stable ICU 3.6 1975 */ 1976 public static final UnicodeBlock PHOENICIAN = 1977 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 1978 /** 1979 * @stable ICU 3.6 1980 */ 1981 public static final UnicodeBlock CUNEIFORM = 1982 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 1983 /** 1984 * @stable ICU 3.6 1985 */ 1986 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 1987 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 1988 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 1989 /** 1990 * @stable ICU 3.6 1991 */ 1992 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 1993 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 1994 1995 /** 1996 * @stable ICU 4.0 1997 */ 1998 public static final UnicodeBlock SUNDANESE = 1999 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 2000 2001 /** 2002 * @stable ICU 4.0 2003 */ 2004 public static final UnicodeBlock LEPCHA = 2005 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 2006 2007 /** 2008 * @stable ICU 4.0 2009 */ 2010 public static final UnicodeBlock OL_CHIKI = 2011 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 2012 2013 /** 2014 * @stable ICU 4.0 2015 */ 2016 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2017 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 2018 2019 /** 2020 * @stable ICU 4.0 2021 */ 2022 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 2023 2024 /** 2025 * @stable ICU 4.0 2026 */ 2027 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2028 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 2029 2030 /** 2031 * @stable ICU 4.0 2032 */ 2033 public static final UnicodeBlock SAURASHTRA = 2034 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 2035 2036 /** 2037 * @stable ICU 4.0 2038 */ 2039 public static final UnicodeBlock KAYAH_LI = 2040 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 2041 2042 /** 2043 * @stable ICU 4.0 2044 */ 2045 public static final UnicodeBlock REJANG = 2046 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 2047 2048 /** 2049 * @stable ICU 4.0 2050 */ 2051 public static final UnicodeBlock CHAM = 2052 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 2053 2054 /** 2055 * @stable ICU 4.0 2056 */ 2057 public static final UnicodeBlock ANCIENT_SYMBOLS = 2058 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 2059 2060 /** 2061 * @stable ICU 4.0 2062 */ 2063 public static final UnicodeBlock PHAISTOS_DISC = 2064 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 2065 2066 /** 2067 * @stable ICU 4.0 2068 */ 2069 public static final UnicodeBlock LYCIAN = 2070 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 2071 2072 /** 2073 * @stable ICU 4.0 2074 */ 2075 public static final UnicodeBlock CARIAN = 2076 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 2077 2078 /** 2079 * @stable ICU 4.0 2080 */ 2081 public static final UnicodeBlock LYDIAN = 2082 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 2083 2084 /** 2085 * @stable ICU 4.0 2086 */ 2087 public static final UnicodeBlock MAHJONG_TILES = 2088 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 2089 2090 /** 2091 * @stable ICU 4.0 2092 */ 2093 public static final UnicodeBlock DOMINO_TILES = 2094 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 2095 2096 /* New blocks in Unicode 5.2 */ 2097 2098 /** @stable ICU 4.4 */ 2099 public static final UnicodeBlock SAMARITAN = 2100 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 2101 /** @stable ICU 4.4 */ 2102 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 2103 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 2104 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 2105 /** @stable ICU 4.4 */ 2106 public static final UnicodeBlock TAI_THAM = 2107 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 2108 /** @stable ICU 4.4 */ 2109 public static final UnicodeBlock VEDIC_EXTENSIONS = 2110 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 2111 /** @stable ICU 4.4 */ 2112 public static final UnicodeBlock LISU = 2113 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 2114 /** @stable ICU 4.4 */ 2115 public static final UnicodeBlock BAMUM = 2116 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 2117 /** @stable ICU 4.4 */ 2118 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2119 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 2120 /** @stable ICU 4.4 */ 2121 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2122 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 2123 /** @stable ICU 4.4 */ 2124 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2125 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 2126 /** @stable ICU 4.4 */ 2127 public static final UnicodeBlock JAVANESE = 2128 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 2129 /** @stable ICU 4.4 */ 2130 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2131 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 2132 /** @stable ICU 4.4 */ 2133 public static final UnicodeBlock TAI_VIET = 2134 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 2135 /** @stable ICU 4.4 */ 2136 public static final UnicodeBlock MEETEI_MAYEK = 2137 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 2138 /** @stable ICU 4.4 */ 2139 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2140 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 2141 /** @stable ICU 4.4 */ 2142 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2143 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 2144 /** @stable ICU 4.4 */ 2145 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2146 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 2147 /** @stable ICU 4.4 */ 2148 public static final UnicodeBlock AVESTAN = 2149 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 2150 /** @stable ICU 4.4 */ 2151 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2152 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 2153 /** @stable ICU 4.4 */ 2154 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2155 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 2156 /** @stable ICU 4.4 */ 2157 public static final UnicodeBlock OLD_TURKIC = 2158 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 2159 /** @stable ICU 4.4 */ 2160 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2161 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 2162 /** @stable ICU 4.4 */ 2163 public static final UnicodeBlock KAITHI = 2164 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 2165 /** @stable ICU 4.4 */ 2166 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2167 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 2168 /** @stable ICU 4.4 */ 2169 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2170 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2171 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 2172 /** @stable ICU 4.4 */ 2173 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2174 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2175 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 2176 /** @stable ICU 4.4 */ 2177 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2178 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2179 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 2180 2181 /* New blocks in Unicode 6.0 */ 2182 2183 /** @stable ICU 4.6 */ 2184 public static final UnicodeBlock MANDAIC = 2185 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 2186 /** @stable ICU 4.6 */ 2187 public static final UnicodeBlock BATAK = 2188 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 2189 /** @stable ICU 4.6 */ 2190 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2191 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 2192 /** @stable ICU 4.6 */ 2193 public static final UnicodeBlock BRAHMI = 2194 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 2195 /** @stable ICU 4.6 */ 2196 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2197 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 2198 /** @stable ICU 4.6 */ 2199 public static final UnicodeBlock KANA_SUPPLEMENT = 2200 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 2201 /** @stable ICU 4.6 */ 2202 public static final UnicodeBlock PLAYING_CARDS = 2203 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 2204 /** @stable ICU 4.6 */ 2205 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2206 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2207 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 2208 /** @stable ICU 4.6 */ 2209 public static final UnicodeBlock EMOTICONS = 2210 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 2211 /** @stable ICU 4.6 */ 2212 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2213 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 2214 /** @stable ICU 4.6 */ 2215 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2216 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 2217 /** @stable ICU 4.6 */ 2218 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2219 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2220 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 2221 2222 /* New blocks in Unicode 6.1 */ 2223 2224 /** @stable ICU 49 */ 2225 public static final UnicodeBlock ARABIC_EXTENDED_A = 2226 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 2227 /** @stable ICU 49 */ 2228 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2229 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 2230 /** @stable ICU 49 */ 2231 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 2232 /** @stable ICU 49 */ 2233 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2234 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2235 /** @stable ICU 49 */ 2236 public static final UnicodeBlock MEROITIC_CURSIVE = 2237 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2238 /** @stable ICU 49 */ 2239 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2240 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2241 /** @stable ICU 49 */ 2242 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2243 /** @stable ICU 49 */ 2244 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2245 /** @stable ICU 49 */ 2246 public static final UnicodeBlock SORA_SOMPENG = 2247 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2248 /** @stable ICU 49 */ 2249 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2250 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2251 /** @stable ICU 49 */ 2252 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2253 2254 /* New blocks in Unicode 7.0 */ 2255 2256 /** @stable ICU 54 */ 2257 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2258 /** @stable ICU 54 */ 2259 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2260 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2261 /** @stable ICU 54 */ 2262 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2263 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2264 /** @stable ICU 54 */ 2265 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2266 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2267 /** @stable ICU 54 */ 2268 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2269 /** @stable ICU 54 */ 2270 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2271 /** @stable ICU 54 */ 2272 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2273 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2274 /** @stable ICU 54 */ 2275 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2276 /** @stable ICU 54 */ 2277 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2278 /** @stable ICU 54 */ 2279 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2280 /** @stable ICU 54 */ 2281 public static final UnicodeBlock LATIN_EXTENDED_E = 2282 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2283 /** @stable ICU 54 */ 2284 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2285 /** @stable ICU 54 */ 2286 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2287 /** @stable ICU 54 */ 2288 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2289 /** @stable ICU 54 */ 2290 public static final UnicodeBlock MENDE_KIKAKUI = 2291 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2292 /** @stable ICU 54 */ 2293 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2294 /** @stable ICU 54 */ 2295 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2296 /** @stable ICU 54 */ 2297 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2298 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2299 /** @stable ICU 54 */ 2300 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2301 /** @stable ICU 54 */ 2302 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2303 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2304 /** @stable ICU 54 */ 2305 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2306 /** @stable ICU 54 */ 2307 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2308 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2309 /** @stable ICU 54 */ 2310 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2311 /** @stable ICU 54 */ 2312 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2313 /** @stable ICU 54 */ 2314 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2315 /** @stable ICU 54 */ 2316 public static final UnicodeBlock PSALTER_PAHLAVI = 2317 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2318 /** @stable ICU 54 */ 2319 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2320 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2321 /** @stable ICU 54 */ 2322 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2323 /** @stable ICU 54 */ 2324 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2325 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2326 /** @stable ICU 54 */ 2327 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2328 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2329 /** @stable ICU 54 */ 2330 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2331 /** @stable ICU 54 */ 2332 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2333 2334 /* New blocks in Unicode 8.0 */ 2335 2336 /** @stable ICU 56 */ 2337 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2338 /** @stable ICU 56 */ 2339 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2340 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2341 /** @stable ICU 56 */ 2342 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2343 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2344 /** @stable ICU 56 */ 2345 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2346 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2347 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2348 /** @stable ICU 56 */ 2349 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2350 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2351 /** @stable ICU 56 */ 2352 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2353 /** @stable ICU 56 */ 2354 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2355 /** @stable ICU 56 */ 2356 public static final UnicodeBlock OLD_HUNGARIAN = 2357 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2358 /** @stable ICU 56 */ 2359 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2360 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2361 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2362 /** @stable ICU 56 */ 2363 public static final UnicodeBlock SUTTON_SIGNWRITING = 2364 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2365 2366 /** 2367 * @stable ICU 2.4 2368 */ 2369 public static final UnicodeBlock INVALID_CODE 2370 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2371 2372 static { 2373 for (int blockId = 0; blockId < COUNT; ++blockId) { 2374 if (BLOCKS_[blockId] == null) { 2375 throw new java.lang.IllegalStateException( 2376 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2377 } 2378 } 2379 } 2380 2381 // public methods -------------------------------------------------- 2382 2383 /** 2384 * {@icu} Returns the only instance of the UnicodeBlock with the argument ID. 2385 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2386 * @param id UnicodeBlock ID 2387 * @return the only instance of the UnicodeBlock with the argument ID 2388 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2389 * returned. 2390 * @stable ICU 2.4 2391 */ getInstance(int id)2392 public static UnicodeBlock getInstance(int id) 2393 { 2394 if (id >= 0 && id < BLOCKS_.length) { 2395 return BLOCKS_[id]; 2396 } 2397 return INVALID_CODE; 2398 } 2399 2400 /** 2401 * Returns the Unicode allocation block that contains the code point, 2402 * or null if the code point is not a member of a defined block. 2403 * @param ch code point to be tested 2404 * @return the Unicode allocation block that contains the code point 2405 * @stable ICU 2.4 2406 */ of(int ch)2407 public static UnicodeBlock of(int ch) 2408 { 2409 if (ch > MAX_VALUE) { 2410 return INVALID_CODE; 2411 } 2412 2413 return UnicodeBlock.getInstance( 2414 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2415 } 2416 2417 /** 2418 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2419 * Returns the Unicode block with the given name. {@icunote} Unlike 2420 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2421 * against the official UCD name and the Java block name 2422 * (ignoring case). 2423 * @param blockName the name of the block to match 2424 * @return the UnicodeBlock with that name 2425 * @throws IllegalArgumentException if the blockName could not be matched 2426 * @stable ICU 3.0 2427 */ forName(String blockName)2428 public static final UnicodeBlock forName(String blockName) { 2429 Map<String, UnicodeBlock> m = null; 2430 if (mref != null) { 2431 m = mref.get(); 2432 } 2433 if (m == null) { 2434 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length); 2435 for (int i = 0; i < BLOCKS_.length; ++i) { 2436 UnicodeBlock b = BLOCKS_[i]; 2437 String name = trimBlockName( 2438 getPropertyValueName(UProperty.BLOCK, b.getID(), 2439 UProperty.NameChoice.LONG)); 2440 m.put(name, b); 2441 } 2442 mref = new SoftReference<Map<String, UnicodeBlock>>(m); 2443 } 2444 UnicodeBlock b = m.get(trimBlockName(blockName)); 2445 if (b == null) { 2446 throw new IllegalArgumentException(); 2447 } 2448 return b; 2449 } 2450 private static SoftReference<Map<String, UnicodeBlock>> mref; 2451 trimBlockName(String name)2452 private static String trimBlockName(String name) { 2453 String upper = name.toUpperCase(Locale.ENGLISH); 2454 StringBuilder result = new StringBuilder(upper.length()); 2455 for (int i = 0; i < upper.length(); i++) { 2456 char c = upper.charAt(i); 2457 if (c != ' ' && c != '_' && c != '-') { 2458 result.append(c); 2459 } 2460 } 2461 return result.toString(); 2462 } 2463 2464 /** 2465 * {icu} Returns the type ID of this Unicode block 2466 * @return integer type ID of this Unicode block 2467 * @stable ICU 2.4 2468 */ getID()2469 public int getID() 2470 { 2471 return m_id_; 2472 } 2473 2474 // private data members --------------------------------------------- 2475 2476 /** 2477 * Identification code for this UnicodeBlock 2478 */ 2479 private int m_id_; 2480 2481 // private constructor ---------------------------------------------- 2482 2483 /** 2484 * UnicodeBlock constructor 2485 * @param name name of this UnicodeBlock 2486 * @param id unique id of this UnicodeBlock 2487 * @exception NullPointerException if name is <code>null</code> 2488 */ UnicodeBlock(String name, int id)2489 private UnicodeBlock(String name, int id) 2490 { 2491 super(name); 2492 m_id_ = id; 2493 if (id >= 0) { 2494 BLOCKS_[id] = this; 2495 } 2496 } 2497 } 2498 2499 /** 2500 * East Asian Width constants. 2501 * @see UProperty#EAST_ASIAN_WIDTH 2502 * @see UCharacter#getIntPropertyValue 2503 * @stable ICU 2.4 2504 */ 2505 public static interface EastAsianWidth 2506 { 2507 /** 2508 * @stable ICU 2.4 2509 */ 2510 public static final int NEUTRAL = 0; 2511 /** 2512 * @stable ICU 2.4 2513 */ 2514 public static final int AMBIGUOUS = 1; 2515 /** 2516 * @stable ICU 2.4 2517 */ 2518 public static final int HALFWIDTH = 2; 2519 /** 2520 * @stable ICU 2.4 2521 */ 2522 public static final int FULLWIDTH = 3; 2523 /** 2524 * @stable ICU 2.4 2525 */ 2526 public static final int NARROW = 4; 2527 /** 2528 * @stable ICU 2.4 2529 */ 2530 public static final int WIDE = 5; 2531 /** 2532 * @stable ICU 2.4 2533 */ 2534 public static final int COUNT = 6; 2535 } 2536 2537 /** 2538 * Decomposition Type constants. 2539 * @see UProperty#DECOMPOSITION_TYPE 2540 * @stable ICU 2.4 2541 */ 2542 public static interface DecompositionType 2543 { 2544 /** 2545 * @stable ICU 2.4 2546 */ 2547 public static final int NONE = 0; 2548 /** 2549 * @stable ICU 2.4 2550 */ 2551 public static final int CANONICAL = 1; 2552 /** 2553 * @stable ICU 2.4 2554 */ 2555 public static final int COMPAT = 2; 2556 /** 2557 * @stable ICU 2.4 2558 */ 2559 public static final int CIRCLE = 3; 2560 /** 2561 * @stable ICU 2.4 2562 */ 2563 public static final int FINAL = 4; 2564 /** 2565 * @stable ICU 2.4 2566 */ 2567 public static final int FONT = 5; 2568 /** 2569 * @stable ICU 2.4 2570 */ 2571 public static final int FRACTION = 6; 2572 /** 2573 * @stable ICU 2.4 2574 */ 2575 public static final int INITIAL = 7; 2576 /** 2577 * @stable ICU 2.4 2578 */ 2579 public static final int ISOLATED = 8; 2580 /** 2581 * @stable ICU 2.4 2582 */ 2583 public static final int MEDIAL = 9; 2584 /** 2585 * @stable ICU 2.4 2586 */ 2587 public static final int NARROW = 10; 2588 /** 2589 * @stable ICU 2.4 2590 */ 2591 public static final int NOBREAK = 11; 2592 /** 2593 * @stable ICU 2.4 2594 */ 2595 public static final int SMALL = 12; 2596 /** 2597 * @stable ICU 2.4 2598 */ 2599 public static final int SQUARE = 13; 2600 /** 2601 * @stable ICU 2.4 2602 */ 2603 public static final int SUB = 14; 2604 /** 2605 * @stable ICU 2.4 2606 */ 2607 public static final int SUPER = 15; 2608 /** 2609 * @stable ICU 2.4 2610 */ 2611 public static final int VERTICAL = 16; 2612 /** 2613 * @stable ICU 2.4 2614 */ 2615 public static final int WIDE = 17; 2616 /** 2617 * @stable ICU 2.4 2618 */ 2619 public static final int COUNT = 18; 2620 } 2621 2622 /** 2623 * Joining Type constants. 2624 * @see UProperty#JOINING_TYPE 2625 * @stable ICU 2.4 2626 */ 2627 public static interface JoiningType 2628 { 2629 /** 2630 * @stable ICU 2.4 2631 */ 2632 public static final int NON_JOINING = 0; 2633 /** 2634 * @stable ICU 2.4 2635 */ 2636 public static final int JOIN_CAUSING = 1; 2637 /** 2638 * @stable ICU 2.4 2639 */ 2640 public static final int DUAL_JOINING = 2; 2641 /** 2642 * @stable ICU 2.4 2643 */ 2644 public static final int LEFT_JOINING = 3; 2645 /** 2646 * @stable ICU 2.4 2647 */ 2648 public static final int RIGHT_JOINING = 4; 2649 /** 2650 * @stable ICU 2.4 2651 */ 2652 public static final int TRANSPARENT = 5; 2653 /** 2654 * @stable ICU 2.4 2655 */ 2656 public static final int COUNT = 6; 2657 } 2658 2659 /** 2660 * Joining Group constants. 2661 * @see UProperty#JOINING_GROUP 2662 * @stable ICU 2.4 2663 */ 2664 public static interface JoiningGroup 2665 { 2666 /** 2667 * @stable ICU 2.4 2668 */ 2669 public static final int NO_JOINING_GROUP = 0; 2670 /** 2671 * @stable ICU 2.4 2672 */ 2673 public static final int AIN = 1; 2674 /** 2675 * @stable ICU 2.4 2676 */ 2677 public static final int ALAPH = 2; 2678 /** 2679 * @stable ICU 2.4 2680 */ 2681 public static final int ALEF = 3; 2682 /** 2683 * @stable ICU 2.4 2684 */ 2685 public static final int BEH = 4; 2686 /** 2687 * @stable ICU 2.4 2688 */ 2689 public static final int BETH = 5; 2690 /** 2691 * @stable ICU 2.4 2692 */ 2693 public static final int DAL = 6; 2694 /** 2695 * @stable ICU 2.4 2696 */ 2697 public static final int DALATH_RISH = 7; 2698 /** 2699 * @stable ICU 2.4 2700 */ 2701 public static final int E = 8; 2702 /** 2703 * @stable ICU 2.4 2704 */ 2705 public static final int FEH = 9; 2706 /** 2707 * @stable ICU 2.4 2708 */ 2709 public static final int FINAL_SEMKATH = 10; 2710 /** 2711 * @stable ICU 2.4 2712 */ 2713 public static final int GAF = 11; 2714 /** 2715 * @stable ICU 2.4 2716 */ 2717 public static final int GAMAL = 12; 2718 /** 2719 * @stable ICU 2.4 2720 */ 2721 public static final int HAH = 13; 2722 /** @stable ICU 4.6 */ 2723 public static final int TEH_MARBUTA_GOAL = 14; 2724 /** 2725 * @stable ICU 2.4 2726 */ 2727 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2728 /** 2729 * @stable ICU 2.4 2730 */ 2731 public static final int HE = 15; 2732 /** 2733 * @stable ICU 2.4 2734 */ 2735 public static final int HEH = 16; 2736 /** 2737 * @stable ICU 2.4 2738 */ 2739 public static final int HEH_GOAL = 17; 2740 /** 2741 * @stable ICU 2.4 2742 */ 2743 public static final int HETH = 18; 2744 /** 2745 * @stable ICU 2.4 2746 */ 2747 public static final int KAF = 19; 2748 /** 2749 * @stable ICU 2.4 2750 */ 2751 public static final int KAPH = 20; 2752 /** 2753 * @stable ICU 2.4 2754 */ 2755 public static final int KNOTTED_HEH = 21; 2756 /** 2757 * @stable ICU 2.4 2758 */ 2759 public static final int LAM = 22; 2760 /** 2761 * @stable ICU 2.4 2762 */ 2763 public static final int LAMADH = 23; 2764 /** 2765 * @stable ICU 2.4 2766 */ 2767 public static final int MEEM = 24; 2768 /** 2769 * @stable ICU 2.4 2770 */ 2771 public static final int MIM = 25; 2772 /** 2773 * @stable ICU 2.4 2774 */ 2775 public static final int NOON = 26; 2776 /** 2777 * @stable ICU 2.4 2778 */ 2779 public static final int NUN = 27; 2780 /** 2781 * @stable ICU 2.4 2782 */ 2783 public static final int PE = 28; 2784 /** 2785 * @stable ICU 2.4 2786 */ 2787 public static final int QAF = 29; 2788 /** 2789 * @stable ICU 2.4 2790 */ 2791 public static final int QAPH = 30; 2792 /** 2793 * @stable ICU 2.4 2794 */ 2795 public static final int REH = 31; 2796 /** 2797 * @stable ICU 2.4 2798 */ 2799 public static final int REVERSED_PE = 32; 2800 /** 2801 * @stable ICU 2.4 2802 */ 2803 public static final int SAD = 33; 2804 /** 2805 * @stable ICU 2.4 2806 */ 2807 public static final int SADHE = 34; 2808 /** 2809 * @stable ICU 2.4 2810 */ 2811 public static final int SEEN = 35; 2812 /** 2813 * @stable ICU 2.4 2814 */ 2815 public static final int SEMKATH = 36; 2816 /** 2817 * @stable ICU 2.4 2818 */ 2819 public static final int SHIN = 37; 2820 /** 2821 * @stable ICU 2.4 2822 */ 2823 public static final int SWASH_KAF = 38; 2824 /** 2825 * @stable ICU 2.4 2826 */ 2827 public static final int SYRIAC_WAW = 39; 2828 /** 2829 * @stable ICU 2.4 2830 */ 2831 public static final int TAH = 40; 2832 /** 2833 * @stable ICU 2.4 2834 */ 2835 public static final int TAW = 41; 2836 /** 2837 * @stable ICU 2.4 2838 */ 2839 public static final int TEH_MARBUTA = 42; 2840 /** 2841 * @stable ICU 2.4 2842 */ 2843 public static final int TETH = 43; 2844 /** 2845 * @stable ICU 2.4 2846 */ 2847 public static final int WAW = 44; 2848 /** 2849 * @stable ICU 2.4 2850 */ 2851 public static final int YEH = 45; 2852 /** 2853 * @stable ICU 2.4 2854 */ 2855 public static final int YEH_BARREE = 46; 2856 /** 2857 * @stable ICU 2.4 2858 */ 2859 public static final int YEH_WITH_TAIL = 47; 2860 /** 2861 * @stable ICU 2.4 2862 */ 2863 public static final int YUDH = 48; 2864 /** 2865 * @stable ICU 2.4 2866 */ 2867 public static final int YUDH_HE = 49; 2868 /** 2869 * @stable ICU 2.4 2870 */ 2871 public static final int ZAIN = 50; 2872 /** 2873 * @stable ICU 2.6 2874 */ 2875 public static final int FE = 51; 2876 /** 2877 * @stable ICU 2.6 2878 */ 2879 public static final int KHAPH = 52; 2880 /** 2881 * @stable ICU 2.6 2882 */ 2883 public static final int ZHAIN = 53; 2884 /** 2885 * @stable ICU 4.0 2886 */ 2887 public static final int BURUSHASKI_YEH_BARREE = 54; 2888 /** @stable ICU 4.4 */ 2889 public static final int FARSI_YEH = 55; 2890 /** @stable ICU 4.4 */ 2891 public static final int NYA = 56; 2892 /** @stable ICU 49 */ 2893 public static final int ROHINGYA_YEH = 57; 2894 2895 /** @stable ICU 54 */ 2896 public static final int MANICHAEAN_ALEPH = 58; 2897 /** @stable ICU 54 */ 2898 public static final int MANICHAEAN_AYIN = 59; 2899 /** @stable ICU 54 */ 2900 public static final int MANICHAEAN_BETH = 60; 2901 /** @stable ICU 54 */ 2902 public static final int MANICHAEAN_DALETH = 61; 2903 /** @stable ICU 54 */ 2904 public static final int MANICHAEAN_DHAMEDH = 62; 2905 /** @stable ICU 54 */ 2906 public static final int MANICHAEAN_FIVE = 63; 2907 /** @stable ICU 54 */ 2908 public static final int MANICHAEAN_GIMEL = 64; 2909 /** @stable ICU 54 */ 2910 public static final int MANICHAEAN_HETH = 65; 2911 /** @stable ICU 54 */ 2912 public static final int MANICHAEAN_HUNDRED = 66; 2913 /** @stable ICU 54 */ 2914 public static final int MANICHAEAN_KAPH = 67; 2915 /** @stable ICU 54 */ 2916 public static final int MANICHAEAN_LAMEDH = 68; 2917 /** @stable ICU 54 */ 2918 public static final int MANICHAEAN_MEM = 69; 2919 /** @stable ICU 54 */ 2920 public static final int MANICHAEAN_NUN = 70; 2921 /** @stable ICU 54 */ 2922 public static final int MANICHAEAN_ONE = 71; 2923 /** @stable ICU 54 */ 2924 public static final int MANICHAEAN_PE = 72; 2925 /** @stable ICU 54 */ 2926 public static final int MANICHAEAN_QOPH = 73; 2927 /** @stable ICU 54 */ 2928 public static final int MANICHAEAN_RESH = 74; 2929 /** @stable ICU 54 */ 2930 public static final int MANICHAEAN_SADHE = 75; 2931 /** @stable ICU 54 */ 2932 public static final int MANICHAEAN_SAMEKH = 76; 2933 /** @stable ICU 54 */ 2934 public static final int MANICHAEAN_TAW = 77; 2935 /** @stable ICU 54 */ 2936 public static final int MANICHAEAN_TEN = 78; 2937 /** @stable ICU 54 */ 2938 public static final int MANICHAEAN_TETH = 79; 2939 /** @stable ICU 54 */ 2940 public static final int MANICHAEAN_THAMEDH = 80; 2941 /** @stable ICU 54 */ 2942 public static final int MANICHAEAN_TWENTY = 81; 2943 /** @stable ICU 54 */ 2944 public static final int MANICHAEAN_WAW = 82; 2945 /** @stable ICU 54 */ 2946 public static final int MANICHAEAN_YODH = 83; 2947 /** @stable ICU 54 */ 2948 public static final int MANICHAEAN_ZAYIN = 84; 2949 /** @stable ICU 54 */ 2950 public static final int STRAIGHT_WAW = 85; 2951 2952 /** 2953 * @stable ICU 2.4 2954 */ 2955 public static final int COUNT = 86; 2956 } 2957 2958 /** 2959 * Grapheme Cluster Break constants. 2960 * @see UProperty#GRAPHEME_CLUSTER_BREAK 2961 * @stable ICU 3.4 2962 */ 2963 public static interface GraphemeClusterBreak { 2964 /** 2965 * @stable ICU 3.4 2966 */ 2967 public static final int OTHER = 0; 2968 /** 2969 * @stable ICU 3.4 2970 */ 2971 public static final int CONTROL = 1; 2972 /** 2973 * @stable ICU 3.4 2974 */ 2975 public static final int CR = 2; 2976 /** 2977 * @stable ICU 3.4 2978 */ 2979 public static final int EXTEND = 3; 2980 /** 2981 * @stable ICU 3.4 2982 */ 2983 public static final int L = 4; 2984 /** 2985 * @stable ICU 3.4 2986 */ 2987 public static final int LF = 5; 2988 /** 2989 * @stable ICU 3.4 2990 */ 2991 public static final int LV = 6; 2992 /** 2993 * @stable ICU 3.4 2994 */ 2995 public static final int LVT = 7; 2996 /** 2997 * @stable ICU 3.4 2998 */ 2999 public static final int T = 8; 3000 /** 3001 * @stable ICU 3.4 3002 */ 3003 public static final int V = 9; 3004 /** 3005 * @stable ICU 4.0 3006 */ 3007 public static final int SPACING_MARK = 10; 3008 /** 3009 * @stable ICU 4.0 3010 */ 3011 public static final int PREPEND = 11; 3012 /** @stable ICU 50 */ 3013 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3014 /** 3015 * @stable ICU 3.4 3016 */ 3017 public static final int COUNT = 13; 3018 } 3019 3020 /** 3021 * Word Break constants. 3022 * @see UProperty#WORD_BREAK 3023 * @stable ICU 3.4 3024 */ 3025 public static interface WordBreak { 3026 /** 3027 * @stable ICU 3.8 3028 */ 3029 public static final int OTHER = 0; 3030 /** 3031 * @stable ICU 3.8 3032 */ 3033 public static final int ALETTER = 1; 3034 /** 3035 * @stable ICU 3.8 3036 */ 3037 public static final int FORMAT = 2; 3038 /** 3039 * @stable ICU 3.8 3040 */ 3041 public static final int KATAKANA = 3; 3042 /** 3043 * @stable ICU 3.8 3044 */ 3045 public static final int MIDLETTER = 4; 3046 /** 3047 * @stable ICU 3.8 3048 */ 3049 public static final int MIDNUM = 5; 3050 /** 3051 * @stable ICU 3.8 3052 */ 3053 public static final int NUMERIC = 6; 3054 /** 3055 * @stable ICU 3.8 3056 */ 3057 public static final int EXTENDNUMLET = 7; 3058 /** 3059 * @stable ICU 4.0 3060 */ 3061 public static final int CR = 8; 3062 /** 3063 * @stable ICU 4.0 3064 */ 3065 public static final int EXTEND = 9; 3066 /** 3067 * @stable ICU 4.0 3068 */ 3069 public static final int LF = 10; 3070 /** 3071 * @stable ICU 4.0 3072 */ 3073 public static final int MIDNUMLET = 11; 3074 /** 3075 * @stable ICU 4.0 3076 */ 3077 public static final int NEWLINE = 12; 3078 /** @stable ICU 50 */ 3079 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3080 /** @stable ICU 52 */ 3081 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 3082 /** @stable ICU 52 */ 3083 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 3084 /** @stable ICU 52 */ 3085 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 3086 /** 3087 * @stable ICU 4.0 3088 */ 3089 public static final int COUNT = 17; 3090 } 3091 3092 /** 3093 * Sentence Break constants. 3094 * @see UProperty#SENTENCE_BREAK 3095 * @stable ICU 3.4 3096 */ 3097 public static interface SentenceBreak { 3098 /** 3099 * @stable ICU 3.8 3100 */ 3101 public static final int OTHER = 0; 3102 /** 3103 * @stable ICU 3.8 3104 */ 3105 public static final int ATERM = 1; 3106 /** 3107 * @stable ICU 3.8 3108 */ 3109 public static final int CLOSE = 2; 3110 /** 3111 * @stable ICU 3.8 3112 */ 3113 public static final int FORMAT = 3; 3114 /** 3115 * @stable ICU 3.8 3116 */ 3117 public static final int LOWER = 4; 3118 /** 3119 * @stable ICU 3.8 3120 */ 3121 public static final int NUMERIC = 5; 3122 /** 3123 * @stable ICU 3.8 3124 */ 3125 public static final int OLETTER = 6; 3126 /** 3127 * @stable ICU 3.8 3128 */ 3129 public static final int SEP = 7; 3130 /** 3131 * @stable ICU 3.8 3132 */ 3133 public static final int SP = 8; 3134 /** 3135 * @stable ICU 3.8 3136 */ 3137 public static final int STERM = 9; 3138 /** 3139 * @stable ICU 3.8 3140 */ 3141 public static final int UPPER = 10; 3142 /** 3143 * @stable ICU 4.0 3144 */ 3145 public static final int CR = 11; 3146 /** 3147 * @stable ICU 4.0 3148 */ 3149 public static final int EXTEND = 12; 3150 /** 3151 * @stable ICU 4.0 3152 */ 3153 public static final int LF = 13; 3154 /** 3155 * @stable ICU 4.0 3156 */ 3157 public static final int SCONTINUE = 14; 3158 /** 3159 * @stable ICU 4.0 3160 */ 3161 public static final int COUNT = 15; 3162 } 3163 3164 /** 3165 * Line Break constants. 3166 * @see UProperty#LINE_BREAK 3167 * @stable ICU 2.4 3168 */ 3169 public static interface LineBreak 3170 { 3171 /** 3172 * @stable ICU 2.4 3173 */ 3174 public static final int UNKNOWN = 0; 3175 /** 3176 * @stable ICU 2.4 3177 */ 3178 public static final int AMBIGUOUS = 1; 3179 /** 3180 * @stable ICU 2.4 3181 */ 3182 public static final int ALPHABETIC = 2; 3183 /** 3184 * @stable ICU 2.4 3185 */ 3186 public static final int BREAK_BOTH = 3; 3187 /** 3188 * @stable ICU 2.4 3189 */ 3190 public static final int BREAK_AFTER = 4; 3191 /** 3192 * @stable ICU 2.4 3193 */ 3194 public static final int BREAK_BEFORE = 5; 3195 /** 3196 * @stable ICU 2.4 3197 */ 3198 public static final int MANDATORY_BREAK = 6; 3199 /** 3200 * @stable ICU 2.4 3201 */ 3202 public static final int CONTINGENT_BREAK = 7; 3203 /** 3204 * @stable ICU 2.4 3205 */ 3206 public static final int CLOSE_PUNCTUATION = 8; 3207 /** 3208 * @stable ICU 2.4 3209 */ 3210 public static final int COMBINING_MARK = 9; 3211 /** 3212 * @stable ICU 2.4 3213 */ 3214 public static final int CARRIAGE_RETURN = 10; 3215 /** 3216 * @stable ICU 2.4 3217 */ 3218 public static final int EXCLAMATION = 11; 3219 /** 3220 * @stable ICU 2.4 3221 */ 3222 public static final int GLUE = 12; 3223 /** 3224 * @stable ICU 2.4 3225 */ 3226 public static final int HYPHEN = 13; 3227 /** 3228 * @stable ICU 2.4 3229 */ 3230 public static final int IDEOGRAPHIC = 14; 3231 /** 3232 * @see #INSEPARABLE 3233 * @stable ICU 2.4 3234 */ 3235 public static final int INSEPERABLE = 15; 3236 /** 3237 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3238 * @stable ICU 3.0 3239 */ 3240 public static final int INSEPARABLE = 15; 3241 /** 3242 * @stable ICU 2.4 3243 */ 3244 public static final int INFIX_NUMERIC = 16; 3245 /** 3246 * @stable ICU 2.4 3247 */ 3248 public static final int LINE_FEED = 17; 3249 /** 3250 * @stable ICU 2.4 3251 */ 3252 public static final int NONSTARTER = 18; 3253 /** 3254 * @stable ICU 2.4 3255 */ 3256 public static final int NUMERIC = 19; 3257 /** 3258 * @stable ICU 2.4 3259 */ 3260 public static final int OPEN_PUNCTUATION = 20; 3261 /** 3262 * @stable ICU 2.4 3263 */ 3264 public static final int POSTFIX_NUMERIC = 21; 3265 /** 3266 * @stable ICU 2.4 3267 */ 3268 public static final int PREFIX_NUMERIC = 22; 3269 /** 3270 * @stable ICU 2.4 3271 */ 3272 public static final int QUOTATION = 23; 3273 /** 3274 * @stable ICU 2.4 3275 */ 3276 public static final int COMPLEX_CONTEXT = 24; 3277 /** 3278 * @stable ICU 2.4 3279 */ 3280 public static final int SURROGATE = 25; 3281 /** 3282 * @stable ICU 2.4 3283 */ 3284 public static final int SPACE = 26; 3285 /** 3286 * @stable ICU 2.4 3287 */ 3288 public static final int BREAK_SYMBOLS = 27; 3289 /** 3290 * @stable ICU 2.4 3291 */ 3292 public static final int ZWSPACE = 28; 3293 /** 3294 * @stable ICU 2.6 3295 */ 3296 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3297 /** 3298 * @stable ICU 2.6 3299 */ 3300 public static final int WORD_JOINER = 30; /*[WJ]*/ 3301 /** 3302 * @stable ICU 3.4 3303 */ 3304 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3305 /** 3306 * @stable ICU 3.4 3307 */ 3308 public static final int H3 = 32; 3309 /** 3310 * @stable ICU 3.4 3311 */ 3312 public static final int JL = 33; 3313 /** 3314 * @stable ICU 3.4 3315 */ 3316 public static final int JT = 34; 3317 /** 3318 * @stable ICU 3.4 3319 */ 3320 public static final int JV = 35; 3321 /** @stable ICU 4.4 */ 3322 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3323 /** @stable ICU 49 */ 3324 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3325 /** @stable ICU 49 */ 3326 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3327 /** @stable ICU 50 */ 3328 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3329 /** 3330 * @stable ICU 2.4 3331 */ 3332 public static final int COUNT = 40; 3333 } 3334 3335 /** 3336 * Numeric Type constants. 3337 * @see UProperty#NUMERIC_TYPE 3338 * @stable ICU 2.4 3339 */ 3340 public static interface NumericType 3341 { 3342 /** 3343 * @stable ICU 2.4 3344 */ 3345 public static final int NONE = 0; 3346 /** 3347 * @stable ICU 2.4 3348 */ 3349 public static final int DECIMAL = 1; 3350 /** 3351 * @stable ICU 2.4 3352 */ 3353 public static final int DIGIT = 2; 3354 /** 3355 * @stable ICU 2.4 3356 */ 3357 public static final int NUMERIC = 3; 3358 /** 3359 * @stable ICU 2.4 3360 */ 3361 public static final int COUNT = 4; 3362 } 3363 3364 /** 3365 * Hangul Syllable Type constants. 3366 * 3367 * @see UProperty#HANGUL_SYLLABLE_TYPE 3368 * @stable ICU 2.6 3369 */ 3370 public static interface HangulSyllableType 3371 { 3372 /** 3373 * @stable ICU 2.6 3374 */ 3375 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3376 /** 3377 * @stable ICU 2.6 3378 */ 3379 public static final int LEADING_JAMO = 1; /*[L]*/ 3380 /** 3381 * @stable ICU 2.6 3382 */ 3383 public static final int VOWEL_JAMO = 2; /*[V]*/ 3384 /** 3385 * @stable ICU 2.6 3386 */ 3387 public static final int TRAILING_JAMO = 3; /*[T]*/ 3388 /** 3389 * @stable ICU 2.6 3390 */ 3391 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3392 /** 3393 * @stable ICU 2.6 3394 */ 3395 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3396 /** 3397 * @stable ICU 2.6 3398 */ 3399 public static final int COUNT = 6; 3400 } 3401 3402 /** 3403 * Bidi Paired Bracket Type constants. 3404 * 3405 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3406 * @stable ICU 52 3407 */ 3408 public static interface BidiPairedBracketType { 3409 /** 3410 * Not a paired bracket. 3411 * @stable ICU 52 3412 */ 3413 public static final int NONE = 0; 3414 /** 3415 * Open paired bracket. 3416 * @stable ICU 52 3417 */ 3418 public static final int OPEN = 1; 3419 /** 3420 * Close paired bracket. 3421 * @stable ICU 52 3422 */ 3423 public static final int CLOSE = 2; 3424 /** 3425 * @stable ICU 52 3426 */ 3427 public static final int COUNT = 3; 3428 } 3429 3430 // public data members ----------------------------------------------- 3431 3432 /** 3433 * The lowest Unicode code point value, constant 0. 3434 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 3435 * 3436 * @stable ICU 2.1 3437 */ 3438 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 3439 3440 /** 3441 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 3442 * Same as {@link Character#MAX_CODE_POINT}. 3443 * 3444 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 3445 * which is still a char with the value U+FFFF. 3446 * 3447 * @stable ICU 2.1 3448 */ 3449 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 3450 3451 /** 3452 * The minimum value for Supplementary code points, constant U+10000. 3453 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 3454 * 3455 * @stable ICU 2.1 3456 */ 3457 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 3458 3459 /** 3460 * Unicode value used when translating into Unicode encoding form and there 3461 * is no existing character. 3462 * @stable ICU 2.1 3463 */ 3464 public static final int REPLACEMENT_CHAR = '\uFFFD'; 3465 3466 /** 3467 * Special value that is returned by getUnicodeNumericValue(int) when no 3468 * numeric value is defined for a code point. 3469 * @stable ICU 2.4 3470 * @see #getUnicodeNumericValue 3471 */ 3472 public static final double NO_NUMERIC_VALUE = -123456789; 3473 3474 /** 3475 * Compatibility constant for Java Character's MIN_RADIX. 3476 * @stable ICU 3.4 3477 */ 3478 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 3479 3480 /** 3481 * Compatibility constant for Java Character's MAX_RADIX. 3482 * @stable ICU 3.4 3483 */ 3484 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 3485 3486 /** 3487 * Do not lowercase non-initial parts of words when titlecasing. 3488 * Option bit for titlecasing APIs that take an options bit set. 3489 * 3490 * By default, titlecasing will titlecase the first cased character 3491 * of a word and lowercase all other characters. 3492 * With this option, the other characters will not be modified. 3493 * 3494 * @see #toTitleCase 3495 * @stable ICU 3.8 3496 */ 3497 public static final int TITLECASE_NO_LOWERCASE = 0x100; 3498 3499 /** 3500 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 3501 * titlecase exactly the characters at breaks from the iterator. 3502 * Option bit for titlecasing APIs that take an options bit set. 3503 * 3504 * By default, titlecasing will take each break iterator index, 3505 * adjust it by looking for the next cased character, and titlecase that one. 3506 * Other characters are lowercased. 3507 * 3508 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 3509 * 3510 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 3511 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 3512 * cased character F. If F exists, map F to default_title(F); then map each 3513 * subsequent character C to default_lower(C). 3514 * 3515 * @see #toTitleCase 3516 * @see #TITLECASE_NO_LOWERCASE 3517 * @stable ICU 3.8 3518 */ 3519 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 3520 3521 // public methods ---------------------------------------------------- 3522 3523 /** 3524 * Returnss the numeric value of a decimal digit code point. 3525 * <br>This method observes the semantics of 3526 * <code>java.lang.Character.digit()</code>. Note that this 3527 * will return positive values for code points for which isDigit 3528 * returns false, just like java.lang.Character. 3529 * <br><em>Semantic Change:</em> In release 1.3.1 and 3530 * prior, this did not treat the European letters as having a 3531 * digit value, and also treated numeric letters and other numbers as 3532 * digits. 3533 * This has been changed to conform to the java semantics. 3534 * <br>A code point is a valid digit if and only if: 3535 * <ul> 3536 * <li>ch is a decimal digit or one of the european letters, and 3537 * <li>the value of ch is less than the specified radix. 3538 * </ul> 3539 * @param ch the code point to query 3540 * @param radix the radix 3541 * @return the numeric value represented by the code point in the 3542 * specified radix, or -1 if the code point is not a decimal digit 3543 * or if its value is too large for the radix 3544 * @stable ICU 2.1 3545 */ digit(int ch, int radix)3546 public static int digit(int ch, int radix) 3547 { 3548 if (2 <= radix && radix <= 36) { 3549 int value = digit(ch); 3550 if (value < 0) { 3551 // ch is not a decimal digit, try latin letters 3552 value = UCharacterProperty.getEuropeanDigit(ch); 3553 } 3554 return (value < radix) ? value : -1; 3555 } else { 3556 return -1; // invalid radix 3557 } 3558 } 3559 3560 /** 3561 * Returnss the numeric value of a decimal digit code point. 3562 * <br>This is a convenience overload of <code>digit(int, int)</code> 3563 * that provides a decimal radix. 3564 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 3565 * treated numeric letters and other numbers as digits. This has 3566 * been changed to conform to the java semantics. 3567 * @param ch the code point to query 3568 * @return the numeric value represented by the code point, 3569 * or -1 if the code point is not a decimal digit or if its 3570 * value is too large for a decimal radix 3571 * @stable ICU 2.1 3572 */ digit(int ch)3573 public static int digit(int ch) 3574 { 3575 return UCharacterProperty.INSTANCE.digit(ch); 3576 } 3577 3578 /** 3579 * Returns the numeric value of the code point as a nonnegative 3580 * integer. 3581 * <br>If the code point does not have a numeric value, then -1 is returned. 3582 * <br> 3583 * If the code point has a numeric value that cannot be represented as a 3584 * nonnegative integer (for example, a fractional value), then -2 is 3585 * returned. 3586 * @param ch the code point to query 3587 * @return the numeric value of the code point, or -1 if it has no numeric 3588 * value, or -2 if it has a numeric value that cannot be represented as a 3589 * nonnegative integer 3590 * @stable ICU 2.1 3591 */ getNumericValue(int ch)3592 public static int getNumericValue(int ch) 3593 { 3594 return UCharacterProperty.INSTANCE.getNumericValue(ch); 3595 } 3596 3597 /** 3598 * {@icu} Returns the numeric value for a Unicode code point as defined in the 3599 * Unicode Character Database. 3600 * <p>A "double" return type is necessary because some numeric values are 3601 * fractions, negative, or too large for int. 3602 * <p>For characters without any numeric values in the Unicode Character 3603 * Database, this function will return NO_NUMERIC_VALUE. 3604 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 3605 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 3606 * return type int and returns -1 when the argument ch does not have a 3607 * corresponding numeric value. This has been changed to synch with ICU4C 3608 * 3609 * This corresponds to the ICU4C function u_getNumericValue. 3610 * @param ch Code point to get the numeric value for. 3611 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 3612 * @stable ICU 2.4 3613 */ getUnicodeNumericValue(int ch)3614 public static double getUnicodeNumericValue(int ch) 3615 { 3616 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 3617 } 3618 3619 /** 3620 * Compatibility override of Java deprecated method. This 3621 * method will always remain deprecated. 3622 * Same as java.lang.Character.isSpace(). 3623 * @param ch the code point 3624 * @return true if the code point is a space character as 3625 * defined by java.lang.Character.isSpace. 3626 * @deprecated ICU 3.4 (Java) 3627 */ 3628 @Deprecated isSpace(int ch)3629 public static boolean isSpace(int ch) { 3630 return ch <= 0x20 && 3631 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 3632 } 3633 3634 /** 3635 * Returns a value indicating a code point's Unicode category. 3636 * Up-to-date Unicode implementation of java.lang.Character.getType() 3637 * except for the above mentioned code points that had their category 3638 * changed.<br> 3639 * Return results are constants from the interface 3640 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 3641 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 3642 * those returned by java.lang.Character.getType. UCharacterCategory values 3643 * match the ones used in ICU4C, while java.lang.Character type 3644 * values, though similar, skip the value 17. 3645 * @param ch code point whose type is to be determined 3646 * @return category which is a value of UCharacterCategory 3647 * @stable ICU 2.1 3648 */ getType(int ch)3649 public static int getType(int ch) 3650 { 3651 return UCharacterProperty.INSTANCE.getType(ch); 3652 } 3653 3654 /** 3655 * Determines if a code point has a defined meaning in the up-to-date 3656 * Unicode standard. 3657 * E.g. supplementary code points though allocated space are not defined in 3658 * Unicode yet.<br> 3659 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 3660 * @param ch code point to be determined if it is defined in the most 3661 * current version of Unicode 3662 * @return true if this code point is defined in unicode 3663 * @stable ICU 2.1 3664 */ isDefined(int ch)3665 public static boolean isDefined(int ch) 3666 { 3667 return getType(ch) != 0; 3668 } 3669 3670 /** 3671 * Determines if a code point is a Java digit. 3672 * <br>This method observes the semantics of 3673 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 3674 * digits only. 3675 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 3676 * numeric letters and other numbers as digits. 3677 * This has been changed to conform to the java semantics. 3678 * @param ch code point to query 3679 * @return true if this code point is a digit 3680 * @stable ICU 2.1 3681 */ isDigit(int ch)3682 public static boolean isDigit(int ch) 3683 { 3684 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 3685 } 3686 3687 /** 3688 * Determines if the specified code point is an ISO control character. 3689 * A code point is considered to be an ISO control character if it is in 3690 * the range \u0000 through \u001F or in the range \u007F through 3691 * \u009F.<br> 3692 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 3693 * @param ch code point to determine if it is an ISO control character 3694 * @return true if code point is a ISO control character 3695 * @stable ICU 2.1 3696 */ isISOControl(int ch)3697 public static boolean isISOControl(int ch) 3698 { 3699 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 3700 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 3701 } 3702 3703 /** 3704 * Determines if the specified code point is a letter. 3705 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 3706 * @param ch code point to determine if it is a letter 3707 * @return true if code point is a letter 3708 * @stable ICU 2.1 3709 */ isLetter(int ch)3710 public static boolean isLetter(int ch) 3711 { 3712 // if props == 0, it will just fall through and return false 3713 return ((1 << getType(ch)) 3714 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3715 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3716 | (1 << UCharacterCategory.TITLECASE_LETTER) 3717 | (1 << UCharacterCategory.MODIFIER_LETTER) 3718 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 3719 } 3720 3721 /** 3722 * Determines if the specified code point is a letter or digit. 3723 * {@icunote} This method, unlike java.lang.Character does not regard the ascii 3724 * characters 'A' - 'Z' and 'a' - 'z' as digits. 3725 * @param ch code point to determine if it is a letter or a digit 3726 * @return true if code point is a letter or a digit 3727 * @stable ICU 2.1 3728 */ isLetterOrDigit(int ch)3729 public static boolean isLetterOrDigit(int ch) 3730 { 3731 return ((1 << getType(ch)) 3732 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3733 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3734 | (1 << UCharacterCategory.TITLECASE_LETTER) 3735 | (1 << UCharacterCategory.MODIFIER_LETTER) 3736 | (1 << UCharacterCategory.OTHER_LETTER) 3737 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 3738 } 3739 3740 /** 3741 * Compatibility override of Java deprecated method. This 3742 * method will always remain deprecated. Delegates to 3743 * java.lang.Character.isJavaIdentifierStart. 3744 * @param cp the code point 3745 * @return true if the code point can start a java identifier. 3746 * @deprecated ICU 3.4 (Java) 3747 */ 3748 @Deprecated isJavaLetter(int cp)3749 public static boolean isJavaLetter(int cp) { 3750 return isJavaIdentifierStart(cp); 3751 } 3752 3753 /** 3754 * Compatibility override of Java deprecated method. This 3755 * method will always remain deprecated. Delegates to 3756 * java.lang.Character.isJavaIdentifierPart. 3757 * @param cp the code point 3758 * @return true if the code point can continue a java identifier. 3759 * @deprecated ICU 3.4 (Java) 3760 */ 3761 @Deprecated isJavaLetterOrDigit(int cp)3762 public static boolean isJavaLetterOrDigit(int cp) { 3763 return isJavaIdentifierPart(cp); 3764 } 3765 3766 /** 3767 * Compatibility override of Java method, delegates to 3768 * java.lang.Character.isJavaIdentifierStart. 3769 * @param cp the code point 3770 * @return true if the code point can start a java identifier. 3771 * @stable ICU 3.4 3772 */ isJavaIdentifierStart(int cp)3773 public static boolean isJavaIdentifierStart(int cp) { 3774 // note, downcast to char for jdk 1.4 compatibility 3775 return java.lang.Character.isJavaIdentifierStart((char)cp); 3776 } 3777 3778 /** 3779 * Compatibility override of Java method, delegates to 3780 * java.lang.Character.isJavaIdentifierPart. 3781 * @param cp the code point 3782 * @return true if the code point can continue a java identifier. 3783 * @stable ICU 3.4 3784 */ isJavaIdentifierPart(int cp)3785 public static boolean isJavaIdentifierPart(int cp) { 3786 // note, downcast to char for jdk 1.4 compatibility 3787 return java.lang.Character.isJavaIdentifierPart((char)cp); 3788 } 3789 3790 /** 3791 * Determines if the specified code point is a lowercase character. 3792 * UnicodeData only contains case mappings for code points where they are 3793 * one-to-one mappings; it also omits information about context-sensitive 3794 * case mappings.<br> For more information about Unicode case mapping 3795 * please refer to the 3796 * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 3797 * #21</a>.<br> 3798 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 3799 * @param ch code point to determine if it is in lowercase 3800 * @return true if code point is a lowercase character 3801 * @stable ICU 2.1 3802 */ isLowerCase(int ch)3803 public static boolean isLowerCase(int ch) 3804 { 3805 // if props == 0, it will just fall through and return false 3806 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 3807 } 3808 3809 /** 3810 * Determines if the specified code point is a white space character. 3811 * A code point is considered to be an whitespace character if and only 3812 * if it satisfies one of the following criteria: 3813 * <ul> 3814 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 3815 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 3816 * <li> It is \u0009, HORIZONTAL TABULATION. 3817 * <li> It is \u000A, LINE FEED. 3818 * <li> It is \u000B, VERTICAL TABULATION. 3819 * <li> It is \u000C, FORM FEED. 3820 * <li> It is \u000D, CARRIAGE RETURN. 3821 * <li> It is \u001C, FILE SEPARATOR. 3822 * <li> It is \u001D, GROUP SEPARATOR. 3823 * <li> It is \u001E, RECORD SEPARATOR. 3824 * <li> It is \u001F, UNIT SEPARATOR. 3825 * </ul> 3826 * 3827 * This API tries to sync with the semantics of Java's 3828 * java.lang.Character.isWhitespace(), but it may not return 3829 * the exact same results because of the Unicode version 3830 * difference. 3831 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 3832 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 3833 * See http://www.unicode.org/versions/Unicode4.0.1/ 3834 * @param ch code point to determine if it is a white space 3835 * @return true if the specified code point is a white space character 3836 * @stable ICU 2.1 3837 */ isWhitespace(int ch)3838 public static boolean isWhitespace(int ch) 3839 { 3840 // exclude no-break spaces 3841 // if props == 0, it will just fall through and return false 3842 return ((1 << getType(ch)) & 3843 ((1 << UCharacterCategory.SPACE_SEPARATOR) 3844 | (1 << UCharacterCategory.LINE_SEPARATOR) 3845 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 3846 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 3847 // TAB VT LF FF CR FS GS RS US NL are all control characters 3848 // that are white spaces. 3849 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 3850 } 3851 3852 /** 3853 * Determines if the specified code point is a Unicode specified space 3854 * character, i.e. if code point is in the category Zs, Zl and Zp. 3855 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 3856 * @param ch code point to determine if it is a space 3857 * @return true if the specified code point is a space character 3858 * @stable ICU 2.1 3859 */ isSpaceChar(int ch)3860 public static boolean isSpaceChar(int ch) 3861 { 3862 // if props == 0, it will just fall through and return false 3863 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 3864 | (1 << UCharacterCategory.LINE_SEPARATOR) 3865 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 3866 != 0; 3867 } 3868 3869 /** 3870 * Determines if the specified code point is a titlecase character. 3871 * UnicodeData only contains case mappings for code points where they are 3872 * one-to-one mappings; it also omits information about context-sensitive 3873 * case mappings.<br> 3874 * For more information about Unicode case mapping please refer to the 3875 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3876 * Technical report #21</a>.<br> 3877 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 3878 * @param ch code point to determine if it is in title case 3879 * @return true if the specified code point is a titlecase character 3880 * @stable ICU 2.1 3881 */ isTitleCase(int ch)3882 public static boolean isTitleCase(int ch) 3883 { 3884 // if props == 0, it will just fall through and return false 3885 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 3886 } 3887 3888 /** 3889 * Determines if the specified code point may be any part of a Unicode 3890 * identifier other than the starting character. 3891 * A code point may be part of a Unicode identifier if and only if it is 3892 * one of the following: 3893 * <ul> 3894 * <li> Lu Uppercase letter 3895 * <li> Ll Lowercase letter 3896 * <li> Lt Titlecase letter 3897 * <li> Lm Modifier letter 3898 * <li> Lo Other letter 3899 * <li> Nl Letter number 3900 * <li> Pc Connecting punctuation character 3901 * <li> Nd decimal number 3902 * <li> Mc Spacing combining mark 3903 * <li> Mn Non-spacing mark 3904 * <li> Cf formatting code 3905 * </ul> 3906 * Up-to-date Unicode implementation of 3907 * java.lang.Character.isUnicodeIdentifierPart().<br> 3908 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3909 * @param ch code point to determine if is can be part of a Unicode 3910 * identifier 3911 * @return true if code point is any character belonging a unicode 3912 * identifier suffix after the first character 3913 * @stable ICU 2.1 3914 */ isUnicodeIdentifierPart(int ch)3915 public static boolean isUnicodeIdentifierPart(int ch) 3916 { 3917 // if props == 0, it will just fall through and return false 3918 // cat == format 3919 return ((1 << getType(ch)) 3920 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3921 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3922 | (1 << UCharacterCategory.TITLECASE_LETTER) 3923 | (1 << UCharacterCategory.MODIFIER_LETTER) 3924 | (1 << UCharacterCategory.OTHER_LETTER) 3925 | (1 << UCharacterCategory.LETTER_NUMBER) 3926 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 3927 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 3928 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 3929 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 3930 || isIdentifierIgnorable(ch); 3931 } 3932 3933 /** 3934 * Determines if the specified code point is permissible as the first 3935 * character in a Unicode identifier. 3936 * A code point may start a Unicode identifier if it is of type either 3937 * <ul> 3938 * <li> Lu Uppercase letter 3939 * <li> Ll Lowercase letter 3940 * <li> Lt Titlecase letter 3941 * <li> Lm Modifier letter 3942 * <li> Lo Other letter 3943 * <li> Nl Letter number 3944 * </ul> 3945 * Up-to-date Unicode implementation of 3946 * java.lang.Character.isUnicodeIdentifierStart().<br> 3947 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3948 * @param ch code point to determine if it can start a Unicode identifier 3949 * @return true if code point is the first character belonging a unicode 3950 * identifier 3951 * @stable ICU 2.1 3952 */ isUnicodeIdentifierStart(int ch)3953 public static boolean isUnicodeIdentifierStart(int ch) 3954 { 3955 /*int cat = getType(ch);*/ 3956 // if props == 0, it will just fall through and return false 3957 return ((1 << getType(ch)) 3958 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3959 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3960 | (1 << UCharacterCategory.TITLECASE_LETTER) 3961 | (1 << UCharacterCategory.MODIFIER_LETTER) 3962 | (1 << UCharacterCategory.OTHER_LETTER) 3963 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 3964 } 3965 3966 /** 3967 * Determines if the specified code point should be regarded as an 3968 * ignorable character in a Java identifier. 3969 * A character is Java-identifier-ignorable if it has the general category 3970 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 3971 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 3972 * Up-to-date Unicode implementation of 3973 * java.lang.Character.isIdentifierIgnorable().<br> 3974 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3975 * <p>Note that Unicode just recommends to ignore Cf (format controls). 3976 * @param ch code point to be determined if it can be ignored in a Unicode 3977 * identifier. 3978 * @return true if the code point is ignorable 3979 * @stable ICU 2.1 3980 */ isIdentifierIgnorable(int ch)3981 public static boolean isIdentifierIgnorable(int ch) 3982 { 3983 // see java.lang.Character.isIdentifierIgnorable() on range of 3984 // ignorable characters. 3985 if (ch <= 0x9f) { 3986 return isISOControl(ch) 3987 && !((ch >= 0x9 && ch <= 0xd) 3988 || (ch >= 0x1c && ch <= 0x1f)); 3989 } 3990 return getType(ch) == UCharacterCategory.FORMAT; 3991 } 3992 3993 /** 3994 * Determines if the specified code point is an uppercase character. 3995 * UnicodeData only contains case mappings for code point where they are 3996 * one-to-one mappings; it also omits information about context-sensitive 3997 * case mappings.<br> 3998 * For language specific case conversion behavior, use 3999 * toUpperCase(locale, str). <br> 4000 * For example, the case conversion for dot-less i and dotted I in Turkish, 4001 * or for final sigma in Greek. 4002 * For more information about Unicode case mapping please refer to the 4003 * <a href=http://www.unicode.org/unicode/reports/tr21/> 4004 * Technical report #21</a>.<br> 4005 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 4006 * @param ch code point to determine if it is in uppercase 4007 * @return true if the code point is an uppercase character 4008 * @stable ICU 2.1 4009 */ isUpperCase(int ch)4010 public static boolean isUpperCase(int ch) 4011 { 4012 // if props == 0, it will just fall through and return false 4013 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 4014 } 4015 4016 /** 4017 * The given code point is mapped to its lowercase equivalent; if the code 4018 * point has no lowercase equivalent, the code point itself is returned. 4019 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 4020 * 4021 * <p>This function only returns the simple, single-code point case mapping. 4022 * Full case mappings should be used whenever possible because they produce 4023 * better results by working on whole strings. 4024 * They take into account the string context and the language and can map 4025 * to a result string with a different length as appropriate. 4026 * Full case mappings are applied by the case mapping functions 4027 * that take String parameters rather than code points (int). 4028 * See also the User Guide chapter on C/POSIX migration: 4029 * http://www.icu-project.org/userguide/posix.html#case_mappings 4030 * 4031 * @param ch code point whose lowercase equivalent is to be retrieved 4032 * @return the lowercase equivalent code point 4033 * @stable ICU 2.1 4034 */ toLowerCase(int ch)4035 public static int toLowerCase(int ch) { 4036 return UCaseProps.INSTANCE.tolower(ch); 4037 } 4038 4039 /** 4040 * Converts argument code point and returns a String object representing 4041 * the code point's value in UTF-16 format. 4042 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 4043 * 4044 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 4045 * 4046 * @param ch code point 4047 * @return string representation of the code point, null if code point is not 4048 * defined in unicode 4049 * @stable ICU 2.1 4050 */ toString(int ch)4051 public static String toString(int ch) 4052 { 4053 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4054 return null; 4055 } 4056 4057 if (ch < SUPPLEMENTARY_MIN_VALUE) { 4058 return String.valueOf((char)ch); 4059 } 4060 4061 return new String(Character.toChars(ch)); 4062 } 4063 4064 /** 4065 * Converts the code point argument to titlecase. 4066 * If no titlecase is available, the uppercase is returned. If no uppercase 4067 * is available, the code point itself is returned. 4068 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4069 * 4070 * <p>This function only returns the simple, single-code point case mapping. 4071 * Full case mappings should be used whenever possible because they produce 4072 * better results by working on whole strings. 4073 * They take into account the string context and the language and can map 4074 * to a result string with a different length as appropriate. 4075 * Full case mappings are applied by the case mapping functions 4076 * that take String parameters rather than code points (int). 4077 * See also the User Guide chapter on C/POSIX migration: 4078 * http://www.icu-project.org/userguide/posix.html#case_mappings 4079 * 4080 * @param ch code point whose title case is to be retrieved 4081 * @return titlecase code point 4082 * @stable ICU 2.1 4083 */ toTitleCase(int ch)4084 public static int toTitleCase(int ch) { 4085 return UCaseProps.INSTANCE.totitle(ch); 4086 } 4087 4088 /** 4089 * Converts the character argument to uppercase. 4090 * If no uppercase is available, the character itself is returned. 4091 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4092 * 4093 * <p>This function only returns the simple, single-code point case mapping. 4094 * Full case mappings should be used whenever possible because they produce 4095 * better results by working on whole strings. 4096 * They take into account the string context and the language and can map 4097 * to a result string with a different length as appropriate. 4098 * Full case mappings are applied by the case mapping functions 4099 * that take String parameters rather than code points (int). 4100 * See also the User Guide chapter on C/POSIX migration: 4101 * http://www.icu-project.org/userguide/posix.html#case_mappings 4102 * 4103 * @param ch code point whose uppercase is to be retrieved 4104 * @return uppercase code point 4105 * @stable ICU 2.1 4106 */ toUpperCase(int ch)4107 public static int toUpperCase(int ch) { 4108 return UCaseProps.INSTANCE.toupper(ch); 4109 } 4110 4111 // extra methods not in java.lang.Character -------------------------- 4112 4113 /** 4114 * {@icu} Determines if the code point is a supplementary character. 4115 * A code point is a supplementary character if and only if it is greater 4116 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4117 * @param ch code point to be determined if it is in the supplementary 4118 * plane 4119 * @return true if code point is a supplementary character 4120 * @stable ICU 2.1 4121 */ isSupplementary(int ch)4122 public static boolean isSupplementary(int ch) 4123 { 4124 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4125 ch <= UCharacter.MAX_VALUE; 4126 } 4127 4128 /** 4129 * {@icu} Determines if the code point is in the BMP plane. 4130 * @param ch code point to be determined if it is not a supplementary 4131 * character 4132 * @return true if code point is not a supplementary character 4133 * @stable ICU 2.1 4134 */ isBMP(int ch)4135 public static boolean isBMP(int ch) 4136 { 4137 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4138 } 4139 4140 /** 4141 * {@icu} Determines whether the specified code point is a printable character 4142 * according to the Unicode standard. 4143 * @param ch code point to be determined if it is printable 4144 * @return true if the code point is a printable character 4145 * @stable ICU 2.1 4146 */ isPrintable(int ch)4147 public static boolean isPrintable(int ch) 4148 { 4149 int cat = getType(ch); 4150 // if props == 0, it will just fall through and return false 4151 return (cat != UCharacterCategory.UNASSIGNED && 4152 cat != UCharacterCategory.CONTROL && 4153 cat != UCharacterCategory.FORMAT && 4154 cat != UCharacterCategory.PRIVATE_USE && 4155 cat != UCharacterCategory.SURROGATE && 4156 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4157 } 4158 4159 /** 4160 * {@icu} Determines whether the specified code point is of base form. 4161 * A code point of base form does not graphically combine with preceding 4162 * characters, and is neither a control nor a format character. 4163 * @param ch code point to be determined if it is of base form 4164 * @return true if the code point is of base form 4165 * @stable ICU 2.1 4166 */ isBaseForm(int ch)4167 public static boolean isBaseForm(int ch) 4168 { 4169 int cat = getType(ch); 4170 // if props == 0, it will just fall through and return false 4171 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4172 cat == UCharacterCategory.OTHER_NUMBER || 4173 cat == UCharacterCategory.LETTER_NUMBER || 4174 cat == UCharacterCategory.UPPERCASE_LETTER || 4175 cat == UCharacterCategory.LOWERCASE_LETTER || 4176 cat == UCharacterCategory.TITLECASE_LETTER || 4177 cat == UCharacterCategory.MODIFIER_LETTER || 4178 cat == UCharacterCategory.OTHER_LETTER || 4179 cat == UCharacterCategory.NON_SPACING_MARK || 4180 cat == UCharacterCategory.ENCLOSING_MARK || 4181 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4182 } 4183 4184 /** 4185 * {@icu} Returns the Bidirection property of a code point. 4186 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4187 * property.<br> 4188 * Result returned belongs to the interface 4189 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4190 * @param ch the code point to be determined its direction 4191 * @return direction constant from UCharacterDirection. 4192 * @stable ICU 2.1 4193 */ getDirection(int ch)4194 public static int getDirection(int ch) 4195 { 4196 return UBiDiProps.INSTANCE.getClass(ch); 4197 } 4198 4199 /** 4200 * Determines whether the code point has the "mirrored" property. 4201 * This property is set for characters that are commonly used in 4202 * Right-To-Left contexts and need to be displayed with a "mirrored" 4203 * glyph. 4204 * @param ch code point whose mirror is to be determined 4205 * @return true if the code point has the "mirrored" property 4206 * @stable ICU 2.1 4207 */ isMirrored(int ch)4208 public static boolean isMirrored(int ch) 4209 { 4210 return UBiDiProps.INSTANCE.isMirrored(ch); 4211 } 4212 4213 /** 4214 * {@icu} Maps the specified code point to a "mirror-image" code point. 4215 * For code points with the "mirrored" property, implementations sometimes 4216 * need a "poor man's" mapping to another code point such that the default 4217 * glyph may serve as the mirror-image of the default glyph of the 4218 * specified code point.<br> 4219 * This is useful for text conversion to and from codepages with visual 4220 * order, and for displays without glyph selection capabilities. 4221 * @param ch code point whose mirror is to be retrieved 4222 * @return another code point that may serve as a mirror-image substitute, 4223 * or ch itself if there is no such mapping or ch does not have the 4224 * "mirrored" property 4225 * @stable ICU 2.1 4226 */ getMirror(int ch)4227 public static int getMirror(int ch) 4228 { 4229 return UBiDiProps.INSTANCE.getMirror(ch); 4230 } 4231 4232 /** 4233 * {@icu} Maps the specified character to its paired bracket character. 4234 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4235 * Otherwise c itself is returned. 4236 * See http://www.unicode.org/reports/tr9/ 4237 * 4238 * @param c the code point to be mapped 4239 * @return the paired bracket code point, 4240 * or c itself if there is no such mapping 4241 * (Bidi_Paired_Bracket_Type=None) 4242 * 4243 * @see UProperty#BIDI_PAIRED_BRACKET 4244 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4245 * @see #getMirror(int) 4246 * @stable ICU 52 4247 */ getBidiPairedBracket(int c)4248 public static int getBidiPairedBracket(int c) { 4249 return UBiDiProps.INSTANCE.getPairedBracket(c); 4250 } 4251 4252 /** 4253 * {@icu} Returns the combining class of the argument codepoint 4254 * @param ch code point whose combining is to be retrieved 4255 * @return the combining class of the codepoint 4256 * @stable ICU 2.1 4257 */ getCombiningClass(int ch)4258 public static int getCombiningClass(int ch) 4259 { 4260 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4261 } 4262 4263 /** 4264 * {@icu} A code point is illegal if and only if 4265 * <ul> 4266 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4267 * <li> A surrogate value, 0xD800 to 0xDFFF 4268 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4269 * </ul> 4270 * Note: legal does not mean that it is assigned in this version of Unicode. 4271 * @param ch code point to determine if it is a legal code point by itself 4272 * @return true if and only if legal. 4273 * @stable ICU 2.1 4274 */ isLegal(int ch)4275 public static boolean isLegal(int ch) 4276 { 4277 if (ch < MIN_VALUE) { 4278 return false; 4279 } 4280 if (ch < Character.MIN_SURROGATE) { 4281 return true; 4282 } 4283 if (ch <= Character.MAX_SURROGATE) { 4284 return false; 4285 } 4286 if (UCharacterUtility.isNonCharacter(ch)) { 4287 return false; 4288 } 4289 return (ch <= MAX_VALUE); 4290 } 4291 4292 /** 4293 * {@icu} A string is legal iff all its code points are legal. 4294 * A code point is illegal if and only if 4295 * <ul> 4296 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4297 * <li> A surrogate value, 0xD800 to 0xDFFF 4298 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4299 * </ul> 4300 * Note: legal does not mean that it is assigned in this version of Unicode. 4301 * @param str containing code points to examin 4302 * @return true if and only if legal. 4303 * @stable ICU 2.1 4304 */ isLegal(String str)4305 public static boolean isLegal(String str) 4306 { 4307 int size = str.length(); 4308 int codepoint; 4309 for (int i = 0; i < size; i += Character.charCount(codepoint)) 4310 { 4311 codepoint = str.codePointAt(i); 4312 if (!isLegal(codepoint)) { 4313 return false; 4314 } 4315 } 4316 return true; 4317 } 4318 4319 /** 4320 * {@icu} Returns the version of Unicode data used. 4321 * @return the unicode version number used 4322 * @stable ICU 2.1 4323 */ getUnicodeVersion()4324 public static VersionInfo getUnicodeVersion() 4325 { 4326 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 4327 } 4328 4329 /** 4330 * {@icu} Returns the most current Unicode name of the argument code point, or 4331 * null if the character is unassigned or outside the range 4332 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4333 * <br> 4334 * Note calling any methods related to code point names, e.g. get*Name*() 4335 * incurs a one-time initialisation cost to construct the name tables. 4336 * @param ch the code point for which to get the name 4337 * @return most current Unicode name 4338 * @stable ICU 2.1 4339 */ getName(int ch)4340 public static String getName(int ch) 4341 { 4342 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 4343 } 4344 4345 /** 4346 * {@icu} Returns the names for each of the characters in a string 4347 * @param s string to format 4348 * @param separator string to go between names 4349 * @return string of names 4350 * @stable ICU 3.8 4351 */ getName(String s, String separator)4352 public static String getName(String s, String separator) { 4353 if (s.length() == 1) { // handle common case 4354 return getName(s.charAt(0)); 4355 } 4356 int cp; 4357 StringBuilder sb = new StringBuilder(); 4358 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 4359 cp = s.codePointAt(i); 4360 if (i != 0) sb.append(separator); 4361 sb.append(UCharacter.getName(cp)); 4362 } 4363 return sb.toString(); 4364 } 4365 4366 /** 4367 * {@icu} Returns null. 4368 * Used to return the Unicode_1_Name property value which was of little practical value. 4369 * @param ch the code point for which to get the name 4370 * @return null 4371 * @deprecated ICU 49 4372 */ 4373 @Deprecated getName1_0(int ch)4374 public static String getName1_0(int ch) 4375 { 4376 return null; 4377 } 4378 4379 /** 4380 * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and 4381 * getName1_0(int), this method will return a name even for codepoints that 4382 * are not assigned a name in UnicodeData.txt. 4383 * 4384 * <p>The names are returned in the following order. 4385 * <ul> 4386 * <li> Most current Unicode name if there is any 4387 * <li> Unicode 1.0 name if there is any 4388 * <li> Extended name in the form of 4389 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 4390 * </ul> 4391 * Note calling any methods related to code point names, e.g. get*Name*() 4392 * incurs a one-time initialisation cost to construct the name tables. 4393 * @param ch the code point for which to get the name 4394 * @return a name for the argument codepoint 4395 * @stable ICU 2.6 4396 */ getExtendedName(int ch)4397 public static String getExtendedName(int ch) { 4398 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 4399 } 4400 4401 /** 4402 * {@icu} Returns the corrected name from NameAliases.txt if there is one. 4403 * Returns null if the character is unassigned or outside the range 4404 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4405 * <br> 4406 * Note calling any methods related to code point names, e.g. get*Name*() 4407 * incurs a one-time initialisation cost to construct the name tables. 4408 * @param ch the code point for which to get the name alias 4409 * @return Unicode name alias, or null 4410 * @stable ICU 4.4 4411 */ getNameAlias(int ch)4412 public static String getNameAlias(int ch) 4413 { 4414 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 4415 } 4416 4417 /** 4418 * {@icu} Returns null. 4419 * Used to return the ISO 10646 comment for a character. 4420 * The Unicode ISO_Comment property is deprecated and has no values. 4421 * 4422 * @param ch The code point for which to get the ISO comment. 4423 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 4424 * @return null 4425 * @deprecated ICU 49 4426 */ 4427 @Deprecated getISOComment(int ch)4428 public static String getISOComment(int ch) 4429 { 4430 return null; 4431 } 4432 4433 /** 4434 * {@icu} <p>Finds a Unicode code point by its most current Unicode name and 4435 * return its code point value. All Unicode names are in uppercase. 4436 * Note calling any methods related to code point names, e.g. get*Name*() 4437 * incurs a one-time initialisation cost to construct the name tables. 4438 * @param name most current Unicode character name whose code point is to 4439 * be returned 4440 * @return code point or -1 if name is not found 4441 * @stable ICU 2.1 4442 */ getCharFromName(String name)4443 public static int getCharFromName(String name){ 4444 return UCharacterName.INSTANCE.getCharFromName( 4445 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 4446 } 4447 4448 /** 4449 * {@icu} Returns -1. 4450 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 4451 * its code point value. 4452 * @param name Unicode 1.0 code point name whose code point is to be 4453 * returned 4454 * @return -1 4455 * @deprecated ICU 49 4456 * @see #getName1_0(int) 4457 */ 4458 @Deprecated getCharFromName1_0(String name)4459 public static int getCharFromName1_0(String name){ 4460 return -1; 4461 } 4462 4463 /** 4464 * {@icu} <p>Find a Unicode character by either its name and return its code 4465 * point value. All Unicode names are in uppercase. 4466 * Extended names are all lowercase except for numbers and are contained 4467 * within angle brackets. 4468 * The names are searched in the following order 4469 * <ul> 4470 * <li> Most current Unicode name if there is any 4471 * <li> Unicode 1.0 name if there is any 4472 * <li> Extended name in the form of 4473 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 4474 * </ul> 4475 * Note calling any methods related to code point names, e.g. get*Name*() 4476 * incurs a one-time initialisation cost to construct the name tables. 4477 * @param name codepoint name 4478 * @return code point associated with the name or -1 if the name is not 4479 * found. 4480 * @stable ICU 2.6 4481 */ getCharFromExtendedName(String name)4482 public static int getCharFromExtendedName(String name){ 4483 return UCharacterName.INSTANCE.getCharFromName( 4484 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 4485 } 4486 4487 /** 4488 * {@icu} <p>Find a Unicode character by its corrected name alias and return 4489 * its code point value. All Unicode names are in uppercase. 4490 * Note calling any methods related to code point names, e.g. get*Name*() 4491 * incurs a one-time initialisation cost to construct the name tables. 4492 * @param name Unicode name alias whose code point is to be returned 4493 * @return code point or -1 if name is not found 4494 * @stable ICU 4.4 4495 */ getCharFromNameAlias(String name)4496 public static int getCharFromNameAlias(String name){ 4497 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 4498 } 4499 4500 /** 4501 * {@icu} Return the Unicode name for a given property, as given in the 4502 * Unicode database file PropertyAliases.txt. Most properties 4503 * have more than one name. The nameChoice determines which one 4504 * is returned. 4505 * 4506 * In addition, this function maps the property 4507 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 4508 * "General_Category_Mask". These names are not in 4509 * PropertyAliases.txt. 4510 * 4511 * @param property UProperty selector. 4512 * 4513 * @param nameChoice UProperty.NameChoice selector for which name 4514 * to get. All properties have a long name. Most have a short 4515 * name, but some do not. Unicode allows for additional names; if 4516 * present these will be returned by UProperty.NameChoice.LONG + i, 4517 * where i=1, 2,... 4518 * 4519 * @return a name, or null if Unicode explicitly defines no name 4520 * ("n/a") for a given property/nameChoice. If a given nameChoice 4521 * throws an exception, then all larger values of nameChoice will 4522 * throw an exception. If null is returned for a given 4523 * nameChoice, then other nameChoice values may return non-null 4524 * results. 4525 * 4526 * @exception IllegalArgumentException thrown if property or 4527 * nameChoice are invalid. 4528 * 4529 * @see UProperty 4530 * @see UProperty.NameChoice 4531 * @stable ICU 2.4 4532 */ getPropertyName(int property, int nameChoice)4533 public static String getPropertyName(int property, 4534 int nameChoice) { 4535 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 4536 } 4537 4538 /** 4539 * {@icu} Return the UProperty selector for a given property name, as 4540 * specified in the Unicode database file PropertyAliases.txt. 4541 * Short, long, and any other variants are recognized. 4542 * 4543 * In addition, this function maps the synthetic names "gcm" / 4544 * "General_Category_Mask" to the property 4545 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 4546 * PropertyAliases.txt. 4547 * 4548 * @param propertyAlias the property name to be matched. The name 4549 * is compared using "loose matching" as described in 4550 * PropertyAliases.txt. 4551 * 4552 * @return a UProperty enum. 4553 * 4554 * @exception IllegalArgumentException thrown if propertyAlias 4555 * is not recognized. 4556 * 4557 * @see UProperty 4558 * @stable ICU 2.4 4559 */ getPropertyEnum(CharSequence propertyAlias)4560 public static int getPropertyEnum(CharSequence propertyAlias) { 4561 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 4562 if (propEnum == UProperty.UNDEFINED) { 4563 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 4564 } 4565 return propEnum; 4566 } 4567 4568 /** 4569 * {@icu} Return the Unicode name for a given property value, as given in 4570 * the Unicode database file PropertyValueAliases.txt. Most 4571 * values have more than one name. The nameChoice determines 4572 * which one is returned. 4573 * 4574 * Note: Some of the names in PropertyValueAliases.txt can only be 4575 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 4576 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4577 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4578 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4579 * 4580 * @param property UProperty selector constant. 4581 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4582 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4583 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4584 * If out of range, null is returned. 4585 * 4586 * @param value selector for a value for the given property. In 4587 * general, valid values range from 0 up to some maximum. There 4588 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 4589 * non-zero value BASIC_LATIN.getID(). (2.) 4590 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 4591 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 4592 * are mask values produced by left-shifting 1 by 4593 * UCharacter.getType(). This allows grouped categories such as 4594 * [:L:] to be represented. Mask values are non-contiguous. 4595 * 4596 * @param nameChoice UProperty.NameChoice selector for which name 4597 * to get. All values have a long name. Most have a short name, 4598 * but some do not. Unicode allows for additional names; if 4599 * present these will be returned by UProperty.NameChoice.LONG + i, 4600 * where i=1, 2,... 4601 * 4602 * @return a name, or null if Unicode explicitly defines no name 4603 * ("n/a") for a given property/value/nameChoice. If a given 4604 * nameChoice throws an exception, then all larger values of 4605 * nameChoice will throw an exception. If null is returned for a 4606 * given nameChoice, then other nameChoice values may return 4607 * non-null results. 4608 * 4609 * @exception IllegalArgumentException thrown if property, value, 4610 * or nameChoice are invalid. 4611 * 4612 * @see UProperty 4613 * @see UProperty.NameChoice 4614 * @stable ICU 2.4 4615 */ getPropertyValueName(int property, int value, int nameChoice)4616 public static String getPropertyValueName(int property, 4617 int value, 4618 int nameChoice) 4619 { 4620 if ((property == UProperty.CANONICAL_COMBINING_CLASS 4621 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 4622 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 4623 && value >= UCharacter.getIntPropertyMinValue( 4624 UProperty.CANONICAL_COMBINING_CLASS) 4625 && value <= UCharacter.getIntPropertyMaxValue( 4626 UProperty.CANONICAL_COMBINING_CLASS) 4627 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 4628 // this is hard coded for the valid cc 4629 // because PropertyValueAliases.txt does not contain all of them 4630 try { 4631 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 4632 nameChoice); 4633 } 4634 catch (IllegalArgumentException e) { 4635 return null; 4636 } 4637 } 4638 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 4639 } 4640 4641 /** 4642 * {@icu} Return the property value integer for a given value name, as 4643 * specified in the Unicode database file PropertyValueAliases.txt. 4644 * Short, long, and any other variants are recognized. 4645 * 4646 * Note: Some of the names in PropertyValueAliases.txt will only be 4647 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 4648 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4649 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4650 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4651 * 4652 * @param property UProperty selector constant. 4653 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4654 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4655 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4656 * Only these properties can be enumerated. 4657 * 4658 * @param valueAlias the value name to be matched. The name is 4659 * compared using "loose matching" as described in 4660 * PropertyValueAliases.txt. 4661 * 4662 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 4663 * values are mask values produced by left-shifting 1 by 4664 * UCharacter.getType(). This allows grouped categories such as 4665 * [:L:] to be represented. 4666 * 4667 * @see UProperty 4668 * @throws IllegalArgumentException if property is not a valid UProperty 4669 * selector or valueAlias is not a value of this property 4670 * @stable ICU 2.4 4671 */ getPropertyValueEnum(int property, CharSequence valueAlias)4672 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 4673 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 4674 if (propEnum == UProperty.UNDEFINED) { 4675 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 4676 } 4677 return propEnum; 4678 } 4679 4680 /** 4681 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 4682 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 4683 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 4684 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 4685 * @internal 4686 * @deprecated This API is ICU internal only. 4687 */ 4688 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4689 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 4690 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 4691 } 4692 4693 4694 /** 4695 * {@icu} Returns a code point corresponding to the two surrogate code units. 4696 * 4697 * @param lead the lead char 4698 * @param trail the trail char 4699 * @return code point if surrogate characters are valid. 4700 * @exception IllegalArgumentException thrown when the code units do 4701 * not form a valid code point 4702 * @stable ICU 2.1 4703 */ getCodePoint(char lead, char trail)4704 public static int getCodePoint(char lead, char trail) 4705 { 4706 if (Character.isSurrogatePair(lead, trail)) { 4707 return Character.toCodePoint(lead, trail); 4708 } 4709 throw new IllegalArgumentException("Illegal surrogate characters"); 4710 } 4711 4712 /** 4713 * {@icu} Returns the code point corresponding to the BMP code point. 4714 * 4715 * @param char16 the BMP code point 4716 * @return code point if argument is a valid character. 4717 * @exception IllegalArgumentException thrown when char16 is not a valid 4718 * code point 4719 * @stable ICU 2.1 4720 */ getCodePoint(char char16)4721 public static int getCodePoint(char char16) 4722 { 4723 if (UCharacter.isLegal(char16)) { 4724 return char16; 4725 } 4726 throw new IllegalArgumentException("Illegal codepoint"); 4727 } 4728 4729 /** 4730 * Implementation of UCaseProps.ContextIterator, iterates over a String. 4731 * See ustrcase.c/utf16_caseContextIterator(). 4732 */ 4733 private static class StringContextIterator implements UCaseProps.ContextIterator { 4734 /** 4735 * Constructor. 4736 * @param s String to iterate over. 4737 */ StringContextIterator(String s)4738 StringContextIterator(String s) { 4739 this.s=s; 4740 limit=s.length(); 4741 cpStart=cpLimit=index=0; 4742 dir=0; 4743 } 4744 4745 /** 4746 * Set the iteration limit for nextCaseMapCP() to an index within the string. 4747 * If the limit parameter is negative or past the string, then the 4748 * string length is restored as the iteration limit. 4749 * 4750 * <p>This limit does not affect the next() function which always 4751 * iterates to the very end of the string. 4752 * 4753 * @param lim The iteration limit. 4754 */ setLimit(int lim)4755 public void setLimit(int lim) { 4756 if(0<=lim && lim<=s.length()) { 4757 limit=lim; 4758 } else { 4759 limit=s.length(); 4760 } 4761 } 4762 4763 /** 4764 * Move to the iteration limit without fetching code points up to there. 4765 */ moveToLimit()4766 public void moveToLimit() { 4767 cpStart=cpLimit=limit; 4768 } 4769 4770 /** 4771 * Iterate forward through the string to fetch the next code point 4772 * to be case-mapped, and set the context indexes for it. 4773 * 4774 * <p>When the iteration limit is reached (and -1 is returned), 4775 * getCPStart() will be at the iteration limit. 4776 * 4777 * <p>Iteration with next() does not affect the position for nextCaseMapCP(). 4778 * 4779 * @return The next code point to be case-mapped, or <0 when the iteration is done. 4780 */ nextCaseMapCP()4781 public int nextCaseMapCP() { 4782 cpStart=cpLimit; 4783 if(cpLimit<limit) { 4784 int c=s.codePointAt(cpLimit); 4785 cpLimit+=Character.charCount(c); 4786 return c; 4787 } else { 4788 return -1; 4789 } 4790 } 4791 4792 /** 4793 * Returns the start of the code point that was last returned 4794 * by nextCaseMapCP(). 4795 */ getCPStart()4796 public int getCPStart() { 4797 return cpStart; 4798 } 4799 4800 /** 4801 * Returns the limit of the code point that was last returned 4802 * by nextCaseMapCP(). 4803 */ getCPLimit()4804 public int getCPLimit() { 4805 return cpLimit; 4806 } 4807 4808 // implement UCaseProps.ContextIterator 4809 // The following code is not used anywhere in this private class reset(int direction)4810 public void reset(int direction) { 4811 if(direction>0) { 4812 /* reset for forward iteration */ 4813 dir=1; 4814 index=cpLimit; 4815 } else if(direction<0) { 4816 /* reset for backward iteration */ 4817 dir=-1; 4818 index=cpStart; 4819 } else { 4820 // not a valid direction 4821 dir=0; 4822 index=0; 4823 } 4824 } 4825 next()4826 public int next() { 4827 int c; 4828 4829 if(dir>0 && index<s.length()) { 4830 c=s.codePointAt(index); 4831 index+=Character.charCount(c); 4832 return c; 4833 } else if(dir<0 && index>0) { 4834 c=s.codePointBefore(index); 4835 index-=Character.charCount(c); 4836 return c; 4837 } 4838 return -1; 4839 } 4840 4841 // variables 4842 protected String s; 4843 protected int index, limit, cpStart, cpLimit; 4844 protected int dir; // 0=initial state >0=forward <0=backward 4845 } 4846 4847 /** 4848 * Returns the uppercase version of the argument string. 4849 * Casing is dependent on the default locale and context-sensitive. 4850 * @param str source string to be performed on 4851 * @return uppercase version of the argument string 4852 * @stable ICU 2.1 4853 */ toUpperCase(String str)4854 public static String toUpperCase(String str) 4855 { 4856 return toUpperCase(ULocale.getDefault(), str); 4857 } 4858 4859 /** 4860 * Returns the lowercase version of the argument string. 4861 * Casing is dependent on the default locale and context-sensitive 4862 * @param str source string to be performed on 4863 * @return lowercase version of the argument string 4864 * @stable ICU 2.1 4865 */ toLowerCase(String str)4866 public static String toLowerCase(String str) 4867 { 4868 return toLowerCase(ULocale.getDefault(), str); 4869 } 4870 4871 /** 4872 * <p>Returns the titlecase version of the argument string. 4873 * <p>Position for titlecasing is determined by the argument break 4874 * iterator, hence the user can customize his break iterator for 4875 * a specialized titlecasing. In this case only the forward iteration 4876 * needs to be implemented. 4877 * If the break iterator passed in is null, the default Unicode algorithm 4878 * will be used to determine the titlecase positions. 4879 * 4880 * <p>Only positions returned by the break iterator will be title cased, 4881 * character in between the positions will all be in lower case. 4882 * <p>Casing is dependent on the default locale and context-sensitive 4883 * @param str source string to be performed on 4884 * @param breakiter break iterator to determine the positions in which 4885 * the character should be title cased. 4886 * @return lowercase version of the argument string 4887 * @stable ICU 2.6 4888 */ toTitleCase(String str, BreakIterator breakiter)4889 public static String toTitleCase(String str, BreakIterator breakiter) 4890 { 4891 return toTitleCase(ULocale.getDefault(), str, breakiter); 4892 } 4893 4894 /** 4895 * Returns the uppercase version of the argument string. 4896 * Casing is dependent on the argument locale and context-sensitive. 4897 * @param locale which string is to be converted in 4898 * @param str source string to be performed on 4899 * @return uppercase version of the argument string 4900 * @stable ICU 2.1 4901 */ toUpperCase(Locale locale, String str)4902 public static String toUpperCase(Locale locale, String str) 4903 { 4904 return toUpperCase(ULocale.forLocale(locale), str); 4905 } 4906 4907 /** 4908 * Returns the uppercase version of the argument string. 4909 * Casing is dependent on the argument locale and context-sensitive. 4910 * @param locale which string is to be converted in 4911 * @param str source string to be performed on 4912 * @return uppercase version of the argument string 4913 * @stable ICU 3.2 4914 */ toUpperCase(ULocale locale, String str)4915 public static String toUpperCase(ULocale locale, String str) { 4916 StringContextIterator iter = new StringContextIterator(str); 4917 StringBuilder result = new StringBuilder(str.length()); 4918 int[] locCache = new int[1]; 4919 int c; 4920 4921 if (locale == null) { 4922 locale = ULocale.getDefault(); 4923 } 4924 locCache[0]=0; 4925 4926 while((c=iter.nextCaseMapCP())>=0) { 4927 c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache); 4928 4929 /* decode the result */ 4930 if(c<0) { 4931 /* (not) original code point */ 4932 c=~c; 4933 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4934 /* mapping already appended to result */ 4935 continue; 4936 /* } else { append single-code point mapping */ 4937 } 4938 result.appendCodePoint(c); 4939 } 4940 return result.toString(); 4941 } 4942 4943 /** 4944 * Returns the lowercase version of the argument string. 4945 * Casing is dependent on the argument locale and context-sensitive 4946 * @param locale which string is to be converted in 4947 * @param str source string to be performed on 4948 * @return lowercase version of the argument string 4949 * @stable ICU 2.1 4950 */ toLowerCase(Locale locale, String str)4951 public static String toLowerCase(Locale locale, String str) 4952 { 4953 return toLowerCase(ULocale.forLocale(locale), str); 4954 } 4955 4956 /** 4957 * Returns the lowercase version of the argument string. 4958 * Casing is dependent on the argument locale and context-sensitive 4959 * @param locale which string is to be converted in 4960 * @param str source string to be performed on 4961 * @return lowercase version of the argument string 4962 * @stable ICU 3.2 4963 */ toLowerCase(ULocale locale, String str)4964 public static String toLowerCase(ULocale locale, String str) { 4965 StringContextIterator iter = new StringContextIterator(str); 4966 StringBuilder result = new StringBuilder(str.length()); 4967 int[] locCache = new int[1]; 4968 int c; 4969 4970 if (locale == null) { 4971 locale = ULocale.getDefault(); 4972 } 4973 locCache[0]=0; 4974 4975 while((c=iter.nextCaseMapCP())>=0) { 4976 c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache); 4977 4978 /* decode the result */ 4979 if(c<0) { 4980 /* (not) original code point */ 4981 c=~c; 4982 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4983 /* mapping already appended to result */ 4984 continue; 4985 /* } else { append single-code point mapping */ 4986 } 4987 result.appendCodePoint(c); 4988 } 4989 return result.toString(); 4990 } 4991 4992 /** 4993 * <p>Returns the titlecase version of the argument string. 4994 * <p>Position for titlecasing is determined by the argument break 4995 * iterator, hence the user can customize his break iterator for 4996 * a specialized titlecasing. In this case only the forward iteration 4997 * needs to be implemented. 4998 * If the break iterator passed in is null, the default Unicode algorithm 4999 * will be used to determine the titlecase positions. 5000 * 5001 * <p>Only positions returned by the break iterator will be title cased, 5002 * character in between the positions will all be in lower case. 5003 * <p>Casing is dependent on the argument locale and context-sensitive 5004 * @param locale which string is to be converted in 5005 * @param str source string to be performed on 5006 * @param breakiter break iterator to determine the positions in which 5007 * the character should be title cased. 5008 * @return lowercase version of the argument string 5009 * @stable ICU 2.6 5010 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)5011 public static String toTitleCase(Locale locale, String str, 5012 BreakIterator breakiter) 5013 { 5014 return toTitleCase(ULocale.forLocale(locale), str, breakiter); 5015 } 5016 5017 /** 5018 * <p>Returns the titlecase version of the argument string. 5019 * <p>Position for titlecasing is determined by the argument break 5020 * iterator, hence the user can customize his break iterator for 5021 * a specialized titlecasing. In this case only the forward iteration 5022 * needs to be implemented. 5023 * If the break iterator passed in is null, the default Unicode algorithm 5024 * will be used to determine the titlecase positions. 5025 * 5026 * <p>Only positions returned by the break iterator will be title cased, 5027 * character in between the positions will all be in lower case. 5028 * <p>Casing is dependent on the argument locale and context-sensitive 5029 * @param locale which string is to be converted in 5030 * @param str source string to be performed on 5031 * @param titleIter break iterator to determine the positions in which 5032 * the character should be title cased. 5033 * @return lowercase version of the argument string 5034 * @stable ICU 3.2 5035 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)5036 public static String toTitleCase(ULocale locale, String str, 5037 BreakIterator titleIter) { 5038 return toTitleCase(locale, str, titleIter, 0); 5039 } 5040 5041 /** 5042 * <p>Returns the titlecase version of the argument string. 5043 * <p>Position for titlecasing is determined by the argument break 5044 * iterator, hence the user can customize his break iterator for 5045 * a specialized titlecasing. In this case only the forward iteration 5046 * needs to be implemented. 5047 * If the break iterator passed in is null, the default Unicode algorithm 5048 * will be used to determine the titlecase positions. 5049 * 5050 * <p>Only positions returned by the break iterator will be title cased, 5051 * character in between the positions will all be in lower case. 5052 * <p>Casing is dependent on the argument locale and context-sensitive 5053 * @param locale which string is to be converted in 5054 * @param str source string to be performed on 5055 * @param titleIter break iterator to determine the positions in which 5056 * the character should be title cased. 5057 * @param options bit set to modify the titlecasing operation 5058 * @return lowercase version of the argument string 5059 * @stable ICU 3.8 5060 * @see #TITLECASE_NO_LOWERCASE 5061 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5062 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5063 public static String toTitleCase(ULocale locale, String str, 5064 BreakIterator titleIter, 5065 int options) { 5066 StringContextIterator iter = new StringContextIterator(str); 5067 StringBuilder result = new StringBuilder(str.length()); 5068 int[] locCache = new int[1]; 5069 int c, nc, srcLength = str.length(); 5070 5071 if (locale == null) { 5072 locale = ULocale.getDefault(); 5073 } 5074 locCache[0]=0; 5075 5076 if(titleIter == null) { 5077 titleIter = BreakIterator.getWordInstance(locale); 5078 } 5079 titleIter.setText(str); 5080 5081 int prev, titleStart, index; 5082 boolean isFirstIndex; 5083 boolean isDutch = locale.getLanguage().equals("nl"); 5084 boolean FirstIJ = true; 5085 5086 /* set up local variables */ 5087 prev=0; 5088 isFirstIndex=true; 5089 5090 /* titlecasing loop */ 5091 while(prev<srcLength) { 5092 /* find next index where to titlecase */ 5093 if(isFirstIndex) { 5094 isFirstIndex=false; 5095 index=titleIter.first(); 5096 } else { 5097 index=titleIter.next(); 5098 } 5099 if(index==BreakIterator.DONE || index>srcLength) { 5100 index=srcLength; 5101 } 5102 5103 /* 5104 * Unicode 4 & 5 section 3.13 Default Case Operations: 5105 * 5106 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 5107 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 5108 * cased character F. If F exists, map F to default_title(F); then map each 5109 * subsequent character C to default_lower(C). 5110 * 5111 * In this implementation, segment [prev..index[ into 3 parts: 5112 * a) uncased characters (copy as-is) [prev..titleStart[ 5113 * b) first case letter (titlecase) [titleStart..titleLimit[ 5114 * c) subsequent characters (lowercase) [titleLimit..index[ 5115 */ 5116 if(prev<index) { 5117 /* find and copy uncased characters [prev..titleStart[ */ 5118 iter.setLimit(index); 5119 c=iter.nextCaseMapCP(); 5120 if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0 5121 && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) { 5122 while((c=iter.nextCaseMapCP())>=0 5123 && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {} 5124 titleStart=iter.getCPStart(); 5125 if(prev<titleStart) { 5126 result.append(str, prev, titleStart); 5127 } 5128 } else { 5129 titleStart=prev; 5130 } 5131 5132 if(titleStart<index) { 5133 FirstIJ = true; 5134 /* titlecase c which is from titleStart */ 5135 c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache); 5136 5137 /* decode the result and lowercase up to index */ 5138 for(;;) { 5139 if(c<0) { 5140 /* (not) original code point */ 5141 c=~c; 5142 result.appendCodePoint(c); 5143 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 5144 /* mapping already appended to result */ 5145 } else { 5146 /* append single-code point mapping */ 5147 result.appendCodePoint(c); 5148 } 5149 5150 if((options&TITLECASE_NO_LOWERCASE)!=0) { 5151 /* Optionally just copy the rest of the word unchanged. */ 5152 5153 int titleLimit=iter.getCPLimit(); 5154 if(titleLimit<index) { 5155 /* Special Case - Dutch IJ Titlecasing */ 5156 if (isDutch && c == 0x0049 && str.charAt(titleLimit) == 'j') { 5157 result.append('J').append(str, titleLimit + 1, index); 5158 } else { 5159 result.append(str, titleLimit, index); 5160 } 5161 } 5162 iter.moveToLimit(); 5163 break; 5164 } else if((nc=iter.nextCaseMapCP())>=0) { 5165 if (isDutch && (nc == 0x004A || nc == 0x006A) 5166 && (c == 0x0049) && (FirstIJ == true)) { 5167 c = 0x004A; /* J */ 5168 FirstIJ = false; 5169 } else { 5170 /* Normal operation: Lowercase the rest of the word. */ 5171 c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale, 5172 locCache); 5173 } 5174 } else { 5175 break; 5176 } 5177 } 5178 } 5179 } 5180 5181 prev=index; 5182 } 5183 return result.toString(); 5184 } 5185 5186 5187 private static final int BREAK_MASK = 5188 (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER) 5189 | (1<<UCharacterCategory.OTHER_LETTER) 5190 | (1<<UCharacterCategory.MODIFIER_LETTER); 5191 5192 /** 5193 * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string, 5194 * and sometimes has no effect at all; the original string is returned whenever casing 5195 * would not be appropriate for the first word (such as for CJK characters or initial numbers). 5196 * Initial non-letters are skipped in order to find the character to change. 5197 * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE. 5198 * <p>Examples: 5199 * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr> 5200 * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr> 5201 * <tr><td>“contact us”</td><td>“Contact us”</td></tr> 5202 * <tr><td>49ers win!</td><td>49ers win!</td></tr> 5203 * <tr><td>丰(abc)</td><td>丰(abc)</td></tr> 5204 * <tr><td>«ijs»</td><td>«Ijs»</td></tr> 5205 * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr> 5206 * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr> 5207 * </table> 5208 * @param locale the locale for accessing exceptional behavior (eg for tr). 5209 * @param str the source string to change 5210 * @return the modified string, or the original if no modifications were necessary. 5211 * @internal 5212 * @deprecated ICU internal only 5213 */ 5214 @Deprecated toTitleFirst(ULocale locale, String str)5215 public static String toTitleFirst(ULocale locale, String str) { 5216 int c = 0; 5217 for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) { 5218 c = UCharacter.codePointAt(str, i); 5219 int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK); 5220 if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK 5221 break; 5222 } 5223 if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) { 5224 continue; 5225 } 5226 5227 // we now have the first cased character 5228 // What we really want is something like: 5229 // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken); 5230 // That is, just give us the titlecased string, for the locale, at i and following, 5231 // and tell us how many characters are replaced. 5232 // The following won't work completely: it needs some more substantial changes to UCaseProps 5233 5234 String substring = str.substring(i, i+UCharacter.charCount(c)); 5235 String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0); 5236 5237 // skip if no change 5238 if (titled.codePointAt(0) == c) { 5239 // Using 0 is safe, since any change in titling will not have first initial character 5240 break; 5241 } 5242 StringBuilder result = new StringBuilder(str.length()).append(str, 0, i); 5243 int startOfSuffix; 5244 5245 // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps. 5246 5247 if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') { 5248 result.append("IJ"); 5249 startOfSuffix = 2; 5250 } else { 5251 result.append(titled); 5252 startOfSuffix = i + UCharacter.charCount(c); 5253 } 5254 5255 // add the remainder, and return 5256 return result.append(str, startOfSuffix, str.length()).toString(); 5257 } 5258 return str; // no change 5259 } 5260 5261 /** 5262 * {@icu} <p>Returns the titlecase version of the argument string. 5263 * <p>Position for titlecasing is determined by the argument break 5264 * iterator, hence the user can customize his break iterator for 5265 * a specialized titlecasing. In this case only the forward iteration 5266 * needs to be implemented. 5267 * If the break iterator passed in is null, the default Unicode algorithm 5268 * will be used to determine the titlecase positions. 5269 * 5270 * <p>Only positions returned by the break iterator will be title cased, 5271 * character in between the positions will all be in lower case. 5272 * <p>Casing is dependent on the argument locale and context-sensitive 5273 * @param locale which string is to be converted in 5274 * @param str source string to be performed on 5275 * @param titleIter break iterator to determine the positions in which 5276 * the character should be title cased. 5277 * @param options bit set to modify the titlecasing operation 5278 * @return lowercase version of the argument string 5279 * @see #TITLECASE_NO_LOWERCASE 5280 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5281 * @stable ICU 54 5282 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5283 public static String toTitleCase(Locale locale, String str, 5284 BreakIterator titleIter, 5285 int options) { 5286 return toTitleCase(ULocale.forLocale(locale), str, titleIter, options); 5287 } 5288 5289 /** 5290 * {@icu} The given character is mapped to its case folding equivalent according 5291 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5292 * folding equivalent, the character itself is returned. 5293 * 5294 * <p>This function only returns the simple, single-code point case mapping. 5295 * Full case mappings should be used whenever possible because they produce 5296 * better results by working on whole strings. 5297 * They can map to a result string with a different length as appropriate. 5298 * Full case mappings are applied by the case mapping functions 5299 * that take String parameters rather than code points (int). 5300 * See also the User Guide chapter on C/POSIX migration: 5301 * http://www.icu-project.org/userguide/posix.html#case_mappings 5302 * 5303 * @param ch the character to be converted 5304 * @param defaultmapping Indicates whether the default mappings defined in 5305 * CaseFolding.txt are to be used, otherwise the 5306 * mappings for dotted I and dotless i marked with 5307 * 'T' in CaseFolding.txt are included. 5308 * @return the case folding equivalent of the character, if 5309 * any; otherwise the character itself. 5310 * @see #foldCase(String, boolean) 5311 * @stable ICU 2.1 5312 */ foldCase(int ch, boolean defaultmapping)5313 public static int foldCase(int ch, boolean defaultmapping) { 5314 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5315 } 5316 5317 /** 5318 * {@icu} The given string is mapped to its case folding equivalent according to 5319 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5320 * folding equivalent, the character itself is returned. 5321 * "Full", multiple-code point case folding mappings are returned here. 5322 * For "simple" single-code point mappings use the API 5323 * foldCase(int ch, boolean defaultmapping). 5324 * @param str the String to be converted 5325 * @param defaultmapping Indicates whether the default mappings defined in 5326 * CaseFolding.txt are to be used, otherwise the 5327 * mappings for dotted I and dotless i marked with 5328 * 'T' in CaseFolding.txt are included. 5329 * @return the case folding equivalent of the character, if 5330 * any; otherwise the character itself. 5331 * @see #foldCase(int, boolean) 5332 * @stable ICU 2.1 5333 */ foldCase(String str, boolean defaultmapping)5334 public static String foldCase(String str, boolean defaultmapping) { 5335 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5336 } 5337 5338 /** 5339 * {@icu} Option value for case folding: use default mappings defined in 5340 * CaseFolding.txt. 5341 * @stable ICU 2.6 5342 */ 5343 public static final int FOLD_CASE_DEFAULT = 0x0000; 5344 /** 5345 * {@icu} Option value for case folding: 5346 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 5347 * and dotless i appropriately for Turkic languages (tr, az). 5348 * 5349 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 5350 * are to be included for default mappings and 5351 * excluded for the Turkic-specific mappings. 5352 * 5353 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 5354 * are to be excluded for default mappings and 5355 * included for the Turkic-specific mappings. 5356 * 5357 * @stable ICU 2.6 5358 */ 5359 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 5360 5361 /** 5362 * {@icu} The given character is mapped to its case folding equivalent according 5363 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5364 * folding equivalent, the character itself is returned. 5365 * 5366 * <p>This function only returns the simple, single-code point case mapping. 5367 * Full case mappings should be used whenever possible because they produce 5368 * better results by working on whole strings. 5369 * They can map to a result string with a different length as appropriate. 5370 * Full case mappings are applied by the case mapping functions 5371 * that take String parameters rather than code points (int). 5372 * See also the User Guide chapter on C/POSIX migration: 5373 * http://www.icu-project.org/userguide/posix.html#case_mappings 5374 * 5375 * @param ch the character to be converted 5376 * @param options A bit set for special processing. Currently the recognised options 5377 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5378 * @return the case folding equivalent of the character, if any; otherwise the 5379 * character itself. 5380 * @see #foldCase(String, boolean) 5381 * @stable ICU 2.6 5382 */ foldCase(int ch, int options)5383 public static int foldCase(int ch, int options) { 5384 return UCaseProps.INSTANCE.fold(ch, options); 5385 } 5386 5387 /** 5388 * {@icu} The given string is mapped to its case folding equivalent according to 5389 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5390 * folding equivalent, the character itself is returned. 5391 * "Full", multiple-code point case folding mappings are returned here. 5392 * For "simple" single-code point mappings use the API 5393 * foldCase(int ch, boolean defaultmapping). 5394 * @param str the String to be converted 5395 * @param options A bit set for special processing. Currently the recognised options 5396 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5397 * @return the case folding equivalent of the character, if any; otherwise the 5398 * character itself. 5399 * @see #foldCase(int, boolean) 5400 * @stable ICU 2.6 5401 */ foldCase(String str, int options)5402 public static final String foldCase(String str, int options) { 5403 StringBuilder result = new StringBuilder(str.length()); 5404 int c, i, length; 5405 5406 length = str.length(); 5407 for(i=0; i<length;) { 5408 c=str.codePointAt(i); 5409 i+=Character.charCount(c); 5410 c = UCaseProps.INSTANCE.toFullFolding(c, result, options); 5411 5412 /* decode the result */ 5413 if(c<0) { 5414 /* (not) original code point */ 5415 c=~c; 5416 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 5417 /* mapping already appended to result */ 5418 continue; 5419 /* } else { append single-code point mapping */ 5420 } 5421 result.appendCodePoint(c); 5422 } 5423 return result.toString(); 5424 } 5425 5426 /** 5427 * {@icu} Returns the numeric value of a Han character. 5428 * 5429 * <p>This returns the value of Han 'numeric' code points, 5430 * including those for zero, ten, hundred, thousand, ten thousand, 5431 * and hundred million. 5432 * This includes both the standard and 'checkwriting' 5433 * characters, the 'big circle' zero character, and the standard 5434 * zero character. 5435 * 5436 * <p>Note: The Unicode Standard has numeric values for more 5437 * Han characters recognized by this method 5438 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 5439 * and a {@link com.ibm.icu.text.NumberFormat} can be used with 5440 * a Chinese {@link com.ibm.icu.text.NumberingSystem}. 5441 * 5442 * @param ch code point to query 5443 * @return value if it is a Han 'numeric character,' otherwise return -1. 5444 * @stable ICU 2.4 5445 */ getHanNumericValue(int ch)5446 public static int getHanNumericValue(int ch) 5447 { 5448 switch(ch) 5449 { 5450 case IDEOGRAPHIC_NUMBER_ZERO_ : 5451 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 5452 return 0; // Han Zero 5453 case CJK_IDEOGRAPH_FIRST_ : 5454 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5455 return 1; // Han One 5456 case CJK_IDEOGRAPH_SECOND_ : 5457 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5458 return 2; // Han Two 5459 case CJK_IDEOGRAPH_THIRD_ : 5460 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5461 return 3; // Han Three 5462 case CJK_IDEOGRAPH_FOURTH_ : 5463 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5464 return 4; // Han Four 5465 case CJK_IDEOGRAPH_FIFTH_ : 5466 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5467 return 5; // Han Five 5468 case CJK_IDEOGRAPH_SIXTH_ : 5469 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5470 return 6; // Han Six 5471 case CJK_IDEOGRAPH_SEVENTH_ : 5472 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5473 return 7; // Han Seven 5474 case CJK_IDEOGRAPH_EIGHTH_ : 5475 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5476 return 8; // Han Eight 5477 case CJK_IDEOGRAPH_NINETH_ : 5478 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5479 return 9; // Han Nine 5480 case CJK_IDEOGRAPH_TEN_ : 5481 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5482 return 10; 5483 case CJK_IDEOGRAPH_HUNDRED_ : 5484 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5485 return 100; 5486 case CJK_IDEOGRAPH_THOUSAND_ : 5487 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5488 return 1000; 5489 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5490 return 10000; 5491 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5492 return 100000000; 5493 } 5494 return -1; // no value 5495 } 5496 5497 /** 5498 * {@icu} <p>Returns an iterator for character types, iterating over codepoints. 5499 * <p>Example of use:<br> 5500 * <pre> 5501 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5502 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5503 * while (iterator.next(element)) { 5504 * System.out.println("Codepoint \\u" + 5505 * Integer.toHexString(element.start) + 5506 * " to codepoint \\u" + 5507 * Integer.toHexString(element.limit - 1) + 5508 * " has the character type " + 5509 * element.value); 5510 * } 5511 * </pre> 5512 * @return an iterator 5513 * @stable ICU 2.6 5514 */ getTypeIterator()5515 public static RangeValueIterator getTypeIterator() 5516 { 5517 return new UCharacterTypeIterator(); 5518 } 5519 5520 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()5521 UCharacterTypeIterator() { 5522 reset(); 5523 } 5524 5525 // implements RangeValueIterator next(Element element)5526 public boolean next(Element element) { 5527 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5528 element.start=range.startCodePoint; 5529 element.limit=range.endCodePoint+1; 5530 element.value=range.value; 5531 return true; 5532 } else { 5533 return false; 5534 } 5535 } 5536 5537 // implements RangeValueIterator reset()5538 public void reset() { 5539 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5540 } 5541 5542 private Iterator<Trie2.Range> trieIterator; 5543 private Trie2.Range range; 5544 5545 private static final class MaskType implements Trie2.ValueMapper { 5546 // Extracts the general category ("character type") from the trie value. map(int value)5547 public int map(int value) { 5548 return value & UCharacterProperty.TYPE_MASK; 5549 } 5550 } 5551 private static final MaskType MASK_TYPE=new MaskType(); 5552 } 5553 5554 /** 5555 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5556 * <p>This API only gets the iterator for the modern, most up-to-date 5557 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5558 * for extended names use getExtendedNameIterator(). 5559 * <p>Example of use:<br> 5560 * <pre> 5561 * ValueIterator iterator = UCharacter.getNameIterator(); 5562 * ValueIterator.Element element = new ValueIterator.Element(); 5563 * while (iterator.next(element)) { 5564 * System.out.println("Codepoint \\u" + 5565 * Integer.toHexString(element.codepoint) + 5566 * " has the name " + (String)element.value); 5567 * } 5568 * </pre> 5569 * <p>The maximal range which the name iterator iterates is from 5570 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 5571 * @return an iterator 5572 * @stable ICU 2.6 5573 */ getNameIterator()5574 public static ValueIterator getNameIterator(){ 5575 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5576 UCharacterNameChoice.UNICODE_CHAR_NAME); 5577 } 5578 5579 /** 5580 * {@icu} Returns an empty iterator. 5581 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 5582 * @return an empty iterator 5583 * @deprecated ICU 49 5584 * @see #getName1_0(int) 5585 */ 5586 @Deprecated getName1_0Iterator()5587 public static ValueIterator getName1_0Iterator(){ 5588 return new DummyValueIterator(); 5589 } 5590 5591 private static final class DummyValueIterator implements ValueIterator { next(Element element)5592 public boolean next(Element element) { return false; } reset()5593 public void reset() {} setRange(int start, int limit)5594 public void setRange(int start, int limit) {} 5595 } 5596 5597 /** 5598 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5599 * <p>This API only gets the iterator for the extended names. 5600 * For modern, most up-to-date Unicode names use getNameIterator() or 5601 * for older 1.0 Unicode names use get1_0NameIterator(). 5602 * <p>Example of use:<br> 5603 * <pre> 5604 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5605 * ValueIterator.Element element = new ValueIterator.Element(); 5606 * while (iterator.next(element)) { 5607 * System.out.println("Codepoint \\u" + 5608 * Integer.toHexString(element.codepoint) + 5609 * " has the name " + (String)element.value); 5610 * } 5611 * </pre> 5612 * <p>The maximal range which the name iterator iterates is from 5613 * @return an iterator 5614 * @stable ICU 2.6 5615 */ getExtendedNameIterator()5616 public static ValueIterator getExtendedNameIterator(){ 5617 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5618 UCharacterNameChoice.EXTENDED_CHAR_NAME); 5619 } 5620 5621 /** 5622 * {@icu} Returns the "age" of the code point. 5623 * <p>The "age" is the Unicode version when the code point was first 5624 * designated (as a non-character or for Private Use) or assigned a 5625 * character. 5626 * <p>This can be useful to avoid emitting code points to receiving 5627 * processes that do not accept newer characters. 5628 * <p>The data is from the UCD file DerivedAge.txt. 5629 * @param ch The code point. 5630 * @return the Unicode version number 5631 * @stable ICU 2.6 5632 */ getAge(int ch)5633 public static VersionInfo getAge(int ch) 5634 { 5635 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5636 throw new IllegalArgumentException("Codepoint out of bounds"); 5637 } 5638 return UCharacterProperty.INSTANCE.getAge(ch); 5639 } 5640 5641 /** 5642 * {@icu} <p>Check a binary Unicode property for a code point. 5643 * <p>Unicode, especially in version 3.2, defines many more properties 5644 * than the original set in UnicodeData.txt. 5645 * <p>This API is intended to reflect Unicode properties as defined in 5646 * the Unicode Character Database (UCD) and Unicode Technical Reports 5647 * (UTR). 5648 * <p>For details about the properties see 5649 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 5650 * <p>For names of Unicode properties see the UCD file 5651 * PropertyAliases.txt. 5652 * <p>This API does not check the validity of the codepoint. 5653 * <p>Important: If ICU is built with UCD files from Unicode versions 5654 * below 3.2, then properties marked with "new" are not or 5655 * not fully available. 5656 * @param ch code point to test. 5657 * @param property selector constant from com.ibm.icu.lang.UProperty, 5658 * identifies which binary property to check. 5659 * @return true or false according to the binary Unicode property value 5660 * for ch. Also false if property is out of bounds or if the 5661 * Unicode version does not have data for the property at all, or 5662 * not for this code point. 5663 * @see com.ibm.icu.lang.UProperty 5664 * @stable ICU 2.6 5665 */ hasBinaryProperty(int ch, int property)5666 public static boolean hasBinaryProperty(int ch, int property) 5667 { 5668 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5669 } 5670 5671 /** 5672 * {@icu} <p>Check if a code point has the Alphabetic Unicode property. 5673 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 5674 * <p>Different from UCharacter.isLetter(ch)! 5675 * @stable ICU 2.6 5676 * @param ch codepoint to be tested 5677 */ isUAlphabetic(int ch)5678 public static boolean isUAlphabetic(int ch) 5679 { 5680 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5681 } 5682 5683 /** 5684 * {@icu} <p>Check if a code point has the Lowercase Unicode property. 5685 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 5686 * <p>This is different from UCharacter.isLowerCase(ch)! 5687 * @param ch codepoint to be tested 5688 * @stable ICU 2.6 5689 */ isULowercase(int ch)5690 public static boolean isULowercase(int ch) 5691 { 5692 return hasBinaryProperty(ch, UProperty.LOWERCASE); 5693 } 5694 5695 /** 5696 * {@icu} <p>Check if a code point has the Uppercase Unicode property. 5697 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 5698 * <p>This is different from UCharacter.isUpperCase(ch)! 5699 * @param ch codepoint to be tested 5700 * @stable ICU 2.6 5701 */ isUUppercase(int ch)5702 public static boolean isUUppercase(int ch) 5703 { 5704 return hasBinaryProperty(ch, UProperty.UPPERCASE); 5705 } 5706 5707 /** 5708 * {@icu} <p>Check if a code point has the White_Space Unicode property. 5709 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 5710 * <p>This is different from both UCharacter.isSpace(ch) and 5711 * UCharacter.isWhitespace(ch)! 5712 * @param ch codepoint to be tested 5713 * @stable ICU 2.6 5714 */ isUWhiteSpace(int ch)5715 public static boolean isUWhiteSpace(int ch) 5716 { 5717 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 5718 } 5719 5720 /** 5721 * {@icu} <p>Returns the property value for an Unicode property type of a code point. 5722 * Also returns binary and mask property values. 5723 * <p>Unicode, especially in version 3.2, defines many more properties than 5724 * the original set in UnicodeData.txt. 5725 * <p>The properties APIs are intended to reflect Unicode properties as 5726 * defined in the Unicode Character Database (UCD) and Unicode Technical 5727 * Reports (UTR). For details about the properties see 5728 * http://www.unicode.org/. 5729 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 5730 * 5731 * <pre> 5732 * Sample usage: 5733 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 5734 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 5735 * boolean b = (ideo == 1) ? true : false; 5736 * </pre> 5737 * @param ch code point to test. 5738 * @param type UProperty selector constant, identifies which binary 5739 * property to check. Must be 5740 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5741 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5742 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5743 * @return numeric value that is directly the property value or, 5744 * for enumerated properties, corresponds to the numeric value of 5745 * the enumerated constant of the respective property value 5746 * enumeration type (cast to enum type if necessary). 5747 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5748 * Returns a bit-mask for mask properties. 5749 * Returns 0 if 'type' is out of bounds or if the Unicode version 5750 * does not have data for the property at all, or not for this code 5751 * point. 5752 * @see UProperty 5753 * @see #hasBinaryProperty 5754 * @see #getIntPropertyMinValue 5755 * @see #getIntPropertyMaxValue 5756 * @see #getUnicodeVersion 5757 * @stable ICU 2.4 5758 */ getIntPropertyValue(int ch, int type)5759 public static int getIntPropertyValue(int ch, int type) 5760 { 5761 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5762 } 5763 /** 5764 * {@icu} Returns a string version of the property value. 5765 * @param propertyEnum The property enum value. 5766 * @param codepoint The codepoint value. 5767 * @param nameChoice The choice of the name. 5768 * @return value as string 5769 * @internal 5770 * @deprecated This API is ICU internal only. 5771 */ 5772 @Deprecated 5773 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5774 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5775 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5776 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5777 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5778 nameChoice); 5779 } 5780 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5781 return String.valueOf(getUnicodeNumericValue(codepoint)); 5782 } 5783 // otherwise must be string property 5784 switch (propertyEnum) { 5785 case UProperty.AGE: return getAge(codepoint).toString(); 5786 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5787 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 5788 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 5789 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5790 case UProperty.NAME: return getName(codepoint); 5791 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 5792 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5793 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5794 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5795 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5796 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5797 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5798 } 5799 throw new IllegalArgumentException("Illegal Property Enum"); 5800 } 5801 ///CLOVER:ON 5802 5803 /** 5804 * {@icu} Returns the minimum value for an integer/binary Unicode property type. 5805 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5806 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5807 * @param type UProperty selector constant, identifies which binary 5808 * property to check. Must be 5809 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5810 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5811 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5812 * for a Unicode property. 0 if the property 5813 * selector 'type' is out of range. 5814 * @see UProperty 5815 * @see #hasBinaryProperty 5816 * @see #getUnicodeVersion 5817 * @see #getIntPropertyMaxValue 5818 * @see #getIntPropertyValue 5819 * @stable ICU 2.4 5820 */ getIntPropertyMinValue(int type)5821 public static int getIntPropertyMinValue(int type){ 5822 5823 return 0; // undefined; and: all other properties have a minimum value of 0 5824 } 5825 5826 5827 /** 5828 * {@icu} Returns the maximum value for an integer/binary Unicode property. 5829 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5830 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5831 * Examples for min/max values (for Unicode 3.2): 5832 * <ul> 5833 * <li> UProperty.BIDI_CLASS: 0/18 5834 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5835 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5836 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5837 * </ul> 5838 * For undefined UProperty constant values, min/max values will be 0/-1. 5839 * @param type UProperty selector constant, identifies which binary 5840 * property to check. Must be 5841 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5842 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5843 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 5844 * property. <= 0 if the property selector 'type' is out of range. 5845 * @see UProperty 5846 * @see #hasBinaryProperty 5847 * @see #getUnicodeVersion 5848 * @see #getIntPropertyMaxValue 5849 * @see #getIntPropertyValue 5850 * @stable ICU 2.4 5851 */ getIntPropertyMaxValue(int type)5852 public static int getIntPropertyMaxValue(int type) 5853 { 5854 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 5855 } 5856 5857 /** 5858 * Provide the java.lang.Character forDigit API, for convenience. 5859 * @stable ICU 3.0 5860 */ forDigit(int digit, int radix)5861 public static char forDigit(int digit, int radix) { 5862 return java.lang.Character.forDigit(digit, radix); 5863 } 5864 5865 // JDK 1.5 API coverage 5866 5867 /** 5868 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 5869 * 5870 * @stable ICU 3.0 5871 */ 5872 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 5873 5874 /** 5875 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 5876 * 5877 * @stable ICU 3.0 5878 */ 5879 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 5880 5881 /** 5882 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 5883 * 5884 * @stable ICU 3.0 5885 */ 5886 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 5887 5888 /** 5889 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 5890 * 5891 * @stable ICU 3.0 5892 */ 5893 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 5894 5895 /** 5896 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 5897 * 5898 * @stable ICU 3.0 5899 */ 5900 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 5901 5902 /** 5903 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 5904 * 5905 * @stable ICU 3.0 5906 */ 5907 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 5908 5909 /** 5910 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 5911 * 5912 * @stable ICU 3.0 5913 */ 5914 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 5915 5916 /** 5917 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 5918 * 5919 * @stable ICU 3.0 5920 */ 5921 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 5922 5923 /** 5924 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 5925 * 5926 * @stable ICU 3.0 5927 */ 5928 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 5929 5930 /** 5931 * Equivalent to {@link Character#isValidCodePoint}. 5932 * 5933 * @param cp the code point to check 5934 * @return true if cp is a valid code point 5935 * @stable ICU 3.0 5936 */ isValidCodePoint(int cp)5937 public static final boolean isValidCodePoint(int cp) { 5938 return cp >= 0 && cp <= MAX_CODE_POINT; 5939 } 5940 5941 /** 5942 * Same as {@link Character#isSupplementaryCodePoint}. 5943 * 5944 * @param cp the code point to check 5945 * @return true if cp is a supplementary code point 5946 * @stable ICU 3.0 5947 */ isSupplementaryCodePoint(int cp)5948 public static final boolean isSupplementaryCodePoint(int cp) { 5949 return Character.isSupplementaryCodePoint(cp); 5950 } 5951 5952 /** 5953 * Same as {@link Character#isHighSurrogate}. 5954 * 5955 * @param ch the char to check 5956 * @return true if ch is a high (lead) surrogate 5957 * @stable ICU 3.0 5958 */ isHighSurrogate(char ch)5959 public static boolean isHighSurrogate(char ch) { 5960 return Character.isHighSurrogate(ch); 5961 } 5962 5963 /** 5964 * Same as {@link Character#isLowSurrogate}. 5965 * 5966 * @param ch the char to check 5967 * @return true if ch is a low (trail) surrogate 5968 * @stable ICU 3.0 5969 */ isLowSurrogate(char ch)5970 public static boolean isLowSurrogate(char ch) { 5971 return Character.isLowSurrogate(ch); 5972 } 5973 5974 /** 5975 * Same as {@link Character#isSurrogatePair}. 5976 * 5977 * @param high the high (lead) char 5978 * @param low the low (trail) char 5979 * @return true if high, low form a surrogate pair 5980 * @stable ICU 3.0 5981 */ isSurrogatePair(char high, char low)5982 public static final boolean isSurrogatePair(char high, char low) { 5983 return Character.isSurrogatePair(high, low); 5984 } 5985 5986 /** 5987 * Same as {@link Character#charCount}. 5988 * Returns the number of chars needed to represent the code point (1 or 2). 5989 * This does not check the code point for validity. 5990 * 5991 * @param cp the code point to check 5992 * @return the number of chars needed to represent the code point 5993 * @stable ICU 3.0 5994 */ charCount(int cp)5995 public static int charCount(int cp) { 5996 return Character.charCount(cp); 5997 } 5998 5999 /** 6000 * Same as {@link Character#toCodePoint}. 6001 * Returns the code point represented by the two surrogate code units. 6002 * This does not check the surrogate pair for validity. 6003 * 6004 * @param high the high (lead) surrogate 6005 * @param low the low (trail) surrogate 6006 * @return the code point formed by the surrogate pair 6007 * @stable ICU 3.0 6008 */ toCodePoint(char high, char low)6009 public static final int toCodePoint(char high, char low) { 6010 return Character.toCodePoint(high, low); 6011 } 6012 6013 /** 6014 * Same as {@link Character#codePointAt(CharSequence, int)}. 6015 * Returns the code point at index. 6016 * This examines only the characters at index and index+1. 6017 * 6018 * @param seq the characters to check 6019 * @param index the index of the first or only char forming the code point 6020 * @return the code point at the index 6021 * @stable ICU 3.0 6022 */ codePointAt(CharSequence seq, int index)6023 public static final int codePointAt(CharSequence seq, int index) { 6024 char c1 = seq.charAt(index++); 6025 if (isHighSurrogate(c1)) { 6026 if (index < seq.length()) { 6027 char c2 = seq.charAt(index); 6028 if (isLowSurrogate(c2)) { 6029 return toCodePoint(c1, c2); 6030 } 6031 } 6032 } 6033 return c1; 6034 } 6035 6036 /** 6037 * Same as {@link Character#codePointAt(char[], int)}. 6038 * Returns the code point at index. 6039 * This examines only the characters at index and index+1. 6040 * 6041 * @param text the characters to check 6042 * @param index the index of the first or only char forming the code point 6043 * @return the code point at the index 6044 * @stable ICU 3.0 6045 */ codePointAt(char[] text, int index)6046 public static final int codePointAt(char[] text, int index) { 6047 char c1 = text[index++]; 6048 if (isHighSurrogate(c1)) { 6049 if (index < text.length) { 6050 char c2 = text[index]; 6051 if (isLowSurrogate(c2)) { 6052 return toCodePoint(c1, c2); 6053 } 6054 } 6055 } 6056 return c1; 6057 } 6058 6059 /** 6060 * Same as {@link Character#codePointAt(char[], int, int)}. 6061 * Returns the code point at index. 6062 * This examines only the characters at index and index+1. 6063 * 6064 * @param text the characters to check 6065 * @param index the index of the first or only char forming the code point 6066 * @param limit the limit of the valid text 6067 * @return the code point at the index 6068 * @stable ICU 3.0 6069 */ codePointAt(char[] text, int index, int limit)6070 public static final int codePointAt(char[] text, int index, int limit) { 6071 if (index >= limit || limit > text.length) { 6072 throw new IndexOutOfBoundsException(); 6073 } 6074 char c1 = text[index++]; 6075 if (isHighSurrogate(c1)) { 6076 if (index < limit) { 6077 char c2 = text[index]; 6078 if (isLowSurrogate(c2)) { 6079 return toCodePoint(c1, c2); 6080 } 6081 } 6082 } 6083 return c1; 6084 } 6085 6086 /** 6087 * Same as {@link Character#codePointBefore(CharSequence, int)}. 6088 * Return the code point before index. 6089 * This examines only the characters at index-1 and index-2. 6090 * 6091 * @param seq the characters to check 6092 * @param index the index after the last or only char forming the code point 6093 * @return the code point before the index 6094 * @stable ICU 3.0 6095 */ codePointBefore(CharSequence seq, int index)6096 public static final int codePointBefore(CharSequence seq, int index) { 6097 char c2 = seq.charAt(--index); 6098 if (isLowSurrogate(c2)) { 6099 if (index > 0) { 6100 char c1 = seq.charAt(--index); 6101 if (isHighSurrogate(c1)) { 6102 return toCodePoint(c1, c2); 6103 } 6104 } 6105 } 6106 return c2; 6107 } 6108 6109 /** 6110 * Same as {@link Character#codePointBefore(char[], int)}. 6111 * Returns the code point before index. 6112 * This examines only the characters at index-1 and index-2. 6113 * 6114 * @param text the characters to check 6115 * @param index the index after the last or only char forming the code point 6116 * @return the code point before the index 6117 * @stable ICU 3.0 6118 */ codePointBefore(char[] text, int index)6119 public static final int codePointBefore(char[] text, int index) { 6120 char c2 = text[--index]; 6121 if (isLowSurrogate(c2)) { 6122 if (index > 0) { 6123 char c1 = text[--index]; 6124 if (isHighSurrogate(c1)) { 6125 return toCodePoint(c1, c2); 6126 } 6127 } 6128 } 6129 return c2; 6130 } 6131 6132 /** 6133 * Same as {@link Character#codePointBefore(char[], int, int)}. 6134 * Return the code point before index. 6135 * This examines only the characters at index-1 and index-2. 6136 * 6137 * @param text the characters to check 6138 * @param index the index after the last or only char forming the code point 6139 * @param limit the start of the valid text 6140 * @return the code point before the index 6141 * @stable ICU 3.0 6142 */ codePointBefore(char[] text, int index, int limit)6143 public static final int codePointBefore(char[] text, int index, int limit) { 6144 if (index <= limit || limit < 0) { 6145 throw new IndexOutOfBoundsException(); 6146 } 6147 char c2 = text[--index]; 6148 if (isLowSurrogate(c2)) { 6149 if (index > limit) { 6150 char c1 = text[--index]; 6151 if (isHighSurrogate(c1)) { 6152 return toCodePoint(c1, c2); 6153 } 6154 } 6155 } 6156 return c2; 6157 } 6158 6159 /** 6160 * Same as {@link Character#toChars(int, char[], int)}. 6161 * Writes the chars representing the 6162 * code point into the destination at the given index. 6163 * 6164 * @param cp the code point to convert 6165 * @param dst the destination array into which to put the char(s) representing the code point 6166 * @param dstIndex the index at which to put the first (or only) char 6167 * @return the count of the number of chars written (1 or 2) 6168 * @throws IllegalArgumentException if cp is not a valid code point 6169 * @stable ICU 3.0 6170 */ toChars(int cp, char[] dst, int dstIndex)6171 public static final int toChars(int cp, char[] dst, int dstIndex) { 6172 return Character.toChars(cp, dst, dstIndex); 6173 } 6174 6175 /** 6176 * Same as {@link Character#toChars(int)}. 6177 * Returns a char array representing the code point. 6178 * 6179 * @param cp the code point to convert 6180 * @return an array containing the char(s) representing the code point 6181 * @throws IllegalArgumentException if cp is not a valid code point 6182 * @stable ICU 3.0 6183 */ toChars(int cp)6184 public static final char[] toChars(int cp) { 6185 return Character.toChars(cp); 6186 } 6187 6188 /** 6189 * Equivalent to the {@link Character#getDirectionality(char)} method, for 6190 * convenience. Returns a byte representing the directionality of the 6191 * character. 6192 * 6193 * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns 6194 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 6195 * 6196 * {@icunote} The return value must be tested using the constants defined in {@link 6197 * UCharacterDirection} and its interface {@link 6198 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 6199 * defined by <code>java.lang.Character</code>. 6200 * @param cp the code point to check 6201 * @return the directionality of the code point 6202 * @see #getDirection 6203 * @stable ICU 3.0 6204 */ getDirectionality(int cp)6205 public static byte getDirectionality(int cp) 6206 { 6207 return (byte)getDirection(cp); 6208 } 6209 6210 /** 6211 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 6212 * method, for convenience. Counts the number of code points in the range 6213 * of text. 6214 * @param text the characters to check 6215 * @param start the start of the range 6216 * @param limit the limit of the range 6217 * @return the number of code points in the range 6218 * @stable ICU 3.0 6219 */ codePointCount(CharSequence text, int start, int limit)6220 public static int codePointCount(CharSequence text, int start, int limit) { 6221 if (start < 0 || limit < start || limit > text.length()) { 6222 throw new IndexOutOfBoundsException("start (" + start + 6223 ") or limit (" + limit + 6224 ") invalid or out of range 0, " + text.length()); 6225 } 6226 6227 int len = limit - start; 6228 while (limit > start) { 6229 char ch = text.charAt(--limit); 6230 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6231 ch = text.charAt(--limit); 6232 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6233 --len; 6234 break; 6235 } 6236 } 6237 } 6238 return len; 6239 } 6240 6241 /** 6242 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 6243 * convenience. Counts the number of code points in the range of text. 6244 * @param text the characters to check 6245 * @param start the start of the range 6246 * @param limit the limit of the range 6247 * @return the number of code points in the range 6248 * @stable ICU 3.0 6249 */ codePointCount(char[] text, int start, int limit)6250 public static int codePointCount(char[] text, int start, int limit) { 6251 if (start < 0 || limit < start || limit > text.length) { 6252 throw new IndexOutOfBoundsException("start (" + start + 6253 ") or limit (" + limit + 6254 ") invalid or out of range 0, " + text.length); 6255 } 6256 6257 int len = limit - start; 6258 while (limit > start) { 6259 char ch = text[--limit]; 6260 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6261 ch = text[--limit]; 6262 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6263 --len; 6264 break; 6265 } 6266 } 6267 } 6268 return len; 6269 } 6270 6271 /** 6272 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 6273 * method, for convenience. Adjusts the char index by a code point offset. 6274 * @param text the characters to check 6275 * @param index the index to adjust 6276 * @param codePointOffset the number of code points by which to offset the index 6277 * @return the adjusted index 6278 * @stable ICU 3.0 6279 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)6280 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 6281 if (index < 0 || index > text.length()) { 6282 throw new IndexOutOfBoundsException("index ( " + index + 6283 ") out of range 0, " + text.length()); 6284 } 6285 6286 if (codePointOffset < 0) { 6287 while (++codePointOffset <= 0) { 6288 char ch = text.charAt(--index); 6289 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 6290 ch = text.charAt(--index); 6291 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6292 if (++codePointOffset > 0) { 6293 return index+1; 6294 } 6295 } 6296 } 6297 } 6298 } else { 6299 int limit = text.length(); 6300 while (--codePointOffset >= 0) { 6301 char ch = text.charAt(index++); 6302 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6303 ch = text.charAt(index++); 6304 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6305 if (--codePointOffset < 0) { 6306 return index-1; 6307 } 6308 } 6309 } 6310 } 6311 } 6312 6313 return index; 6314 } 6315 6316 /** 6317 * Equivalent to the 6318 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 6319 * method, for convenience. Adjusts the char index by a code point offset. 6320 * @param text the characters to check 6321 * @param start the start of the range to check 6322 * @param count the length of the range to check 6323 * @param index the index to adjust 6324 * @param codePointOffset the number of code points by which to offset the index 6325 * @return the adjusted index 6326 * @stable ICU 3.0 6327 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6328 public static int offsetByCodePoints(char[] text, int start, int count, int index, 6329 int codePointOffset) { 6330 int limit = start + count; 6331 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 6332 throw new IndexOutOfBoundsException("index ( " + index + 6333 ") out of range " + start + 6334 ", " + limit + 6335 " in array 0, " + text.length); 6336 } 6337 6338 if (codePointOffset < 0) { 6339 while (++codePointOffset <= 0) { 6340 char ch = text[--index]; 6341 if (index < start) { 6342 throw new IndexOutOfBoundsException("index ( " + index + 6343 ") < start (" + start + 6344 ")"); 6345 } 6346 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 6347 ch = text[--index]; 6348 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6349 if (++codePointOffset > 0) { 6350 return index+1; 6351 } 6352 } 6353 } 6354 } 6355 } else { 6356 while (--codePointOffset >= 0) { 6357 char ch = text[index++]; 6358 if (index > limit) { 6359 throw new IndexOutOfBoundsException("index ( " + index + 6360 ") > limit (" + limit + 6361 ")"); 6362 } 6363 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6364 ch = text[index++]; 6365 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6366 if (--codePointOffset < 0) { 6367 return index-1; 6368 } 6369 } 6370 } 6371 } 6372 } 6373 6374 return index; 6375 } 6376 6377 // private variables ------------------------------------------------- 6378 6379 /** 6380 * To get the last character out from a data type 6381 */ 6382 private static final int LAST_CHAR_MASK_ = 0xFFFF; 6383 6384 // /** 6385 // * To get the last byte out from a data type 6386 // */ 6387 // private static final int LAST_BYTE_MASK_ = 0xFF; 6388 // 6389 // /** 6390 // * Shift 16 bits 6391 // */ 6392 // private static final int SHIFT_16_ = 16; 6393 // 6394 // /** 6395 // * Shift 24 bits 6396 // */ 6397 // private static final int SHIFT_24_ = 24; 6398 // 6399 // /** 6400 // * Decimal radix 6401 // */ 6402 // private static final int DECIMAL_RADIX_ = 10; 6403 6404 /** 6405 * No break space code point 6406 */ 6407 private static final int NO_BREAK_SPACE_ = 0xA0; 6408 6409 /** 6410 * Figure space code point 6411 */ 6412 private static final int FIGURE_SPACE_ = 0x2007; 6413 6414 /** 6415 * Narrow no break space code point 6416 */ 6417 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 6418 6419 /** 6420 * Ideographic number zero code point 6421 */ 6422 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6423 6424 /** 6425 * CJK Ideograph, First code point 6426 */ 6427 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6428 6429 /** 6430 * CJK Ideograph, Second code point 6431 */ 6432 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6433 6434 /** 6435 * CJK Ideograph, Third code point 6436 */ 6437 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6438 6439 /** 6440 * CJK Ideograph, Fourth code point 6441 */ 6442 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 6443 6444 /** 6445 * CJK Ideograph, FIFTH code point 6446 */ 6447 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6448 6449 /** 6450 * CJK Ideograph, Sixth code point 6451 */ 6452 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6453 6454 /** 6455 * CJK Ideograph, Seventh code point 6456 */ 6457 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6458 6459 /** 6460 * CJK Ideograph, Eighth code point 6461 */ 6462 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6463 6464 /** 6465 * CJK Ideograph, Nineth code point 6466 */ 6467 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6468 6469 /** 6470 * Application Program command code point 6471 */ 6472 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6473 6474 /** 6475 * Unit separator code point 6476 */ 6477 private static final int UNIT_SEPARATOR_ = 0x001F; 6478 6479 /** 6480 * Delete code point 6481 */ 6482 private static final int DELETE_ = 0x007F; 6483 6484 /** 6485 * Han digit characters 6486 */ 6487 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6488 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6489 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6490 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6491 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6492 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6493 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6494 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6495 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6496 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6497 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6498 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6499 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6500 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6501 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6502 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6503 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6504 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6505 6506 // private constructor ----------------------------------------------- 6507 ///CLOVER:OFF 6508 /** 6509 * Private constructor to prevent instantiation 6510 */ UCharacter()6511 private UCharacter() 6512 { 6513 } 6514 ///CLOVER:ON 6515 } 6516