1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 package com.ibm.icu.lang; 9 10 import java.lang.ref.SoftReference; 11 import java.util.HashMap; 12 import java.util.Iterator; 13 import java.util.Locale; 14 import java.util.Map; 15 16 import com.ibm.icu.impl.IllegalIcuArgumentException; 17 import com.ibm.icu.impl.Trie2; 18 import com.ibm.icu.impl.UBiDiProps; 19 import com.ibm.icu.impl.UCaseProps; 20 import com.ibm.icu.impl.UCharacterName; 21 import com.ibm.icu.impl.UCharacterNameChoice; 22 import com.ibm.icu.impl.UCharacterProperty; 23 import com.ibm.icu.impl.UCharacterUtility; 24 import com.ibm.icu.impl.UPropertyAliases; 25 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 26 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; 27 import com.ibm.icu.text.BreakIterator; 28 import com.ibm.icu.text.Normalizer2; 29 import com.ibm.icu.text.UTF16; 30 import com.ibm.icu.util.RangeValueIterator; 31 import com.ibm.icu.util.ULocale; 32 import com.ibm.icu.util.ValueIterator; 33 import com.ibm.icu.util.VersionInfo; 34 35 /** 36 * {@icuenhanced java.lang.Character}.{@icu _usage_} 37 * 38 * <p>The UCharacter class provides extensions to the 39 * <a href="http://java.sun.com/j2se/1.5/docs/api/java/lang/Character.html"> 40 * java.lang.Character</a> class. These extensions provide support for 41 * more Unicode properties and together with the <a href=../text/UTF16.html>UTF16</a> 42 * class, provide support for supplementary characters (those with code 43 * points above U+FFFF). 44 * Each ICU release supports the latest version of Unicode available at that time. 45 * 46 * <p>Code points are represented in these API using ints. While it would be 47 * more convenient in Java to have a separate primitive datatype for them, 48 * ints suffice in the meantime. 49 * 50 * <p>To use this class please add the jar file name icu4j.jar to the 51 * class path, since it contains data files which supply the information used 52 * by this file.<br> 53 * E.g. In Windows <br> 54 * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> 55 * Otherwise, another method would be to copy the files uprops.dat and 56 * unames.icu from the icu4j source subdirectory 57 * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 58 * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>. 59 * 60 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 61 * properties, the main differences between UCharacter and Character are: 62 * <ul> 63 * <li> UCharacter is not designed to be a char wrapper and does not have 64 * APIs to which involves management of that single char.<br> 65 * These include: 66 * <ul> 67 * <li> char charValue(), 68 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 69 * </ul> 70 * <li> UCharacter does not include Character APIs that are deprecated, nor 71 * does it include the Java-specific character information, such as 72 * boolean isJavaIdentifierPart(char ch). 73 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 74 * values '10' - '35'. UCharacter also does this in digit and 75 * getNumericValue, to adhere to the java semantics of these 76 * methods. New methods unicodeDigit, and 77 * getUnicodeNumericValue do not treat the above code points 78 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 79 * </ul> 80 * <p> 81 * Further detail on differences can be determined using the program 82 * <a href= 83 * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java"> 84 * com.ibm.icu.dev.test.lang.UCharacterCompare</a> 85 * </p> 86 * <p> 87 * In addition to Java compatibility functions, which calculate derived properties, 88 * this API provides low-level access to the Unicode Character Database. 89 * </p> 90 * <p> 91 * Unicode assigns each code point (not just assigned character) values for 92 * many properties. 93 * Most of them are simple boolean flags, or constants from a small enumerated list. 94 * For some properties, values are strings or other relatively more complex types. 95 * </p> 96 * <p> 97 * For more information see 98 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 99 * (http://www.unicode.org/ucd/) 100 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU 101 * User Guide chapter on Properties</a> 102 * (http://www.icu-project.org/userguide/properties.html). 103 * </p> 104 * <p> 105 * There are also functions that provide easy migration from C/POSIX functions 106 * like isblank(). Their use is generally discouraged because the C/POSIX 107 * standards do not define their semantics beyond the ASCII range, which means 108 * that different implementations exhibit very different behavior. 109 * Instead, Unicode properties should be used directly. 110 * </p> 111 * <p> 112 * There are also only a few, broad C/POSIX character classes, and they tend 113 * to be used for conflicting purposes. For example, the "isalpha()" class 114 * is sometimes used to determine word boundaries, while a more sophisticated 115 * approach would at least distinguish initial letters from continuation 116 * characters (the latter including combining marks). 117 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 118 * Another example: There is no "istitle()" class for titlecase characters. 119 * </p> 120 * <p> 121 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 122 * ICU implements them according to the Standard Recommendations in 123 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 124 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 125 * </p> 126 * <p> 127 * API access for C/POSIX character classes is as follows: 128 * <pre>{@code 129 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 130 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 131 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 132 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 133 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 134 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 135 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 136 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 137 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 138 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 139 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 140 * - cntrl: getType(c)==CONTROL 141 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 142 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 143 * </p> 144 * <p> 145 * The C/POSIX character classes are also available in UnicodeSet patterns, 146 * using patterns like [:graph:] or \p{graph}. 147 * </p> 148 * 149 * {@icunote} There are several ICU (and Java) whitespace functions. 150 * Comparison:<ul> 151 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 152 * most of general categories "Z" (separators) + most whitespace ISO controls 153 * (including no-break spaces, but excluding IS1..IS4 and ZWSP) 154 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 155 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 156 * </p> 157 * <p> 158 * This class is not subclassable. 159 * </p> 160 * @author Syn Wee Quek 161 * @stable ICU 2.1 162 * @see com.ibm.icu.lang.UCharacterEnums 163 */ 164 165 public final class UCharacter implements ECharacterCategory, ECharacterDirection 166 { 167 // public inner classes ---------------------------------------------- 168 169 /** 170 * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_} 171 * 172 * A family of character subsets representing the character blocks in the 173 * Unicode specification, generated from Unicode Data file Blocks.txt. 174 * Character blocks generally define characters used for a specific script 175 * or purpose. A character is contained by at most one Unicode block. 176 * 177 * {@icunote} All fields named XXX_ID are specific to ICU. 178 * 179 * @stable ICU 2.4 180 */ 181 public static final class UnicodeBlock extends Character.Subset 182 { 183 // block id corresponding to icu4c ----------------------------------- 184 185 /** 186 * @stable ICU 2.4 187 */ 188 public static final int INVALID_CODE_ID = -1; 189 /** 190 * @stable ICU 2.4 191 */ 192 public static final int BASIC_LATIN_ID = 1; 193 /** 194 * @stable ICU 2.4 195 */ 196 public static final int LATIN_1_SUPPLEMENT_ID = 2; 197 /** 198 * @stable ICU 2.4 199 */ 200 public static final int LATIN_EXTENDED_A_ID = 3; 201 /** 202 * @stable ICU 2.4 203 */ 204 public static final int LATIN_EXTENDED_B_ID = 4; 205 /** 206 * @stable ICU 2.4 207 */ 208 public static final int IPA_EXTENSIONS_ID = 5; 209 /** 210 * @stable ICU 2.4 211 */ 212 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 213 /** 214 * @stable ICU 2.4 215 */ 216 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 217 /** 218 * Unicode 3.2 renames this block to "Greek and Coptic". 219 * @stable ICU 2.4 220 */ 221 public static final int GREEK_ID = 8; 222 /** 223 * @stable ICU 2.4 224 */ 225 public static final int CYRILLIC_ID = 9; 226 /** 227 * @stable ICU 2.4 228 */ 229 public static final int ARMENIAN_ID = 10; 230 /** 231 * @stable ICU 2.4 232 */ 233 public static final int HEBREW_ID = 11; 234 /** 235 * @stable ICU 2.4 236 */ 237 public static final int ARABIC_ID = 12; 238 /** 239 * @stable ICU 2.4 240 */ 241 public static final int SYRIAC_ID = 13; 242 /** 243 * @stable ICU 2.4 244 */ 245 public static final int THAANA_ID = 14; 246 /** 247 * @stable ICU 2.4 248 */ 249 public static final int DEVANAGARI_ID = 15; 250 /** 251 * @stable ICU 2.4 252 */ 253 public static final int BENGALI_ID = 16; 254 /** 255 * @stable ICU 2.4 256 */ 257 public static final int GURMUKHI_ID = 17; 258 /** 259 * @stable ICU 2.4 260 */ 261 public static final int GUJARATI_ID = 18; 262 /** 263 * @stable ICU 2.4 264 */ 265 public static final int ORIYA_ID = 19; 266 /** 267 * @stable ICU 2.4 268 */ 269 public static final int TAMIL_ID = 20; 270 /** 271 * @stable ICU 2.4 272 */ 273 public static final int TELUGU_ID = 21; 274 /** 275 * @stable ICU 2.4 276 */ 277 public static final int KANNADA_ID = 22; 278 /** 279 * @stable ICU 2.4 280 */ 281 public static final int MALAYALAM_ID = 23; 282 /** 283 * @stable ICU 2.4 284 */ 285 public static final int SINHALA_ID = 24; 286 /** 287 * @stable ICU 2.4 288 */ 289 public static final int THAI_ID = 25; 290 /** 291 * @stable ICU 2.4 292 */ 293 public static final int LAO_ID = 26; 294 /** 295 * @stable ICU 2.4 296 */ 297 public static final int TIBETAN_ID = 27; 298 /** 299 * @stable ICU 2.4 300 */ 301 public static final int MYANMAR_ID = 28; 302 /** 303 * @stable ICU 2.4 304 */ 305 public static final int GEORGIAN_ID = 29; 306 /** 307 * @stable ICU 2.4 308 */ 309 public static final int HANGUL_JAMO_ID = 30; 310 /** 311 * @stable ICU 2.4 312 */ 313 public static final int ETHIOPIC_ID = 31; 314 /** 315 * @stable ICU 2.4 316 */ 317 public static final int CHEROKEE_ID = 32; 318 /** 319 * @stable ICU 2.4 320 */ 321 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 322 /** 323 * @stable ICU 2.4 324 */ 325 public static final int OGHAM_ID = 34; 326 /** 327 * @stable ICU 2.4 328 */ 329 public static final int RUNIC_ID = 35; 330 /** 331 * @stable ICU 2.4 332 */ 333 public static final int KHMER_ID = 36; 334 /** 335 * @stable ICU 2.4 336 */ 337 public static final int MONGOLIAN_ID = 37; 338 /** 339 * @stable ICU 2.4 340 */ 341 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 342 /** 343 * @stable ICU 2.4 344 */ 345 public static final int GREEK_EXTENDED_ID = 39; 346 /** 347 * @stable ICU 2.4 348 */ 349 public static final int GENERAL_PUNCTUATION_ID = 40; 350 /** 351 * @stable ICU 2.4 352 */ 353 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 354 /** 355 * @stable ICU 2.4 356 */ 357 public static final int CURRENCY_SYMBOLS_ID = 42; 358 /** 359 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 360 * Symbols". 361 * @stable ICU 2.4 362 */ 363 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 364 /** 365 * @stable ICU 2.4 366 */ 367 public static final int LETTERLIKE_SYMBOLS_ID = 44; 368 /** 369 * @stable ICU 2.4 370 */ 371 public static final int NUMBER_FORMS_ID = 45; 372 /** 373 * @stable ICU 2.4 374 */ 375 public static final int ARROWS_ID = 46; 376 /** 377 * @stable ICU 2.4 378 */ 379 public static final int MATHEMATICAL_OPERATORS_ID = 47; 380 /** 381 * @stable ICU 2.4 382 */ 383 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 384 /** 385 * @stable ICU 2.4 386 */ 387 public static final int CONTROL_PICTURES_ID = 49; 388 /** 389 * @stable ICU 2.4 390 */ 391 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 392 /** 393 * @stable ICU 2.4 394 */ 395 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 396 /** 397 * @stable ICU 2.4 398 */ 399 public static final int BOX_DRAWING_ID = 52; 400 /** 401 * @stable ICU 2.4 402 */ 403 public static final int BLOCK_ELEMENTS_ID = 53; 404 /** 405 * @stable ICU 2.4 406 */ 407 public static final int GEOMETRIC_SHAPES_ID = 54; 408 /** 409 * @stable ICU 2.4 410 */ 411 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 412 /** 413 * @stable ICU 2.4 414 */ 415 public static final int DINGBATS_ID = 56; 416 /** 417 * @stable ICU 2.4 418 */ 419 public static final int BRAILLE_PATTERNS_ID = 57; 420 /** 421 * @stable ICU 2.4 422 */ 423 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 424 /** 425 * @stable ICU 2.4 426 */ 427 public static final int KANGXI_RADICALS_ID = 59; 428 /** 429 * @stable ICU 2.4 430 */ 431 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 432 /** 433 * @stable ICU 2.4 434 */ 435 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 436 /** 437 * @stable ICU 2.4 438 */ 439 public static final int HIRAGANA_ID = 62; 440 /** 441 * @stable ICU 2.4 442 */ 443 public static final int KATAKANA_ID = 63; 444 /** 445 * @stable ICU 2.4 446 */ 447 public static final int BOPOMOFO_ID = 64; 448 /** 449 * @stable ICU 2.4 450 */ 451 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 452 /** 453 * @stable ICU 2.4 454 */ 455 public static final int KANBUN_ID = 66; 456 /** 457 * @stable ICU 2.4 458 */ 459 public static final int BOPOMOFO_EXTENDED_ID = 67; 460 /** 461 * @stable ICU 2.4 462 */ 463 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 464 /** 465 * @stable ICU 2.4 466 */ 467 public static final int CJK_COMPATIBILITY_ID = 69; 468 /** 469 * @stable ICU 2.4 470 */ 471 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 472 /** 473 * @stable ICU 2.4 474 */ 475 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 476 /** 477 * @stable ICU 2.4 478 */ 479 public static final int YI_SYLLABLES_ID = 72; 480 /** 481 * @stable ICU 2.4 482 */ 483 public static final int YI_RADICALS_ID = 73; 484 /** 485 * @stable ICU 2.4 486 */ 487 public static final int HANGUL_SYLLABLES_ID = 74; 488 /** 489 * @stable ICU 2.4 490 */ 491 public static final int HIGH_SURROGATES_ID = 75; 492 /** 493 * @stable ICU 2.4 494 */ 495 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 496 /** 497 * @stable ICU 2.4 498 */ 499 public static final int LOW_SURROGATES_ID = 77; 500 /** 501 * Same as public static final int PRIVATE_USE. 502 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 503 * and multiple code point ranges had this block. 504 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 505 * and adds separate blocks for the supplementary PUAs. 506 * @stable ICU 2.4 507 */ 508 public static final int PRIVATE_USE_AREA_ID = 78; 509 /** 510 * Same as public static final int PRIVATE_USE_AREA. 511 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 512 * and multiple code point ranges had this block. 513 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 514 * and adds separate blocks for the supplementary PUAs. 515 * @stable ICU 2.4 516 */ 517 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 518 /** 519 * @stable ICU 2.4 520 */ 521 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 522 /** 523 * @stable ICU 2.4 524 */ 525 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 526 /** 527 * @stable ICU 2.4 528 */ 529 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 530 /** 531 * @stable ICU 2.4 532 */ 533 public static final int COMBINING_HALF_MARKS_ID = 82; 534 /** 535 * @stable ICU 2.4 536 */ 537 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 538 /** 539 * @stable ICU 2.4 540 */ 541 public static final int SMALL_FORM_VARIANTS_ID = 84; 542 /** 543 * @stable ICU 2.4 544 */ 545 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 546 /** 547 * @stable ICU 2.4 548 */ 549 public static final int SPECIALS_ID = 86; 550 /** 551 * @stable ICU 2.4 552 */ 553 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 554 /** 555 * @stable ICU 2.4 556 */ 557 public static final int OLD_ITALIC_ID = 88; 558 /** 559 * @stable ICU 2.4 560 */ 561 public static final int GOTHIC_ID = 89; 562 /** 563 * @stable ICU 2.4 564 */ 565 public static final int DESERET_ID = 90; 566 /** 567 * @stable ICU 2.4 568 */ 569 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 570 /** 571 * @stable ICU 2.4 572 */ 573 public static final int MUSICAL_SYMBOLS_ID = 92; 574 /** 575 * @stable ICU 2.4 576 */ 577 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 578 /** 579 * @stable ICU 2.4 580 */ 581 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 582 /** 583 * @stable ICU 2.4 584 */ 585 public static final int 586 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 587 /** 588 * @stable ICU 2.4 589 */ 590 public static final int TAGS_ID = 96; 591 592 // New blocks in Unicode 3.2 593 594 /** 595 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 596 * @stable ICU 2.4 597 */ 598 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 599 /** 600 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 601 * @stable ICU 3.0 602 */ 603 604 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 605 /** 606 * @stable ICU 2.4 607 */ 608 public static final int TAGALOG_ID = 98; 609 /** 610 * @stable ICU 2.4 611 */ 612 public static final int HANUNOO_ID = 99; 613 /** 614 * @stable ICU 2.4 615 */ 616 public static final int BUHID_ID = 100; 617 /** 618 * @stable ICU 2.4 619 */ 620 public static final int TAGBANWA_ID = 101; 621 /** 622 * @stable ICU 2.4 623 */ 624 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 625 /** 626 * @stable ICU 2.4 627 */ 628 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 629 /** 630 * @stable ICU 2.4 631 */ 632 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 633 /** 634 * @stable ICU 2.4 635 */ 636 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 637 /** 638 * @stable ICU 2.4 639 */ 640 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 641 /** 642 * @stable ICU 2.4 643 */ 644 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 645 /** 646 * @stable ICU 2.4 647 */ 648 public static final int VARIATION_SELECTORS_ID = 108; 649 /** 650 * @stable ICU 2.4 651 */ 652 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 653 /** 654 * @stable ICU 2.4 655 */ 656 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 657 658 /** 659 * @stable ICU 2.6 660 */ 661 public static final int LIMBU_ID = 111; /*[1900]*/ 662 /** 663 * @stable ICU 2.6 664 */ 665 public static final int TAI_LE_ID = 112; /*[1950]*/ 666 /** 667 * @stable ICU 2.6 668 */ 669 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 670 /** 671 * @stable ICU 2.6 672 */ 673 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 674 /** 675 * @stable ICU 2.6 676 */ 677 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 678 /** 679 * @stable ICU 2.6 680 */ 681 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 682 /** 683 * @stable ICU 2.6 684 */ 685 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 686 /** 687 * @stable ICU 2.6 688 */ 689 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 690 /** 691 * @stable ICU 2.6 692 */ 693 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 694 /** 695 * @stable ICU 2.6 696 */ 697 public static final int UGARITIC_ID = 120; /*[10380]*/ 698 /** 699 * @stable ICU 2.6 700 */ 701 public static final int SHAVIAN_ID = 121; /*[10450]*/ 702 /** 703 * @stable ICU 2.6 704 */ 705 public static final int OSMANYA_ID = 122; /*[10480]*/ 706 /** 707 * @stable ICU 2.6 708 */ 709 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 710 /** 711 * @stable ICU 2.6 712 */ 713 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 714 /** 715 * @stable ICU 2.6 716 */ 717 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 718 719 /* New blocks in Unicode 4.1 */ 720 721 /** 722 * @stable ICU 3.4 723 */ 724 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 725 726 /** 727 * @stable ICU 3.4 728 */ 729 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 730 731 /** 732 * @stable ICU 3.4 733 */ 734 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 735 736 /** 737 * @stable ICU 3.4 738 */ 739 public static final int BUGINESE_ID = 129; /*[1A00]*/ 740 741 /** 742 * @stable ICU 3.4 743 */ 744 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 745 746 /** 747 * @stable ICU 3.4 748 */ 749 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 750 751 /** 752 * @stable ICU 3.4 753 */ 754 public static final int COPTIC_ID = 132; /*[2C80]*/ 755 756 /** 757 * @stable ICU 3.4 758 */ 759 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 760 761 /** 762 * @stable ICU 3.4 763 */ 764 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 765 766 /** 767 * @stable ICU 3.4 768 */ 769 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 770 771 /** 772 * @stable ICU 3.4 773 */ 774 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 775 776 /** 777 * @stable ICU 3.4 778 */ 779 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 780 781 /** 782 * @stable ICU 3.4 783 */ 784 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 785 786 /** 787 * @stable ICU 3.4 788 */ 789 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 790 791 /** 792 * @stable ICU 3.4 793 */ 794 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 795 796 /** 797 * @stable ICU 3.4 798 */ 799 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 800 801 /** 802 * @stable ICU 3.4 803 */ 804 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 805 806 /** 807 * @stable ICU 3.4 808 */ 809 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 810 811 /** 812 * @stable ICU 3.4 813 */ 814 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 815 816 /** 817 * @stable ICU 3.4 818 */ 819 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 820 821 /* New blocks in Unicode 5.0 */ 822 823 /** 824 * @stable ICU 3.6 825 */ 826 public static final int NKO_ID = 146; /*[07C0]*/ 827 /** 828 * @stable ICU 3.6 829 */ 830 public static final int BALINESE_ID = 147; /*[1B00]*/ 831 /** 832 * @stable ICU 3.6 833 */ 834 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 835 /** 836 * @stable ICU 3.6 837 */ 838 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 839 /** 840 * @stable ICU 3.6 841 */ 842 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 843 /** 844 * @stable ICU 3.6 845 */ 846 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 847 /** 848 * @stable ICU 3.6 849 */ 850 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 851 /** 852 * @stable ICU 3.6 853 */ 854 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 855 /** 856 * @stable ICU 3.6 857 */ 858 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 859 860 /** 861 * @stable ICU 4.0 862 */ 863 public static final int SUNDANESE_ID = 155; /* [1B80] */ 864 865 /** 866 * @stable ICU 4.0 867 */ 868 public static final int LEPCHA_ID = 156; /* [1C00] */ 869 870 /** 871 * @stable ICU 4.0 872 */ 873 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 874 875 /** 876 * @stable ICU 4.0 877 */ 878 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 879 880 /** 881 * @stable ICU 4.0 882 */ 883 public static final int VAI_ID = 159; /* [A500] */ 884 885 /** 886 * @stable ICU 4.0 887 */ 888 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 889 890 /** 891 * @stable ICU 4.0 892 */ 893 public static final int SAURASHTRA_ID = 161; /* [A880] */ 894 895 /** 896 * @stable ICU 4.0 897 */ 898 public static final int KAYAH_LI_ID = 162; /* [A900] */ 899 900 /** 901 * @stable ICU 4.0 902 */ 903 public static final int REJANG_ID = 163; /* [A930] */ 904 905 /** 906 * @stable ICU 4.0 907 */ 908 public static final int CHAM_ID = 164; /* [AA00] */ 909 910 /** 911 * @stable ICU 4.0 912 */ 913 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 914 915 /** 916 * @stable ICU 4.0 917 */ 918 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 919 920 /** 921 * @stable ICU 4.0 922 */ 923 public static final int LYCIAN_ID = 167; /* [10280] */ 924 925 /** 926 * @stable ICU 4.0 927 */ 928 public static final int CARIAN_ID = 168; /* [102A0] */ 929 930 /** 931 * @stable ICU 4.0 932 */ 933 public static final int LYDIAN_ID = 169; /* [10920] */ 934 935 /** 936 * @stable ICU 4.0 937 */ 938 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 939 940 /** 941 * @stable ICU 4.0 942 */ 943 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 944 945 /* New blocks in Unicode 5.2 */ 946 947 /** @stable ICU 4.4 */ 948 public static final int SAMARITAN_ID = 172; /*[0800]*/ 949 /** @stable ICU 4.4 */ 950 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 951 /** @stable ICU 4.4 */ 952 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 953 /** @stable ICU 4.4 */ 954 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 955 /** @stable ICU 4.4 */ 956 public static final int LISU_ID = 176; /*[A4D0]*/ 957 /** @stable ICU 4.4 */ 958 public static final int BAMUM_ID = 177; /*[A6A0]*/ 959 /** @stable ICU 4.4 */ 960 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 961 /** @stable ICU 4.4 */ 962 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 963 /** @stable ICU 4.4 */ 964 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 965 /** @stable ICU 4.4 */ 966 public static final int JAVANESE_ID = 181; /*[A980]*/ 967 /** @stable ICU 4.4 */ 968 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 969 /** @stable ICU 4.4 */ 970 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 971 /** @stable ICU 4.4 */ 972 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 973 /** @stable ICU 4.4 */ 974 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 975 /** @stable ICU 4.4 */ 976 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 977 /** @stable ICU 4.4 */ 978 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 979 /** @stable ICU 4.4 */ 980 public static final int AVESTAN_ID = 188; /*[10B00]*/ 981 /** @stable ICU 4.4 */ 982 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 983 /** @stable ICU 4.4 */ 984 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 985 /** @stable ICU 4.4 */ 986 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 987 /** @stable ICU 4.4 */ 988 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 989 /** @stable ICU 4.4 */ 990 public static final int KAITHI_ID = 193; /*[11080]*/ 991 /** @stable ICU 4.4 */ 992 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 993 /** @stable ICU 4.4 */ 994 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 995 /** @stable ICU 4.4 */ 996 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 997 /** @stable ICU 4.4 */ 998 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 999 1000 /* New blocks in Unicode 6.0 */ 1001 1002 /** @stable ICU 4.6 */ 1003 public static final int MANDAIC_ID = 198; /*[0840]*/ 1004 /** @stable ICU 4.6 */ 1005 public static final int BATAK_ID = 199; /*[1BC0]*/ 1006 /** @stable ICU 4.6 */ 1007 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 1008 /** @stable ICU 4.6 */ 1009 public static final int BRAHMI_ID = 201; /*[11000]*/ 1010 /** @stable ICU 4.6 */ 1011 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 1012 /** @stable ICU 4.6 */ 1013 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 1014 /** @stable ICU 4.6 */ 1015 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 1016 /** @stable ICU 4.6 */ 1017 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 1018 /** @stable ICU 4.6 */ 1019 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 1020 /** @stable ICU 4.6 */ 1021 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 1022 /** @stable ICU 4.6 */ 1023 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 1024 /** @stable ICU 4.6 */ 1025 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 1026 1027 /* New blocks in Unicode 6.1 */ 1028 1029 /** @stable ICU 49 */ 1030 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 1031 /** @stable ICU 49 */ 1032 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 1033 /** @stable ICU 49 */ 1034 public static final int CHAKMA_ID = 212; /*[11100]*/ 1035 /** @stable ICU 49 */ 1036 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 1037 /** @stable ICU 49 */ 1038 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 1039 /** @stable ICU 49 */ 1040 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 1041 /** @stable ICU 49 */ 1042 public static final int MIAO_ID = 216; /*[16F00]*/ 1043 /** @stable ICU 49 */ 1044 public static final int SHARADA_ID = 217; /*[11180]*/ 1045 /** @stable ICU 49 */ 1046 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 1047 /** @stable ICU 49 */ 1048 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 1049 /** @stable ICU 49 */ 1050 public static final int TAKRI_ID = 220; /*[11680]*/ 1051 1052 /* New blocks in Unicode 7.0 */ 1053 1054 /** @stable ICU 54 */ 1055 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 1056 /** @stable ICU 54 */ 1057 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 1058 /** @stable ICU 54 */ 1059 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 1060 /** @stable ICU 54 */ 1061 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 1062 /** @stable ICU 54 */ 1063 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 1064 /** @stable ICU 54 */ 1065 public static final int ELBASAN_ID = 226; /*[10500]*/ 1066 /** @stable ICU 54 */ 1067 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 1068 /** @stable ICU 54 */ 1069 public static final int GRANTHA_ID = 228; /*[11300]*/ 1070 /** @stable ICU 54 */ 1071 public static final int KHOJKI_ID = 229; /*[11200]*/ 1072 /** @stable ICU 54 */ 1073 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 1074 /** @stable ICU 54 */ 1075 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 1076 /** @stable ICU 54 */ 1077 public static final int LINEAR_A_ID = 232; /*[10600]*/ 1078 /** @stable ICU 54 */ 1079 public static final int MAHAJANI_ID = 233; /*[11150]*/ 1080 /** @stable ICU 54 */ 1081 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 1082 /** @stable ICU 54 */ 1083 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 1084 /** @stable ICU 54 */ 1085 public static final int MODI_ID = 236; /*[11600]*/ 1086 /** @stable ICU 54 */ 1087 public static final int MRO_ID = 237; /*[16A40]*/ 1088 /** @stable ICU 54 */ 1089 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 1090 /** @stable ICU 54 */ 1091 public static final int NABATAEAN_ID = 239; /*[10880]*/ 1092 /** @stable ICU 54 */ 1093 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 1094 /** @stable ICU 54 */ 1095 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 1096 /** @stable ICU 54 */ 1097 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 1098 /** @stable ICU 54 */ 1099 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 1100 /** @stable ICU 54 */ 1101 public static final int PALMYRENE_ID = 244; /*[10860]*/ 1102 /** @stable ICU 54 */ 1103 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 1104 /** @stable ICU 54 */ 1105 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 1106 /** @stable ICU 54 */ 1107 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 1108 /** @stable ICU 54 */ 1109 public static final int SIDDHAM_ID = 248; /*[11580]*/ 1110 /** @stable ICU 54 */ 1111 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 1112 /** @stable ICU 54 */ 1113 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 1114 /** @stable ICU 54 */ 1115 public static final int TIRHUTA_ID = 251; /*[11480]*/ 1116 /** @stable ICU 54 */ 1117 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 1118 1119 /** 1120 * @stable ICU 2.4 1121 */ 1122 public static final int COUNT = 253; 1123 1124 // blocks objects --------------------------------------------------- 1125 1126 /** 1127 * Array of UnicodeBlocks, for easy access in getInstance(int) 1128 */ 1129 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1130 1131 /** 1132 * @stable ICU 2.6 1133 */ 1134 public static final UnicodeBlock NO_BLOCK 1135 = new UnicodeBlock("NO_BLOCK", 0); 1136 1137 /** 1138 * @stable ICU 2.4 1139 */ 1140 public static final UnicodeBlock BASIC_LATIN 1141 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1142 /** 1143 * @stable ICU 2.4 1144 */ 1145 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1146 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1147 /** 1148 * @stable ICU 2.4 1149 */ 1150 public static final UnicodeBlock LATIN_EXTENDED_A 1151 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1152 /** 1153 * @stable ICU 2.4 1154 */ 1155 public static final UnicodeBlock LATIN_EXTENDED_B 1156 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1157 /** 1158 * @stable ICU 2.4 1159 */ 1160 public static final UnicodeBlock IPA_EXTENSIONS 1161 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1162 /** 1163 * @stable ICU 2.4 1164 */ 1165 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1166 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1167 /** 1168 * @stable ICU 2.4 1169 */ 1170 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1171 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1172 /** 1173 * Unicode 3.2 renames this block to "Greek and Coptic". 1174 * @stable ICU 2.4 1175 */ 1176 public static final UnicodeBlock GREEK 1177 = new UnicodeBlock("GREEK", GREEK_ID); 1178 /** 1179 * @stable ICU 2.4 1180 */ 1181 public static final UnicodeBlock CYRILLIC 1182 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1183 /** 1184 * @stable ICU 2.4 1185 */ 1186 public static final UnicodeBlock ARMENIAN 1187 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1188 /** 1189 * @stable ICU 2.4 1190 */ 1191 public static final UnicodeBlock HEBREW 1192 = new UnicodeBlock("HEBREW", HEBREW_ID); 1193 /** 1194 * @stable ICU 2.4 1195 */ 1196 public static final UnicodeBlock ARABIC 1197 = new UnicodeBlock("ARABIC", ARABIC_ID); 1198 /** 1199 * @stable ICU 2.4 1200 */ 1201 public static final UnicodeBlock SYRIAC 1202 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1203 /** 1204 * @stable ICU 2.4 1205 */ 1206 public static final UnicodeBlock THAANA 1207 = new UnicodeBlock("THAANA", THAANA_ID); 1208 /** 1209 * @stable ICU 2.4 1210 */ 1211 public static final UnicodeBlock DEVANAGARI 1212 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1213 /** 1214 * @stable ICU 2.4 1215 */ 1216 public static final UnicodeBlock BENGALI 1217 = new UnicodeBlock("BENGALI", BENGALI_ID); 1218 /** 1219 * @stable ICU 2.4 1220 */ 1221 public static final UnicodeBlock GURMUKHI 1222 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1223 /** 1224 * @stable ICU 2.4 1225 */ 1226 public static final UnicodeBlock GUJARATI 1227 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1228 /** 1229 * @stable ICU 2.4 1230 */ 1231 public static final UnicodeBlock ORIYA 1232 = new UnicodeBlock("ORIYA", ORIYA_ID); 1233 /** 1234 * @stable ICU 2.4 1235 */ 1236 public static final UnicodeBlock TAMIL 1237 = new UnicodeBlock("TAMIL", TAMIL_ID); 1238 /** 1239 * @stable ICU 2.4 1240 */ 1241 public static final UnicodeBlock TELUGU 1242 = new UnicodeBlock("TELUGU", TELUGU_ID); 1243 /** 1244 * @stable ICU 2.4 1245 */ 1246 public static final UnicodeBlock KANNADA 1247 = new UnicodeBlock("KANNADA", KANNADA_ID); 1248 /** 1249 * @stable ICU 2.4 1250 */ 1251 public static final UnicodeBlock MALAYALAM 1252 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1253 /** 1254 * @stable ICU 2.4 1255 */ 1256 public static final UnicodeBlock SINHALA 1257 = new UnicodeBlock("SINHALA", SINHALA_ID); 1258 /** 1259 * @stable ICU 2.4 1260 */ 1261 public static final UnicodeBlock THAI 1262 = new UnicodeBlock("THAI", THAI_ID); 1263 /** 1264 * @stable ICU 2.4 1265 */ 1266 public static final UnicodeBlock LAO 1267 = new UnicodeBlock("LAO", LAO_ID); 1268 /** 1269 * @stable ICU 2.4 1270 */ 1271 public static final UnicodeBlock TIBETAN 1272 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1273 /** 1274 * @stable ICU 2.4 1275 */ 1276 public static final UnicodeBlock MYANMAR 1277 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1278 /** 1279 * @stable ICU 2.4 1280 */ 1281 public static final UnicodeBlock GEORGIAN 1282 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1283 /** 1284 * @stable ICU 2.4 1285 */ 1286 public static final UnicodeBlock HANGUL_JAMO 1287 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1288 /** 1289 * @stable ICU 2.4 1290 */ 1291 public static final UnicodeBlock ETHIOPIC 1292 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1293 /** 1294 * @stable ICU 2.4 1295 */ 1296 public static final UnicodeBlock CHEROKEE 1297 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1298 /** 1299 * @stable ICU 2.4 1300 */ 1301 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1302 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1303 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1304 /** 1305 * @stable ICU 2.4 1306 */ 1307 public static final UnicodeBlock OGHAM 1308 = new UnicodeBlock("OGHAM", OGHAM_ID); 1309 /** 1310 * @stable ICU 2.4 1311 */ 1312 public static final UnicodeBlock RUNIC 1313 = new UnicodeBlock("RUNIC", RUNIC_ID); 1314 /** 1315 * @stable ICU 2.4 1316 */ 1317 public static final UnicodeBlock KHMER 1318 = new UnicodeBlock("KHMER", KHMER_ID); 1319 /** 1320 * @stable ICU 2.4 1321 */ 1322 public static final UnicodeBlock MONGOLIAN 1323 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1324 /** 1325 * @stable ICU 2.4 1326 */ 1327 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1328 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1329 /** 1330 * @stable ICU 2.4 1331 */ 1332 public static final UnicodeBlock GREEK_EXTENDED 1333 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1334 /** 1335 * @stable ICU 2.4 1336 */ 1337 public static final UnicodeBlock GENERAL_PUNCTUATION 1338 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1339 /** 1340 * @stable ICU 2.4 1341 */ 1342 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1343 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1344 /** 1345 * @stable ICU 2.4 1346 */ 1347 public static final UnicodeBlock CURRENCY_SYMBOLS 1348 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1349 /** 1350 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1351 * Symbols". 1352 * @stable ICU 2.4 1353 */ 1354 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1355 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1356 /** 1357 * @stable ICU 2.4 1358 */ 1359 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1360 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1361 /** 1362 * @stable ICU 2.4 1363 */ 1364 public static final UnicodeBlock NUMBER_FORMS 1365 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1366 /** 1367 * @stable ICU 2.4 1368 */ 1369 public static final UnicodeBlock ARROWS 1370 = new UnicodeBlock("ARROWS", ARROWS_ID); 1371 /** 1372 * @stable ICU 2.4 1373 */ 1374 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1375 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1376 /** 1377 * @stable ICU 2.4 1378 */ 1379 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1380 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1381 /** 1382 * @stable ICU 2.4 1383 */ 1384 public static final UnicodeBlock CONTROL_PICTURES 1385 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1386 /** 1387 * @stable ICU 2.4 1388 */ 1389 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1390 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1391 /** 1392 * @stable ICU 2.4 1393 */ 1394 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1395 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1396 /** 1397 * @stable ICU 2.4 1398 */ 1399 public static final UnicodeBlock BOX_DRAWING 1400 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1401 /** 1402 * @stable ICU 2.4 1403 */ 1404 public static final UnicodeBlock BLOCK_ELEMENTS 1405 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1406 /** 1407 * @stable ICU 2.4 1408 */ 1409 public static final UnicodeBlock GEOMETRIC_SHAPES 1410 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1411 /** 1412 * @stable ICU 2.4 1413 */ 1414 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1415 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1416 /** 1417 * @stable ICU 2.4 1418 */ 1419 public static final UnicodeBlock DINGBATS 1420 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1421 /** 1422 * @stable ICU 2.4 1423 */ 1424 public static final UnicodeBlock BRAILLE_PATTERNS 1425 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1426 /** 1427 * @stable ICU 2.4 1428 */ 1429 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1430 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1431 /** 1432 * @stable ICU 2.4 1433 */ 1434 public static final UnicodeBlock KANGXI_RADICALS 1435 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1436 /** 1437 * @stable ICU 2.4 1438 */ 1439 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1440 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1441 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1442 /** 1443 * @stable ICU 2.4 1444 */ 1445 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1446 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1447 /** 1448 * @stable ICU 2.4 1449 */ 1450 public static final UnicodeBlock HIRAGANA 1451 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1452 /** 1453 * @stable ICU 2.4 1454 */ 1455 public static final UnicodeBlock KATAKANA 1456 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1457 /** 1458 * @stable ICU 2.4 1459 */ 1460 public static final UnicodeBlock BOPOMOFO 1461 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1462 /** 1463 * @stable ICU 2.4 1464 */ 1465 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1466 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1467 /** 1468 * @stable ICU 2.4 1469 */ 1470 public static final UnicodeBlock KANBUN 1471 = new UnicodeBlock("KANBUN", KANBUN_ID); 1472 /** 1473 * @stable ICU 2.4 1474 */ 1475 public static final UnicodeBlock BOPOMOFO_EXTENDED 1476 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1477 /** 1478 * @stable ICU 2.4 1479 */ 1480 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1481 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1482 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1483 /** 1484 * @stable ICU 2.4 1485 */ 1486 public static final UnicodeBlock CJK_COMPATIBILITY 1487 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1488 /** 1489 * @stable ICU 2.4 1490 */ 1491 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1492 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1493 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1494 /** 1495 * @stable ICU 2.4 1496 */ 1497 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1498 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1499 /** 1500 * @stable ICU 2.4 1501 */ 1502 public static final UnicodeBlock YI_SYLLABLES 1503 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1504 /** 1505 * @stable ICU 2.4 1506 */ 1507 public static final UnicodeBlock YI_RADICALS 1508 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1509 /** 1510 * @stable ICU 2.4 1511 */ 1512 public static final UnicodeBlock HANGUL_SYLLABLES 1513 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1514 /** 1515 * @stable ICU 2.4 1516 */ 1517 public static final UnicodeBlock HIGH_SURROGATES 1518 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1519 /** 1520 * @stable ICU 2.4 1521 */ 1522 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1523 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1524 /** 1525 * @stable ICU 2.4 1526 */ 1527 public static final UnicodeBlock LOW_SURROGATES 1528 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1529 /** 1530 * Same as public static final int PRIVATE_USE. 1531 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1532 * and multiple code point ranges had this block. 1533 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1534 * and adds separate blocks for the supplementary PUAs. 1535 * @stable ICU 2.4 1536 */ 1537 public static final UnicodeBlock PRIVATE_USE_AREA 1538 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1539 /** 1540 * Same as public static final int PRIVATE_USE_AREA. 1541 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1542 * and multiple code point ranges had this block. 1543 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1544 * and adds separate blocks for the supplementary PUAs. 1545 * @stable ICU 2.4 1546 */ 1547 public static final UnicodeBlock PRIVATE_USE 1548 = PRIVATE_USE_AREA; 1549 /** 1550 * @stable ICU 2.4 1551 */ 1552 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1553 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1554 /** 1555 * @stable ICU 2.4 1556 */ 1557 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1558 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1559 /** 1560 * @stable ICU 2.4 1561 */ 1562 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1563 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1564 /** 1565 * @stable ICU 2.4 1566 */ 1567 public static final UnicodeBlock COMBINING_HALF_MARKS 1568 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1569 /** 1570 * @stable ICU 2.4 1571 */ 1572 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1573 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1574 /** 1575 * @stable ICU 2.4 1576 */ 1577 public static final UnicodeBlock SMALL_FORM_VARIANTS 1578 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1579 /** 1580 * @stable ICU 2.4 1581 */ 1582 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1583 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1584 /** 1585 * @stable ICU 2.4 1586 */ 1587 public static final UnicodeBlock SPECIALS 1588 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1589 /** 1590 * @stable ICU 2.4 1591 */ 1592 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1593 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1594 /** 1595 * @stable ICU 2.4 1596 */ 1597 public static final UnicodeBlock OLD_ITALIC 1598 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1599 /** 1600 * @stable ICU 2.4 1601 */ 1602 public static final UnicodeBlock GOTHIC 1603 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1604 /** 1605 * @stable ICU 2.4 1606 */ 1607 public static final UnicodeBlock DESERET 1608 = new UnicodeBlock("DESERET", DESERET_ID); 1609 /** 1610 * @stable ICU 2.4 1611 */ 1612 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1613 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1614 /** 1615 * @stable ICU 2.4 1616 */ 1617 public static final UnicodeBlock MUSICAL_SYMBOLS 1618 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1619 /** 1620 * @stable ICU 2.4 1621 */ 1622 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1623 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1624 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1625 /** 1626 * @stable ICU 2.4 1627 */ 1628 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1629 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1630 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1631 /** 1632 * @stable ICU 2.4 1633 */ 1634 public static final UnicodeBlock 1635 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1636 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1637 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1638 /** 1639 * @stable ICU 2.4 1640 */ 1641 public static final UnicodeBlock TAGS 1642 = new UnicodeBlock("TAGS", TAGS_ID); 1643 1644 // New blocks in Unicode 3.2 1645 1646 /** 1647 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1648 * @stable ICU 2.4 1649 */ 1650 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1651 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1652 /** 1653 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1654 * @stable ICU 3.0 1655 */ 1656 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1657 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1658 /** 1659 * @stable ICU 2.4 1660 */ 1661 public static final UnicodeBlock TAGALOG 1662 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1663 /** 1664 * @stable ICU 2.4 1665 */ 1666 public static final UnicodeBlock HANUNOO 1667 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1668 /** 1669 * @stable ICU 2.4 1670 */ 1671 public static final UnicodeBlock BUHID 1672 = new UnicodeBlock("BUHID", BUHID_ID); 1673 /** 1674 * @stable ICU 2.4 1675 */ 1676 public static final UnicodeBlock TAGBANWA 1677 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1678 /** 1679 * @stable ICU 2.4 1680 */ 1681 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1682 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1683 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1684 /** 1685 * @stable ICU 2.4 1686 */ 1687 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1688 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1689 /** 1690 * @stable ICU 2.4 1691 */ 1692 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1693 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1694 /** 1695 * @stable ICU 2.4 1696 */ 1697 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1698 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1699 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1700 /** 1701 * @stable ICU 2.4 1702 */ 1703 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1704 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1705 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1706 /** 1707 * @stable ICU 2.4 1708 */ 1709 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1710 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1711 /** 1712 * @stable ICU 2.4 1713 */ 1714 public static final UnicodeBlock VARIATION_SELECTORS 1715 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1716 /** 1717 * @stable ICU 2.4 1718 */ 1719 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1720 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1721 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1722 /** 1723 * @stable ICU 2.4 1724 */ 1725 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1726 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1727 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1728 1729 /** 1730 * @stable ICU 2.6 1731 */ 1732 public static final UnicodeBlock LIMBU 1733 = new UnicodeBlock("LIMBU", LIMBU_ID); 1734 /** 1735 * @stable ICU 2.6 1736 */ 1737 public static final UnicodeBlock TAI_LE 1738 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1739 /** 1740 * @stable ICU 2.6 1741 */ 1742 public static final UnicodeBlock KHMER_SYMBOLS 1743 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1744 1745 /** 1746 * @stable ICU 2.6 1747 */ 1748 public static final UnicodeBlock PHONETIC_EXTENSIONS 1749 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1750 1751 /** 1752 * @stable ICU 2.6 1753 */ 1754 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1755 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1756 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1757 /** 1758 * @stable ICU 2.6 1759 */ 1760 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1761 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1762 /** 1763 * @stable ICU 2.6 1764 */ 1765 public static final UnicodeBlock LINEAR_B_SYLLABARY 1766 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1767 /** 1768 * @stable ICU 2.6 1769 */ 1770 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1771 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1772 /** 1773 * @stable ICU 2.6 1774 */ 1775 public static final UnicodeBlock AEGEAN_NUMBERS 1776 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1777 /** 1778 * @stable ICU 2.6 1779 */ 1780 public static final UnicodeBlock UGARITIC 1781 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1782 /** 1783 * @stable ICU 2.6 1784 */ 1785 public static final UnicodeBlock SHAVIAN 1786 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1787 /** 1788 * @stable ICU 2.6 1789 */ 1790 public static final UnicodeBlock OSMANYA 1791 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1792 /** 1793 * @stable ICU 2.6 1794 */ 1795 public static final UnicodeBlock CYPRIOT_SYLLABARY 1796 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1797 /** 1798 * @stable ICU 2.6 1799 */ 1800 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1801 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1802 1803 /** 1804 * @stable ICU 2.6 1805 */ 1806 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1807 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1808 1809 /* New blocks in Unicode 4.1 */ 1810 1811 /** 1812 * @stable ICU 3.4 1813 */ 1814 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1815 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1816 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1817 1818 /** 1819 * @stable ICU 3.4 1820 */ 1821 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1822 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1823 1824 /** 1825 * @stable ICU 3.4 1826 */ 1827 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1828 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1829 1830 /** 1831 * @stable ICU 3.4 1832 */ 1833 public static final UnicodeBlock BUGINESE = 1834 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1835 1836 /** 1837 * @stable ICU 3.4 1838 */ 1839 public static final UnicodeBlock CJK_STROKES = 1840 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1841 1842 /** 1843 * @stable ICU 3.4 1844 */ 1845 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1846 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1847 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1848 1849 /** 1850 * @stable ICU 3.4 1851 */ 1852 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1853 1854 /** 1855 * @stable ICU 3.4 1856 */ 1857 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1858 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1859 1860 /** 1861 * @stable ICU 3.4 1862 */ 1863 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1864 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1865 1866 /** 1867 * @stable ICU 3.4 1868 */ 1869 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1870 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 1871 1872 /** 1873 * @stable ICU 3.4 1874 */ 1875 public static final UnicodeBlock GLAGOLITIC = 1876 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 1877 1878 /** 1879 * @stable ICU 3.4 1880 */ 1881 public static final UnicodeBlock KHAROSHTHI = 1882 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 1883 1884 /** 1885 * @stable ICU 3.4 1886 */ 1887 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1888 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 1889 1890 /** 1891 * @stable ICU 3.4 1892 */ 1893 public static final UnicodeBlock NEW_TAI_LUE = 1894 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 1895 1896 /** 1897 * @stable ICU 3.4 1898 */ 1899 public static final UnicodeBlock OLD_PERSIAN = 1900 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 1901 1902 /** 1903 * @stable ICU 3.4 1904 */ 1905 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1906 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1907 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 1908 1909 /** 1910 * @stable ICU 3.4 1911 */ 1912 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1913 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 1914 1915 /** 1916 * @stable ICU 3.4 1917 */ 1918 public static final UnicodeBlock SYLOTI_NAGRI = 1919 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 1920 1921 /** 1922 * @stable ICU 3.4 1923 */ 1924 public static final UnicodeBlock TIFINAGH = 1925 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 1926 1927 /** 1928 * @stable ICU 3.4 1929 */ 1930 public static final UnicodeBlock VERTICAL_FORMS = 1931 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 1932 1933 /** 1934 * @stable ICU 3.6 1935 */ 1936 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 1937 /** 1938 * @stable ICU 3.6 1939 */ 1940 public static final UnicodeBlock BALINESE = 1941 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 1942 /** 1943 * @stable ICU 3.6 1944 */ 1945 public static final UnicodeBlock LATIN_EXTENDED_C = 1946 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 1947 /** 1948 * @stable ICU 3.6 1949 */ 1950 public static final UnicodeBlock LATIN_EXTENDED_D = 1951 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 1952 /** 1953 * @stable ICU 3.6 1954 */ 1955 public static final UnicodeBlock PHAGS_PA = 1956 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 1957 /** 1958 * @stable ICU 3.6 1959 */ 1960 public static final UnicodeBlock PHOENICIAN = 1961 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 1962 /** 1963 * @stable ICU 3.6 1964 */ 1965 public static final UnicodeBlock CUNEIFORM = 1966 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 1967 /** 1968 * @stable ICU 3.6 1969 */ 1970 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 1971 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 1972 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 1973 /** 1974 * @stable ICU 3.6 1975 */ 1976 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 1977 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 1978 1979 /** 1980 * @stable ICU 4.0 1981 */ 1982 public static final UnicodeBlock SUNDANESE = 1983 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 1984 1985 /** 1986 * @stable ICU 4.0 1987 */ 1988 public static final UnicodeBlock LEPCHA = 1989 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 1990 1991 /** 1992 * @stable ICU 4.0 1993 */ 1994 public static final UnicodeBlock OL_CHIKI = 1995 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 1996 1997 /** 1998 * @stable ICU 4.0 1999 */ 2000 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2001 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 2002 2003 /** 2004 * @stable ICU 4.0 2005 */ 2006 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 2007 2008 /** 2009 * @stable ICU 4.0 2010 */ 2011 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2012 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 2013 2014 /** 2015 * @stable ICU 4.0 2016 */ 2017 public static final UnicodeBlock SAURASHTRA = 2018 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 2019 2020 /** 2021 * @stable ICU 4.0 2022 */ 2023 public static final UnicodeBlock KAYAH_LI = 2024 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 2025 2026 /** 2027 * @stable ICU 4.0 2028 */ 2029 public static final UnicodeBlock REJANG = 2030 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 2031 2032 /** 2033 * @stable ICU 4.0 2034 */ 2035 public static final UnicodeBlock CHAM = 2036 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 2037 2038 /** 2039 * @stable ICU 4.0 2040 */ 2041 public static final UnicodeBlock ANCIENT_SYMBOLS = 2042 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 2043 2044 /** 2045 * @stable ICU 4.0 2046 */ 2047 public static final UnicodeBlock PHAISTOS_DISC = 2048 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 2049 2050 /** 2051 * @stable ICU 4.0 2052 */ 2053 public static final UnicodeBlock LYCIAN = 2054 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 2055 2056 /** 2057 * @stable ICU 4.0 2058 */ 2059 public static final UnicodeBlock CARIAN = 2060 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 2061 2062 /** 2063 * @stable ICU 4.0 2064 */ 2065 public static final UnicodeBlock LYDIAN = 2066 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 2067 2068 /** 2069 * @stable ICU 4.0 2070 */ 2071 public static final UnicodeBlock MAHJONG_TILES = 2072 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 2073 2074 /** 2075 * @stable ICU 4.0 2076 */ 2077 public static final UnicodeBlock DOMINO_TILES = 2078 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 2079 2080 /* New blocks in Unicode 5.2 */ 2081 2082 /** @stable ICU 4.4 */ 2083 public static final UnicodeBlock SAMARITAN = 2084 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 2085 /** @stable ICU 4.4 */ 2086 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 2087 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 2088 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 2089 /** @stable ICU 4.4 */ 2090 public static final UnicodeBlock TAI_THAM = 2091 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 2092 /** @stable ICU 4.4 */ 2093 public static final UnicodeBlock VEDIC_EXTENSIONS = 2094 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 2095 /** @stable ICU 4.4 */ 2096 public static final UnicodeBlock LISU = 2097 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 2098 /** @stable ICU 4.4 */ 2099 public static final UnicodeBlock BAMUM = 2100 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 2101 /** @stable ICU 4.4 */ 2102 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2103 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 2104 /** @stable ICU 4.4 */ 2105 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2106 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 2107 /** @stable ICU 4.4 */ 2108 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2109 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 2110 /** @stable ICU 4.4 */ 2111 public static final UnicodeBlock JAVANESE = 2112 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 2113 /** @stable ICU 4.4 */ 2114 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2115 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 2116 /** @stable ICU 4.4 */ 2117 public static final UnicodeBlock TAI_VIET = 2118 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 2119 /** @stable ICU 4.4 */ 2120 public static final UnicodeBlock MEETEI_MAYEK = 2121 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 2122 /** @stable ICU 4.4 */ 2123 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2124 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 2125 /** @stable ICU 4.4 */ 2126 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2127 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 2128 /** @stable ICU 4.4 */ 2129 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2130 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 2131 /** @stable ICU 4.4 */ 2132 public static final UnicodeBlock AVESTAN = 2133 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 2134 /** @stable ICU 4.4 */ 2135 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2136 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 2137 /** @stable ICU 4.4 */ 2138 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2139 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 2140 /** @stable ICU 4.4 */ 2141 public static final UnicodeBlock OLD_TURKIC = 2142 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 2143 /** @stable ICU 4.4 */ 2144 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2145 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 2146 /** @stable ICU 4.4 */ 2147 public static final UnicodeBlock KAITHI = 2148 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 2149 /** @stable ICU 4.4 */ 2150 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2151 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 2152 /** @stable ICU 4.4 */ 2153 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2154 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2155 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 2156 /** @stable ICU 4.4 */ 2157 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2158 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2159 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 2160 /** @stable ICU 4.4 */ 2161 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2162 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2163 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 2164 2165 /* New blocks in Unicode 6.0 */ 2166 2167 /** @stable ICU 4.6 */ 2168 public static final UnicodeBlock MANDAIC = 2169 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 2170 /** @stable ICU 4.6 */ 2171 public static final UnicodeBlock BATAK = 2172 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 2173 /** @stable ICU 4.6 */ 2174 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2175 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 2176 /** @stable ICU 4.6 */ 2177 public static final UnicodeBlock BRAHMI = 2178 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 2179 /** @stable ICU 4.6 */ 2180 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2181 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 2182 /** @stable ICU 4.6 */ 2183 public static final UnicodeBlock KANA_SUPPLEMENT = 2184 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 2185 /** @stable ICU 4.6 */ 2186 public static final UnicodeBlock PLAYING_CARDS = 2187 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 2188 /** @stable ICU 4.6 */ 2189 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2190 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2191 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 2192 /** @stable ICU 4.6 */ 2193 public static final UnicodeBlock EMOTICONS = 2194 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 2195 /** @stable ICU 4.6 */ 2196 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2197 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 2198 /** @stable ICU 4.6 */ 2199 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2200 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 2201 /** @stable ICU 4.6 */ 2202 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2203 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2204 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 2205 2206 /* New blocks in Unicode 6.1 */ 2207 2208 /** @stable ICU 49 */ 2209 public static final UnicodeBlock ARABIC_EXTENDED_A = 2210 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 2211 /** @stable ICU 49 */ 2212 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2213 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 2214 /** @stable ICU 49 */ 2215 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 2216 /** @stable ICU 49 */ 2217 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2218 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2219 /** @stable ICU 49 */ 2220 public static final UnicodeBlock MEROITIC_CURSIVE = 2221 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2222 /** @stable ICU 49 */ 2223 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2224 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2225 /** @stable ICU 49 */ 2226 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2227 /** @stable ICU 49 */ 2228 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2229 /** @stable ICU 49 */ 2230 public static final UnicodeBlock SORA_SOMPENG = 2231 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2232 /** @stable ICU 49 */ 2233 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2234 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2235 /** @stable ICU 49 */ 2236 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2237 2238 /* New blocks in Unicode 7.0 */ 2239 2240 /** @stable ICU 54 */ 2241 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2242 /** @stable ICU 54 */ 2243 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2244 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2245 /** @stable ICU 54 */ 2246 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2247 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2248 /** @stable ICU 54 */ 2249 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2250 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2251 /** @stable ICU 54 */ 2252 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2253 /** @stable ICU 54 */ 2254 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2255 /** @stable ICU 54 */ 2256 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2257 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2258 /** @stable ICU 54 */ 2259 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2260 /** @stable ICU 54 */ 2261 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2262 /** @stable ICU 54 */ 2263 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2264 /** @stable ICU 54 */ 2265 public static final UnicodeBlock LATIN_EXTENDED_E = 2266 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2267 /** @stable ICU 54 */ 2268 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2269 /** @stable ICU 54 */ 2270 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2271 /** @stable ICU 54 */ 2272 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2273 /** @stable ICU 54 */ 2274 public static final UnicodeBlock MENDE_KIKAKUI = 2275 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2276 /** @stable ICU 54 */ 2277 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2278 /** @stable ICU 54 */ 2279 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2280 /** @stable ICU 54 */ 2281 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2282 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2283 /** @stable ICU 54 */ 2284 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2285 /** @stable ICU 54 */ 2286 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2287 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2288 /** @stable ICU 54 */ 2289 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2290 /** @stable ICU 54 */ 2291 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2292 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2293 /** @stable ICU 54 */ 2294 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2295 /** @stable ICU 54 */ 2296 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2297 /** @stable ICU 54 */ 2298 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2299 /** @stable ICU 54 */ 2300 public static final UnicodeBlock PSALTER_PAHLAVI = 2301 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2302 /** @stable ICU 54 */ 2303 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2304 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2305 /** @stable ICU 54 */ 2306 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2307 /** @stable ICU 54 */ 2308 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2309 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2310 /** @stable ICU 54 */ 2311 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2312 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2313 /** @stable ICU 54 */ 2314 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2315 /** @stable ICU 54 */ 2316 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2317 2318 /** 2319 * @stable ICU 2.4 2320 */ 2321 public static final UnicodeBlock INVALID_CODE 2322 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2323 2324 static { 2325 for (int blockId = 0; blockId < COUNT; ++blockId) { 2326 if (BLOCKS_[blockId] == null) { 2327 throw new java.lang.IllegalStateException( 2328 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2329 } 2330 } 2331 } 2332 2333 // public methods -------------------------------------------------- 2334 2335 /** 2336 * {@icu} Returns the only instance of the UnicodeBlock with the argument ID. 2337 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2338 * @param id UnicodeBlock ID 2339 * @return the only instance of the UnicodeBlock with the argument ID 2340 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2341 * returned. 2342 * @stable ICU 2.4 2343 */ getInstance(int id)2344 public static UnicodeBlock getInstance(int id) 2345 { 2346 if (id >= 0 && id < BLOCKS_.length) { 2347 return BLOCKS_[id]; 2348 } 2349 return INVALID_CODE; 2350 } 2351 2352 /** 2353 * Returns the Unicode allocation block that contains the code point, 2354 * or null if the code point is not a member of a defined block. 2355 * @param ch code point to be tested 2356 * @return the Unicode allocation block that contains the code point 2357 * @stable ICU 2.4 2358 */ of(int ch)2359 public static UnicodeBlock of(int ch) 2360 { 2361 if (ch > MAX_VALUE) { 2362 return INVALID_CODE; 2363 } 2364 2365 return UnicodeBlock.getInstance( 2366 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2367 } 2368 2369 /** 2370 * Cover the JDK 1.5 API. Return the Unicode block with the 2371 * given name. {@icunote} Unlike JDK 1.5, this only matches 2372 * against the official UCD name and the Java block name 2373 * (ignoring case). 2374 * @param blockName the name of the block to match 2375 * @return the UnicodeBlock with that name 2376 * @throws IllegalArgumentException if the blockName could not be matched 2377 * @stable ICU 3.0 2378 */ forName(String blockName)2379 public static final UnicodeBlock forName(String blockName) { 2380 Map<String, UnicodeBlock> m = null; 2381 if (mref != null) { 2382 m = mref.get(); 2383 } 2384 if (m == null) { 2385 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length); 2386 for (int i = 0; i < BLOCKS_.length; ++i) { 2387 UnicodeBlock b = BLOCKS_[i]; 2388 String name = trimBlockName( 2389 getPropertyValueName(UProperty.BLOCK, b.getID(), 2390 UProperty.NameChoice.LONG)); 2391 m.put(name, b); 2392 } 2393 mref = new SoftReference<Map<String, UnicodeBlock>>(m); 2394 } 2395 UnicodeBlock b = m.get(trimBlockName(blockName)); 2396 if (b == null) { 2397 throw new IllegalArgumentException(); 2398 } 2399 return b; 2400 } 2401 private static SoftReference<Map<String, UnicodeBlock>> mref; 2402 trimBlockName(String name)2403 private static String trimBlockName(String name) { 2404 String upper = name.toUpperCase(Locale.ENGLISH); 2405 StringBuilder result = new StringBuilder(upper.length()); 2406 for (int i = 0; i < upper.length(); i++) { 2407 char c = upper.charAt(i); 2408 if (c != ' ' && c != '_' && c != '-') { 2409 result.append(c); 2410 } 2411 } 2412 return result.toString(); 2413 } 2414 2415 /** 2416 * {icu} Returns the type ID of this Unicode block 2417 * @return integer type ID of this Unicode block 2418 * @stable ICU 2.4 2419 */ getID()2420 public int getID() 2421 { 2422 return m_id_; 2423 } 2424 2425 // private data members --------------------------------------------- 2426 2427 /** 2428 * Identification code for this UnicodeBlock 2429 */ 2430 private int m_id_; 2431 2432 // private constructor ---------------------------------------------- 2433 2434 /** 2435 * UnicodeBlock constructor 2436 * @param name name of this UnicodeBlock 2437 * @param id unique id of this UnicodeBlock 2438 * @exception NullPointerException if name is <code>null</code> 2439 */ UnicodeBlock(String name, int id)2440 private UnicodeBlock(String name, int id) 2441 { 2442 super(name); 2443 m_id_ = id; 2444 if (id >= 0) { 2445 BLOCKS_[id] = this; 2446 } 2447 } 2448 } 2449 2450 /** 2451 * East Asian Width constants. 2452 * @see UProperty#EAST_ASIAN_WIDTH 2453 * @see UCharacter#getIntPropertyValue 2454 * @stable ICU 2.4 2455 */ 2456 public static interface EastAsianWidth 2457 { 2458 /** 2459 * @stable ICU 2.4 2460 */ 2461 public static final int NEUTRAL = 0; 2462 /** 2463 * @stable ICU 2.4 2464 */ 2465 public static final int AMBIGUOUS = 1; 2466 /** 2467 * @stable ICU 2.4 2468 */ 2469 public static final int HALFWIDTH = 2; 2470 /** 2471 * @stable ICU 2.4 2472 */ 2473 public static final int FULLWIDTH = 3; 2474 /** 2475 * @stable ICU 2.4 2476 */ 2477 public static final int NARROW = 4; 2478 /** 2479 * @stable ICU 2.4 2480 */ 2481 public static final int WIDE = 5; 2482 /** 2483 * @stable ICU 2.4 2484 */ 2485 public static final int COUNT = 6; 2486 } 2487 2488 /** 2489 * Decomposition Type constants. 2490 * @see UProperty#DECOMPOSITION_TYPE 2491 * @stable ICU 2.4 2492 */ 2493 public static interface DecompositionType 2494 { 2495 /** 2496 * @stable ICU 2.4 2497 */ 2498 public static final int NONE = 0; 2499 /** 2500 * @stable ICU 2.4 2501 */ 2502 public static final int CANONICAL = 1; 2503 /** 2504 * @stable ICU 2.4 2505 */ 2506 public static final int COMPAT = 2; 2507 /** 2508 * @stable ICU 2.4 2509 */ 2510 public static final int CIRCLE = 3; 2511 /** 2512 * @stable ICU 2.4 2513 */ 2514 public static final int FINAL = 4; 2515 /** 2516 * @stable ICU 2.4 2517 */ 2518 public static final int FONT = 5; 2519 /** 2520 * @stable ICU 2.4 2521 */ 2522 public static final int FRACTION = 6; 2523 /** 2524 * @stable ICU 2.4 2525 */ 2526 public static final int INITIAL = 7; 2527 /** 2528 * @stable ICU 2.4 2529 */ 2530 public static final int ISOLATED = 8; 2531 /** 2532 * @stable ICU 2.4 2533 */ 2534 public static final int MEDIAL = 9; 2535 /** 2536 * @stable ICU 2.4 2537 */ 2538 public static final int NARROW = 10; 2539 /** 2540 * @stable ICU 2.4 2541 */ 2542 public static final int NOBREAK = 11; 2543 /** 2544 * @stable ICU 2.4 2545 */ 2546 public static final int SMALL = 12; 2547 /** 2548 * @stable ICU 2.4 2549 */ 2550 public static final int SQUARE = 13; 2551 /** 2552 * @stable ICU 2.4 2553 */ 2554 public static final int SUB = 14; 2555 /** 2556 * @stable ICU 2.4 2557 */ 2558 public static final int SUPER = 15; 2559 /** 2560 * @stable ICU 2.4 2561 */ 2562 public static final int VERTICAL = 16; 2563 /** 2564 * @stable ICU 2.4 2565 */ 2566 public static final int WIDE = 17; 2567 /** 2568 * @stable ICU 2.4 2569 */ 2570 public static final int COUNT = 18; 2571 } 2572 2573 /** 2574 * Joining Type constants. 2575 * @see UProperty#JOINING_TYPE 2576 * @stable ICU 2.4 2577 */ 2578 public static interface JoiningType 2579 { 2580 /** 2581 * @stable ICU 2.4 2582 */ 2583 public static final int NON_JOINING = 0; 2584 /** 2585 * @stable ICU 2.4 2586 */ 2587 public static final int JOIN_CAUSING = 1; 2588 /** 2589 * @stable ICU 2.4 2590 */ 2591 public static final int DUAL_JOINING = 2; 2592 /** 2593 * @stable ICU 2.4 2594 */ 2595 public static final int LEFT_JOINING = 3; 2596 /** 2597 * @stable ICU 2.4 2598 */ 2599 public static final int RIGHT_JOINING = 4; 2600 /** 2601 * @stable ICU 2.4 2602 */ 2603 public static final int TRANSPARENT = 5; 2604 /** 2605 * @stable ICU 2.4 2606 */ 2607 public static final int COUNT = 6; 2608 } 2609 2610 /** 2611 * Joining Group constants. 2612 * @see UProperty#JOINING_GROUP 2613 * @stable ICU 2.4 2614 */ 2615 public static interface JoiningGroup 2616 { 2617 /** 2618 * @stable ICU 2.4 2619 */ 2620 public static final int NO_JOINING_GROUP = 0; 2621 /** 2622 * @stable ICU 2.4 2623 */ 2624 public static final int AIN = 1; 2625 /** 2626 * @stable ICU 2.4 2627 */ 2628 public static final int ALAPH = 2; 2629 /** 2630 * @stable ICU 2.4 2631 */ 2632 public static final int ALEF = 3; 2633 /** 2634 * @stable ICU 2.4 2635 */ 2636 public static final int BEH = 4; 2637 /** 2638 * @stable ICU 2.4 2639 */ 2640 public static final int BETH = 5; 2641 /** 2642 * @stable ICU 2.4 2643 */ 2644 public static final int DAL = 6; 2645 /** 2646 * @stable ICU 2.4 2647 */ 2648 public static final int DALATH_RISH = 7; 2649 /** 2650 * @stable ICU 2.4 2651 */ 2652 public static final int E = 8; 2653 /** 2654 * @stable ICU 2.4 2655 */ 2656 public static final int FEH = 9; 2657 /** 2658 * @stable ICU 2.4 2659 */ 2660 public static final int FINAL_SEMKATH = 10; 2661 /** 2662 * @stable ICU 2.4 2663 */ 2664 public static final int GAF = 11; 2665 /** 2666 * @stable ICU 2.4 2667 */ 2668 public static final int GAMAL = 12; 2669 /** 2670 * @stable ICU 2.4 2671 */ 2672 public static final int HAH = 13; 2673 /** @stable ICU 4.6 */ 2674 public static final int TEH_MARBUTA_GOAL = 14; 2675 /** 2676 * @stable ICU 2.4 2677 */ 2678 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2679 /** 2680 * @stable ICU 2.4 2681 */ 2682 public static final int HE = 15; 2683 /** 2684 * @stable ICU 2.4 2685 */ 2686 public static final int HEH = 16; 2687 /** 2688 * @stable ICU 2.4 2689 */ 2690 public static final int HEH_GOAL = 17; 2691 /** 2692 * @stable ICU 2.4 2693 */ 2694 public static final int HETH = 18; 2695 /** 2696 * @stable ICU 2.4 2697 */ 2698 public static final int KAF = 19; 2699 /** 2700 * @stable ICU 2.4 2701 */ 2702 public static final int KAPH = 20; 2703 /** 2704 * @stable ICU 2.4 2705 */ 2706 public static final int KNOTTED_HEH = 21; 2707 /** 2708 * @stable ICU 2.4 2709 */ 2710 public static final int LAM = 22; 2711 /** 2712 * @stable ICU 2.4 2713 */ 2714 public static final int LAMADH = 23; 2715 /** 2716 * @stable ICU 2.4 2717 */ 2718 public static final int MEEM = 24; 2719 /** 2720 * @stable ICU 2.4 2721 */ 2722 public static final int MIM = 25; 2723 /** 2724 * @stable ICU 2.4 2725 */ 2726 public static final int NOON = 26; 2727 /** 2728 * @stable ICU 2.4 2729 */ 2730 public static final int NUN = 27; 2731 /** 2732 * @stable ICU 2.4 2733 */ 2734 public static final int PE = 28; 2735 /** 2736 * @stable ICU 2.4 2737 */ 2738 public static final int QAF = 29; 2739 /** 2740 * @stable ICU 2.4 2741 */ 2742 public static final int QAPH = 30; 2743 /** 2744 * @stable ICU 2.4 2745 */ 2746 public static final int REH = 31; 2747 /** 2748 * @stable ICU 2.4 2749 */ 2750 public static final int REVERSED_PE = 32; 2751 /** 2752 * @stable ICU 2.4 2753 */ 2754 public static final int SAD = 33; 2755 /** 2756 * @stable ICU 2.4 2757 */ 2758 public static final int SADHE = 34; 2759 /** 2760 * @stable ICU 2.4 2761 */ 2762 public static final int SEEN = 35; 2763 /** 2764 * @stable ICU 2.4 2765 */ 2766 public static final int SEMKATH = 36; 2767 /** 2768 * @stable ICU 2.4 2769 */ 2770 public static final int SHIN = 37; 2771 /** 2772 * @stable ICU 2.4 2773 */ 2774 public static final int SWASH_KAF = 38; 2775 /** 2776 * @stable ICU 2.4 2777 */ 2778 public static final int SYRIAC_WAW = 39; 2779 /** 2780 * @stable ICU 2.4 2781 */ 2782 public static final int TAH = 40; 2783 /** 2784 * @stable ICU 2.4 2785 */ 2786 public static final int TAW = 41; 2787 /** 2788 * @stable ICU 2.4 2789 */ 2790 public static final int TEH_MARBUTA = 42; 2791 /** 2792 * @stable ICU 2.4 2793 */ 2794 public static final int TETH = 43; 2795 /** 2796 * @stable ICU 2.4 2797 */ 2798 public static final int WAW = 44; 2799 /** 2800 * @stable ICU 2.4 2801 */ 2802 public static final int YEH = 45; 2803 /** 2804 * @stable ICU 2.4 2805 */ 2806 public static final int YEH_BARREE = 46; 2807 /** 2808 * @stable ICU 2.4 2809 */ 2810 public static final int YEH_WITH_TAIL = 47; 2811 /** 2812 * @stable ICU 2.4 2813 */ 2814 public static final int YUDH = 48; 2815 /** 2816 * @stable ICU 2.4 2817 */ 2818 public static final int YUDH_HE = 49; 2819 /** 2820 * @stable ICU 2.4 2821 */ 2822 public static final int ZAIN = 50; 2823 /** 2824 * @stable ICU 2.6 2825 */ 2826 public static final int FE = 51; 2827 /** 2828 * @stable ICU 2.6 2829 */ 2830 public static final int KHAPH = 52; 2831 /** 2832 * @stable ICU 2.6 2833 */ 2834 public static final int ZHAIN = 53; 2835 /** 2836 * @stable ICU 4.0 2837 */ 2838 public static final int BURUSHASKI_YEH_BARREE = 54; 2839 /** @stable ICU 4.4 */ 2840 public static final int FARSI_YEH = 55; 2841 /** @stable ICU 4.4 */ 2842 public static final int NYA = 56; 2843 /** @stable ICU 49 */ 2844 public static final int ROHINGYA_YEH = 57; 2845 2846 /** @stable ICU 54 */ 2847 public static final int MANICHAEAN_ALEPH = 58; 2848 /** @stable ICU 54 */ 2849 public static final int MANICHAEAN_AYIN = 59; 2850 /** @stable ICU 54 */ 2851 public static final int MANICHAEAN_BETH = 60; 2852 /** @stable ICU 54 */ 2853 public static final int MANICHAEAN_DALETH = 61; 2854 /** @stable ICU 54 */ 2855 public static final int MANICHAEAN_DHAMEDH = 62; 2856 /** @stable ICU 54 */ 2857 public static final int MANICHAEAN_FIVE = 63; 2858 /** @stable ICU 54 */ 2859 public static final int MANICHAEAN_GIMEL = 64; 2860 /** @stable ICU 54 */ 2861 public static final int MANICHAEAN_HETH = 65; 2862 /** @stable ICU 54 */ 2863 public static final int MANICHAEAN_HUNDRED = 66; 2864 /** @stable ICU 54 */ 2865 public static final int MANICHAEAN_KAPH = 67; 2866 /** @stable ICU 54 */ 2867 public static final int MANICHAEAN_LAMEDH = 68; 2868 /** @stable ICU 54 */ 2869 public static final int MANICHAEAN_MEM = 69; 2870 /** @stable ICU 54 */ 2871 public static final int MANICHAEAN_NUN = 70; 2872 /** @stable ICU 54 */ 2873 public static final int MANICHAEAN_ONE = 71; 2874 /** @stable ICU 54 */ 2875 public static final int MANICHAEAN_PE = 72; 2876 /** @stable ICU 54 */ 2877 public static final int MANICHAEAN_QOPH = 73; 2878 /** @stable ICU 54 */ 2879 public static final int MANICHAEAN_RESH = 74; 2880 /** @stable ICU 54 */ 2881 public static final int MANICHAEAN_SADHE = 75; 2882 /** @stable ICU 54 */ 2883 public static final int MANICHAEAN_SAMEKH = 76; 2884 /** @stable ICU 54 */ 2885 public static final int MANICHAEAN_TAW = 77; 2886 /** @stable ICU 54 */ 2887 public static final int MANICHAEAN_TEN = 78; 2888 /** @stable ICU 54 */ 2889 public static final int MANICHAEAN_TETH = 79; 2890 /** @stable ICU 54 */ 2891 public static final int MANICHAEAN_THAMEDH = 80; 2892 /** @stable ICU 54 */ 2893 public static final int MANICHAEAN_TWENTY = 81; 2894 /** @stable ICU 54 */ 2895 public static final int MANICHAEAN_WAW = 82; 2896 /** @stable ICU 54 */ 2897 public static final int MANICHAEAN_YODH = 83; 2898 /** @stable ICU 54 */ 2899 public static final int MANICHAEAN_ZAYIN = 84; 2900 /** @stable ICU 54 */ 2901 public static final int STRAIGHT_WAW = 85; 2902 2903 /** 2904 * @stable ICU 2.4 2905 */ 2906 public static final int COUNT = 86; 2907 } 2908 2909 /** 2910 * Grapheme Cluster Break constants. 2911 * @see UProperty#GRAPHEME_CLUSTER_BREAK 2912 * @stable ICU 3.4 2913 */ 2914 public static interface GraphemeClusterBreak { 2915 /** 2916 * @stable ICU 3.4 2917 */ 2918 public static final int OTHER = 0; 2919 /** 2920 * @stable ICU 3.4 2921 */ 2922 public static final int CONTROL = 1; 2923 /** 2924 * @stable ICU 3.4 2925 */ 2926 public static final int CR = 2; 2927 /** 2928 * @stable ICU 3.4 2929 */ 2930 public static final int EXTEND = 3; 2931 /** 2932 * @stable ICU 3.4 2933 */ 2934 public static final int L = 4; 2935 /** 2936 * @stable ICU 3.4 2937 */ 2938 public static final int LF = 5; 2939 /** 2940 * @stable ICU 3.4 2941 */ 2942 public static final int LV = 6; 2943 /** 2944 * @stable ICU 3.4 2945 */ 2946 public static final int LVT = 7; 2947 /** 2948 * @stable ICU 3.4 2949 */ 2950 public static final int T = 8; 2951 /** 2952 * @stable ICU 3.4 2953 */ 2954 public static final int V = 9; 2955 /** 2956 * @stable ICU 4.0 2957 */ 2958 public static final int SPACING_MARK = 10; 2959 /** 2960 * @stable ICU 4.0 2961 */ 2962 public static final int PREPEND = 11; 2963 /** @stable ICU 50 */ 2964 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2965 /** 2966 * @stable ICU 3.4 2967 */ 2968 public static final int COUNT = 13; 2969 } 2970 2971 /** 2972 * Word Break constants. 2973 * @see UProperty#WORD_BREAK 2974 * @stable ICU 3.4 2975 */ 2976 public static interface WordBreak { 2977 /** 2978 * @stable ICU 3.8 2979 */ 2980 public static final int OTHER = 0; 2981 /** 2982 * @stable ICU 3.8 2983 */ 2984 public static final int ALETTER = 1; 2985 /** 2986 * @stable ICU 3.8 2987 */ 2988 public static final int FORMAT = 2; 2989 /** 2990 * @stable ICU 3.8 2991 */ 2992 public static final int KATAKANA = 3; 2993 /** 2994 * @stable ICU 3.8 2995 */ 2996 public static final int MIDLETTER = 4; 2997 /** 2998 * @stable ICU 3.8 2999 */ 3000 public static final int MIDNUM = 5; 3001 /** 3002 * @stable ICU 3.8 3003 */ 3004 public static final int NUMERIC = 6; 3005 /** 3006 * @stable ICU 3.8 3007 */ 3008 public static final int EXTENDNUMLET = 7; 3009 /** 3010 * @stable ICU 4.0 3011 */ 3012 public static final int CR = 8; 3013 /** 3014 * @stable ICU 4.0 3015 */ 3016 public static final int EXTEND = 9; 3017 /** 3018 * @stable ICU 4.0 3019 */ 3020 public static final int LF = 10; 3021 /** 3022 * @stable ICU 4.0 3023 */ 3024 public static final int MIDNUMLET = 11; 3025 /** 3026 * @stable ICU 4.0 3027 */ 3028 public static final int NEWLINE = 12; 3029 /** @stable ICU 50 */ 3030 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3031 /** @stable ICU 52 */ 3032 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 3033 /** @stable ICU 52 */ 3034 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 3035 /** @stable ICU 52 */ 3036 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 3037 /** 3038 * @stable ICU 4.0 3039 */ 3040 public static final int COUNT = 17; 3041 } 3042 3043 /** 3044 * Sentence Break constants. 3045 * @see UProperty#SENTENCE_BREAK 3046 * @stable ICU 3.4 3047 */ 3048 public static interface SentenceBreak { 3049 /** 3050 * @stable ICU 3.8 3051 */ 3052 public static final int OTHER = 0; 3053 /** 3054 * @stable ICU 3.8 3055 */ 3056 public static final int ATERM = 1; 3057 /** 3058 * @stable ICU 3.8 3059 */ 3060 public static final int CLOSE = 2; 3061 /** 3062 * @stable ICU 3.8 3063 */ 3064 public static final int FORMAT = 3; 3065 /** 3066 * @stable ICU 3.8 3067 */ 3068 public static final int LOWER = 4; 3069 /** 3070 * @stable ICU 3.8 3071 */ 3072 public static final int NUMERIC = 5; 3073 /** 3074 * @stable ICU 3.8 3075 */ 3076 public static final int OLETTER = 6; 3077 /** 3078 * @stable ICU 3.8 3079 */ 3080 public static final int SEP = 7; 3081 /** 3082 * @stable ICU 3.8 3083 */ 3084 public static final int SP = 8; 3085 /** 3086 * @stable ICU 3.8 3087 */ 3088 public static final int STERM = 9; 3089 /** 3090 * @stable ICU 3.8 3091 */ 3092 public static final int UPPER = 10; 3093 /** 3094 * @stable ICU 4.0 3095 */ 3096 public static final int CR = 11; 3097 /** 3098 * @stable ICU 4.0 3099 */ 3100 public static final int EXTEND = 12; 3101 /** 3102 * @stable ICU 4.0 3103 */ 3104 public static final int LF = 13; 3105 /** 3106 * @stable ICU 4.0 3107 */ 3108 public static final int SCONTINUE = 14; 3109 /** 3110 * @stable ICU 4.0 3111 */ 3112 public static final int COUNT = 15; 3113 } 3114 3115 /** 3116 * Line Break constants. 3117 * @see UProperty#LINE_BREAK 3118 * @stable ICU 2.4 3119 */ 3120 public static interface LineBreak 3121 { 3122 /** 3123 * @stable ICU 2.4 3124 */ 3125 public static final int UNKNOWN = 0; 3126 /** 3127 * @stable ICU 2.4 3128 */ 3129 public static final int AMBIGUOUS = 1; 3130 /** 3131 * @stable ICU 2.4 3132 */ 3133 public static final int ALPHABETIC = 2; 3134 /** 3135 * @stable ICU 2.4 3136 */ 3137 public static final int BREAK_BOTH = 3; 3138 /** 3139 * @stable ICU 2.4 3140 */ 3141 public static final int BREAK_AFTER = 4; 3142 /** 3143 * @stable ICU 2.4 3144 */ 3145 public static final int BREAK_BEFORE = 5; 3146 /** 3147 * @stable ICU 2.4 3148 */ 3149 public static final int MANDATORY_BREAK = 6; 3150 /** 3151 * @stable ICU 2.4 3152 */ 3153 public static final int CONTINGENT_BREAK = 7; 3154 /** 3155 * @stable ICU 2.4 3156 */ 3157 public static final int CLOSE_PUNCTUATION = 8; 3158 /** 3159 * @stable ICU 2.4 3160 */ 3161 public static final int COMBINING_MARK = 9; 3162 /** 3163 * @stable ICU 2.4 3164 */ 3165 public static final int CARRIAGE_RETURN = 10; 3166 /** 3167 * @stable ICU 2.4 3168 */ 3169 public static final int EXCLAMATION = 11; 3170 /** 3171 * @stable ICU 2.4 3172 */ 3173 public static final int GLUE = 12; 3174 /** 3175 * @stable ICU 2.4 3176 */ 3177 public static final int HYPHEN = 13; 3178 /** 3179 * @stable ICU 2.4 3180 */ 3181 public static final int IDEOGRAPHIC = 14; 3182 /** 3183 * @see #INSEPARABLE 3184 * @stable ICU 2.4 3185 */ 3186 public static final int INSEPERABLE = 15; 3187 /** 3188 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3189 * @stable ICU 3.0 3190 */ 3191 public static final int INSEPARABLE = 15; 3192 /** 3193 * @stable ICU 2.4 3194 */ 3195 public static final int INFIX_NUMERIC = 16; 3196 /** 3197 * @stable ICU 2.4 3198 */ 3199 public static final int LINE_FEED = 17; 3200 /** 3201 * @stable ICU 2.4 3202 */ 3203 public static final int NONSTARTER = 18; 3204 /** 3205 * @stable ICU 2.4 3206 */ 3207 public static final int NUMERIC = 19; 3208 /** 3209 * @stable ICU 2.4 3210 */ 3211 public static final int OPEN_PUNCTUATION = 20; 3212 /** 3213 * @stable ICU 2.4 3214 */ 3215 public static final int POSTFIX_NUMERIC = 21; 3216 /** 3217 * @stable ICU 2.4 3218 */ 3219 public static final int PREFIX_NUMERIC = 22; 3220 /** 3221 * @stable ICU 2.4 3222 */ 3223 public static final int QUOTATION = 23; 3224 /** 3225 * @stable ICU 2.4 3226 */ 3227 public static final int COMPLEX_CONTEXT = 24; 3228 /** 3229 * @stable ICU 2.4 3230 */ 3231 public static final int SURROGATE = 25; 3232 /** 3233 * @stable ICU 2.4 3234 */ 3235 public static final int SPACE = 26; 3236 /** 3237 * @stable ICU 2.4 3238 */ 3239 public static final int BREAK_SYMBOLS = 27; 3240 /** 3241 * @stable ICU 2.4 3242 */ 3243 public static final int ZWSPACE = 28; 3244 /** 3245 * @stable ICU 2.6 3246 */ 3247 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3248 /** 3249 * @stable ICU 2.6 3250 */ 3251 public static final int WORD_JOINER = 30; /*[WJ]*/ 3252 /** 3253 * @stable ICU 3.4 3254 */ 3255 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3256 /** 3257 * @stable ICU 3.4 3258 */ 3259 public static final int H3 = 32; 3260 /** 3261 * @stable ICU 3.4 3262 */ 3263 public static final int JL = 33; 3264 /** 3265 * @stable ICU 3.4 3266 */ 3267 public static final int JT = 34; 3268 /** 3269 * @stable ICU 3.4 3270 */ 3271 public static final int JV = 35; 3272 /** @stable ICU 4.4 */ 3273 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3274 /** @stable ICU 49 */ 3275 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3276 /** @stable ICU 49 */ 3277 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3278 /** @stable ICU 50 */ 3279 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3280 /** 3281 * @stable ICU 2.4 3282 */ 3283 public static final int COUNT = 40; 3284 } 3285 3286 /** 3287 * Numeric Type constants. 3288 * @see UProperty#NUMERIC_TYPE 3289 * @stable ICU 2.4 3290 */ 3291 public static interface NumericType 3292 { 3293 /** 3294 * @stable ICU 2.4 3295 */ 3296 public static final int NONE = 0; 3297 /** 3298 * @stable ICU 2.4 3299 */ 3300 public static final int DECIMAL = 1; 3301 /** 3302 * @stable ICU 2.4 3303 */ 3304 public static final int DIGIT = 2; 3305 /** 3306 * @stable ICU 2.4 3307 */ 3308 public static final int NUMERIC = 3; 3309 /** 3310 * @stable ICU 2.4 3311 */ 3312 public static final int COUNT = 4; 3313 } 3314 3315 /** 3316 * Hangul Syllable Type constants. 3317 * 3318 * @see UProperty#HANGUL_SYLLABLE_TYPE 3319 * @stable ICU 2.6 3320 */ 3321 public static interface HangulSyllableType 3322 { 3323 /** 3324 * @stable ICU 2.6 3325 */ 3326 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3327 /** 3328 * @stable ICU 2.6 3329 */ 3330 public static final int LEADING_JAMO = 1; /*[L]*/ 3331 /** 3332 * @stable ICU 2.6 3333 */ 3334 public static final int VOWEL_JAMO = 2; /*[V]*/ 3335 /** 3336 * @stable ICU 2.6 3337 */ 3338 public static final int TRAILING_JAMO = 3; /*[T]*/ 3339 /** 3340 * @stable ICU 2.6 3341 */ 3342 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3343 /** 3344 * @stable ICU 2.6 3345 */ 3346 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3347 /** 3348 * @stable ICU 2.6 3349 */ 3350 public static final int COUNT = 6; 3351 } 3352 3353 /** 3354 * Bidi Paired Bracket Type constants. 3355 * 3356 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3357 * @stable ICU 52 3358 */ 3359 public static interface BidiPairedBracketType { 3360 /** 3361 * Not a paired bracket. 3362 * @stable ICU 52 3363 */ 3364 public static final int NONE = 0; 3365 /** 3366 * Open paired bracket. 3367 * @stable ICU 52 3368 */ 3369 public static final int OPEN = 1; 3370 /** 3371 * Close paired bracket. 3372 * @stable ICU 52 3373 */ 3374 public static final int CLOSE = 2; 3375 /** 3376 * @stable ICU 52 3377 */ 3378 public static final int COUNT = 3; 3379 } 3380 3381 // public data members ----------------------------------------------- 3382 3383 /** 3384 * The lowest Unicode code point value. 3385 * @stable ICU 2.1 3386 */ 3387 public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE; 3388 3389 /** 3390 * The highest Unicode code point value (scalar value) according to the 3391 * Unicode Standard. 3392 * This is a 21-bit value (21 bits, rounded up).<br> 3393 * Up-to-date Unicode implementation of java.lang.Character.MAX_VALUE 3394 * @stable ICU 2.1 3395 */ 3396 public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE; 3397 3398 /** 3399 * The minimum value for Supplementary code points 3400 * @stable ICU 2.1 3401 */ 3402 public static final int SUPPLEMENTARY_MIN_VALUE = 3403 UTF16.SUPPLEMENTARY_MIN_VALUE; 3404 3405 /** 3406 * Unicode value used when translating into Unicode encoding form and there 3407 * is no existing character. 3408 * @stable ICU 2.1 3409 */ 3410 public static final int REPLACEMENT_CHAR = '\uFFFD'; 3411 3412 /** 3413 * Special value that is returned by getUnicodeNumericValue(int) when no 3414 * numeric value is defined for a code point. 3415 * @stable ICU 2.4 3416 * @see #getUnicodeNumericValue 3417 */ 3418 public static final double NO_NUMERIC_VALUE = -123456789; 3419 3420 /** 3421 * Compatibility constant for Java Character's MIN_RADIX. 3422 * @stable ICU 3.4 3423 */ 3424 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 3425 3426 /** 3427 * Compatibility constant for Java Character's MAX_RADIX. 3428 * @stable ICU 3.4 3429 */ 3430 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 3431 3432 /** 3433 * Do not lowercase non-initial parts of words when titlecasing. 3434 * Option bit for titlecasing APIs that take an options bit set. 3435 * 3436 * By default, titlecasing will titlecase the first cased character 3437 * of a word and lowercase all other characters. 3438 * With this option, the other characters will not be modified. 3439 * 3440 * @see #toTitleCase 3441 * @stable ICU 3.8 3442 */ 3443 public static final int TITLECASE_NO_LOWERCASE = 0x100; 3444 3445 /** 3446 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 3447 * titlecase exactly the characters at breaks from the iterator. 3448 * Option bit for titlecasing APIs that take an options bit set. 3449 * 3450 * By default, titlecasing will take each break iterator index, 3451 * adjust it by looking for the next cased character, and titlecase that one. 3452 * Other characters are lowercased. 3453 * 3454 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 3455 * 3456 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 3457 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 3458 * cased character F. If F exists, map F to default_title(F); then map each 3459 * subsequent character C to default_lower(C). 3460 * 3461 * @see #toTitleCase 3462 * @see #TITLECASE_NO_LOWERCASE 3463 * @stable ICU 3.8 3464 */ 3465 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 3466 3467 // public methods ---------------------------------------------------- 3468 3469 /** 3470 * Returnss the numeric value of a decimal digit code point. 3471 * <br>This method observes the semantics of 3472 * <code>java.lang.Character.digit()</code>. Note that this 3473 * will return positive values for code points for which isDigit 3474 * returns false, just like java.lang.Character. 3475 * <br><em>Semantic Change:</em> In release 1.3.1 and 3476 * prior, this did not treat the European letters as having a 3477 * digit value, and also treated numeric letters and other numbers as 3478 * digits. 3479 * This has been changed to conform to the java semantics. 3480 * <br>A code point is a valid digit if and only if: 3481 * <ul> 3482 * <li>ch is a decimal digit or one of the european letters, and 3483 * <li>the value of ch is less than the specified radix. 3484 * </ul> 3485 * @param ch the code point to query 3486 * @param radix the radix 3487 * @return the numeric value represented by the code point in the 3488 * specified radix, or -1 if the code point is not a decimal digit 3489 * or if its value is too large for the radix 3490 * @stable ICU 2.1 3491 */ digit(int ch, int radix)3492 public static int digit(int ch, int radix) 3493 { 3494 if (2 <= radix && radix <= 36) { 3495 int value = digit(ch); 3496 if (value < 0) { 3497 // ch is not a decimal digit, try latin letters 3498 value = UCharacterProperty.getEuropeanDigit(ch); 3499 } 3500 return (value < radix) ? value : -1; 3501 } else { 3502 return -1; // invalid radix 3503 } 3504 } 3505 3506 /** 3507 * Returnss the numeric value of a decimal digit code point. 3508 * <br>This is a convenience overload of <code>digit(int, int)</code> 3509 * that provides a decimal radix. 3510 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 3511 * treated numeric letters and other numbers as digits. This has 3512 * been changed to conform to the java semantics. 3513 * @param ch the code point to query 3514 * @return the numeric value represented by the code point, 3515 * or -1 if the code point is not a decimal digit or if its 3516 * value is too large for a decimal radix 3517 * @stable ICU 2.1 3518 */ digit(int ch)3519 public static int digit(int ch) 3520 { 3521 return UCharacterProperty.INSTANCE.digit(ch); 3522 } 3523 3524 /** 3525 * Returns the numeric value of the code point as a nonnegative 3526 * integer. 3527 * <br>If the code point does not have a numeric value, then -1 is returned. 3528 * <br> 3529 * If the code point has a numeric value that cannot be represented as a 3530 * nonnegative integer (for example, a fractional value), then -2 is 3531 * returned. 3532 * @param ch the code point to query 3533 * @return the numeric value of the code point, or -1 if it has no numeric 3534 * value, or -2 if it has a numeric value that cannot be represented as a 3535 * nonnegative integer 3536 * @stable ICU 2.1 3537 */ getNumericValue(int ch)3538 public static int getNumericValue(int ch) 3539 { 3540 return UCharacterProperty.INSTANCE.getNumericValue(ch); 3541 } 3542 3543 /** 3544 * {@icu} Returns the numeric value for a Unicode code point as defined in the 3545 * Unicode Character Database.</p> 3546 * <p>A "double" return type is necessary because some numeric values are 3547 * fractions, negative, or too large for int.</p> 3548 * <p>For characters without any numeric values in the Unicode Character 3549 * Database, this function will return NO_NUMERIC_VALUE. 3550 * Note: This is different from the Unicode Standard which specifies NaN as the default value.</p> 3551 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 3552 * return type int and returns -1 when the argument ch does not have a 3553 * corresponding numeric value. This has been changed to synch with ICU4C 3554 * </p> 3555 * This corresponds to the ICU4C function u_getNumericValue. 3556 * @param ch Code point to get the numeric value for. 3557 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 3558 * @stable ICU 2.4 3559 */ getUnicodeNumericValue(int ch)3560 public static double getUnicodeNumericValue(int ch) 3561 { 3562 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 3563 } 3564 3565 /** 3566 * Compatibility override of Java deprecated method. This 3567 * method will always remain deprecated. 3568 * Same as java.lang.Character.isSpace(). 3569 * @param ch the code point 3570 * @return true if the code point is a space character as 3571 * defined by java.lang.Character.isSpace. 3572 * @deprecated ICU 3.4 (Java) 3573 */ 3574 @Deprecated isSpace(int ch)3575 public static boolean isSpace(int ch) { 3576 return ch <= 0x20 && 3577 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 3578 } 3579 3580 /** 3581 * Returns a value indicating a code point's Unicode category. 3582 * Up-to-date Unicode implementation of java.lang.Character.getType() 3583 * except for the above mentioned code points that had their category 3584 * changed.<br> 3585 * Return results are constants from the interface 3586 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 3587 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 3588 * those returned by java.lang.Character.getType. UCharacterCategory values 3589 * match the ones used in ICU4C, while java.lang.Character type 3590 * values, though similar, skip the value 17.</p> 3591 * @param ch code point whose type is to be determined 3592 * @return category which is a value of UCharacterCategory 3593 * @stable ICU 2.1 3594 */ getType(int ch)3595 public static int getType(int ch) 3596 { 3597 return UCharacterProperty.INSTANCE.getType(ch); 3598 } 3599 3600 /** 3601 * Determines if a code point has a defined meaning in the up-to-date 3602 * Unicode standard. 3603 * E.g. supplementary code points though allocated space are not defined in 3604 * Unicode yet.<br> 3605 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 3606 * @param ch code point to be determined if it is defined in the most 3607 * current version of Unicode 3608 * @return true if this code point is defined in unicode 3609 * @stable ICU 2.1 3610 */ isDefined(int ch)3611 public static boolean isDefined(int ch) 3612 { 3613 return getType(ch) != 0; 3614 } 3615 3616 /** 3617 * Determines if a code point is a Java digit. 3618 * <br>This method observes the semantics of 3619 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 3620 * digits only. 3621 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 3622 * numeric letters and other numbers as digits. 3623 * This has been changed to conform to the java semantics. 3624 * @param ch code point to query 3625 * @return true if this code point is a digit 3626 * @stable ICU 2.1 3627 */ isDigit(int ch)3628 public static boolean isDigit(int ch) 3629 { 3630 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 3631 } 3632 3633 /** 3634 * Determines if the specified code point is an ISO control character. 3635 * A code point is considered to be an ISO control character if it is in 3636 * the range \u0000 through \u001F or in the range \u007F through 3637 * \u009F.<br> 3638 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 3639 * @param ch code point to determine if it is an ISO control character 3640 * @return true if code point is a ISO control character 3641 * @stable ICU 2.1 3642 */ isISOControl(int ch)3643 public static boolean isISOControl(int ch) 3644 { 3645 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 3646 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 3647 } 3648 3649 /** 3650 * Determines if the specified code point is a letter. 3651 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 3652 * @param ch code point to determine if it is a letter 3653 * @return true if code point is a letter 3654 * @stable ICU 2.1 3655 */ isLetter(int ch)3656 public static boolean isLetter(int ch) 3657 { 3658 // if props == 0, it will just fall through and return false 3659 return ((1 << getType(ch)) 3660 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3661 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3662 | (1 << UCharacterCategory.TITLECASE_LETTER) 3663 | (1 << UCharacterCategory.MODIFIER_LETTER) 3664 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 3665 } 3666 3667 /** 3668 * Determines if the specified code point is a letter or digit. 3669 * {@icunote} This method, unlike java.lang.Character does not regard the ascii 3670 * characters 'A' - 'Z' and 'a' - 'z' as digits. 3671 * @param ch code point to determine if it is a letter or a digit 3672 * @return true if code point is a letter or a digit 3673 * @stable ICU 2.1 3674 */ isLetterOrDigit(int ch)3675 public static boolean isLetterOrDigit(int ch) 3676 { 3677 return ((1 << getType(ch)) 3678 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3679 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3680 | (1 << UCharacterCategory.TITLECASE_LETTER) 3681 | (1 << UCharacterCategory.MODIFIER_LETTER) 3682 | (1 << UCharacterCategory.OTHER_LETTER) 3683 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 3684 } 3685 3686 /** 3687 * Compatibility override of Java deprecated method. This 3688 * method will always remain deprecated. Delegates to 3689 * java.lang.Character.isJavaIdentifierStart. 3690 * @param cp the code point 3691 * @return true if the code point can start a java identifier. 3692 * @deprecated ICU 3.4 (Java) 3693 */ 3694 @Deprecated isJavaLetter(int cp)3695 public static boolean isJavaLetter(int cp) { 3696 return isJavaIdentifierStart(cp); 3697 } 3698 3699 /** 3700 * Compatibility override of Java deprecated method. This 3701 * method will always remain deprecated. Delegates to 3702 * java.lang.Character.isJavaIdentifierPart. 3703 * @param cp the code point 3704 * @return true if the code point can continue a java identifier. 3705 * @deprecated ICU 3.4 (Java) 3706 */ 3707 @Deprecated isJavaLetterOrDigit(int cp)3708 public static boolean isJavaLetterOrDigit(int cp) { 3709 return isJavaIdentifierPart(cp); 3710 } 3711 3712 /** 3713 * Compatibility override of Java method, delegates to 3714 * java.lang.Character.isJavaIdentifierStart. 3715 * @param cp the code point 3716 * @return true if the code point can start a java identifier. 3717 * @stable ICU 3.4 3718 */ isJavaIdentifierStart(int cp)3719 public static boolean isJavaIdentifierStart(int cp) { 3720 // note, downcast to char for jdk 1.4 compatibility 3721 return java.lang.Character.isJavaIdentifierStart((char)cp); 3722 } 3723 3724 /** 3725 * Compatibility override of Java method, delegates to 3726 * java.lang.Character.isJavaIdentifierPart. 3727 * @param cp the code point 3728 * @return true if the code point can continue a java identifier. 3729 * @stable ICU 3.4 3730 */ isJavaIdentifierPart(int cp)3731 public static boolean isJavaIdentifierPart(int cp) { 3732 // note, downcast to char for jdk 1.4 compatibility 3733 return java.lang.Character.isJavaIdentifierPart((char)cp); 3734 } 3735 3736 /** 3737 * Determines if the specified code point is a lowercase character. 3738 * UnicodeData only contains case mappings for code points where they are 3739 * one-to-one mappings; it also omits information about context-sensitive 3740 * case mappings.<br> For more information about Unicode case mapping 3741 * please refer to the 3742 * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 3743 * #21</a>.<br> 3744 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 3745 * @param ch code point to determine if it is in lowercase 3746 * @return true if code point is a lowercase character 3747 * @stable ICU 2.1 3748 */ isLowerCase(int ch)3749 public static boolean isLowerCase(int ch) 3750 { 3751 // if props == 0, it will just fall through and return false 3752 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 3753 } 3754 3755 /** 3756 * Determines if the specified code point is a white space character. 3757 * A code point is considered to be an whitespace character if and only 3758 * if it satisfies one of the following criteria: 3759 * <ul> 3760 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 3761 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 3762 * <li> It is \u0009, HORIZONTAL TABULATION. 3763 * <li> It is \u000A, LINE FEED. 3764 * <li> It is \u000B, VERTICAL TABULATION. 3765 * <li> It is \u000C, FORM FEED. 3766 * <li> It is \u000D, CARRIAGE RETURN. 3767 * <li> It is \u001C, FILE SEPARATOR. 3768 * <li> It is \u001D, GROUP SEPARATOR. 3769 * <li> It is \u001E, RECORD SEPARATOR. 3770 * <li> It is \u001F, UNIT SEPARATOR. 3771 * </ul> 3772 * 3773 * This API tries to sync with the semantics of Java's 3774 * java.lang.Character.isWhitespace(), but it may not return 3775 * the exact same results because of the Unicode version 3776 * difference. 3777 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 3778 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 3779 * See http://www.unicode.org/versions/Unicode4.0.1/ 3780 * @param ch code point to determine if it is a white space 3781 * @return true if the specified code point is a white space character 3782 * @stable ICU 2.1 3783 */ isWhitespace(int ch)3784 public static boolean isWhitespace(int ch) 3785 { 3786 // exclude no-break spaces 3787 // if props == 0, it will just fall through and return false 3788 return ((1 << getType(ch)) & 3789 ((1 << UCharacterCategory.SPACE_SEPARATOR) 3790 | (1 << UCharacterCategory.LINE_SEPARATOR) 3791 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 3792 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 3793 // TAB VT LF FF CR FS GS RS US NL are all control characters 3794 // that are white spaces. 3795 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 3796 } 3797 3798 /** 3799 * Determines if the specified code point is a Unicode specified space 3800 * character, i.e. if code point is in the category Zs, Zl and Zp. 3801 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 3802 * @param ch code point to determine if it is a space 3803 * @return true if the specified code point is a space character 3804 * @stable ICU 2.1 3805 */ isSpaceChar(int ch)3806 public static boolean isSpaceChar(int ch) 3807 { 3808 // if props == 0, it will just fall through and return false 3809 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 3810 | (1 << UCharacterCategory.LINE_SEPARATOR) 3811 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 3812 != 0; 3813 } 3814 3815 /** 3816 * Determines if the specified code point is a titlecase character. 3817 * UnicodeData only contains case mappings for code points where they are 3818 * one-to-one mappings; it also omits information about context-sensitive 3819 * case mappings.<br> 3820 * For more information about Unicode case mapping please refer to the 3821 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3822 * Technical report #21</a>.<br> 3823 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 3824 * @param ch code point to determine if it is in title case 3825 * @return true if the specified code point is a titlecase character 3826 * @stable ICU 2.1 3827 */ isTitleCase(int ch)3828 public static boolean isTitleCase(int ch) 3829 { 3830 // if props == 0, it will just fall through and return false 3831 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 3832 } 3833 3834 /** 3835 * Determines if the specified code point may be any part of a Unicode 3836 * identifier other than the starting character. 3837 * A code point may be part of a Unicode identifier if and only if it is 3838 * one of the following: 3839 * <ul> 3840 * <li> Lu Uppercase letter 3841 * <li> Ll Lowercase letter 3842 * <li> Lt Titlecase letter 3843 * <li> Lm Modifier letter 3844 * <li> Lo Other letter 3845 * <li> Nl Letter number 3846 * <li> Pc Connecting punctuation character 3847 * <li> Nd decimal number 3848 * <li> Mc Spacing combining mark 3849 * <li> Mn Non-spacing mark 3850 * <li> Cf formatting code 3851 * </ul> 3852 * Up-to-date Unicode implementation of 3853 * java.lang.Character.isUnicodeIdentifierPart().<br> 3854 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3855 * @param ch code point to determine if is can be part of a Unicode 3856 * identifier 3857 * @return true if code point is any character belonging a unicode 3858 * identifier suffix after the first character 3859 * @stable ICU 2.1 3860 */ isUnicodeIdentifierPart(int ch)3861 public static boolean isUnicodeIdentifierPart(int ch) 3862 { 3863 // if props == 0, it will just fall through and return false 3864 // cat == format 3865 return ((1 << getType(ch)) 3866 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3867 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3868 | (1 << UCharacterCategory.TITLECASE_LETTER) 3869 | (1 << UCharacterCategory.MODIFIER_LETTER) 3870 | (1 << UCharacterCategory.OTHER_LETTER) 3871 | (1 << UCharacterCategory.LETTER_NUMBER) 3872 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 3873 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 3874 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 3875 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 3876 || isIdentifierIgnorable(ch); 3877 } 3878 3879 /** 3880 * Determines if the specified code point is permissible as the first 3881 * character in a Unicode identifier. 3882 * A code point may start a Unicode identifier if it is of type either 3883 * <ul> 3884 * <li> Lu Uppercase letter 3885 * <li> Ll Lowercase letter 3886 * <li> Lt Titlecase letter 3887 * <li> Lm Modifier letter 3888 * <li> Lo Other letter 3889 * <li> Nl Letter number 3890 * </ul> 3891 * Up-to-date Unicode implementation of 3892 * java.lang.Character.isUnicodeIdentifierStart().<br> 3893 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3894 * @param ch code point to determine if it can start a Unicode identifier 3895 * @return true if code point is the first character belonging a unicode 3896 * identifier 3897 * @stable ICU 2.1 3898 */ isUnicodeIdentifierStart(int ch)3899 public static boolean isUnicodeIdentifierStart(int ch) 3900 { 3901 /*int cat = getType(ch);*/ 3902 // if props == 0, it will just fall through and return false 3903 return ((1 << getType(ch)) 3904 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3905 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3906 | (1 << UCharacterCategory.TITLECASE_LETTER) 3907 | (1 << UCharacterCategory.MODIFIER_LETTER) 3908 | (1 << UCharacterCategory.OTHER_LETTER) 3909 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 3910 } 3911 3912 /** 3913 * Determines if the specified code point should be regarded as an 3914 * ignorable character in a Java identifier. 3915 * A character is Java-identifier-ignorable if it has the general category 3916 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 3917 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 3918 * Up-to-date Unicode implementation of 3919 * java.lang.Character.isIdentifierIgnorable().<br> 3920 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3921 * <p>Note that Unicode just recommends to ignore Cf (format controls). 3922 * @param ch code point to be determined if it can be ignored in a Unicode 3923 * identifier. 3924 * @return true if the code point is ignorable 3925 * @stable ICU 2.1 3926 */ isIdentifierIgnorable(int ch)3927 public static boolean isIdentifierIgnorable(int ch) 3928 { 3929 // see java.lang.Character.isIdentifierIgnorable() on range of 3930 // ignorable characters. 3931 if (ch <= 0x9f) { 3932 return isISOControl(ch) 3933 && !((ch >= 0x9 && ch <= 0xd) 3934 || (ch >= 0x1c && ch <= 0x1f)); 3935 } 3936 return getType(ch) == UCharacterCategory.FORMAT; 3937 } 3938 3939 /** 3940 * Determines if the specified code point is an uppercase character. 3941 * UnicodeData only contains case mappings for code point where they are 3942 * one-to-one mappings; it also omits information about context-sensitive 3943 * case mappings.<br> 3944 * For language specific case conversion behavior, use 3945 * toUpperCase(locale, str). <br> 3946 * For example, the case conversion for dot-less i and dotted I in Turkish, 3947 * or for final sigma in Greek. 3948 * For more information about Unicode case mapping please refer to the 3949 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3950 * Technical report #21</a>.<br> 3951 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 3952 * @param ch code point to determine if it is in uppercase 3953 * @return true if the code point is an uppercase character 3954 * @stable ICU 2.1 3955 */ isUpperCase(int ch)3956 public static boolean isUpperCase(int ch) 3957 { 3958 // if props == 0, it will just fall through and return false 3959 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 3960 } 3961 3962 /** 3963 * The given code point is mapped to its lowercase equivalent; if the code 3964 * point has no lowercase equivalent, the code point itself is returned. 3965 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 3966 * 3967 * <p>This function only returns the simple, single-code point case mapping. 3968 * Full case mappings should be used whenever possible because they produce 3969 * better results by working on whole strings. 3970 * They take into account the string context and the language and can map 3971 * to a result string with a different length as appropriate. 3972 * Full case mappings are applied by the case mapping functions 3973 * that take String parameters rather than code points (int). 3974 * See also the User Guide chapter on C/POSIX migration: 3975 * http://www.icu-project.org/userguide/posix.html#case_mappings 3976 * 3977 * @param ch code point whose lowercase equivalent is to be retrieved 3978 * @return the lowercase equivalent code point 3979 * @stable ICU 2.1 3980 */ toLowerCase(int ch)3981 public static int toLowerCase(int ch) { 3982 return UCaseProps.INSTANCE.tolower(ch); 3983 } 3984 3985 /** 3986 * Converts argument code point and returns a String object representing 3987 * the code point's value in UTF16 format. 3988 * The result is a string whose length is 1 for non-supplementary code 3989 * points, 2 otherwise.<br> 3990 * com.ibm.ibm.icu.UTF16 can be used to parse Strings generated by this 3991 * function.<br> 3992 * Up-to-date Unicode implementation of java.lang.Character.toString() 3993 * @param ch code point 3994 * @return string representation of the code point, null if code point is not 3995 * defined in unicode 3996 * @stable ICU 2.1 3997 */ toString(int ch)3998 public static String toString(int ch) 3999 { 4000 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4001 return null; 4002 } 4003 4004 if (ch < SUPPLEMENTARY_MIN_VALUE) { 4005 return String.valueOf((char)ch); 4006 } 4007 4008 StringBuilder result = new StringBuilder(); 4009 result.append(UTF16.getLeadSurrogate(ch)); 4010 result.append(UTF16.getTrailSurrogate(ch)); 4011 return result.toString(); 4012 } 4013 4014 /** 4015 * Converts the code point argument to titlecase. 4016 * If no titlecase is available, the uppercase is returned. If no uppercase 4017 * is available, the code point itself is returned. 4018 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4019 * 4020 * <p>This function only returns the simple, single-code point case mapping. 4021 * Full case mappings should be used whenever possible because they produce 4022 * better results by working on whole strings. 4023 * They take into account the string context and the language and can map 4024 * to a result string with a different length as appropriate. 4025 * Full case mappings are applied by the case mapping functions 4026 * that take String parameters rather than code points (int). 4027 * See also the User Guide chapter on C/POSIX migration: 4028 * http://www.icu-project.org/userguide/posix.html#case_mappings 4029 * 4030 * @param ch code point whose title case is to be retrieved 4031 * @return titlecase code point 4032 * @stable ICU 2.1 4033 */ toTitleCase(int ch)4034 public static int toTitleCase(int ch) { 4035 return UCaseProps.INSTANCE.totitle(ch); 4036 } 4037 4038 /** 4039 * Converts the character argument to uppercase. 4040 * If no uppercase is available, the character itself is returned. 4041 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4042 * 4043 * <p>This function only returns the simple, single-code point case mapping. 4044 * Full case mappings should be used whenever possible because they produce 4045 * better results by working on whole strings. 4046 * They take into account the string context and the language and can map 4047 * to a result string with a different length as appropriate. 4048 * Full case mappings are applied by the case mapping functions 4049 * that take String parameters rather than code points (int). 4050 * See also the User Guide chapter on C/POSIX migration: 4051 * http://www.icu-project.org/userguide/posix.html#case_mappings 4052 * 4053 * @param ch code point whose uppercase is to be retrieved 4054 * @return uppercase code point 4055 * @stable ICU 2.1 4056 */ toUpperCase(int ch)4057 public static int toUpperCase(int ch) { 4058 return UCaseProps.INSTANCE.toupper(ch); 4059 } 4060 4061 // extra methods not in java.lang.Character -------------------------- 4062 4063 /** 4064 * {@icu} Determines if the code point is a supplementary character. 4065 * A code point is a supplementary character if and only if it is greater 4066 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4067 * @param ch code point to be determined if it is in the supplementary 4068 * plane 4069 * @return true if code point is a supplementary character 4070 * @stable ICU 2.1 4071 */ isSupplementary(int ch)4072 public static boolean isSupplementary(int ch) 4073 { 4074 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4075 ch <= UCharacter.MAX_VALUE; 4076 } 4077 4078 /** 4079 * {@icu} Determines if the code point is in the BMP plane. 4080 * @param ch code point to be determined if it is not a supplementary 4081 * character 4082 * @return true if code point is not a supplementary character 4083 * @stable ICU 2.1 4084 */ isBMP(int ch)4085 public static boolean isBMP(int ch) 4086 { 4087 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4088 } 4089 4090 /** 4091 * {@icu} Determines whether the specified code point is a printable character 4092 * according to the Unicode standard. 4093 * @param ch code point to be determined if it is printable 4094 * @return true if the code point is a printable character 4095 * @stable ICU 2.1 4096 */ isPrintable(int ch)4097 public static boolean isPrintable(int ch) 4098 { 4099 int cat = getType(ch); 4100 // if props == 0, it will just fall through and return false 4101 return (cat != UCharacterCategory.UNASSIGNED && 4102 cat != UCharacterCategory.CONTROL && 4103 cat != UCharacterCategory.FORMAT && 4104 cat != UCharacterCategory.PRIVATE_USE && 4105 cat != UCharacterCategory.SURROGATE && 4106 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4107 } 4108 4109 /** 4110 * {@icu} Determines whether the specified code point is of base form. 4111 * A code point of base form does not graphically combine with preceding 4112 * characters, and is neither a control nor a format character. 4113 * @param ch code point to be determined if it is of base form 4114 * @return true if the code point is of base form 4115 * @stable ICU 2.1 4116 */ isBaseForm(int ch)4117 public static boolean isBaseForm(int ch) 4118 { 4119 int cat = getType(ch); 4120 // if props == 0, it will just fall through and return false 4121 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4122 cat == UCharacterCategory.OTHER_NUMBER || 4123 cat == UCharacterCategory.LETTER_NUMBER || 4124 cat == UCharacterCategory.UPPERCASE_LETTER || 4125 cat == UCharacterCategory.LOWERCASE_LETTER || 4126 cat == UCharacterCategory.TITLECASE_LETTER || 4127 cat == UCharacterCategory.MODIFIER_LETTER || 4128 cat == UCharacterCategory.OTHER_LETTER || 4129 cat == UCharacterCategory.NON_SPACING_MARK || 4130 cat == UCharacterCategory.ENCLOSING_MARK || 4131 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4132 } 4133 4134 /** 4135 * {@icu} Returns the Bidirection property of a code point. 4136 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4137 * property.<br> 4138 * Result returned belongs to the interface 4139 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4140 * @param ch the code point to be determined its direction 4141 * @return direction constant from UCharacterDirection. 4142 * @stable ICU 2.1 4143 */ getDirection(int ch)4144 public static int getDirection(int ch) 4145 { 4146 return UBiDiProps.INSTANCE.getClass(ch); 4147 } 4148 4149 /** 4150 * Determines whether the code point has the "mirrored" property. 4151 * This property is set for characters that are commonly used in 4152 * Right-To-Left contexts and need to be displayed with a "mirrored" 4153 * glyph. 4154 * @param ch code point whose mirror is to be determined 4155 * @return true if the code point has the "mirrored" property 4156 * @stable ICU 2.1 4157 */ isMirrored(int ch)4158 public static boolean isMirrored(int ch) 4159 { 4160 return UBiDiProps.INSTANCE.isMirrored(ch); 4161 } 4162 4163 /** 4164 * {@icu} Maps the specified code point to a "mirror-image" code point. 4165 * For code points with the "mirrored" property, implementations sometimes 4166 * need a "poor man's" mapping to another code point such that the default 4167 * glyph may serve as the mirror-image of the default glyph of the 4168 * specified code point.<br> 4169 * This is useful for text conversion to and from codepages with visual 4170 * order, and for displays without glyph selection capabilities. 4171 * @param ch code point whose mirror is to be retrieved 4172 * @return another code point that may serve as a mirror-image substitute, 4173 * or ch itself if there is no such mapping or ch does not have the 4174 * "mirrored" property 4175 * @stable ICU 2.1 4176 */ getMirror(int ch)4177 public static int getMirror(int ch) 4178 { 4179 return UBiDiProps.INSTANCE.getMirror(ch); 4180 } 4181 4182 /** 4183 * {@icu} Maps the specified character to its paired bracket character. 4184 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4185 * Otherwise c itself is returned. 4186 * See http://www.unicode.org/reports/tr9/ 4187 * 4188 * @param c the code point to be mapped 4189 * @return the paired bracket code point, 4190 * or c itself if there is no such mapping 4191 * (Bidi_Paired_Bracket_Type=None) 4192 * 4193 * @see UProperty#BIDI_PAIRED_BRACKET 4194 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4195 * @see #getMirror(int) 4196 * @stable ICU 52 4197 */ getBidiPairedBracket(int c)4198 public static int getBidiPairedBracket(int c) { 4199 return UBiDiProps.INSTANCE.getPairedBracket(c); 4200 } 4201 4202 /** 4203 * {@icu} Returns the combining class of the argument codepoint 4204 * @param ch code point whose combining is to be retrieved 4205 * @return the combining class of the codepoint 4206 * @stable ICU 2.1 4207 */ getCombiningClass(int ch)4208 public static int getCombiningClass(int ch) 4209 { 4210 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4211 } 4212 4213 /** 4214 * {@icu} A code point is illegal if and only if 4215 * <ul> 4216 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4217 * <li> A surrogate value, 0xD800 to 0xDFFF 4218 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4219 * </ul> 4220 * Note: legal does not mean that it is assigned in this version of Unicode. 4221 * @param ch code point to determine if it is a legal code point by itself 4222 * @return true if and only if legal. 4223 * @stable ICU 2.1 4224 */ isLegal(int ch)4225 public static boolean isLegal(int ch) 4226 { 4227 if (ch < MIN_VALUE) { 4228 return false; 4229 } 4230 if (ch < UTF16.SURROGATE_MIN_VALUE) { 4231 return true; 4232 } 4233 if (ch <= UTF16.SURROGATE_MAX_VALUE) { 4234 return false; 4235 } 4236 if (UCharacterUtility.isNonCharacter(ch)) { 4237 return false; 4238 } 4239 return (ch <= MAX_VALUE); 4240 } 4241 4242 /** 4243 * {@icu} A string is legal iff all its code points are legal. 4244 * A code point is illegal if and only if 4245 * <ul> 4246 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4247 * <li> A surrogate value, 0xD800 to 0xDFFF 4248 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4249 * </ul> 4250 * Note: legal does not mean that it is assigned in this version of Unicode. 4251 * @param str containing code points to examin 4252 * @return true if and only if legal. 4253 * @stable ICU 2.1 4254 */ isLegal(String str)4255 public static boolean isLegal(String str) 4256 { 4257 int size = str.length(); 4258 int codepoint; 4259 for (int i = 0; i < size; i ++) 4260 { 4261 codepoint = UTF16.charAt(str, i); 4262 if (!isLegal(codepoint)) { 4263 return false; 4264 } 4265 if (isSupplementary(codepoint)) { 4266 i ++; 4267 } 4268 } 4269 return true; 4270 } 4271 4272 /** 4273 * {@icu} Returns the version of Unicode data used. 4274 * @return the unicode version number used 4275 * @stable ICU 2.1 4276 */ getUnicodeVersion()4277 public static VersionInfo getUnicodeVersion() 4278 { 4279 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 4280 } 4281 4282 /** 4283 * {@icu} Returns the most current Unicode name of the argument code point, or 4284 * null if the character is unassigned or outside the range 4285 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4286 * <br> 4287 * Note calling any methods related to code point names, e.g. get*Name*() 4288 * incurs a one-time initialisation cost to construct the name tables. 4289 * @param ch the code point for which to get the name 4290 * @return most current Unicode name 4291 * @stable ICU 2.1 4292 */ getName(int ch)4293 public static String getName(int ch) 4294 { 4295 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 4296 } 4297 4298 /** 4299 * {@icu} Returns the names for each of the characters in a string 4300 * @param s string to format 4301 * @param separator string to go between names 4302 * @return string of names 4303 * @stable ICU 3.8 4304 */ getName(String s, String separator)4305 public static String getName(String s, String separator) { 4306 if (s.length() == 1) { // handle common case 4307 return getName(s.charAt(0)); 4308 } 4309 int cp; 4310 StringBuilder sb = new StringBuilder(); 4311 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 4312 cp = UTF16.charAt(s,i); 4313 if (i != 0) sb.append(separator); 4314 sb.append(UCharacter.getName(cp)); 4315 } 4316 return sb.toString(); 4317 } 4318 4319 /** 4320 * {@icu} Returns null. 4321 * Used to return the Unicode_1_Name property value which was of little practical value. 4322 * @param ch the code point for which to get the name 4323 * @return null 4324 * @deprecated ICU 49 4325 */ 4326 @Deprecated getName1_0(int ch)4327 public static String getName1_0(int ch) 4328 { 4329 return null; 4330 } 4331 4332 /** 4333 * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and 4334 * getName1_0(int), this method will return a name even for codepoints that 4335 * are not assigned a name in UnicodeData.txt. 4336 * </p> 4337 * The names are returned in the following order. 4338 * <ul> 4339 * <li> Most current Unicode name if there is any 4340 * <li> Unicode 1.0 name if there is any 4341 * <li> Extended name in the form of 4342 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 4343 * </ul> 4344 * Note calling any methods related to code point names, e.g. get*Name*() 4345 * incurs a one-time initialisation cost to construct the name tables. 4346 * @param ch the code point for which to get the name 4347 * @return a name for the argument codepoint 4348 * @stable ICU 2.6 4349 */ getExtendedName(int ch)4350 public static String getExtendedName(int ch) { 4351 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 4352 } 4353 4354 /** 4355 * {@icu} Returns the corrected name from NameAliases.txt if there is one. 4356 * Returns null if the character is unassigned or outside the range 4357 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4358 * <br> 4359 * Note calling any methods related to code point names, e.g. get*Name*() 4360 * incurs a one-time initialisation cost to construct the name tables. 4361 * @param ch the code point for which to get the name alias 4362 * @return Unicode name alias, or null 4363 * @stable ICU 4.4 4364 */ getNameAlias(int ch)4365 public static String getNameAlias(int ch) 4366 { 4367 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 4368 } 4369 4370 /** 4371 * {@icu} Returns null. 4372 * Used to return the ISO 10646 comment for a character. 4373 * The Unicode ISO_Comment property is deprecated and has no values. 4374 * 4375 * @param ch The code point for which to get the ISO comment. 4376 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 4377 * @return null 4378 * @deprecated ICU 49 4379 */ 4380 @Deprecated getISOComment(int ch)4381 public static String getISOComment(int ch) 4382 { 4383 return null; 4384 } 4385 4386 /** 4387 * {@icu} <p>Finds a Unicode code point by its most current Unicode name and 4388 * return its code point value. All Unicode names are in uppercase.</p> 4389 * Note calling any methods related to code point names, e.g. get*Name*() 4390 * incurs a one-time initialisation cost to construct the name tables. 4391 * @param name most current Unicode character name whose code point is to 4392 * be returned 4393 * @return code point or -1 if name is not found 4394 * @stable ICU 2.1 4395 */ getCharFromName(String name)4396 public static int getCharFromName(String name){ 4397 return UCharacterName.INSTANCE.getCharFromName( 4398 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 4399 } 4400 4401 /** 4402 * {@icu} Returns -1. 4403 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 4404 * its code point value.</p> 4405 * @param name Unicode 1.0 code point name whose code point is to be 4406 * returned 4407 * @return -1 4408 * @deprecated ICU 49 4409 * @see #getName1_0(int) 4410 */ 4411 @Deprecated getCharFromName1_0(String name)4412 public static int getCharFromName1_0(String name){ 4413 return -1; 4414 } 4415 4416 /** 4417 * {@icu} <p>Find a Unicode character by either its name and return its code 4418 * point value. All Unicode names are in uppercase. 4419 * Extended names are all lowercase except for numbers and are contained 4420 * within angle brackets.</p> 4421 * The names are searched in the following order 4422 * <ul> 4423 * <li> Most current Unicode name if there is any 4424 * <li> Unicode 1.0 name if there is any 4425 * <li> Extended name in the form of 4426 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 4427 * </ul> 4428 * Note calling any methods related to code point names, e.g. get*Name*() 4429 * incurs a one-time initialisation cost to construct the name tables. 4430 * @param name codepoint name 4431 * @return code point associated with the name or -1 if the name is not 4432 * found. 4433 * @stable ICU 2.6 4434 */ getCharFromExtendedName(String name)4435 public static int getCharFromExtendedName(String name){ 4436 return UCharacterName.INSTANCE.getCharFromName( 4437 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 4438 } 4439 4440 /** 4441 * {@icu} <p>Find a Unicode character by its corrected name alias and return 4442 * its code point value. All Unicode names are in uppercase.</p> 4443 * Note calling any methods related to code point names, e.g. get*Name*() 4444 * incurs a one-time initialisation cost to construct the name tables. 4445 * @param name Unicode name alias whose code point is to be returned 4446 * @return code point or -1 if name is not found 4447 * @stable ICU 4.4 4448 */ getCharFromNameAlias(String name)4449 public static int getCharFromNameAlias(String name){ 4450 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 4451 } 4452 4453 /** 4454 * {@icu} Return the Unicode name for a given property, as given in the 4455 * Unicode database file PropertyAliases.txt. Most properties 4456 * have more than one name. The nameChoice determines which one 4457 * is returned. 4458 * 4459 * In addition, this function maps the property 4460 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 4461 * "General_Category_Mask". These names are not in 4462 * PropertyAliases.txt. 4463 * 4464 * @param property UProperty selector. 4465 * 4466 * @param nameChoice UProperty.NameChoice selector for which name 4467 * to get. All properties have a long name. Most have a short 4468 * name, but some do not. Unicode allows for additional names; if 4469 * present these will be returned by UProperty.NameChoice.LONG + i, 4470 * where i=1, 2,... 4471 * 4472 * @return a name, or null if Unicode explicitly defines no name 4473 * ("n/a") for a given property/nameChoice. If a given nameChoice 4474 * throws an exception, then all larger values of nameChoice will 4475 * throw an exception. If null is returned for a given 4476 * nameChoice, then other nameChoice values may return non-null 4477 * results. 4478 * 4479 * @exception IllegalArgumentException thrown if property or 4480 * nameChoice are invalid. 4481 * 4482 * @see UProperty 4483 * @see UProperty.NameChoice 4484 * @stable ICU 2.4 4485 */ getPropertyName(int property, int nameChoice)4486 public static String getPropertyName(int property, 4487 int nameChoice) { 4488 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 4489 } 4490 4491 /** 4492 * {@icu} Return the UProperty selector for a given property name, as 4493 * specified in the Unicode database file PropertyAliases.txt. 4494 * Short, long, and any other variants are recognized. 4495 * 4496 * In addition, this function maps the synthetic names "gcm" / 4497 * "General_Category_Mask" to the property 4498 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 4499 * PropertyAliases.txt. 4500 * 4501 * @param propertyAlias the property name to be matched. The name 4502 * is compared using "loose matching" as described in 4503 * PropertyAliases.txt. 4504 * 4505 * @return a UProperty enum. 4506 * 4507 * @exception IllegalArgumentException thrown if propertyAlias 4508 * is not recognized. 4509 * 4510 * @see UProperty 4511 * @stable ICU 2.4 4512 */ getPropertyEnum(CharSequence propertyAlias)4513 public static int getPropertyEnum(CharSequence propertyAlias) { 4514 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 4515 if (propEnum == UProperty.UNDEFINED) { 4516 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 4517 } 4518 return propEnum; 4519 } 4520 4521 /** 4522 * {@icu} Return the Unicode name for a given property value, as given in 4523 * the Unicode database file PropertyValueAliases.txt. Most 4524 * values have more than one name. The nameChoice determines 4525 * which one is returned. 4526 * 4527 * Note: Some of the names in PropertyValueAliases.txt can only be 4528 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 4529 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4530 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4531 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4532 * 4533 * @param property UProperty selector constant. 4534 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4535 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4536 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4537 * If out of range, null is returned. 4538 * 4539 * @param value selector for a value for the given property. In 4540 * general, valid values range from 0 up to some maximum. There 4541 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 4542 * non-zero value BASIC_LATIN.getID(). (2.) 4543 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 4544 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 4545 * are mask values produced by left-shifting 1 by 4546 * UCharacter.getType(). This allows grouped categories such as 4547 * [:L:] to be represented. Mask values are non-contiguous. 4548 * 4549 * @param nameChoice UProperty.NameChoice selector for which name 4550 * to get. All values have a long name. Most have a short name, 4551 * but some do not. Unicode allows for additional names; if 4552 * present these will be returned by UProperty.NameChoice.LONG + i, 4553 * where i=1, 2,... 4554 * 4555 * @return a name, or null if Unicode explicitly defines no name 4556 * ("n/a") for a given property/value/nameChoice. If a given 4557 * nameChoice throws an exception, then all larger values of 4558 * nameChoice will throw an exception. If null is returned for a 4559 * given nameChoice, then other nameChoice values may return 4560 * non-null results. 4561 * 4562 * @exception IllegalArgumentException thrown if property, value, 4563 * or nameChoice are invalid. 4564 * 4565 * @see UProperty 4566 * @see UProperty.NameChoice 4567 * @stable ICU 2.4 4568 */ getPropertyValueName(int property, int value, int nameChoice)4569 public static String getPropertyValueName(int property, 4570 int value, 4571 int nameChoice) 4572 { 4573 if ((property == UProperty.CANONICAL_COMBINING_CLASS 4574 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 4575 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 4576 && value >= UCharacter.getIntPropertyMinValue( 4577 UProperty.CANONICAL_COMBINING_CLASS) 4578 && value <= UCharacter.getIntPropertyMaxValue( 4579 UProperty.CANONICAL_COMBINING_CLASS) 4580 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 4581 // this is hard coded for the valid cc 4582 // because PropertyValueAliases.txt does not contain all of them 4583 try { 4584 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 4585 nameChoice); 4586 } 4587 catch (IllegalArgumentException e) { 4588 return null; 4589 } 4590 } 4591 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 4592 } 4593 4594 /** 4595 * {@icu} Return the property value integer for a given value name, as 4596 * specified in the Unicode database file PropertyValueAliases.txt. 4597 * Short, long, and any other variants are recognized. 4598 * 4599 * Note: Some of the names in PropertyValueAliases.txt will only be 4600 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 4601 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4602 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4603 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4604 * 4605 * @param property UProperty selector constant. 4606 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4607 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4608 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4609 * Only these properties can be enumerated. 4610 * 4611 * @param valueAlias the value name to be matched. The name is 4612 * compared using "loose matching" as described in 4613 * PropertyValueAliases.txt. 4614 * 4615 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 4616 * values are mask values produced by left-shifting 1 by 4617 * UCharacter.getType(). This allows grouped categories such as 4618 * [:L:] to be represented. 4619 * 4620 * @see UProperty 4621 * @throws IllegalArgumentException if property is not a valid UProperty 4622 * selector or valueAlias is not a value of this property 4623 * @stable ICU 2.4 4624 */ getPropertyValueEnum(int property, CharSequence valueAlias)4625 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 4626 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 4627 if (propEnum == UProperty.UNDEFINED) { 4628 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 4629 } 4630 return propEnum; 4631 } 4632 4633 /** 4634 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 4635 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 4636 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 4637 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 4638 * @internal 4639 * @deprecated This API is ICU internal only. 4640 */ 4641 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4642 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 4643 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 4644 } 4645 4646 4647 /** 4648 * {@icu} Returns a code point corresponding to the two UTF16 characters. 4649 * @param lead the lead char 4650 * @param trail the trail char 4651 * @return code point if surrogate characters are valid. 4652 * @exception IllegalArgumentException thrown when argument characters do 4653 * not form a valid codepoint 4654 * @stable ICU 2.1 4655 */ getCodePoint(char lead, char trail)4656 public static int getCodePoint(char lead, char trail) 4657 { 4658 if (UTF16.isLeadSurrogate(lead) && UTF16.isTrailSurrogate(trail)) { 4659 return UCharacterProperty.getRawSupplementary(lead, trail); 4660 } 4661 throw new IllegalArgumentException("Illegal surrogate characters"); 4662 } 4663 4664 /** 4665 * {@icu} Returns the code point corresponding to the UTF16 character. 4666 * @param char16 the UTF16 character 4667 * @return code point if argument is a valid character. 4668 * @exception IllegalArgumentException thrown when char16 is not a valid 4669 * codepoint 4670 * @stable ICU 2.1 4671 */ getCodePoint(char char16)4672 public static int getCodePoint(char char16) 4673 { 4674 if (UCharacter.isLegal(char16)) { 4675 return char16; 4676 } 4677 throw new IllegalArgumentException("Illegal codepoint"); 4678 } 4679 4680 /** 4681 * Implementation of UCaseProps.ContextIterator, iterates over a String. 4682 * See ustrcase.c/utf16_caseContextIterator(). 4683 */ 4684 private static class StringContextIterator implements UCaseProps.ContextIterator { 4685 /** 4686 * Constructor. 4687 * @param s String to iterate over. 4688 */ StringContextIterator(String s)4689 StringContextIterator(String s) { 4690 this.s=s; 4691 limit=s.length(); 4692 cpStart=cpLimit=index=0; 4693 dir=0; 4694 } 4695 4696 /** 4697 * Set the iteration limit for nextCaseMapCP() to an index within the string. 4698 * If the limit parameter is negative or past the string, then the 4699 * string length is restored as the iteration limit. 4700 * 4701 * This limit does not affect the next() function which always 4702 * iterates to the very end of the string. 4703 * 4704 * @param lim The iteration limit. 4705 */ setLimit(int lim)4706 public void setLimit(int lim) { 4707 if(0<=lim && lim<=s.length()) { 4708 limit=lim; 4709 } else { 4710 limit=s.length(); 4711 } 4712 } 4713 4714 /** 4715 * Move to the iteration limit without fetching code points up to there. 4716 */ moveToLimit()4717 public void moveToLimit() { 4718 cpStart=cpLimit=limit; 4719 } 4720 4721 /** 4722 * Iterate forward through the string to fetch the next code point 4723 * to be case-mapped, and set the context indexes for it. 4724 * Performance optimization, to save on function calls and redundant 4725 * tests. Combines UTF16.charAt(), UTF16.getCharCount(), and setIndex(). 4726 * 4727 * When the iteration limit is reached (and -1 is returned), 4728 * getCPStart() will be at the iteration limit. 4729 * 4730 * Iteration with next() does not affect the position for nextCaseMapCP(). 4731 * 4732 * @return The next code point to be case-mapped, or <0 when the iteration is done. 4733 */ nextCaseMapCP()4734 public int nextCaseMapCP() { 4735 cpStart=cpLimit; 4736 if(cpLimit<limit) { 4737 int c=s.charAt(cpLimit++); 4738 if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE) { 4739 char c2; 4740 if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit && 4741 UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) && 4742 c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE 4743 ) { 4744 // supplementary code point 4745 ++cpLimit; 4746 c=UCharacterProperty.getRawSupplementary((char)c, c2); 4747 // else unpaired surrogate code point 4748 } 4749 // else BMP code point 4750 } 4751 return c; 4752 } else { 4753 return -1; 4754 } 4755 } 4756 4757 /** 4758 * Returns the start of the code point that was last returned 4759 * by nextCaseMapCP(). 4760 */ getCPStart()4761 public int getCPStart() { 4762 return cpStart; 4763 } 4764 4765 /** 4766 * Returns the limit of the code point that was last returned 4767 * by nextCaseMapCP(). 4768 */ getCPLimit()4769 public int getCPLimit() { 4770 return cpLimit; 4771 } 4772 4773 // implement UCaseProps.ContextIterator 4774 // The following code is not used anywhere in this private class reset(int direction)4775 public void reset(int direction) { 4776 if(direction>0) { 4777 /* reset for forward iteration */ 4778 dir=1; 4779 index=cpLimit; 4780 } else if(direction<0) { 4781 /* reset for backward iteration */ 4782 dir=-1; 4783 index=cpStart; 4784 } else { 4785 // not a valid direction 4786 dir=0; 4787 index=0; 4788 } 4789 } 4790 next()4791 public int next() { 4792 int c; 4793 4794 if(dir>0 && index<s.length()) { 4795 c=UTF16.charAt(s, index); 4796 index+=UTF16.getCharCount(c); 4797 return c; 4798 } else if(dir<0 && index>0) { 4799 c=UTF16.charAt(s, index-1); 4800 index-=UTF16.getCharCount(c); 4801 return c; 4802 } 4803 return -1; 4804 } 4805 4806 // variables 4807 protected String s; 4808 protected int index, limit, cpStart, cpLimit; 4809 protected int dir; // 0=initial state >0=forward <0=backward 4810 } 4811 4812 /** 4813 * Returns the uppercase version of the argument string. 4814 * Casing is dependent on the default locale and context-sensitive. 4815 * @param str source string to be performed on 4816 * @return uppercase version of the argument string 4817 * @stable ICU 2.1 4818 */ toUpperCase(String str)4819 public static String toUpperCase(String str) 4820 { 4821 return toUpperCase(ULocale.getDefault(), str); 4822 } 4823 4824 /** 4825 * Returns the lowercase version of the argument string. 4826 * Casing is dependent on the default locale and context-sensitive 4827 * @param str source string to be performed on 4828 * @return lowercase version of the argument string 4829 * @stable ICU 2.1 4830 */ toLowerCase(String str)4831 public static String toLowerCase(String str) 4832 { 4833 return toLowerCase(ULocale.getDefault(), str); 4834 } 4835 4836 /** 4837 * <p>Returns the titlecase version of the argument string.</p> 4838 * <p>Position for titlecasing is determined by the argument break 4839 * iterator, hence the user can customize his break iterator for 4840 * a specialized titlecasing. In this case only the forward iteration 4841 * needs to be implemented. 4842 * If the break iterator passed in is null, the default Unicode algorithm 4843 * will be used to determine the titlecase positions. 4844 * </p> 4845 * <p>Only positions returned by the break iterator will be title cased, 4846 * character in between the positions will all be in lower case.</p> 4847 * <p>Casing is dependent on the default locale and context-sensitive</p> 4848 * @param str source string to be performed on 4849 * @param breakiter break iterator to determine the positions in which 4850 * the character should be title cased. 4851 * @return lowercase version of the argument string 4852 * @stable ICU 2.6 4853 */ toTitleCase(String str, BreakIterator breakiter)4854 public static String toTitleCase(String str, BreakIterator breakiter) 4855 { 4856 return toTitleCase(ULocale.getDefault(), str, breakiter); 4857 } 4858 4859 /** 4860 * Returns the uppercase version of the argument string. 4861 * Casing is dependent on the argument locale and context-sensitive. 4862 * @param locale which string is to be converted in 4863 * @param str source string to be performed on 4864 * @return uppercase version of the argument string 4865 * @stable ICU 2.1 4866 */ toUpperCase(Locale locale, String str)4867 public static String toUpperCase(Locale locale, String str) 4868 { 4869 return toUpperCase(ULocale.forLocale(locale), str); 4870 } 4871 4872 /** 4873 * Returns the uppercase version of the argument string. 4874 * Casing is dependent on the argument locale and context-sensitive. 4875 * @param locale which string is to be converted in 4876 * @param str source string to be performed on 4877 * @return uppercase version of the argument string 4878 * @stable ICU 3.2 4879 */ toUpperCase(ULocale locale, String str)4880 public static String toUpperCase(ULocale locale, String str) { 4881 StringContextIterator iter = new StringContextIterator(str); 4882 StringBuilder result = new StringBuilder(str.length()); 4883 int[] locCache = new int[1]; 4884 int c; 4885 4886 if (locale == null) { 4887 locale = ULocale.getDefault(); 4888 } 4889 locCache[0]=0; 4890 4891 while((c=iter.nextCaseMapCP())>=0) { 4892 c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache); 4893 4894 /* decode the result */ 4895 if(c<0) { 4896 /* (not) original code point */ 4897 c=~c; 4898 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4899 /* mapping already appended to result */ 4900 continue; 4901 /* } else { append single-code point mapping */ 4902 } 4903 result.appendCodePoint(c); 4904 } 4905 return result.toString(); 4906 } 4907 4908 /** 4909 * Returns the lowercase version of the argument string. 4910 * Casing is dependent on the argument locale and context-sensitive 4911 * @param locale which string is to be converted in 4912 * @param str source string to be performed on 4913 * @return lowercase version of the argument string 4914 * @stable ICU 2.1 4915 */ toLowerCase(Locale locale, String str)4916 public static String toLowerCase(Locale locale, String str) 4917 { 4918 return toLowerCase(ULocale.forLocale(locale), str); 4919 } 4920 4921 /** 4922 * Returns the lowercase version of the argument string. 4923 * Casing is dependent on the argument locale and context-sensitive 4924 * @param locale which string is to be converted in 4925 * @param str source string to be performed on 4926 * @return lowercase version of the argument string 4927 * @stable ICU 3.2 4928 */ toLowerCase(ULocale locale, String str)4929 public static String toLowerCase(ULocale locale, String str) { 4930 StringContextIterator iter = new StringContextIterator(str); 4931 StringBuilder result = new StringBuilder(str.length()); 4932 int[] locCache = new int[1]; 4933 int c; 4934 4935 if (locale == null) { 4936 locale = ULocale.getDefault(); 4937 } 4938 locCache[0]=0; 4939 4940 while((c=iter.nextCaseMapCP())>=0) { 4941 c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache); 4942 4943 /* decode the result */ 4944 if(c<0) { 4945 /* (not) original code point */ 4946 c=~c; 4947 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4948 /* mapping already appended to result */ 4949 continue; 4950 /* } else { append single-code point mapping */ 4951 } 4952 result.appendCodePoint(c); 4953 } 4954 return result.toString(); 4955 } 4956 4957 /** 4958 * <p>Returns the titlecase version of the argument string.</p> 4959 * <p>Position for titlecasing is determined by the argument break 4960 * iterator, hence the user can customize his break iterator for 4961 * a specialized titlecasing. In this case only the forward iteration 4962 * needs to be implemented. 4963 * If the break iterator passed in is null, the default Unicode algorithm 4964 * will be used to determine the titlecase positions. 4965 * </p> 4966 * <p>Only positions returned by the break iterator will be title cased, 4967 * character in between the positions will all be in lower case.</p> 4968 * <p>Casing is dependent on the argument locale and context-sensitive</p> 4969 * @param locale which string is to be converted in 4970 * @param str source string to be performed on 4971 * @param breakiter break iterator to determine the positions in which 4972 * the character should be title cased. 4973 * @return lowercase version of the argument string 4974 * @stable ICU 2.6 4975 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)4976 public static String toTitleCase(Locale locale, String str, 4977 BreakIterator breakiter) 4978 { 4979 return toTitleCase(ULocale.forLocale(locale), str, breakiter); 4980 } 4981 4982 /** 4983 * <p>Returns the titlecase version of the argument string.</p> 4984 * <p>Position for titlecasing is determined by the argument break 4985 * iterator, hence the user can customize his break iterator for 4986 * a specialized titlecasing. In this case only the forward iteration 4987 * needs to be implemented. 4988 * If the break iterator passed in is null, the default Unicode algorithm 4989 * will be used to determine the titlecase positions. 4990 * </p> 4991 * <p>Only positions returned by the break iterator will be title cased, 4992 * character in between the positions will all be in lower case.</p> 4993 * <p>Casing is dependent on the argument locale and context-sensitive</p> 4994 * @param locale which string is to be converted in 4995 * @param str source string to be performed on 4996 * @param titleIter break iterator to determine the positions in which 4997 * the character should be title cased. 4998 * @return lowercase version of the argument string 4999 * @stable ICU 3.2 5000 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)5001 public static String toTitleCase(ULocale locale, String str, 5002 BreakIterator titleIter) { 5003 return toTitleCase(locale, str, titleIter, 0); 5004 } 5005 5006 /** 5007 * <p>Returns the titlecase version of the argument string.</p> 5008 * <p>Position for titlecasing is determined by the argument break 5009 * iterator, hence the user can customize his break iterator for 5010 * a specialized titlecasing. In this case only the forward iteration 5011 * needs to be implemented. 5012 * If the break iterator passed in is null, the default Unicode algorithm 5013 * will be used to determine the titlecase positions. 5014 * </p> 5015 * <p>Only positions returned by the break iterator will be title cased, 5016 * character in between the positions will all be in lower case.</p> 5017 * <p>Casing is dependent on the argument locale and context-sensitive</p> 5018 * @param locale which string is to be converted in 5019 * @param str source string to be performed on 5020 * @param titleIter break iterator to determine the positions in which 5021 * the character should be title cased. 5022 * @param options bit set to modify the titlecasing operation 5023 * @return lowercase version of the argument string 5024 * @stable ICU 3.8 5025 * @see #TITLECASE_NO_LOWERCASE 5026 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5027 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5028 public static String toTitleCase(ULocale locale, String str, 5029 BreakIterator titleIter, 5030 int options) { 5031 StringContextIterator iter = new StringContextIterator(str); 5032 StringBuilder result = new StringBuilder(str.length()); 5033 int[] locCache = new int[1]; 5034 int c, nc, srcLength = str.length(); 5035 5036 if (locale == null) { 5037 locale = ULocale.getDefault(); 5038 } 5039 locCache[0]=0; 5040 5041 if(titleIter == null) { 5042 titleIter = BreakIterator.getWordInstance(locale); 5043 } 5044 titleIter.setText(str); 5045 5046 int prev, titleStart, index; 5047 boolean isFirstIndex; 5048 boolean isDutch = locale.getLanguage().equals("nl"); 5049 boolean FirstIJ = true; 5050 5051 /* set up local variables */ 5052 prev=0; 5053 isFirstIndex=true; 5054 5055 /* titlecasing loop */ 5056 while(prev<srcLength) { 5057 /* find next index where to titlecase */ 5058 if(isFirstIndex) { 5059 isFirstIndex=false; 5060 index=titleIter.first(); 5061 } else { 5062 index=titleIter.next(); 5063 } 5064 if(index==BreakIterator.DONE || index>srcLength) { 5065 index=srcLength; 5066 } 5067 5068 /* 5069 * Unicode 4 & 5 section 3.13 Default Case Operations: 5070 * 5071 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 5072 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 5073 * cased character F. If F exists, map F to default_title(F); then map each 5074 * subsequent character C to default_lower(C). 5075 * 5076 * In this implementation, segment [prev..index[ into 3 parts: 5077 * a) uncased characters (copy as-is) [prev..titleStart[ 5078 * b) first case letter (titlecase) [titleStart..titleLimit[ 5079 * c) subsequent characters (lowercase) [titleLimit..index[ 5080 */ 5081 if(prev<index) { 5082 /* find and copy uncased characters [prev..titleStart[ */ 5083 iter.setLimit(index); 5084 c=iter.nextCaseMapCP(); 5085 if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0 5086 && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) { 5087 while((c=iter.nextCaseMapCP())>=0 5088 && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {} 5089 titleStart=iter.getCPStart(); 5090 if(prev<titleStart) { 5091 result.append(str, prev, titleStart); 5092 } 5093 } else { 5094 titleStart=prev; 5095 } 5096 5097 if(titleStart<index) { 5098 FirstIJ = true; 5099 /* titlecase c which is from titleStart */ 5100 c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache); 5101 5102 /* decode the result and lowercase up to index */ 5103 for(;;) { 5104 if(c<0) { 5105 /* (not) original code point */ 5106 c=~c; 5107 result.appendCodePoint(c); 5108 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 5109 /* mapping already appended to result */ 5110 } else { 5111 /* append single-code point mapping */ 5112 result.appendCodePoint(c); 5113 } 5114 5115 if((options&TITLECASE_NO_LOWERCASE)!=0) { 5116 /* Optionally just copy the rest of the word unchanged. */ 5117 5118 int titleLimit=iter.getCPLimit(); 5119 if(titleLimit<index) { 5120 // TODO: With Java 5, this would want to be 5121 // result.append(str, titleLimit, index); 5122 String appendStr = str.substring(titleLimit,index); 5123 /* Special Case - Dutch IJ Titlecasing */ 5124 if ( isDutch && c == 0x0049 && appendStr.startsWith("j")) { 5125 appendStr = "J" + appendStr.substring(1); 5126 } 5127 result.append(appendStr); 5128 } 5129 iter.moveToLimit(); 5130 break; 5131 } else if((nc=iter.nextCaseMapCP())>=0) { 5132 if (isDutch && (nc == 0x004A || nc == 0x006A) 5133 && (c == 0x0049) && (FirstIJ == true)) { 5134 c = 0x004A; /* J */ 5135 FirstIJ = false; 5136 } else { 5137 /* Normal operation: Lowercase the rest of the word. */ 5138 c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale, 5139 locCache); 5140 } 5141 } else { 5142 break; 5143 } 5144 } 5145 } 5146 } 5147 5148 prev=index; 5149 } 5150 return result.toString(); 5151 } 5152 5153 5154 private static final int BREAK_MASK = 5155 (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER) 5156 | (1<<UCharacterCategory.OTHER_LETTER) 5157 | (1<<UCharacterCategory.MODIFIER_LETTER); 5158 5159 /** 5160 * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string, 5161 * and sometimes has no effect at all; the original string is returned whenever casing 5162 * would not be appropriate for the first word (such as for CJK characters or initial numbers). 5163 * Initial non-letters are skipped in order to find the character to change. 5164 * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE. 5165 * <p>Examples: 5166 * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr> 5167 * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr> 5168 * <tr><td>“contact us”</td><td>“Contact us”</td></tr> 5169 * <tr><td>49ers win!</td><td>49ers win!</td></tr> 5170 * <tr><td>丰(abc)</td><td>丰(abc)</td></tr> 5171 * <tr><td>«ijs»</td><td>«Ijs»</td></tr> 5172 * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr> 5173 * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr> 5174 * </table> 5175 * @param locale the locale for accessing exceptional behavior (eg for tr). 5176 * @param str the source string to change 5177 * @return the modified string, or the original if no modifications were necessary. 5178 * @internal 5179 * @deprecated ICU internal only 5180 */ 5181 @Deprecated toTitleFirst(ULocale locale, String str)5182 public static String toTitleFirst(ULocale locale, String str) { 5183 int c = 0; 5184 for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) { 5185 c = UCharacter.codePointAt(str, i); 5186 int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK); 5187 if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK 5188 break; 5189 } 5190 if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) { 5191 continue; 5192 } 5193 5194 // we now have the first cased character 5195 // What we really want is something like: 5196 // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken); 5197 // That is, just give us the titlecased string, for the locale, at i and following, 5198 // and tell us how many characters are replaced. 5199 // The following won't work completely: it needs some more substantial changes to UCaseProps 5200 5201 String substring = str.substring(i, i+UCharacter.charCount(c)); 5202 String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0); 5203 5204 // skip if no change 5205 if (titled.codePointAt(0) == c) { 5206 // Using 0 is safe, since any change in titling will not have first initial character 5207 break; 5208 } 5209 StringBuilder result = new StringBuilder(str.length()).append(str, 0, i); 5210 int startOfSuffix; 5211 5212 // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps. 5213 5214 if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') { 5215 result.append("IJ"); 5216 startOfSuffix = 2; 5217 } else { 5218 result.append(titled); 5219 startOfSuffix = i + UCharacter.charCount(c); 5220 } 5221 5222 // add the remainder, and return 5223 return result.append(str, startOfSuffix, str.length()).toString(); 5224 } 5225 return str; // no change 5226 } 5227 5228 /** 5229 * {@icu} <p>Returns the titlecase version of the argument string.</p> 5230 * <p>Position for titlecasing is determined by the argument break 5231 * iterator, hence the user can customize his break iterator for 5232 * a specialized titlecasing. In this case only the forward iteration 5233 * needs to be implemented. 5234 * If the break iterator passed in is null, the default Unicode algorithm 5235 * will be used to determine the titlecase positions. 5236 * </p> 5237 * <p>Only positions returned by the break iterator will be title cased, 5238 * character in between the positions will all be in lower case.</p> 5239 * <p>Casing is dependent on the argument locale and context-sensitive</p> 5240 * @param locale which string is to be converted in 5241 * @param str source string to be performed on 5242 * @param titleIter break iterator to determine the positions in which 5243 * the character should be title cased. 5244 * @param options bit set to modify the titlecasing operation 5245 * @return lowercase version of the argument string 5246 * @see #TITLECASE_NO_LOWERCASE 5247 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5248 * @draft ICU 54 5249 * @provisional This API might change or be removed in a future release. 5250 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5251 public static String toTitleCase(Locale locale, String str, 5252 BreakIterator titleIter, 5253 int options) { 5254 return toTitleCase(ULocale.forLocale(locale), str, titleIter, options); 5255 } 5256 5257 /** 5258 * {@icu} The given character is mapped to its case folding equivalent according 5259 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5260 * folding equivalent, the character itself is returned. 5261 * 5262 * <p>This function only returns the simple, single-code point case mapping. 5263 * Full case mappings should be used whenever possible because they produce 5264 * better results by working on whole strings. 5265 * They can map to a result string with a different length as appropriate. 5266 * Full case mappings are applied by the case mapping functions 5267 * that take String parameters rather than code points (int). 5268 * See also the User Guide chapter on C/POSIX migration: 5269 * http://www.icu-project.org/userguide/posix.html#case_mappings 5270 * 5271 * @param ch the character to be converted 5272 * @param defaultmapping Indicates whether the default mappings defined in 5273 * CaseFolding.txt are to be used, otherwise the 5274 * mappings for dotted I and dotless i marked with 5275 * 'T' in CaseFolding.txt are included. 5276 * @return the case folding equivalent of the character, if 5277 * any; otherwise the character itself. 5278 * @see #foldCase(String, boolean) 5279 * @stable ICU 2.1 5280 */ foldCase(int ch, boolean defaultmapping)5281 public static int foldCase(int ch, boolean defaultmapping) { 5282 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5283 } 5284 5285 /** 5286 * {@icu} The given string is mapped to its case folding equivalent according to 5287 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5288 * folding equivalent, the character itself is returned. 5289 * "Full", multiple-code point case folding mappings are returned here. 5290 * For "simple" single-code point mappings use the API 5291 * foldCase(int ch, boolean defaultmapping). 5292 * @param str the String to be converted 5293 * @param defaultmapping Indicates whether the default mappings defined in 5294 * CaseFolding.txt are to be used, otherwise the 5295 * mappings for dotted I and dotless i marked with 5296 * 'T' in CaseFolding.txt are included. 5297 * @return the case folding equivalent of the character, if 5298 * any; otherwise the character itself. 5299 * @see #foldCase(int, boolean) 5300 * @stable ICU 2.1 5301 */ foldCase(String str, boolean defaultmapping)5302 public static String foldCase(String str, boolean defaultmapping) { 5303 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5304 } 5305 5306 /** 5307 * {@icu} Option value for case folding: use default mappings defined in 5308 * CaseFolding.txt. 5309 * @stable ICU 2.6 5310 */ 5311 public static final int FOLD_CASE_DEFAULT = 0x0000; 5312 /** 5313 * {@icu} Option value for case folding: 5314 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 5315 * and dotless i appropriately for Turkic languages (tr, az). 5316 * 5317 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 5318 * are to be included for default mappings and 5319 * excluded for the Turkic-specific mappings. 5320 * 5321 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 5322 * are to be excluded for default mappings and 5323 * included for the Turkic-specific mappings. 5324 * 5325 * @stable ICU 2.6 5326 */ 5327 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 5328 5329 /** 5330 * {@icu} The given character is mapped to its case folding equivalent according 5331 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5332 * folding equivalent, the character itself is returned. 5333 * 5334 * <p>This function only returns the simple, single-code point case mapping. 5335 * Full case mappings should be used whenever possible because they produce 5336 * better results by working on whole strings. 5337 * They can map to a result string with a different length as appropriate. 5338 * Full case mappings are applied by the case mapping functions 5339 * that take String parameters rather than code points (int). 5340 * See also the User Guide chapter on C/POSIX migration: 5341 * http://www.icu-project.org/userguide/posix.html#case_mappings 5342 * 5343 * @param ch the character to be converted 5344 * @param options A bit set for special processing. Currently the recognised options 5345 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5346 * @return the case folding equivalent of the character, if any; otherwise the 5347 * character itself. 5348 * @see #foldCase(String, boolean) 5349 * @stable ICU 2.6 5350 */ foldCase(int ch, int options)5351 public static int foldCase(int ch, int options) { 5352 return UCaseProps.INSTANCE.fold(ch, options); 5353 } 5354 5355 /** 5356 * {@icu} The given string is mapped to its case folding equivalent according to 5357 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5358 * folding equivalent, the character itself is returned. 5359 * "Full", multiple-code point case folding mappings are returned here. 5360 * For "simple" single-code point mappings use the API 5361 * foldCase(int ch, boolean defaultmapping). 5362 * @param str the String to be converted 5363 * @param options A bit set for special processing. Currently the recognised options 5364 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5365 * @return the case folding equivalent of the character, if any; otherwise the 5366 * character itself. 5367 * @see #foldCase(int, boolean) 5368 * @stable ICU 2.6 5369 */ foldCase(String str, int options)5370 public static final String foldCase(String str, int options) { 5371 StringBuilder result = new StringBuilder(str.length()); 5372 int c, i, length; 5373 5374 length = str.length(); 5375 for(i=0; i<length;) { 5376 c=UTF16.charAt(str, i); 5377 i+=UTF16.getCharCount(c); 5378 c = UCaseProps.INSTANCE.toFullFolding(c, result, options); 5379 5380 /* decode the result */ 5381 if(c<0) { 5382 /* (not) original code point */ 5383 c=~c; 5384 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 5385 /* mapping already appended to result */ 5386 continue; 5387 /* } else { append single-code point mapping */ 5388 } 5389 result.appendCodePoint(c); 5390 } 5391 return result.toString(); 5392 } 5393 5394 /** 5395 * {@icu} Returns the numeric value of a Han character. 5396 * 5397 * <p>This returns the value of Han 'numeric' code points, 5398 * including those for zero, ten, hundred, thousand, ten thousand, 5399 * and hundred million. 5400 * This includes both the standard and 'checkwriting' 5401 * characters, the 'big circle' zero character, and the standard 5402 * zero character. 5403 * 5404 * <p>Note: The Unicode Standard has numeric values for more 5405 * Han characters recognized by this method 5406 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 5407 * and a {@link com.ibm.icu.text.NumberFormat} can be used with 5408 * a Chinese {@link com.ibm.icu.text.NumberingSystem}. 5409 * 5410 * @param ch code point to query 5411 * @return value if it is a Han 'numeric character,' otherwise return -1. 5412 * @stable ICU 2.4 5413 */ getHanNumericValue(int ch)5414 public static int getHanNumericValue(int ch) 5415 { 5416 switch(ch) 5417 { 5418 case IDEOGRAPHIC_NUMBER_ZERO_ : 5419 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 5420 return 0; // Han Zero 5421 case CJK_IDEOGRAPH_FIRST_ : 5422 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5423 return 1; // Han One 5424 case CJK_IDEOGRAPH_SECOND_ : 5425 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5426 return 2; // Han Two 5427 case CJK_IDEOGRAPH_THIRD_ : 5428 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5429 return 3; // Han Three 5430 case CJK_IDEOGRAPH_FOURTH_ : 5431 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5432 return 4; // Han Four 5433 case CJK_IDEOGRAPH_FIFTH_ : 5434 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5435 return 5; // Han Five 5436 case CJK_IDEOGRAPH_SIXTH_ : 5437 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5438 return 6; // Han Six 5439 case CJK_IDEOGRAPH_SEVENTH_ : 5440 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5441 return 7; // Han Seven 5442 case CJK_IDEOGRAPH_EIGHTH_ : 5443 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5444 return 8; // Han Eight 5445 case CJK_IDEOGRAPH_NINETH_ : 5446 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5447 return 9; // Han Nine 5448 case CJK_IDEOGRAPH_TEN_ : 5449 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5450 return 10; 5451 case CJK_IDEOGRAPH_HUNDRED_ : 5452 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5453 return 100; 5454 case CJK_IDEOGRAPH_THOUSAND_ : 5455 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5456 return 1000; 5457 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5458 return 10000; 5459 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5460 return 100000000; 5461 } 5462 return -1; // no value 5463 } 5464 5465 /** 5466 * {@icu} <p>Returns an iterator for character types, iterating over codepoints.</p> 5467 * Example of use:<br> 5468 * <pre> 5469 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5470 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5471 * while (iterator.next(element)) { 5472 * System.out.println("Codepoint \\u" + 5473 * Integer.toHexString(element.start) + 5474 * " to codepoint \\u" + 5475 * Integer.toHexString(element.limit - 1) + 5476 * " has the character type " + 5477 * element.value); 5478 * } 5479 * </pre> 5480 * @return an iterator 5481 * @stable ICU 2.6 5482 */ getTypeIterator()5483 public static RangeValueIterator getTypeIterator() 5484 { 5485 return new UCharacterTypeIterator(); 5486 } 5487 5488 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()5489 UCharacterTypeIterator() { 5490 reset(); 5491 } 5492 5493 // implements RangeValueIterator next(Element element)5494 public boolean next(Element element) { 5495 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5496 element.start=range.startCodePoint; 5497 element.limit=range.endCodePoint+1; 5498 element.value=range.value; 5499 return true; 5500 } else { 5501 return false; 5502 } 5503 } 5504 5505 // implements RangeValueIterator reset()5506 public void reset() { 5507 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5508 } 5509 5510 private Iterator<Trie2.Range> trieIterator; 5511 private Trie2.Range range; 5512 5513 private static final class MaskType implements Trie2.ValueMapper { 5514 // Extracts the general category ("character type") from the trie value. map(int value)5515 public int map(int value) { 5516 return value & UCharacterProperty.TYPE_MASK; 5517 } 5518 } 5519 private static final MaskType MASK_TYPE=new MaskType(); 5520 } 5521 5522 /** 5523 * {@icu} <p>Returns an iterator for character names, iterating over codepoints.</p> 5524 * <p>This API only gets the iterator for the modern, most up-to-date 5525 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5526 * for extended names use getExtendedNameIterator().</p> 5527 * Example of use:<br> 5528 * <pre> 5529 * ValueIterator iterator = UCharacter.getNameIterator(); 5530 * ValueIterator.Element element = new ValueIterator.Element(); 5531 * while (iterator.next(element)) { 5532 * System.out.println("Codepoint \\u" + 5533 * Integer.toHexString(element.codepoint) + 5534 * " has the name " + (String)element.value); 5535 * } 5536 * </pre> 5537 * <p>The maximal range which the name iterator iterates is from 5538 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.</p> 5539 * @return an iterator 5540 * @stable ICU 2.6 5541 */ getNameIterator()5542 public static ValueIterator getNameIterator(){ 5543 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5544 UCharacterNameChoice.UNICODE_CHAR_NAME); 5545 } 5546 5547 /** 5548 * {@icu} Returns an empty iterator. 5549 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.</p> 5550 * @return an empty iterator 5551 * @deprecated ICU 49 5552 * @see #getName1_0(int) 5553 */ 5554 @Deprecated getName1_0Iterator()5555 public static ValueIterator getName1_0Iterator(){ 5556 return new DummyValueIterator(); 5557 } 5558 5559 private static final class DummyValueIterator implements ValueIterator { next(Element element)5560 public boolean next(Element element) { return false; } reset()5561 public void reset() {} setRange(int start, int limit)5562 public void setRange(int start, int limit) {} 5563 } 5564 5565 /** 5566 * {@icu} <p>Returns an iterator for character names, iterating over codepoints.</p> 5567 * <p>This API only gets the iterator for the extended names. 5568 * For modern, most up-to-date Unicode names use getNameIterator() or 5569 * for older 1.0 Unicode names use get1_0NameIterator().</p> 5570 * Example of use:<br> 5571 * <pre> 5572 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5573 * ValueIterator.Element element = new ValueIterator.Element(); 5574 * while (iterator.next(element)) { 5575 * System.out.println("Codepoint \\u" + 5576 * Integer.toHexString(element.codepoint) + 5577 * " has the name " + (String)element.value); 5578 * } 5579 * </pre> 5580 * <p>The maximal range which the name iterator iterates is from 5581 * @return an iterator 5582 * @stable ICU 2.6 5583 */ getExtendedNameIterator()5584 public static ValueIterator getExtendedNameIterator(){ 5585 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5586 UCharacterNameChoice.EXTENDED_CHAR_NAME); 5587 } 5588 5589 /** 5590 * {@icu} Returns the "age" of the code point.</p> 5591 * <p>The "age" is the Unicode version when the code point was first 5592 * designated (as a non-character or for Private Use) or assigned a 5593 * character. 5594 * <p>This can be useful to avoid emitting code points to receiving 5595 * processes that do not accept newer characters.</p> 5596 * <p>The data is from the UCD file DerivedAge.txt.</p> 5597 * @param ch The code point. 5598 * @return the Unicode version number 5599 * @stable ICU 2.6 5600 */ getAge(int ch)5601 public static VersionInfo getAge(int ch) 5602 { 5603 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5604 throw new IllegalArgumentException("Codepoint out of bounds"); 5605 } 5606 return UCharacterProperty.INSTANCE.getAge(ch); 5607 } 5608 5609 /** 5610 * {@icu} <p>Check a binary Unicode property for a code point.</p> 5611 * <p>Unicode, especially in version 3.2, defines many more properties 5612 * than the original set in UnicodeData.txt.</p> 5613 * <p>This API is intended to reflect Unicode properties as defined in 5614 * the Unicode Character Database (UCD) and Unicode Technical Reports 5615 * (UTR).</p> 5616 * <p>For details about the properties see 5617 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.</p> 5618 * <p>For names of Unicode properties see the UCD file 5619 * PropertyAliases.txt.</p> 5620 * <p>This API does not check the validity of the codepoint.</p> 5621 * <p>Important: If ICU is built with UCD files from Unicode versions 5622 * below 3.2, then properties marked with "new" are not or 5623 * not fully available.</p> 5624 * @param ch code point to test. 5625 * @param property selector constant from com.ibm.icu.lang.UProperty, 5626 * identifies which binary property to check. 5627 * @return true or false according to the binary Unicode property value 5628 * for ch. Also false if property is out of bounds or if the 5629 * Unicode version does not have data for the property at all, or 5630 * not for this code point. 5631 * @see com.ibm.icu.lang.UProperty 5632 * @stable ICU 2.6 5633 */ hasBinaryProperty(int ch, int property)5634 public static boolean hasBinaryProperty(int ch, int property) 5635 { 5636 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5637 } 5638 5639 /** 5640 * {@icu} <p>Check if a code point has the Alphabetic Unicode property.</p> 5641 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).</p> 5642 * <p>Different from UCharacter.isLetter(ch)!</p> 5643 * @stable ICU 2.6 5644 * @param ch codepoint to be tested 5645 */ isUAlphabetic(int ch)5646 public static boolean isUAlphabetic(int ch) 5647 { 5648 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5649 } 5650 5651 /** 5652 * {@icu} <p>Check if a code point has the Lowercase Unicode property.</p> 5653 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).</p> 5654 * <p>This is different from UCharacter.isLowerCase(ch)!</p> 5655 * @param ch codepoint to be tested 5656 * @stable ICU 2.6 5657 */ isULowercase(int ch)5658 public static boolean isULowercase(int ch) 5659 { 5660 return hasBinaryProperty(ch, UProperty.LOWERCASE); 5661 } 5662 5663 /** 5664 * {@icu} <p>Check if a code point has the Uppercase Unicode property.</p> 5665 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).</p> 5666 * <p>This is different from UCharacter.isUpperCase(ch)!</p> 5667 * @param ch codepoint to be tested 5668 * @stable ICU 2.6 5669 */ isUUppercase(int ch)5670 public static boolean isUUppercase(int ch) 5671 { 5672 return hasBinaryProperty(ch, UProperty.UPPERCASE); 5673 } 5674 5675 /** 5676 * {@icu} <p>Check if a code point has the White_Space Unicode property.</p> 5677 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).</p> 5678 * <p>This is different from both UCharacter.isSpace(ch) and 5679 * UCharacter.isWhitespace(ch)!</p> 5680 * @param ch codepoint to be tested 5681 * @stable ICU 2.6 5682 */ isUWhiteSpace(int ch)5683 public static boolean isUWhiteSpace(int ch) 5684 { 5685 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 5686 } 5687 5688 /** 5689 * {@icu} <p>Returns the property value for an Unicode property type of a code point. 5690 * Also returns binary and mask property values.</p> 5691 * <p>Unicode, especially in version 3.2, defines many more properties than 5692 * the original set in UnicodeData.txt.</p> 5693 * <p>The properties APIs are intended to reflect Unicode properties as 5694 * defined in the Unicode Character Database (UCD) and Unicode Technical 5695 * Reports (UTR). For details about the properties see 5696 * http://www.unicode.org/.</p> 5697 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 5698 * </p> 5699 * <pre> 5700 * Sample usage: 5701 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 5702 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 5703 * boolean b = (ideo == 1) ? true : false; 5704 * </pre> 5705 * @param ch code point to test. 5706 * @param type UProperty selector constant, identifies which binary 5707 * property to check. Must be 5708 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5709 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5710 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5711 * @return numeric value that is directly the property value or, 5712 * for enumerated properties, corresponds to the numeric value of 5713 * the enumerated constant of the respective property value 5714 * enumeration type (cast to enum type if necessary). 5715 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5716 * Returns a bit-mask for mask properties. 5717 * Returns 0 if 'type' is out of bounds or if the Unicode version 5718 * does not have data for the property at all, or not for this code 5719 * point. 5720 * @see UProperty 5721 * @see #hasBinaryProperty 5722 * @see #getIntPropertyMinValue 5723 * @see #getIntPropertyMaxValue 5724 * @see #getUnicodeVersion 5725 * @stable ICU 2.4 5726 */ getIntPropertyValue(int ch, int type)5727 public static int getIntPropertyValue(int ch, int type) 5728 { 5729 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5730 } 5731 /** 5732 * {@icu} Returns a string version of the property value. 5733 * @param propertyEnum The property enum value. 5734 * @param codepoint The codepoint value. 5735 * @param nameChoice The choice of the name. 5736 * @return value as string 5737 * @internal 5738 * @deprecated This API is ICU internal only. 5739 */ 5740 @Deprecated 5741 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5742 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5743 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5744 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5745 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5746 nameChoice); 5747 } 5748 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5749 return String.valueOf(getUnicodeNumericValue(codepoint)); 5750 } 5751 // otherwise must be string property 5752 switch (propertyEnum) { 5753 case UProperty.AGE: return getAge(codepoint).toString(); 5754 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5755 case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(getMirror(codepoint)); 5756 case UProperty.CASE_FOLDING: return foldCase(UTF16.valueOf(codepoint), true); 5757 case UProperty.LOWERCASE_MAPPING: return toLowerCase(UTF16.valueOf(codepoint)); 5758 case UProperty.NAME: return getName(codepoint); 5759 case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(foldCase(codepoint,true)); 5760 case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(toLowerCase(codepoint)); 5761 case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(toTitleCase(codepoint)); 5762 case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(toUpperCase(codepoint)); 5763 case UProperty.TITLECASE_MAPPING: return toTitleCase(UTF16.valueOf(codepoint),null); 5764 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5765 case UProperty.UPPERCASE_MAPPING: return toUpperCase(UTF16.valueOf(codepoint)); 5766 } 5767 throw new IllegalArgumentException("Illegal Property Enum"); 5768 } 5769 ///CLOVER:ON 5770 5771 /** 5772 * {@icu} Returns the minimum value for an integer/binary Unicode property type. 5773 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5774 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5775 * @param type UProperty selector constant, identifies which binary 5776 * property to check. Must be 5777 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5778 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5779 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5780 * for a Unicode property. 0 if the property 5781 * selector 'type' is out of range. 5782 * @see UProperty 5783 * @see #hasBinaryProperty 5784 * @see #getUnicodeVersion 5785 * @see #getIntPropertyMaxValue 5786 * @see #getIntPropertyValue 5787 * @stable ICU 2.4 5788 */ getIntPropertyMinValue(int type)5789 public static int getIntPropertyMinValue(int type){ 5790 5791 return 0; // undefined; and: all other properties have a minimum value of 0 5792 } 5793 5794 5795 /** 5796 * {@icu} Returns the maximum value for an integer/binary Unicode property. 5797 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5798 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5799 * Examples for min/max values (for Unicode 3.2): 5800 * <ul> 5801 * <li> UProperty.BIDI_CLASS: 0/18 5802 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5803 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5804 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5805 * </ul> 5806 * For undefined UProperty constant values, min/max values will be 0/-1. 5807 * @param type UProperty selector constant, identifies which binary 5808 * property to check. Must be 5809 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5810 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5811 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 5812 * property. <= 0 if the property selector 'type' is out of range. 5813 * @see UProperty 5814 * @see #hasBinaryProperty 5815 * @see #getUnicodeVersion 5816 * @see #getIntPropertyMaxValue 5817 * @see #getIntPropertyValue 5818 * @stable ICU 2.4 5819 */ getIntPropertyMaxValue(int type)5820 public static int getIntPropertyMaxValue(int type) 5821 { 5822 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 5823 } 5824 5825 /** 5826 * Provide the java.lang.Character forDigit API, for convenience. 5827 * @stable ICU 3.0 5828 */ forDigit(int digit, int radix)5829 public static char forDigit(int digit, int radix) { 5830 return java.lang.Character.forDigit(digit, radix); 5831 } 5832 5833 // JDK 1.5 API coverage 5834 5835 /** 5836 * Cover the JDK 1.5 API, for convenience. 5837 * @see UTF16#LEAD_SURROGATE_MIN_VALUE 5838 * @stable ICU 3.0 5839 */ 5840 public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE; 5841 5842 /** 5843 * Cover the JDK 1.5 API, for convenience. 5844 * @see UTF16#LEAD_SURROGATE_MAX_VALUE 5845 * @stable ICU 3.0 5846 */ 5847 public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE; 5848 5849 /** 5850 * Cover the JDK 1.5 API, for convenience. 5851 * @see UTF16#TRAIL_SURROGATE_MIN_VALUE 5852 * @stable ICU 3.0 5853 */ 5854 public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE; 5855 5856 /** 5857 * Cover the JDK 1.5 API, for convenience. 5858 * @see UTF16#TRAIL_SURROGATE_MAX_VALUE 5859 * @stable ICU 3.0 5860 */ 5861 public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE; 5862 5863 /** 5864 * Cover the JDK 1.5 API, for convenience. 5865 * @see UTF16#SURROGATE_MIN_VALUE 5866 * @stable ICU 3.0 5867 */ 5868 public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE; 5869 5870 /** 5871 * Cover the JDK 1.5 API, for convenience. 5872 * @see UTF16#SURROGATE_MAX_VALUE 5873 * @stable ICU 3.0 5874 */ 5875 public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE; 5876 5877 /** 5878 * Cover the JDK 1.5 API, for convenience. 5879 * @see UTF16#SUPPLEMENTARY_MIN_VALUE 5880 * @stable ICU 3.0 5881 */ 5882 public static final int MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE; 5883 5884 /** 5885 * Cover the JDK 1.5 API, for convenience. 5886 * @see UTF16#CODEPOINT_MAX_VALUE 5887 * @stable ICU 3.0 5888 */ 5889 public static final int MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE; 5890 5891 /** 5892 * Cover the JDK 1.5 API, for convenience. 5893 * @see UTF16#CODEPOINT_MIN_VALUE 5894 * @stable ICU 3.0 5895 */ 5896 public static final int MIN_CODE_POINT = UTF16.CODEPOINT_MIN_VALUE; 5897 5898 /** 5899 * Cover the JDK 1.5 API, for convenience. 5900 * @param cp the code point to check 5901 * @return true if cp is a valid code point 5902 * @stable ICU 3.0 5903 */ isValidCodePoint(int cp)5904 public static final boolean isValidCodePoint(int cp) { 5905 return cp >= 0 && cp <= MAX_CODE_POINT; 5906 } 5907 5908 /** 5909 * Cover the JDK 1.5 API, for convenience. 5910 * @param cp the code point to check 5911 * @return true if cp is a supplementary code point 5912 * @stable ICU 3.0 5913 */ isSupplementaryCodePoint(int cp)5914 public static final boolean isSupplementaryCodePoint(int cp) { 5915 return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE 5916 && cp <= UTF16.CODEPOINT_MAX_VALUE; 5917 } 5918 5919 /** 5920 * Cover the JDK 1.5 API, for convenience. 5921 * @param ch the char to check 5922 * @return true if ch is a high (lead) surrogate 5923 * @stable ICU 3.0 5924 */ isHighSurrogate(char ch)5925 public static boolean isHighSurrogate(char ch) { 5926 return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE; 5927 } 5928 5929 /** 5930 * Cover the JDK 1.5 API, for convenience. 5931 * @param ch the char to check 5932 * @return true if ch is a low (trail) surrogate 5933 * @stable ICU 3.0 5934 */ isLowSurrogate(char ch)5935 public static boolean isLowSurrogate(char ch) { 5936 return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE; 5937 } 5938 5939 /** 5940 * Cover the JDK 1.5 API, for convenience. Return true if the chars 5941 * form a valid surrogate pair. 5942 * @param high the high (lead) char 5943 * @param low the low (trail) char 5944 * @return true if high, low form a surrogate pair 5945 * @stable ICU 3.0 5946 */ isSurrogatePair(char high, char low)5947 public static final boolean isSurrogatePair(char high, char low) { 5948 return isHighSurrogate(high) && isLowSurrogate(low); 5949 } 5950 5951 /** 5952 * Cover the JDK 1.5 API, for convenience. Return the number of chars needed 5953 * to represent the code point. This does not check the 5954 * code point for validity. 5955 * @param cp the code point to check 5956 * @return the number of chars needed to represent the code point 5957 * @see UTF16#getCharCount 5958 * @stable ICU 3.0 5959 */ charCount(int cp)5960 public static int charCount(int cp) { 5961 return UTF16.getCharCount(cp); 5962 } 5963 5964 /** 5965 * Cover the JDK 1.5 API, for convenience. Return the code point represented by 5966 * the characters. This does not check the surrogate pair for validity. 5967 * @param high the high (lead) surrogate 5968 * @param low the low (trail) surrogate 5969 * @return the code point formed by the surrogate pair 5970 * @stable ICU 3.0 5971 */ toCodePoint(char high, char low)5972 public static final int toCodePoint(char high, char low) { 5973 return UCharacterProperty.getRawSupplementary(high, low); 5974 } 5975 5976 /** 5977 * Cover the JDK 1.5 API, for convenience. Return the code point at index. 5978 * <br/><b>Note</b>: the semantics of this API is different from the related UTF16 5979 * API. This examines only the characters at index and index+1. 5980 * @param seq the characters to check 5981 * @param index the index of the first or only char forming the code point 5982 * @return the code point at the index 5983 * @stable ICU 3.0 5984 */ codePointAt(CharSequence seq, int index)5985 public static final int codePointAt(CharSequence seq, int index) { 5986 char c1 = seq.charAt(index++); 5987 if (isHighSurrogate(c1)) { 5988 if (index < seq.length()) { 5989 char c2 = seq.charAt(index); 5990 if (isLowSurrogate(c2)) { 5991 return toCodePoint(c1, c2); 5992 } 5993 } 5994 } 5995 return c1; 5996 } 5997 5998 /** 5999 * Cover the JDK 1.5 API, for convenience. Return the code point at index. 6000 * <br/><b>Note</b>: the semantics of this API is different from the related UTF16 6001 * API. This examines only the characters at index and index+1. 6002 * @param text the characters to check 6003 * @param index the index of the first or only char forming the code point 6004 * @return the code point at the index 6005 * @stable ICU 3.0 6006 */ codePointAt(char[] text, int index)6007 public static final int codePointAt(char[] text, int index) { 6008 char c1 = text[index++]; 6009 if (isHighSurrogate(c1)) { 6010 if (index < text.length) { 6011 char c2 = text[index]; 6012 if (isLowSurrogate(c2)) { 6013 return toCodePoint(c1, c2); 6014 } 6015 } 6016 } 6017 return c1; 6018 } 6019 6020 /** 6021 * Cover the JDK 1.5 API, for convenience. Return the code point at index. 6022 * <br/><b>Note</b>: the semantics of this API is different from the related UTF16 6023 * API. This examines only the characters at index and index+1. 6024 * @param text the characters to check 6025 * @param index the index of the first or only char forming the code point 6026 * @param limit the limit of the valid text 6027 * @return the code point at the index 6028 * @stable ICU 3.0 6029 */ codePointAt(char[] text, int index, int limit)6030 public static final int codePointAt(char[] text, int index, int limit) { 6031 if (index >= limit || limit > text.length) { 6032 throw new IndexOutOfBoundsException(); 6033 } 6034 char c1 = text[index++]; 6035 if (isHighSurrogate(c1)) { 6036 if (index < limit) { 6037 char c2 = text[index]; 6038 if (isLowSurrogate(c2)) { 6039 return toCodePoint(c1, c2); 6040 } 6041 } 6042 } 6043 return c1; 6044 } 6045 6046 /** 6047 * Cover the JDK 1.5 API, for convenience. Return the code point before index. 6048 * <br/><b>Note</b>: the semantics of this API is different from the related UTF16 6049 * API. This examines only the characters at index-1 and index-2. 6050 * @param seq the characters to check 6051 * @param index the index after the last or only char forming the code point 6052 * @return the code point before the index 6053 * @stable ICU 3.0 6054 */ codePointBefore(CharSequence seq, int index)6055 public static final int codePointBefore(CharSequence seq, int index) { 6056 char c2 = seq.charAt(--index); 6057 if (isLowSurrogate(c2)) { 6058 if (index > 0) { 6059 char c1 = seq.charAt(--index); 6060 if (isHighSurrogate(c1)) { 6061 return toCodePoint(c1, c2); 6062 } 6063 } 6064 } 6065 return c2; 6066 } 6067 6068 /** 6069 * Cover the JDK 1.5 API, for convenience. Return the code point before index. 6070 * <br/><b>Note</b>: the semantics of this API is different from the related UTF16 6071 * API. This examines only the characters at index-1 and index-2. 6072 * @param text the characters to check 6073 * @param index the index after the last or only char forming the code point 6074 * @return the code point before the index 6075 * @stable ICU 3.0 6076 */ codePointBefore(char[] text, int index)6077 public static final int codePointBefore(char[] text, int index) { 6078 char c2 = text[--index]; 6079 if (isLowSurrogate(c2)) { 6080 if (index > 0) { 6081 char c1 = text[--index]; 6082 if (isHighSurrogate(c1)) { 6083 return toCodePoint(c1, c2); 6084 } 6085 } 6086 } 6087 return c2; 6088 } 6089 6090 /** 6091 * Cover the JDK 1.5 API, for convenience. Return the code point before index. 6092 * <br/><b>Note</b>: the semantics of this API is different from the related UTF16 6093 * API. This examines only the characters at index-1 and index-2. 6094 * @param text the characters to check 6095 * @param index the index after the last or only char forming the code point 6096 * @param limit the start of the valid text 6097 * @return the code point before the index 6098 * @stable ICU 3.0 6099 */ codePointBefore(char[] text, int index, int limit)6100 public static final int codePointBefore(char[] text, int index, int limit) { 6101 if (index <= limit || limit < 0) { 6102 throw new IndexOutOfBoundsException(); 6103 } 6104 char c2 = text[--index]; 6105 if (isLowSurrogate(c2)) { 6106 if (index > limit) { 6107 char c1 = text[--index]; 6108 if (isHighSurrogate(c1)) { 6109 return toCodePoint(c1, c2); 6110 } 6111 } 6112 } 6113 return c2; 6114 } 6115 6116 /** 6117 * Cover the JDK 1.5 API, for convenience. Writes the chars representing the 6118 * code point into the destination at the given index. 6119 * @param cp the code point to convert 6120 * @param dst the destination array into which to put the char(s) representing the code point 6121 * @param dstIndex the index at which to put the first (or only) char 6122 * @return the count of the number of chars written (1 or 2) 6123 * @throws IllegalArgumentException if cp is not a valid code point 6124 * @stable ICU 3.0 6125 */ toChars(int cp, char[] dst, int dstIndex)6126 public static final int toChars(int cp, char[] dst, int dstIndex) { 6127 if (cp >= 0) { 6128 if (cp < MIN_SUPPLEMENTARY_CODE_POINT) { 6129 dst[dstIndex] = (char)cp; 6130 return 1; 6131 } 6132 if (cp <= MAX_CODE_POINT) { 6133 dst[dstIndex] = UTF16.getLeadSurrogate(cp); 6134 dst[dstIndex+1] = UTF16.getTrailSurrogate(cp); 6135 return 2; 6136 } 6137 } 6138 throw new IllegalArgumentException(); 6139 } 6140 6141 /** 6142 * Cover the JDK 1.5 API, for convenience. Returns a char array 6143 * representing the code point. 6144 * @param cp the code point to convert 6145 * @return an array containing the char(s) representing the code point 6146 * @throws IllegalArgumentException if cp is not a valid code point 6147 * @stable ICU 3.0 6148 */ toChars(int cp)6149 public static final char[] toChars(int cp) { 6150 if (cp >= 0) { 6151 if (cp < MIN_SUPPLEMENTARY_CODE_POINT) { 6152 return new char[] { (char)cp }; 6153 } 6154 if (cp <= MAX_CODE_POINT) { 6155 return new char[] { 6156 UTF16.getLeadSurrogate(cp), 6157 UTF16.getTrailSurrogate(cp) 6158 }; 6159 } 6160 } 6161 throw new IllegalArgumentException(); 6162 } 6163 6164 /** 6165 * Cover the JDK API, for convenience. Return a byte representing the directionality of 6166 * the character. 6167 * 6168 * {@icunote} Unlike the JDK, this returns DIRECTIONALITY_LEFT_TO_RIGHT for undefined 6169 * or out-of-bounds characters. 6170 * 6171 * {@icunote} The return value must be tested using the constants defined in {@link 6172 * UCharacterDirection} and its interface {@link 6173 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 6174 * defined by <code>java.lang.Character</code>. 6175 * @param cp the code point to check 6176 * @return the directionality of the code point 6177 * @see #getDirection 6178 * @stable ICU 3.0 6179 */ getDirectionality(int cp)6180 public static byte getDirectionality(int cp) 6181 { 6182 return (byte)getDirection(cp); 6183 } 6184 6185 /** 6186 * Cover the JDK API, for convenience. Count the number of code points in the range of text. 6187 * @param text the characters to check 6188 * @param start the start of the range 6189 * @param limit the limit of the range 6190 * @return the number of code points in the range 6191 * @stable ICU 3.0 6192 */ codePointCount(CharSequence text, int start, int limit)6193 public static int codePointCount(CharSequence text, int start, int limit) { 6194 if (start < 0 || limit < start || limit > text.length()) { 6195 throw new IndexOutOfBoundsException("start (" + start + 6196 ") or limit (" + limit + 6197 ") invalid or out of range 0, " + text.length()); 6198 } 6199 6200 int len = limit - start; 6201 while (limit > start) { 6202 char ch = text.charAt(--limit); 6203 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6204 ch = text.charAt(--limit); 6205 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6206 --len; 6207 break; 6208 } 6209 } 6210 } 6211 return len; 6212 } 6213 6214 /** 6215 * Cover the JDK API, for convenience. Count the number of code points in the range of text. 6216 * @param text the characters to check 6217 * @param start the start of the range 6218 * @param limit the limit of the range 6219 * @return the number of code points in the range 6220 * @stable ICU 3.0 6221 */ codePointCount(char[] text, int start, int limit)6222 public static int codePointCount(char[] text, int start, int limit) { 6223 if (start < 0 || limit < start || limit > text.length) { 6224 throw new IndexOutOfBoundsException("start (" + start + 6225 ") or limit (" + limit + 6226 ") invalid or out of range 0, " + text.length); 6227 } 6228 6229 int len = limit - start; 6230 while (limit > start) { 6231 char ch = text[--limit]; 6232 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6233 ch = text[--limit]; 6234 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6235 --len; 6236 break; 6237 } 6238 } 6239 } 6240 return len; 6241 } 6242 6243 /** 6244 * Cover the JDK API, for convenience. Adjust the char index by a code point offset. 6245 * @param text the characters to check 6246 * @param index the index to adjust 6247 * @param codePointOffset the number of code points by which to offset the index 6248 * @return the adjusted index 6249 * @stable ICU 3.0 6250 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)6251 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 6252 if (index < 0 || index > text.length()) { 6253 throw new IndexOutOfBoundsException("index ( " + index + 6254 ") out of range 0, " + text.length()); 6255 } 6256 6257 if (codePointOffset < 0) { 6258 while (++codePointOffset <= 0) { 6259 char ch = text.charAt(--index); 6260 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 6261 ch = text.charAt(--index); 6262 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6263 if (++codePointOffset > 0) { 6264 return index+1; 6265 } 6266 } 6267 } 6268 } 6269 } else { 6270 int limit = text.length(); 6271 while (--codePointOffset >= 0) { 6272 char ch = text.charAt(index++); 6273 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6274 ch = text.charAt(index++); 6275 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6276 if (--codePointOffset < 0) { 6277 return index-1; 6278 } 6279 } 6280 } 6281 } 6282 } 6283 6284 return index; 6285 } 6286 6287 /** 6288 * Cover the JDK API, for convenience. Adjust the char index by a code point offset. 6289 * @param text the characters to check 6290 * @param start the start of the range to check 6291 * @param count the length of the range to check 6292 * @param index the index to adjust 6293 * @param codePointOffset the number of code points by which to offset the index 6294 * @return the adjusted index 6295 * @stable ICU 3.0 6296 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6297 public static int offsetByCodePoints(char[] text, int start, int count, int index, 6298 int codePointOffset) { 6299 int limit = start + count; 6300 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 6301 throw new IndexOutOfBoundsException("index ( " + index + 6302 ") out of range " + start + 6303 ", " + limit + 6304 " in array 0, " + text.length); 6305 } 6306 6307 if (codePointOffset < 0) { 6308 while (++codePointOffset <= 0) { 6309 char ch = text[--index]; 6310 if (index < start) { 6311 throw new IndexOutOfBoundsException("index ( " + index + 6312 ") < start (" + start + 6313 ")"); 6314 } 6315 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 6316 ch = text[--index]; 6317 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6318 if (++codePointOffset > 0) { 6319 return index+1; 6320 } 6321 } 6322 } 6323 } 6324 } else { 6325 while (--codePointOffset >= 0) { 6326 char ch = text[index++]; 6327 if (index > limit) { 6328 throw new IndexOutOfBoundsException("index ( " + index + 6329 ") > limit (" + limit + 6330 ")"); 6331 } 6332 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6333 ch = text[index++]; 6334 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6335 if (--codePointOffset < 0) { 6336 return index-1; 6337 } 6338 } 6339 } 6340 } 6341 } 6342 6343 return index; 6344 } 6345 6346 // private variables ------------------------------------------------- 6347 6348 /** 6349 * To get the last character out from a data type 6350 */ 6351 private static final int LAST_CHAR_MASK_ = 0xFFFF; 6352 6353 // /** 6354 // * To get the last byte out from a data type 6355 // */ 6356 // private static final int LAST_BYTE_MASK_ = 0xFF; 6357 // 6358 // /** 6359 // * Shift 16 bits 6360 // */ 6361 // private static final int SHIFT_16_ = 16; 6362 // 6363 // /** 6364 // * Shift 24 bits 6365 // */ 6366 // private static final int SHIFT_24_ = 24; 6367 // 6368 // /** 6369 // * Decimal radix 6370 // */ 6371 // private static final int DECIMAL_RADIX_ = 10; 6372 6373 /** 6374 * No break space code point 6375 */ 6376 private static final int NO_BREAK_SPACE_ = 0xA0; 6377 6378 /** 6379 * Figure space code point 6380 */ 6381 private static final int FIGURE_SPACE_ = 0x2007; 6382 6383 /** 6384 * Narrow no break space code point 6385 */ 6386 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 6387 6388 /** 6389 * Ideographic number zero code point 6390 */ 6391 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6392 6393 /** 6394 * CJK Ideograph, First code point 6395 */ 6396 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6397 6398 /** 6399 * CJK Ideograph, Second code point 6400 */ 6401 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6402 6403 /** 6404 * CJK Ideograph, Third code point 6405 */ 6406 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6407 6408 /** 6409 * CJK Ideograph, Fourth code point 6410 */ 6411 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 6412 6413 /** 6414 * CJK Ideograph, FIFTH code point 6415 */ 6416 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6417 6418 /** 6419 * CJK Ideograph, Sixth code point 6420 */ 6421 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6422 6423 /** 6424 * CJK Ideograph, Seventh code point 6425 */ 6426 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6427 6428 /** 6429 * CJK Ideograph, Eighth code point 6430 */ 6431 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6432 6433 /** 6434 * CJK Ideograph, Nineth code point 6435 */ 6436 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6437 6438 /** 6439 * Application Program command code point 6440 */ 6441 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6442 6443 /** 6444 * Unit separator code point 6445 */ 6446 private static final int UNIT_SEPARATOR_ = 0x001F; 6447 6448 /** 6449 * Delete code point 6450 */ 6451 private static final int DELETE_ = 0x007F; 6452 6453 /** 6454 * Han digit characters 6455 */ 6456 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6457 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6458 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6459 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6460 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6461 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6462 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6463 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6464 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6465 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6466 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6467 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6468 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6469 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6470 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6471 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6472 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6473 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6474 6475 // private constructor ----------------------------------------------- 6476 ///CLOVER:OFF 6477 /** 6478 * Private constructor to prevent instantiation 6479 */ UCharacter()6480 private UCharacter() 6481 { 6482 } 6483 ///CLOVER:ON 6484 } 6485