1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.lang; 11 12 import java.lang.ref.SoftReference; 13 import java.util.HashMap; 14 import java.util.Iterator; 15 import java.util.Locale; 16 import java.util.Map; 17 18 import com.ibm.icu.impl.CaseMapImpl; 19 import com.ibm.icu.impl.IllegalIcuArgumentException; 20 import com.ibm.icu.impl.Trie2; 21 import com.ibm.icu.impl.UBiDiProps; 22 import com.ibm.icu.impl.UCaseProps; 23 import com.ibm.icu.impl.UCharacterName; 24 import com.ibm.icu.impl.UCharacterNameChoice; 25 import com.ibm.icu.impl.UCharacterProperty; 26 import com.ibm.icu.impl.UCharacterUtility; 27 import com.ibm.icu.impl.UPropertyAliases; 28 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 29 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; 30 import com.ibm.icu.text.BreakIterator; 31 import com.ibm.icu.text.Normalizer2; 32 import com.ibm.icu.util.RangeValueIterator; 33 import com.ibm.icu.util.ULocale; 34 import com.ibm.icu.util.ValueIterator; 35 import com.ibm.icu.util.VersionInfo; 36 37 /** 38 * {@icuenhanced java.lang.Character}.{@icu _usage_} 39 * 40 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 41 * These extensions provide support for more Unicode properties. 42 * Each ICU release supports the latest version of Unicode available at that time. 43 * 44 * <p>For some time before Java 5 added support for supplementary Unicode code points, 45 * The ICU UCharacter class and many other ICU classes already supported them. 46 * Some UCharacter methods and constants were widened slightly differently than 47 * how the Character class methods and constants were widened later. 48 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 49 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 50 * 51 * <p>Code points are represented in these API using ints. While it would be 52 * more convenient in Java to have a separate primitive datatype for them, 53 * ints suffice in the meantime. 54 * 55 * <p>To use this class please add the jar file name icu4j.jar to the 56 * class path, since it contains data files which supply the information used 57 * by this file.<br> 58 * E.g. In Windows <br> 59 * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> 60 * Otherwise, another method would be to copy the files uprops.dat and 61 * unames.icu from the icu4j source subdirectory 62 * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 63 * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>. 64 * 65 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 66 * properties, the main differences between UCharacter and Character are: 67 * <ul> 68 * <li> UCharacter is not designed to be a char wrapper and does not have 69 * APIs to which involves management of that single char.<br> 70 * These include: 71 * <ul> 72 * <li> char charValue(), 73 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 74 * </ul> 75 * <li> UCharacter does not include Character APIs that are deprecated, nor 76 * does it include the Java-specific character information, such as 77 * boolean isJavaIdentifierPart(char ch). 78 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 79 * values '10' - '35'. UCharacter also does this in digit and 80 * getNumericValue, to adhere to the java semantics of these 81 * methods. New methods unicodeDigit, and 82 * getUnicodeNumericValue do not treat the above code points 83 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 84 * </ul> 85 * <p> 86 * Further detail on differences can be determined using the program 87 * <a href= 88 * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java"> 89 * com.ibm.icu.dev.test.lang.UCharacterCompare</a> 90 * <p> 91 * In addition to Java compatibility functions, which calculate derived properties, 92 * this API provides low-level access to the Unicode Character Database. 93 * <p> 94 * Unicode assigns each code point (not just assigned character) values for 95 * many properties. 96 * Most of them are simple boolean flags, or constants from a small enumerated list. 97 * For some properties, values are strings or other relatively more complex types. 98 * <p> 99 * For more information see 100 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 101 * (http://www.unicode.org/ucd/) 102 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU 103 * User Guide chapter on Properties</a> 104 * (http://www.icu-project.org/userguide/properties.html). 105 * <p> 106 * There are also functions that provide easy migration from C/POSIX functions 107 * like isblank(). Their use is generally discouraged because the C/POSIX 108 * standards do not define their semantics beyond the ASCII range, which means 109 * that different implementations exhibit very different behavior. 110 * Instead, Unicode properties should be used directly. 111 * <p> 112 * There are also only a few, broad C/POSIX character classes, and they tend 113 * to be used for conflicting purposes. For example, the "isalpha()" class 114 * is sometimes used to determine word boundaries, while a more sophisticated 115 * approach would at least distinguish initial letters from continuation 116 * characters (the latter including combining marks). 117 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 118 * Another example: There is no "istitle()" class for titlecase characters. 119 * <p> 120 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 121 * ICU implements them according to the Standard Recommendations in 122 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 123 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 124 * <p> 125 * API access for C/POSIX character classes is as follows: 126 * <pre>{@code 127 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 128 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 129 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 130 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 131 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 132 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 133 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 134 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 135 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 136 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 137 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 138 * - cntrl: getType(c)==CONTROL 139 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 140 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 141 * <p> 142 * The C/POSIX character classes are also available in UnicodeSet patterns, 143 * using patterns like [:graph:] or \p{graph}. 144 * 145 * <p>{@icunote} There are several ICU (and Java) whitespace functions. 146 * Comparison:<ul> 147 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 148 * most of general categories "Z" (separators) + most whitespace ISO controls 149 * (including no-break spaces, but excluding IS1..IS4) 150 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 151 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 152 * 153 * <p> 154 * This class is not subclassable. 155 * 156 * @author Syn Wee Quek 157 * @stable ICU 2.1 158 * @see com.ibm.icu.lang.UCharacterEnums 159 */ 160 161 public final class UCharacter implements ECharacterCategory, ECharacterDirection 162 { 163 // public inner classes ---------------------------------------------- 164 165 /** 166 * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_} 167 * 168 * A family of character subsets representing the character blocks in the 169 * Unicode specification, generated from Unicode Data file Blocks.txt. 170 * Character blocks generally define characters used for a specific script 171 * or purpose. A character is contained by at most one Unicode block. 172 * 173 * {@icunote} All fields named XXX_ID are specific to ICU. 174 * 175 * @stable ICU 2.4 176 */ 177 public static final class UnicodeBlock extends Character.Subset 178 { 179 // block id corresponding to icu4c ----------------------------------- 180 181 /** 182 * @stable ICU 2.4 183 */ 184 public static final int INVALID_CODE_ID = -1; 185 /** 186 * @stable ICU 2.4 187 */ 188 public static final int BASIC_LATIN_ID = 1; 189 /** 190 * @stable ICU 2.4 191 */ 192 public static final int LATIN_1_SUPPLEMENT_ID = 2; 193 /** 194 * @stable ICU 2.4 195 */ 196 public static final int LATIN_EXTENDED_A_ID = 3; 197 /** 198 * @stable ICU 2.4 199 */ 200 public static final int LATIN_EXTENDED_B_ID = 4; 201 /** 202 * @stable ICU 2.4 203 */ 204 public static final int IPA_EXTENSIONS_ID = 5; 205 /** 206 * @stable ICU 2.4 207 */ 208 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 209 /** 210 * @stable ICU 2.4 211 */ 212 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 213 /** 214 * Unicode 3.2 renames this block to "Greek and Coptic". 215 * @stable ICU 2.4 216 */ 217 public static final int GREEK_ID = 8; 218 /** 219 * @stable ICU 2.4 220 */ 221 public static final int CYRILLIC_ID = 9; 222 /** 223 * @stable ICU 2.4 224 */ 225 public static final int ARMENIAN_ID = 10; 226 /** 227 * @stable ICU 2.4 228 */ 229 public static final int HEBREW_ID = 11; 230 /** 231 * @stable ICU 2.4 232 */ 233 public static final int ARABIC_ID = 12; 234 /** 235 * @stable ICU 2.4 236 */ 237 public static final int SYRIAC_ID = 13; 238 /** 239 * @stable ICU 2.4 240 */ 241 public static final int THAANA_ID = 14; 242 /** 243 * @stable ICU 2.4 244 */ 245 public static final int DEVANAGARI_ID = 15; 246 /** 247 * @stable ICU 2.4 248 */ 249 public static final int BENGALI_ID = 16; 250 /** 251 * @stable ICU 2.4 252 */ 253 public static final int GURMUKHI_ID = 17; 254 /** 255 * @stable ICU 2.4 256 */ 257 public static final int GUJARATI_ID = 18; 258 /** 259 * @stable ICU 2.4 260 */ 261 public static final int ORIYA_ID = 19; 262 /** 263 * @stable ICU 2.4 264 */ 265 public static final int TAMIL_ID = 20; 266 /** 267 * @stable ICU 2.4 268 */ 269 public static final int TELUGU_ID = 21; 270 /** 271 * @stable ICU 2.4 272 */ 273 public static final int KANNADA_ID = 22; 274 /** 275 * @stable ICU 2.4 276 */ 277 public static final int MALAYALAM_ID = 23; 278 /** 279 * @stable ICU 2.4 280 */ 281 public static final int SINHALA_ID = 24; 282 /** 283 * @stable ICU 2.4 284 */ 285 public static final int THAI_ID = 25; 286 /** 287 * @stable ICU 2.4 288 */ 289 public static final int LAO_ID = 26; 290 /** 291 * @stable ICU 2.4 292 */ 293 public static final int TIBETAN_ID = 27; 294 /** 295 * @stable ICU 2.4 296 */ 297 public static final int MYANMAR_ID = 28; 298 /** 299 * @stable ICU 2.4 300 */ 301 public static final int GEORGIAN_ID = 29; 302 /** 303 * @stable ICU 2.4 304 */ 305 public static final int HANGUL_JAMO_ID = 30; 306 /** 307 * @stable ICU 2.4 308 */ 309 public static final int ETHIOPIC_ID = 31; 310 /** 311 * @stable ICU 2.4 312 */ 313 public static final int CHEROKEE_ID = 32; 314 /** 315 * @stable ICU 2.4 316 */ 317 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 318 /** 319 * @stable ICU 2.4 320 */ 321 public static final int OGHAM_ID = 34; 322 /** 323 * @stable ICU 2.4 324 */ 325 public static final int RUNIC_ID = 35; 326 /** 327 * @stable ICU 2.4 328 */ 329 public static final int KHMER_ID = 36; 330 /** 331 * @stable ICU 2.4 332 */ 333 public static final int MONGOLIAN_ID = 37; 334 /** 335 * @stable ICU 2.4 336 */ 337 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 338 /** 339 * @stable ICU 2.4 340 */ 341 public static final int GREEK_EXTENDED_ID = 39; 342 /** 343 * @stable ICU 2.4 344 */ 345 public static final int GENERAL_PUNCTUATION_ID = 40; 346 /** 347 * @stable ICU 2.4 348 */ 349 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 350 /** 351 * @stable ICU 2.4 352 */ 353 public static final int CURRENCY_SYMBOLS_ID = 42; 354 /** 355 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 356 * Symbols". 357 * @stable ICU 2.4 358 */ 359 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 360 /** 361 * @stable ICU 2.4 362 */ 363 public static final int LETTERLIKE_SYMBOLS_ID = 44; 364 /** 365 * @stable ICU 2.4 366 */ 367 public static final int NUMBER_FORMS_ID = 45; 368 /** 369 * @stable ICU 2.4 370 */ 371 public static final int ARROWS_ID = 46; 372 /** 373 * @stable ICU 2.4 374 */ 375 public static final int MATHEMATICAL_OPERATORS_ID = 47; 376 /** 377 * @stable ICU 2.4 378 */ 379 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 380 /** 381 * @stable ICU 2.4 382 */ 383 public static final int CONTROL_PICTURES_ID = 49; 384 /** 385 * @stable ICU 2.4 386 */ 387 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 388 /** 389 * @stable ICU 2.4 390 */ 391 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 392 /** 393 * @stable ICU 2.4 394 */ 395 public static final int BOX_DRAWING_ID = 52; 396 /** 397 * @stable ICU 2.4 398 */ 399 public static final int BLOCK_ELEMENTS_ID = 53; 400 /** 401 * @stable ICU 2.4 402 */ 403 public static final int GEOMETRIC_SHAPES_ID = 54; 404 /** 405 * @stable ICU 2.4 406 */ 407 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 408 /** 409 * @stable ICU 2.4 410 */ 411 public static final int DINGBATS_ID = 56; 412 /** 413 * @stable ICU 2.4 414 */ 415 public static final int BRAILLE_PATTERNS_ID = 57; 416 /** 417 * @stable ICU 2.4 418 */ 419 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 420 /** 421 * @stable ICU 2.4 422 */ 423 public static final int KANGXI_RADICALS_ID = 59; 424 /** 425 * @stable ICU 2.4 426 */ 427 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 428 /** 429 * @stable ICU 2.4 430 */ 431 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 432 /** 433 * @stable ICU 2.4 434 */ 435 public static final int HIRAGANA_ID = 62; 436 /** 437 * @stable ICU 2.4 438 */ 439 public static final int KATAKANA_ID = 63; 440 /** 441 * @stable ICU 2.4 442 */ 443 public static final int BOPOMOFO_ID = 64; 444 /** 445 * @stable ICU 2.4 446 */ 447 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 448 /** 449 * @stable ICU 2.4 450 */ 451 public static final int KANBUN_ID = 66; 452 /** 453 * @stable ICU 2.4 454 */ 455 public static final int BOPOMOFO_EXTENDED_ID = 67; 456 /** 457 * @stable ICU 2.4 458 */ 459 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 460 /** 461 * @stable ICU 2.4 462 */ 463 public static final int CJK_COMPATIBILITY_ID = 69; 464 /** 465 * @stable ICU 2.4 466 */ 467 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 468 /** 469 * @stable ICU 2.4 470 */ 471 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 472 /** 473 * @stable ICU 2.4 474 */ 475 public static final int YI_SYLLABLES_ID = 72; 476 /** 477 * @stable ICU 2.4 478 */ 479 public static final int YI_RADICALS_ID = 73; 480 /** 481 * @stable ICU 2.4 482 */ 483 public static final int HANGUL_SYLLABLES_ID = 74; 484 /** 485 * @stable ICU 2.4 486 */ 487 public static final int HIGH_SURROGATES_ID = 75; 488 /** 489 * @stable ICU 2.4 490 */ 491 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 492 /** 493 * @stable ICU 2.4 494 */ 495 public static final int LOW_SURROGATES_ID = 77; 496 /** 497 * Same as public static final int PRIVATE_USE. 498 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 499 * and multiple code point ranges had this block. 500 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 501 * and adds separate blocks for the supplementary PUAs. 502 * @stable ICU 2.4 503 */ 504 public static final int PRIVATE_USE_AREA_ID = 78; 505 /** 506 * Same as public static final int PRIVATE_USE_AREA. 507 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 508 * and multiple code point ranges had this block. 509 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 510 * and adds separate blocks for the supplementary PUAs. 511 * @stable ICU 2.4 512 */ 513 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 514 /** 515 * @stable ICU 2.4 516 */ 517 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 518 /** 519 * @stable ICU 2.4 520 */ 521 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 522 /** 523 * @stable ICU 2.4 524 */ 525 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 526 /** 527 * @stable ICU 2.4 528 */ 529 public static final int COMBINING_HALF_MARKS_ID = 82; 530 /** 531 * @stable ICU 2.4 532 */ 533 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 534 /** 535 * @stable ICU 2.4 536 */ 537 public static final int SMALL_FORM_VARIANTS_ID = 84; 538 /** 539 * @stable ICU 2.4 540 */ 541 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 542 /** 543 * @stable ICU 2.4 544 */ 545 public static final int SPECIALS_ID = 86; 546 /** 547 * @stable ICU 2.4 548 */ 549 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 550 /** 551 * @stable ICU 2.4 552 */ 553 public static final int OLD_ITALIC_ID = 88; 554 /** 555 * @stable ICU 2.4 556 */ 557 public static final int GOTHIC_ID = 89; 558 /** 559 * @stable ICU 2.4 560 */ 561 public static final int DESERET_ID = 90; 562 /** 563 * @stable ICU 2.4 564 */ 565 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 566 /** 567 * @stable ICU 2.4 568 */ 569 public static final int MUSICAL_SYMBOLS_ID = 92; 570 /** 571 * @stable ICU 2.4 572 */ 573 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 574 /** 575 * @stable ICU 2.4 576 */ 577 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 578 /** 579 * @stable ICU 2.4 580 */ 581 public static final int 582 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 583 /** 584 * @stable ICU 2.4 585 */ 586 public static final int TAGS_ID = 96; 587 588 // New blocks in Unicode 3.2 589 590 /** 591 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 592 * @stable ICU 2.4 593 */ 594 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 595 /** 596 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 597 * @stable ICU 3.0 598 */ 599 600 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 601 /** 602 * @stable ICU 2.4 603 */ 604 public static final int TAGALOG_ID = 98; 605 /** 606 * @stable ICU 2.4 607 */ 608 public static final int HANUNOO_ID = 99; 609 /** 610 * @stable ICU 2.4 611 */ 612 public static final int BUHID_ID = 100; 613 /** 614 * @stable ICU 2.4 615 */ 616 public static final int TAGBANWA_ID = 101; 617 /** 618 * @stable ICU 2.4 619 */ 620 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 621 /** 622 * @stable ICU 2.4 623 */ 624 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 625 /** 626 * @stable ICU 2.4 627 */ 628 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 629 /** 630 * @stable ICU 2.4 631 */ 632 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 633 /** 634 * @stable ICU 2.4 635 */ 636 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 637 /** 638 * @stable ICU 2.4 639 */ 640 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 641 /** 642 * @stable ICU 2.4 643 */ 644 public static final int VARIATION_SELECTORS_ID = 108; 645 /** 646 * @stable ICU 2.4 647 */ 648 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 649 /** 650 * @stable ICU 2.4 651 */ 652 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 653 654 /** 655 * @stable ICU 2.6 656 */ 657 public static final int LIMBU_ID = 111; /*[1900]*/ 658 /** 659 * @stable ICU 2.6 660 */ 661 public static final int TAI_LE_ID = 112; /*[1950]*/ 662 /** 663 * @stable ICU 2.6 664 */ 665 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 666 /** 667 * @stable ICU 2.6 668 */ 669 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 670 /** 671 * @stable ICU 2.6 672 */ 673 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 674 /** 675 * @stable ICU 2.6 676 */ 677 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 678 /** 679 * @stable ICU 2.6 680 */ 681 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 682 /** 683 * @stable ICU 2.6 684 */ 685 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 686 /** 687 * @stable ICU 2.6 688 */ 689 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 690 /** 691 * @stable ICU 2.6 692 */ 693 public static final int UGARITIC_ID = 120; /*[10380]*/ 694 /** 695 * @stable ICU 2.6 696 */ 697 public static final int SHAVIAN_ID = 121; /*[10450]*/ 698 /** 699 * @stable ICU 2.6 700 */ 701 public static final int OSMANYA_ID = 122; /*[10480]*/ 702 /** 703 * @stable ICU 2.6 704 */ 705 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 706 /** 707 * @stable ICU 2.6 708 */ 709 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 710 /** 711 * @stable ICU 2.6 712 */ 713 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 714 715 /* New blocks in Unicode 4.1 */ 716 717 /** 718 * @stable ICU 3.4 719 */ 720 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 721 722 /** 723 * @stable ICU 3.4 724 */ 725 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 726 727 /** 728 * @stable ICU 3.4 729 */ 730 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 731 732 /** 733 * @stable ICU 3.4 734 */ 735 public static final int BUGINESE_ID = 129; /*[1A00]*/ 736 737 /** 738 * @stable ICU 3.4 739 */ 740 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 741 742 /** 743 * @stable ICU 3.4 744 */ 745 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 746 747 /** 748 * @stable ICU 3.4 749 */ 750 public static final int COPTIC_ID = 132; /*[2C80]*/ 751 752 /** 753 * @stable ICU 3.4 754 */ 755 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 756 757 /** 758 * @stable ICU 3.4 759 */ 760 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 761 762 /** 763 * @stable ICU 3.4 764 */ 765 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 766 767 /** 768 * @stable ICU 3.4 769 */ 770 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 771 772 /** 773 * @stable ICU 3.4 774 */ 775 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 776 777 /** 778 * @stable ICU 3.4 779 */ 780 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 781 782 /** 783 * @stable ICU 3.4 784 */ 785 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 786 787 /** 788 * @stable ICU 3.4 789 */ 790 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 791 792 /** 793 * @stable ICU 3.4 794 */ 795 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 796 797 /** 798 * @stable ICU 3.4 799 */ 800 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 801 802 /** 803 * @stable ICU 3.4 804 */ 805 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 806 807 /** 808 * @stable ICU 3.4 809 */ 810 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 811 812 /** 813 * @stable ICU 3.4 814 */ 815 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 816 817 /* New blocks in Unicode 5.0 */ 818 819 /** 820 * @stable ICU 3.6 821 */ 822 public static final int NKO_ID = 146; /*[07C0]*/ 823 /** 824 * @stable ICU 3.6 825 */ 826 public static final int BALINESE_ID = 147; /*[1B00]*/ 827 /** 828 * @stable ICU 3.6 829 */ 830 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 831 /** 832 * @stable ICU 3.6 833 */ 834 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 835 /** 836 * @stable ICU 3.6 837 */ 838 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 839 /** 840 * @stable ICU 3.6 841 */ 842 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 843 /** 844 * @stable ICU 3.6 845 */ 846 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 847 /** 848 * @stable ICU 3.6 849 */ 850 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 851 /** 852 * @stable ICU 3.6 853 */ 854 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 855 856 /** 857 * @stable ICU 4.0 858 */ 859 public static final int SUNDANESE_ID = 155; /* [1B80] */ 860 861 /** 862 * @stable ICU 4.0 863 */ 864 public static final int LEPCHA_ID = 156; /* [1C00] */ 865 866 /** 867 * @stable ICU 4.0 868 */ 869 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 870 871 /** 872 * @stable ICU 4.0 873 */ 874 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 875 876 /** 877 * @stable ICU 4.0 878 */ 879 public static final int VAI_ID = 159; /* [A500] */ 880 881 /** 882 * @stable ICU 4.0 883 */ 884 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 885 886 /** 887 * @stable ICU 4.0 888 */ 889 public static final int SAURASHTRA_ID = 161; /* [A880] */ 890 891 /** 892 * @stable ICU 4.0 893 */ 894 public static final int KAYAH_LI_ID = 162; /* [A900] */ 895 896 /** 897 * @stable ICU 4.0 898 */ 899 public static final int REJANG_ID = 163; /* [A930] */ 900 901 /** 902 * @stable ICU 4.0 903 */ 904 public static final int CHAM_ID = 164; /* [AA00] */ 905 906 /** 907 * @stable ICU 4.0 908 */ 909 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 910 911 /** 912 * @stable ICU 4.0 913 */ 914 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 915 916 /** 917 * @stable ICU 4.0 918 */ 919 public static final int LYCIAN_ID = 167; /* [10280] */ 920 921 /** 922 * @stable ICU 4.0 923 */ 924 public static final int CARIAN_ID = 168; /* [102A0] */ 925 926 /** 927 * @stable ICU 4.0 928 */ 929 public static final int LYDIAN_ID = 169; /* [10920] */ 930 931 /** 932 * @stable ICU 4.0 933 */ 934 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 935 936 /** 937 * @stable ICU 4.0 938 */ 939 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 940 941 /* New blocks in Unicode 5.2 */ 942 943 /** @stable ICU 4.4 */ 944 public static final int SAMARITAN_ID = 172; /*[0800]*/ 945 /** @stable ICU 4.4 */ 946 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 947 /** @stable ICU 4.4 */ 948 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 949 /** @stable ICU 4.4 */ 950 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 951 /** @stable ICU 4.4 */ 952 public static final int LISU_ID = 176; /*[A4D0]*/ 953 /** @stable ICU 4.4 */ 954 public static final int BAMUM_ID = 177; /*[A6A0]*/ 955 /** @stable ICU 4.4 */ 956 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 957 /** @stable ICU 4.4 */ 958 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 959 /** @stable ICU 4.4 */ 960 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 961 /** @stable ICU 4.4 */ 962 public static final int JAVANESE_ID = 181; /*[A980]*/ 963 /** @stable ICU 4.4 */ 964 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 965 /** @stable ICU 4.4 */ 966 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 967 /** @stable ICU 4.4 */ 968 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 969 /** @stable ICU 4.4 */ 970 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 971 /** @stable ICU 4.4 */ 972 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 973 /** @stable ICU 4.4 */ 974 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 975 /** @stable ICU 4.4 */ 976 public static final int AVESTAN_ID = 188; /*[10B00]*/ 977 /** @stable ICU 4.4 */ 978 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 979 /** @stable ICU 4.4 */ 980 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 981 /** @stable ICU 4.4 */ 982 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 983 /** @stable ICU 4.4 */ 984 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 985 /** @stable ICU 4.4 */ 986 public static final int KAITHI_ID = 193; /*[11080]*/ 987 /** @stable ICU 4.4 */ 988 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 989 /** @stable ICU 4.4 */ 990 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 991 /** @stable ICU 4.4 */ 992 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 993 /** @stable ICU 4.4 */ 994 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 995 996 /* New blocks in Unicode 6.0 */ 997 998 /** @stable ICU 4.6 */ 999 public static final int MANDAIC_ID = 198; /*[0840]*/ 1000 /** @stable ICU 4.6 */ 1001 public static final int BATAK_ID = 199; /*[1BC0]*/ 1002 /** @stable ICU 4.6 */ 1003 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 1004 /** @stable ICU 4.6 */ 1005 public static final int BRAHMI_ID = 201; /*[11000]*/ 1006 /** @stable ICU 4.6 */ 1007 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 1008 /** @stable ICU 4.6 */ 1009 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 1010 /** @stable ICU 4.6 */ 1011 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 1012 /** @stable ICU 4.6 */ 1013 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 1014 /** @stable ICU 4.6 */ 1015 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 1016 /** @stable ICU 4.6 */ 1017 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 1018 /** @stable ICU 4.6 */ 1019 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 1020 /** @stable ICU 4.6 */ 1021 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 1022 1023 /* New blocks in Unicode 6.1 */ 1024 1025 /** @stable ICU 49 */ 1026 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 1027 /** @stable ICU 49 */ 1028 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 1029 /** @stable ICU 49 */ 1030 public static final int CHAKMA_ID = 212; /*[11100]*/ 1031 /** @stable ICU 49 */ 1032 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 1033 /** @stable ICU 49 */ 1034 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 1035 /** @stable ICU 49 */ 1036 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 1037 /** @stable ICU 49 */ 1038 public static final int MIAO_ID = 216; /*[16F00]*/ 1039 /** @stable ICU 49 */ 1040 public static final int SHARADA_ID = 217; /*[11180]*/ 1041 /** @stable ICU 49 */ 1042 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 1043 /** @stable ICU 49 */ 1044 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 1045 /** @stable ICU 49 */ 1046 public static final int TAKRI_ID = 220; /*[11680]*/ 1047 1048 /* New blocks in Unicode 7.0 */ 1049 1050 /** @stable ICU 54 */ 1051 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 1052 /** @stable ICU 54 */ 1053 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 1054 /** @stable ICU 54 */ 1055 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 1056 /** @stable ICU 54 */ 1057 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 1058 /** @stable ICU 54 */ 1059 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 1060 /** @stable ICU 54 */ 1061 public static final int ELBASAN_ID = 226; /*[10500]*/ 1062 /** @stable ICU 54 */ 1063 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 1064 /** @stable ICU 54 */ 1065 public static final int GRANTHA_ID = 228; /*[11300]*/ 1066 /** @stable ICU 54 */ 1067 public static final int KHOJKI_ID = 229; /*[11200]*/ 1068 /** @stable ICU 54 */ 1069 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 1070 /** @stable ICU 54 */ 1071 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 1072 /** @stable ICU 54 */ 1073 public static final int LINEAR_A_ID = 232; /*[10600]*/ 1074 /** @stable ICU 54 */ 1075 public static final int MAHAJANI_ID = 233; /*[11150]*/ 1076 /** @stable ICU 54 */ 1077 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 1078 /** @stable ICU 54 */ 1079 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 1080 /** @stable ICU 54 */ 1081 public static final int MODI_ID = 236; /*[11600]*/ 1082 /** @stable ICU 54 */ 1083 public static final int MRO_ID = 237; /*[16A40]*/ 1084 /** @stable ICU 54 */ 1085 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 1086 /** @stable ICU 54 */ 1087 public static final int NABATAEAN_ID = 239; /*[10880]*/ 1088 /** @stable ICU 54 */ 1089 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 1090 /** @stable ICU 54 */ 1091 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 1092 /** @stable ICU 54 */ 1093 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 1094 /** @stable ICU 54 */ 1095 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 1096 /** @stable ICU 54 */ 1097 public static final int PALMYRENE_ID = 244; /*[10860]*/ 1098 /** @stable ICU 54 */ 1099 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 1100 /** @stable ICU 54 */ 1101 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 1102 /** @stable ICU 54 */ 1103 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 1104 /** @stable ICU 54 */ 1105 public static final int SIDDHAM_ID = 248; /*[11580]*/ 1106 /** @stable ICU 54 */ 1107 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 1108 /** @stable ICU 54 */ 1109 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 1110 /** @stable ICU 54 */ 1111 public static final int TIRHUTA_ID = 251; /*[11480]*/ 1112 /** @stable ICU 54 */ 1113 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 1114 1115 /* New blocks in Unicode 8.0 */ 1116 1117 /** @stable ICU 56 */ 1118 public static final int AHOM_ID = 253; /*[11700]*/ 1119 /** @stable ICU 56 */ 1120 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 1121 /** @stable ICU 56 */ 1122 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 1123 /** @stable ICU 56 */ 1124 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 1125 /** @stable ICU 56 */ 1126 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 1127 /** @stable ICU 56 */ 1128 public static final int HATRAN_ID = 258; /*[108E0]*/ 1129 /** @stable ICU 56 */ 1130 public static final int MULTANI_ID = 259; /*[11280]*/ 1131 /** @stable ICU 56 */ 1132 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 1133 /** @stable ICU 56 */ 1134 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 1135 /** @stable ICU 56 */ 1136 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 1137 1138 /* New blocks in Unicode 9.0 */ 1139 1140 /** @stable ICU 58 */ 1141 public static final int ADLAM_ID = 263; /*[1E900]*/ 1142 /** @stable ICU 58 */ 1143 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 1144 /** @stable ICU 58 */ 1145 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 1146 /** @stable ICU 58 */ 1147 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 1148 /** @stable ICU 58 */ 1149 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 1150 /** @stable ICU 58 */ 1151 public static final int MARCHEN_ID = 268; /*[11C70]*/ 1152 /** @stable ICU 58 */ 1153 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 1154 /** @stable ICU 58 */ 1155 public static final int NEWA_ID = 270; /*[11400]*/ 1156 /** @stable ICU 58 */ 1157 public static final int OSAGE_ID = 271; /*[104B0]*/ 1158 /** @stable ICU 58 */ 1159 public static final int TANGUT_ID = 272; /*[17000]*/ 1160 /** @stable ICU 58 */ 1161 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 1162 1163 // New blocks in Unicode 10.0 1164 1165 /** @stable ICU 60 */ 1166 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/ 1167 /** @stable ICU 60 */ 1168 public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/ 1169 /** @stable ICU 60 */ 1170 public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/ 1171 /** @stable ICU 60 */ 1172 public static final int NUSHU_ID = 277; /*[1B170]*/ 1173 /** @stable ICU 60 */ 1174 public static final int SOYOMBO_ID = 278; /*[11A50]*/ 1175 /** @stable ICU 60 */ 1176 public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/ 1177 /** @stable ICU 60 */ 1178 public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/ 1179 1180 // New blocks in Unicode 11.0 1181 1182 /** @stable ICU 62 */ 1183 public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/ 1184 /** @stable ICU 62 */ 1185 public static final int DOGRA_ID = 282; /*[11800]*/ 1186 /** @stable ICU 62 */ 1187 public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/ 1188 /** @stable ICU 62 */ 1189 public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/ 1190 /** @stable ICU 62 */ 1191 public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/ 1192 /** @stable ICU 62 */ 1193 public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/ 1194 /** @stable ICU 62 */ 1195 public static final int MAKASAR_ID = 287; /*[11EE0]*/ 1196 /** @stable ICU 62 */ 1197 public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/ 1198 /** @stable ICU 62 */ 1199 public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/ 1200 /** @stable ICU 62 */ 1201 public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/ 1202 /** @stable ICU 62 */ 1203 public static final int SOGDIAN_ID = 291; /*[10F30]*/ 1204 1205 /** 1206 * One more than the highest normal UnicodeBlock value. 1207 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 1208 * 1209 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1210 */ 1211 @Deprecated 1212 public static final int COUNT = 292; 1213 1214 // blocks objects --------------------------------------------------- 1215 1216 /** 1217 * Array of UnicodeBlocks, for easy access in getInstance(int) 1218 */ 1219 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1220 1221 /** 1222 * @stable ICU 2.6 1223 */ 1224 public static final UnicodeBlock NO_BLOCK 1225 = new UnicodeBlock("NO_BLOCK", 0); 1226 1227 /** 1228 * @stable ICU 2.4 1229 */ 1230 public static final UnicodeBlock BASIC_LATIN 1231 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1232 /** 1233 * @stable ICU 2.4 1234 */ 1235 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1236 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1237 /** 1238 * @stable ICU 2.4 1239 */ 1240 public static final UnicodeBlock LATIN_EXTENDED_A 1241 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1242 /** 1243 * @stable ICU 2.4 1244 */ 1245 public static final UnicodeBlock LATIN_EXTENDED_B 1246 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1247 /** 1248 * @stable ICU 2.4 1249 */ 1250 public static final UnicodeBlock IPA_EXTENSIONS 1251 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1252 /** 1253 * @stable ICU 2.4 1254 */ 1255 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1256 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1257 /** 1258 * @stable ICU 2.4 1259 */ 1260 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1261 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1262 /** 1263 * Unicode 3.2 renames this block to "Greek and Coptic". 1264 * @stable ICU 2.4 1265 */ 1266 public static final UnicodeBlock GREEK 1267 = new UnicodeBlock("GREEK", GREEK_ID); 1268 /** 1269 * @stable ICU 2.4 1270 */ 1271 public static final UnicodeBlock CYRILLIC 1272 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1273 /** 1274 * @stable ICU 2.4 1275 */ 1276 public static final UnicodeBlock ARMENIAN 1277 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1278 /** 1279 * @stable ICU 2.4 1280 */ 1281 public static final UnicodeBlock HEBREW 1282 = new UnicodeBlock("HEBREW", HEBREW_ID); 1283 /** 1284 * @stable ICU 2.4 1285 */ 1286 public static final UnicodeBlock ARABIC 1287 = new UnicodeBlock("ARABIC", ARABIC_ID); 1288 /** 1289 * @stable ICU 2.4 1290 */ 1291 public static final UnicodeBlock SYRIAC 1292 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1293 /** 1294 * @stable ICU 2.4 1295 */ 1296 public static final UnicodeBlock THAANA 1297 = new UnicodeBlock("THAANA", THAANA_ID); 1298 /** 1299 * @stable ICU 2.4 1300 */ 1301 public static final UnicodeBlock DEVANAGARI 1302 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1303 /** 1304 * @stable ICU 2.4 1305 */ 1306 public static final UnicodeBlock BENGALI 1307 = new UnicodeBlock("BENGALI", BENGALI_ID); 1308 /** 1309 * @stable ICU 2.4 1310 */ 1311 public static final UnicodeBlock GURMUKHI 1312 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1313 /** 1314 * @stable ICU 2.4 1315 */ 1316 public static final UnicodeBlock GUJARATI 1317 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1318 /** 1319 * @stable ICU 2.4 1320 */ 1321 public static final UnicodeBlock ORIYA 1322 = new UnicodeBlock("ORIYA", ORIYA_ID); 1323 /** 1324 * @stable ICU 2.4 1325 */ 1326 public static final UnicodeBlock TAMIL 1327 = new UnicodeBlock("TAMIL", TAMIL_ID); 1328 /** 1329 * @stable ICU 2.4 1330 */ 1331 public static final UnicodeBlock TELUGU 1332 = new UnicodeBlock("TELUGU", TELUGU_ID); 1333 /** 1334 * @stable ICU 2.4 1335 */ 1336 public static final UnicodeBlock KANNADA 1337 = new UnicodeBlock("KANNADA", KANNADA_ID); 1338 /** 1339 * @stable ICU 2.4 1340 */ 1341 public static final UnicodeBlock MALAYALAM 1342 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1343 /** 1344 * @stable ICU 2.4 1345 */ 1346 public static final UnicodeBlock SINHALA 1347 = new UnicodeBlock("SINHALA", SINHALA_ID); 1348 /** 1349 * @stable ICU 2.4 1350 */ 1351 public static final UnicodeBlock THAI 1352 = new UnicodeBlock("THAI", THAI_ID); 1353 /** 1354 * @stable ICU 2.4 1355 */ 1356 public static final UnicodeBlock LAO 1357 = new UnicodeBlock("LAO", LAO_ID); 1358 /** 1359 * @stable ICU 2.4 1360 */ 1361 public static final UnicodeBlock TIBETAN 1362 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1363 /** 1364 * @stable ICU 2.4 1365 */ 1366 public static final UnicodeBlock MYANMAR 1367 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1368 /** 1369 * @stable ICU 2.4 1370 */ 1371 public static final UnicodeBlock GEORGIAN 1372 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1373 /** 1374 * @stable ICU 2.4 1375 */ 1376 public static final UnicodeBlock HANGUL_JAMO 1377 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1378 /** 1379 * @stable ICU 2.4 1380 */ 1381 public static final UnicodeBlock ETHIOPIC 1382 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1383 /** 1384 * @stable ICU 2.4 1385 */ 1386 public static final UnicodeBlock CHEROKEE 1387 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1388 /** 1389 * @stable ICU 2.4 1390 */ 1391 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1392 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1393 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1394 /** 1395 * @stable ICU 2.4 1396 */ 1397 public static final UnicodeBlock OGHAM 1398 = new UnicodeBlock("OGHAM", OGHAM_ID); 1399 /** 1400 * @stable ICU 2.4 1401 */ 1402 public static final UnicodeBlock RUNIC 1403 = new UnicodeBlock("RUNIC", RUNIC_ID); 1404 /** 1405 * @stable ICU 2.4 1406 */ 1407 public static final UnicodeBlock KHMER 1408 = new UnicodeBlock("KHMER", KHMER_ID); 1409 /** 1410 * @stable ICU 2.4 1411 */ 1412 public static final UnicodeBlock MONGOLIAN 1413 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1414 /** 1415 * @stable ICU 2.4 1416 */ 1417 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1418 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1419 /** 1420 * @stable ICU 2.4 1421 */ 1422 public static final UnicodeBlock GREEK_EXTENDED 1423 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1424 /** 1425 * @stable ICU 2.4 1426 */ 1427 public static final UnicodeBlock GENERAL_PUNCTUATION 1428 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1429 /** 1430 * @stable ICU 2.4 1431 */ 1432 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1433 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1434 /** 1435 * @stable ICU 2.4 1436 */ 1437 public static final UnicodeBlock CURRENCY_SYMBOLS 1438 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1439 /** 1440 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1441 * Symbols". 1442 * @stable ICU 2.4 1443 */ 1444 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1445 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1446 /** 1447 * @stable ICU 2.4 1448 */ 1449 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1450 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1451 /** 1452 * @stable ICU 2.4 1453 */ 1454 public static final UnicodeBlock NUMBER_FORMS 1455 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1456 /** 1457 * @stable ICU 2.4 1458 */ 1459 public static final UnicodeBlock ARROWS 1460 = new UnicodeBlock("ARROWS", ARROWS_ID); 1461 /** 1462 * @stable ICU 2.4 1463 */ 1464 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1465 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1466 /** 1467 * @stable ICU 2.4 1468 */ 1469 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1470 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1471 /** 1472 * @stable ICU 2.4 1473 */ 1474 public static final UnicodeBlock CONTROL_PICTURES 1475 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1476 /** 1477 * @stable ICU 2.4 1478 */ 1479 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1480 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1481 /** 1482 * @stable ICU 2.4 1483 */ 1484 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1485 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1486 /** 1487 * @stable ICU 2.4 1488 */ 1489 public static final UnicodeBlock BOX_DRAWING 1490 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1491 /** 1492 * @stable ICU 2.4 1493 */ 1494 public static final UnicodeBlock BLOCK_ELEMENTS 1495 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1496 /** 1497 * @stable ICU 2.4 1498 */ 1499 public static final UnicodeBlock GEOMETRIC_SHAPES 1500 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1501 /** 1502 * @stable ICU 2.4 1503 */ 1504 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1505 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1506 /** 1507 * @stable ICU 2.4 1508 */ 1509 public static final UnicodeBlock DINGBATS 1510 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1511 /** 1512 * @stable ICU 2.4 1513 */ 1514 public static final UnicodeBlock BRAILLE_PATTERNS 1515 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1516 /** 1517 * @stable ICU 2.4 1518 */ 1519 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1520 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1521 /** 1522 * @stable ICU 2.4 1523 */ 1524 public static final UnicodeBlock KANGXI_RADICALS 1525 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1526 /** 1527 * @stable ICU 2.4 1528 */ 1529 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1530 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1531 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1532 /** 1533 * @stable ICU 2.4 1534 */ 1535 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1536 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1537 /** 1538 * @stable ICU 2.4 1539 */ 1540 public static final UnicodeBlock HIRAGANA 1541 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1542 /** 1543 * @stable ICU 2.4 1544 */ 1545 public static final UnicodeBlock KATAKANA 1546 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1547 /** 1548 * @stable ICU 2.4 1549 */ 1550 public static final UnicodeBlock BOPOMOFO 1551 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1552 /** 1553 * @stable ICU 2.4 1554 */ 1555 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1556 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1557 /** 1558 * @stable ICU 2.4 1559 */ 1560 public static final UnicodeBlock KANBUN 1561 = new UnicodeBlock("KANBUN", KANBUN_ID); 1562 /** 1563 * @stable ICU 2.4 1564 */ 1565 public static final UnicodeBlock BOPOMOFO_EXTENDED 1566 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1567 /** 1568 * @stable ICU 2.4 1569 */ 1570 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1571 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1572 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1573 /** 1574 * @stable ICU 2.4 1575 */ 1576 public static final UnicodeBlock CJK_COMPATIBILITY 1577 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1578 /** 1579 * @stable ICU 2.4 1580 */ 1581 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1582 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1583 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1584 /** 1585 * @stable ICU 2.4 1586 */ 1587 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1588 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1589 /** 1590 * @stable ICU 2.4 1591 */ 1592 public static final UnicodeBlock YI_SYLLABLES 1593 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1594 /** 1595 * @stable ICU 2.4 1596 */ 1597 public static final UnicodeBlock YI_RADICALS 1598 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1599 /** 1600 * @stable ICU 2.4 1601 */ 1602 public static final UnicodeBlock HANGUL_SYLLABLES 1603 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1604 /** 1605 * @stable ICU 2.4 1606 */ 1607 public static final UnicodeBlock HIGH_SURROGATES 1608 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1609 /** 1610 * @stable ICU 2.4 1611 */ 1612 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1613 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1614 /** 1615 * @stable ICU 2.4 1616 */ 1617 public static final UnicodeBlock LOW_SURROGATES 1618 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1619 /** 1620 * Same as public static final int PRIVATE_USE. 1621 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1622 * and multiple code point ranges had this block. 1623 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1624 * and adds separate blocks for the supplementary PUAs. 1625 * @stable ICU 2.4 1626 */ 1627 public static final UnicodeBlock PRIVATE_USE_AREA 1628 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1629 /** 1630 * Same as public static final int PRIVATE_USE_AREA. 1631 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1632 * and multiple code point ranges had this block. 1633 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1634 * and adds separate blocks for the supplementary PUAs. 1635 * @stable ICU 2.4 1636 */ 1637 public static final UnicodeBlock PRIVATE_USE 1638 = PRIVATE_USE_AREA; 1639 /** 1640 * @stable ICU 2.4 1641 */ 1642 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1643 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1644 /** 1645 * @stable ICU 2.4 1646 */ 1647 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1648 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1649 /** 1650 * @stable ICU 2.4 1651 */ 1652 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1653 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1654 /** 1655 * @stable ICU 2.4 1656 */ 1657 public static final UnicodeBlock COMBINING_HALF_MARKS 1658 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1659 /** 1660 * @stable ICU 2.4 1661 */ 1662 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1663 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1664 /** 1665 * @stable ICU 2.4 1666 */ 1667 public static final UnicodeBlock SMALL_FORM_VARIANTS 1668 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1669 /** 1670 * @stable ICU 2.4 1671 */ 1672 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1673 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1674 /** 1675 * @stable ICU 2.4 1676 */ 1677 public static final UnicodeBlock SPECIALS 1678 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1679 /** 1680 * @stable ICU 2.4 1681 */ 1682 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1683 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1684 /** 1685 * @stable ICU 2.4 1686 */ 1687 public static final UnicodeBlock OLD_ITALIC 1688 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1689 /** 1690 * @stable ICU 2.4 1691 */ 1692 public static final UnicodeBlock GOTHIC 1693 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1694 /** 1695 * @stable ICU 2.4 1696 */ 1697 public static final UnicodeBlock DESERET 1698 = new UnicodeBlock("DESERET", DESERET_ID); 1699 /** 1700 * @stable ICU 2.4 1701 */ 1702 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1703 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1704 /** 1705 * @stable ICU 2.4 1706 */ 1707 public static final UnicodeBlock MUSICAL_SYMBOLS 1708 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1709 /** 1710 * @stable ICU 2.4 1711 */ 1712 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1713 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1714 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1715 /** 1716 * @stable ICU 2.4 1717 */ 1718 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1719 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1720 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1721 /** 1722 * @stable ICU 2.4 1723 */ 1724 public static final UnicodeBlock 1725 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1726 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1727 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1728 /** 1729 * @stable ICU 2.4 1730 */ 1731 public static final UnicodeBlock TAGS 1732 = new UnicodeBlock("TAGS", TAGS_ID); 1733 1734 // New blocks in Unicode 3.2 1735 1736 /** 1737 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1738 * @stable ICU 2.4 1739 */ 1740 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1741 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1742 /** 1743 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1744 * @stable ICU 3.0 1745 */ 1746 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1747 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1748 /** 1749 * @stable ICU 2.4 1750 */ 1751 public static final UnicodeBlock TAGALOG 1752 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1753 /** 1754 * @stable ICU 2.4 1755 */ 1756 public static final UnicodeBlock HANUNOO 1757 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1758 /** 1759 * @stable ICU 2.4 1760 */ 1761 public static final UnicodeBlock BUHID 1762 = new UnicodeBlock("BUHID", BUHID_ID); 1763 /** 1764 * @stable ICU 2.4 1765 */ 1766 public static final UnicodeBlock TAGBANWA 1767 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1768 /** 1769 * @stable ICU 2.4 1770 */ 1771 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1772 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1773 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1774 /** 1775 * @stable ICU 2.4 1776 */ 1777 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1778 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1779 /** 1780 * @stable ICU 2.4 1781 */ 1782 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1783 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1784 /** 1785 * @stable ICU 2.4 1786 */ 1787 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1788 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1789 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1790 /** 1791 * @stable ICU 2.4 1792 */ 1793 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1794 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1795 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1796 /** 1797 * @stable ICU 2.4 1798 */ 1799 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1800 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1801 /** 1802 * @stable ICU 2.4 1803 */ 1804 public static final UnicodeBlock VARIATION_SELECTORS 1805 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1806 /** 1807 * @stable ICU 2.4 1808 */ 1809 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1810 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1811 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1812 /** 1813 * @stable ICU 2.4 1814 */ 1815 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1816 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1817 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1818 1819 /** 1820 * @stable ICU 2.6 1821 */ 1822 public static final UnicodeBlock LIMBU 1823 = new UnicodeBlock("LIMBU", LIMBU_ID); 1824 /** 1825 * @stable ICU 2.6 1826 */ 1827 public static final UnicodeBlock TAI_LE 1828 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1829 /** 1830 * @stable ICU 2.6 1831 */ 1832 public static final UnicodeBlock KHMER_SYMBOLS 1833 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1834 1835 /** 1836 * @stable ICU 2.6 1837 */ 1838 public static final UnicodeBlock PHONETIC_EXTENSIONS 1839 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1840 1841 /** 1842 * @stable ICU 2.6 1843 */ 1844 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1845 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1846 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1847 /** 1848 * @stable ICU 2.6 1849 */ 1850 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1851 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1852 /** 1853 * @stable ICU 2.6 1854 */ 1855 public static final UnicodeBlock LINEAR_B_SYLLABARY 1856 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1857 /** 1858 * @stable ICU 2.6 1859 */ 1860 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1861 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1862 /** 1863 * @stable ICU 2.6 1864 */ 1865 public static final UnicodeBlock AEGEAN_NUMBERS 1866 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1867 /** 1868 * @stable ICU 2.6 1869 */ 1870 public static final UnicodeBlock UGARITIC 1871 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1872 /** 1873 * @stable ICU 2.6 1874 */ 1875 public static final UnicodeBlock SHAVIAN 1876 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1877 /** 1878 * @stable ICU 2.6 1879 */ 1880 public static final UnicodeBlock OSMANYA 1881 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1882 /** 1883 * @stable ICU 2.6 1884 */ 1885 public static final UnicodeBlock CYPRIOT_SYLLABARY 1886 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1887 /** 1888 * @stable ICU 2.6 1889 */ 1890 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1891 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1892 1893 /** 1894 * @stable ICU 2.6 1895 */ 1896 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1897 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1898 1899 /* New blocks in Unicode 4.1 */ 1900 1901 /** 1902 * @stable ICU 3.4 1903 */ 1904 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1905 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1906 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1907 1908 /** 1909 * @stable ICU 3.4 1910 */ 1911 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1912 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1913 1914 /** 1915 * @stable ICU 3.4 1916 */ 1917 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1918 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1919 1920 /** 1921 * @stable ICU 3.4 1922 */ 1923 public static final UnicodeBlock BUGINESE = 1924 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1925 1926 /** 1927 * @stable ICU 3.4 1928 */ 1929 public static final UnicodeBlock CJK_STROKES = 1930 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1931 1932 /** 1933 * @stable ICU 3.4 1934 */ 1935 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1936 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1937 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1938 1939 /** 1940 * @stable ICU 3.4 1941 */ 1942 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1943 1944 /** 1945 * @stable ICU 3.4 1946 */ 1947 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1948 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1949 1950 /** 1951 * @stable ICU 3.4 1952 */ 1953 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1954 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1955 1956 /** 1957 * @stable ICU 3.4 1958 */ 1959 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1960 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 1961 1962 /** 1963 * @stable ICU 3.4 1964 */ 1965 public static final UnicodeBlock GLAGOLITIC = 1966 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 1967 1968 /** 1969 * @stable ICU 3.4 1970 */ 1971 public static final UnicodeBlock KHAROSHTHI = 1972 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 1973 1974 /** 1975 * @stable ICU 3.4 1976 */ 1977 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1978 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 1979 1980 /** 1981 * @stable ICU 3.4 1982 */ 1983 public static final UnicodeBlock NEW_TAI_LUE = 1984 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 1985 1986 /** 1987 * @stable ICU 3.4 1988 */ 1989 public static final UnicodeBlock OLD_PERSIAN = 1990 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 1991 1992 /** 1993 * @stable ICU 3.4 1994 */ 1995 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1996 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1997 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 1998 1999 /** 2000 * @stable ICU 3.4 2001 */ 2002 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2003 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 2004 2005 /** 2006 * @stable ICU 3.4 2007 */ 2008 public static final UnicodeBlock SYLOTI_NAGRI = 2009 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 2010 2011 /** 2012 * @stable ICU 3.4 2013 */ 2014 public static final UnicodeBlock TIFINAGH = 2015 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 2016 2017 /** 2018 * @stable ICU 3.4 2019 */ 2020 public static final UnicodeBlock VERTICAL_FORMS = 2021 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 2022 2023 /** 2024 * @stable ICU 3.6 2025 */ 2026 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 2027 /** 2028 * @stable ICU 3.6 2029 */ 2030 public static final UnicodeBlock BALINESE = 2031 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 2032 /** 2033 * @stable ICU 3.6 2034 */ 2035 public static final UnicodeBlock LATIN_EXTENDED_C = 2036 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 2037 /** 2038 * @stable ICU 3.6 2039 */ 2040 public static final UnicodeBlock LATIN_EXTENDED_D = 2041 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 2042 /** 2043 * @stable ICU 3.6 2044 */ 2045 public static final UnicodeBlock PHAGS_PA = 2046 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 2047 /** 2048 * @stable ICU 3.6 2049 */ 2050 public static final UnicodeBlock PHOENICIAN = 2051 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 2052 /** 2053 * @stable ICU 3.6 2054 */ 2055 public static final UnicodeBlock CUNEIFORM = 2056 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 2057 /** 2058 * @stable ICU 3.6 2059 */ 2060 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2061 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2062 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 2063 /** 2064 * @stable ICU 3.6 2065 */ 2066 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2067 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 2068 2069 /** 2070 * @stable ICU 4.0 2071 */ 2072 public static final UnicodeBlock SUNDANESE = 2073 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 2074 2075 /** 2076 * @stable ICU 4.0 2077 */ 2078 public static final UnicodeBlock LEPCHA = 2079 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 2080 2081 /** 2082 * @stable ICU 4.0 2083 */ 2084 public static final UnicodeBlock OL_CHIKI = 2085 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 2086 2087 /** 2088 * @stable ICU 4.0 2089 */ 2090 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2091 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 2092 2093 /** 2094 * @stable ICU 4.0 2095 */ 2096 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 2097 2098 /** 2099 * @stable ICU 4.0 2100 */ 2101 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2102 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 2103 2104 /** 2105 * @stable ICU 4.0 2106 */ 2107 public static final UnicodeBlock SAURASHTRA = 2108 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 2109 2110 /** 2111 * @stable ICU 4.0 2112 */ 2113 public static final UnicodeBlock KAYAH_LI = 2114 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 2115 2116 /** 2117 * @stable ICU 4.0 2118 */ 2119 public static final UnicodeBlock REJANG = 2120 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 2121 2122 /** 2123 * @stable ICU 4.0 2124 */ 2125 public static final UnicodeBlock CHAM = 2126 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 2127 2128 /** 2129 * @stable ICU 4.0 2130 */ 2131 public static final UnicodeBlock ANCIENT_SYMBOLS = 2132 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 2133 2134 /** 2135 * @stable ICU 4.0 2136 */ 2137 public static final UnicodeBlock PHAISTOS_DISC = 2138 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 2139 2140 /** 2141 * @stable ICU 4.0 2142 */ 2143 public static final UnicodeBlock LYCIAN = 2144 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 2145 2146 /** 2147 * @stable ICU 4.0 2148 */ 2149 public static final UnicodeBlock CARIAN = 2150 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 2151 2152 /** 2153 * @stable ICU 4.0 2154 */ 2155 public static final UnicodeBlock LYDIAN = 2156 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 2157 2158 /** 2159 * @stable ICU 4.0 2160 */ 2161 public static final UnicodeBlock MAHJONG_TILES = 2162 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 2163 2164 /** 2165 * @stable ICU 4.0 2166 */ 2167 public static final UnicodeBlock DOMINO_TILES = 2168 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 2169 2170 /* New blocks in Unicode 5.2 */ 2171 2172 /** @stable ICU 4.4 */ 2173 public static final UnicodeBlock SAMARITAN = 2174 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 2175 /** @stable ICU 4.4 */ 2176 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 2177 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 2178 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 2179 /** @stable ICU 4.4 */ 2180 public static final UnicodeBlock TAI_THAM = 2181 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 2182 /** @stable ICU 4.4 */ 2183 public static final UnicodeBlock VEDIC_EXTENSIONS = 2184 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 2185 /** @stable ICU 4.4 */ 2186 public static final UnicodeBlock LISU = 2187 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 2188 /** @stable ICU 4.4 */ 2189 public static final UnicodeBlock BAMUM = 2190 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 2191 /** @stable ICU 4.4 */ 2192 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2193 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 2194 /** @stable ICU 4.4 */ 2195 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2196 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 2197 /** @stable ICU 4.4 */ 2198 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2199 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 2200 /** @stable ICU 4.4 */ 2201 public static final UnicodeBlock JAVANESE = 2202 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 2203 /** @stable ICU 4.4 */ 2204 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2205 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 2206 /** @stable ICU 4.4 */ 2207 public static final UnicodeBlock TAI_VIET = 2208 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 2209 /** @stable ICU 4.4 */ 2210 public static final UnicodeBlock MEETEI_MAYEK = 2211 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 2212 /** @stable ICU 4.4 */ 2213 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2214 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 2215 /** @stable ICU 4.4 */ 2216 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2217 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 2218 /** @stable ICU 4.4 */ 2219 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2220 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 2221 /** @stable ICU 4.4 */ 2222 public static final UnicodeBlock AVESTAN = 2223 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 2224 /** @stable ICU 4.4 */ 2225 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2226 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 2227 /** @stable ICU 4.4 */ 2228 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2229 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 2230 /** @stable ICU 4.4 */ 2231 public static final UnicodeBlock OLD_TURKIC = 2232 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 2233 /** @stable ICU 4.4 */ 2234 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2235 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 2236 /** @stable ICU 4.4 */ 2237 public static final UnicodeBlock KAITHI = 2238 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 2239 /** @stable ICU 4.4 */ 2240 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2241 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 2242 /** @stable ICU 4.4 */ 2243 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2244 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2245 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 2246 /** @stable ICU 4.4 */ 2247 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2248 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2249 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 2250 /** @stable ICU 4.4 */ 2251 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2252 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2253 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 2254 2255 /* New blocks in Unicode 6.0 */ 2256 2257 /** @stable ICU 4.6 */ 2258 public static final UnicodeBlock MANDAIC = 2259 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 2260 /** @stable ICU 4.6 */ 2261 public static final UnicodeBlock BATAK = 2262 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 2263 /** @stable ICU 4.6 */ 2264 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2265 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 2266 /** @stable ICU 4.6 */ 2267 public static final UnicodeBlock BRAHMI = 2268 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 2269 /** @stable ICU 4.6 */ 2270 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2271 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 2272 /** @stable ICU 4.6 */ 2273 public static final UnicodeBlock KANA_SUPPLEMENT = 2274 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 2275 /** @stable ICU 4.6 */ 2276 public static final UnicodeBlock PLAYING_CARDS = 2277 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 2278 /** @stable ICU 4.6 */ 2279 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2280 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2281 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 2282 /** @stable ICU 4.6 */ 2283 public static final UnicodeBlock EMOTICONS = 2284 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 2285 /** @stable ICU 4.6 */ 2286 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2287 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 2288 /** @stable ICU 4.6 */ 2289 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2290 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 2291 /** @stable ICU 4.6 */ 2292 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2293 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2294 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 2295 2296 /* New blocks in Unicode 6.1 */ 2297 2298 /** @stable ICU 49 */ 2299 public static final UnicodeBlock ARABIC_EXTENDED_A = 2300 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 2301 /** @stable ICU 49 */ 2302 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2303 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 2304 /** @stable ICU 49 */ 2305 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 2306 /** @stable ICU 49 */ 2307 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2308 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2309 /** @stable ICU 49 */ 2310 public static final UnicodeBlock MEROITIC_CURSIVE = 2311 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2312 /** @stable ICU 49 */ 2313 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2314 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2315 /** @stable ICU 49 */ 2316 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2317 /** @stable ICU 49 */ 2318 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2319 /** @stable ICU 49 */ 2320 public static final UnicodeBlock SORA_SOMPENG = 2321 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2322 /** @stable ICU 49 */ 2323 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2324 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2325 /** @stable ICU 49 */ 2326 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2327 2328 /* New blocks in Unicode 7.0 */ 2329 2330 /** @stable ICU 54 */ 2331 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2332 /** @stable ICU 54 */ 2333 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2334 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2335 /** @stable ICU 54 */ 2336 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2337 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2338 /** @stable ICU 54 */ 2339 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2340 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2341 /** @stable ICU 54 */ 2342 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2343 /** @stable ICU 54 */ 2344 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2345 /** @stable ICU 54 */ 2346 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2347 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2348 /** @stable ICU 54 */ 2349 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2350 /** @stable ICU 54 */ 2351 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2352 /** @stable ICU 54 */ 2353 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2354 /** @stable ICU 54 */ 2355 public static final UnicodeBlock LATIN_EXTENDED_E = 2356 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2357 /** @stable ICU 54 */ 2358 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2359 /** @stable ICU 54 */ 2360 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2361 /** @stable ICU 54 */ 2362 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2363 /** @stable ICU 54 */ 2364 public static final UnicodeBlock MENDE_KIKAKUI = 2365 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2366 /** @stable ICU 54 */ 2367 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2368 /** @stable ICU 54 */ 2369 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2370 /** @stable ICU 54 */ 2371 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2372 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2373 /** @stable ICU 54 */ 2374 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2375 /** @stable ICU 54 */ 2376 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2377 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2378 /** @stable ICU 54 */ 2379 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2380 /** @stable ICU 54 */ 2381 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2382 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2383 /** @stable ICU 54 */ 2384 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2385 /** @stable ICU 54 */ 2386 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2387 /** @stable ICU 54 */ 2388 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2389 /** @stable ICU 54 */ 2390 public static final UnicodeBlock PSALTER_PAHLAVI = 2391 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2392 /** @stable ICU 54 */ 2393 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2394 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2395 /** @stable ICU 54 */ 2396 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2397 /** @stable ICU 54 */ 2398 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2399 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2400 /** @stable ICU 54 */ 2401 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2402 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2403 /** @stable ICU 54 */ 2404 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2405 /** @stable ICU 54 */ 2406 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2407 2408 /* New blocks in Unicode 8.0 */ 2409 2410 /** @stable ICU 56 */ 2411 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2412 /** @stable ICU 56 */ 2413 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2414 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2415 /** @stable ICU 56 */ 2416 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2417 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2418 /** @stable ICU 56 */ 2419 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2420 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2421 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2422 /** @stable ICU 56 */ 2423 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2424 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2425 /** @stable ICU 56 */ 2426 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2427 /** @stable ICU 56 */ 2428 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2429 /** @stable ICU 56 */ 2430 public static final UnicodeBlock OLD_HUNGARIAN = 2431 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2432 /** @stable ICU 56 */ 2433 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2434 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2435 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2436 /** @stable ICU 56 */ 2437 public static final UnicodeBlock SUTTON_SIGNWRITING = 2438 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2439 2440 /* New blocks in Unicode 9.0 */ 2441 2442 /** @stable ICU 58 */ 2443 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2444 /** @stable ICU 58 */ 2445 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2446 /** @stable ICU 58 */ 2447 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2448 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2449 /** @stable ICU 58 */ 2450 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2451 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2452 /** @stable ICU 58 */ 2453 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2454 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2455 /** @stable ICU 58 */ 2456 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2457 /** @stable ICU 58 */ 2458 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2459 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2460 /** @stable ICU 58 */ 2461 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2462 /** @stable ICU 58 */ 2463 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2464 /** @stable ICU 58 */ 2465 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2466 /** @stable ICU 58 */ 2467 public static final UnicodeBlock TANGUT_COMPONENTS = 2468 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2469 2470 // New blocks in Unicode 10.0 2471 2472 /** @stable ICU 60 */ 2473 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 2474 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/ 2475 /** @stable ICU 60 */ 2476 public static final UnicodeBlock KANA_EXTENDED_A = 2477 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/ 2478 /** @stable ICU 60 */ 2479 public static final UnicodeBlock MASARAM_GONDI = 2480 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/ 2481 /** @stable ICU 60 */ 2482 public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/ 2483 /** @stable ICU 60 */ 2484 public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/ 2485 /** @stable ICU 60 */ 2486 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2487 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/ 2488 /** @stable ICU 60 */ 2489 public static final UnicodeBlock ZANABAZAR_SQUARE = 2490 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/ 2491 2492 // New blocks in Unicode 11.0 2493 2494 /** @stable ICU 62 */ 2495 public static final UnicodeBlock CHESS_SYMBOLS = 2496 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/ 2497 /** @stable ICU 62 */ 2498 public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/ 2499 /** @stable ICU 62 */ 2500 public static final UnicodeBlock GEORGIAN_EXTENDED = 2501 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/ 2502 /** @stable ICU 62 */ 2503 public static final UnicodeBlock GUNJALA_GONDI = 2504 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/ 2505 /** @stable ICU 62 */ 2506 public static final UnicodeBlock HANIFI_ROHINGYA = 2507 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/ 2508 /** @stable ICU 62 */ 2509 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 2510 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/ 2511 /** @stable ICU 62 */ 2512 public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/ 2513 /** @stable ICU 62 */ 2514 public static final UnicodeBlock MAYAN_NUMERALS = 2515 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/ 2516 /** @stable ICU 62 */ 2517 public static final UnicodeBlock MEDEFAIDRIN = 2518 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/ 2519 /** @stable ICU 62 */ 2520 public static final UnicodeBlock OLD_SOGDIAN = 2521 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/ 2522 /** @stable ICU 62 */ 2523 public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/ 2524 2525 /** 2526 * @stable ICU 2.4 2527 */ 2528 public static final UnicodeBlock INVALID_CODE 2529 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2530 2531 static { 2532 for (int blockId = 0; blockId < COUNT; ++blockId) { 2533 if (BLOCKS_[blockId] == null) { 2534 throw new java.lang.IllegalStateException( 2535 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2536 } 2537 } 2538 } 2539 2540 // public methods -------------------------------------------------- 2541 2542 /** 2543 * {@icu} Returns the only instance of the UnicodeBlock with the argument ID. 2544 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2545 * @param id UnicodeBlock ID 2546 * @return the only instance of the UnicodeBlock with the argument ID 2547 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2548 * returned. 2549 * @stable ICU 2.4 2550 */ getInstance(int id)2551 public static UnicodeBlock getInstance(int id) 2552 { 2553 if (id >= 0 && id < BLOCKS_.length) { 2554 return BLOCKS_[id]; 2555 } 2556 return INVALID_CODE; 2557 } 2558 2559 /** 2560 * Returns the Unicode allocation block that contains the code point, 2561 * or null if the code point is not a member of a defined block. 2562 * @param ch code point to be tested 2563 * @return the Unicode allocation block that contains the code point 2564 * @stable ICU 2.4 2565 */ of(int ch)2566 public static UnicodeBlock of(int ch) 2567 { 2568 if (ch > MAX_VALUE) { 2569 return INVALID_CODE; 2570 } 2571 2572 return UnicodeBlock.getInstance( 2573 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2574 } 2575 2576 /** 2577 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2578 * Returns the Unicode block with the given name. {@icunote} Unlike 2579 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2580 * against the official UCD name and the Java block name 2581 * (ignoring case). 2582 * @param blockName the name of the block to match 2583 * @return the UnicodeBlock with that name 2584 * @throws IllegalArgumentException if the blockName could not be matched 2585 * @stable ICU 3.0 2586 */ forName(String blockName)2587 public static final UnicodeBlock forName(String blockName) { 2588 Map<String, UnicodeBlock> m = null; 2589 if (mref != null) { 2590 m = mref.get(); 2591 } 2592 if (m == null) { 2593 m = new HashMap<>(BLOCKS_.length); 2594 for (int i = 0; i < BLOCKS_.length; ++i) { 2595 UnicodeBlock b = BLOCKS_[i]; 2596 String name = trimBlockName( 2597 getPropertyValueName(UProperty.BLOCK, b.getID(), 2598 UProperty.NameChoice.LONG)); 2599 m.put(name, b); 2600 } 2601 mref = new SoftReference<>(m); 2602 } 2603 UnicodeBlock b = m.get(trimBlockName(blockName)); 2604 if (b == null) { 2605 throw new IllegalArgumentException(); 2606 } 2607 return b; 2608 } 2609 private static SoftReference<Map<String, UnicodeBlock>> mref; 2610 trimBlockName(String name)2611 private static String trimBlockName(String name) { 2612 String upper = name.toUpperCase(Locale.ENGLISH); 2613 StringBuilder result = new StringBuilder(upper.length()); 2614 for (int i = 0; i < upper.length(); i++) { 2615 char c = upper.charAt(i); 2616 if (c != ' ' && c != '_' && c != '-') { 2617 result.append(c); 2618 } 2619 } 2620 return result.toString(); 2621 } 2622 2623 /** 2624 * {icu} Returns the type ID of this Unicode block 2625 * @return integer type ID of this Unicode block 2626 * @stable ICU 2.4 2627 */ getID()2628 public int getID() 2629 { 2630 return m_id_; 2631 } 2632 2633 // private data members --------------------------------------------- 2634 2635 /** 2636 * Identification code for this UnicodeBlock 2637 */ 2638 private int m_id_; 2639 2640 // private constructor ---------------------------------------------- 2641 2642 /** 2643 * UnicodeBlock constructor 2644 * @param name name of this UnicodeBlock 2645 * @param id unique id of this UnicodeBlock 2646 * @exception NullPointerException if name is <code>null</code> 2647 */ UnicodeBlock(String name, int id)2648 private UnicodeBlock(String name, int id) 2649 { 2650 super(name); 2651 m_id_ = id; 2652 if (id >= 0) { 2653 BLOCKS_[id] = this; 2654 } 2655 } 2656 } 2657 2658 /** 2659 * East Asian Width constants. 2660 * @see UProperty#EAST_ASIAN_WIDTH 2661 * @see UCharacter#getIntPropertyValue 2662 * @stable ICU 2.4 2663 */ 2664 public static interface EastAsianWidth 2665 { 2666 /** 2667 * @stable ICU 2.4 2668 */ 2669 public static final int NEUTRAL = 0; 2670 /** 2671 * @stable ICU 2.4 2672 */ 2673 public static final int AMBIGUOUS = 1; 2674 /** 2675 * @stable ICU 2.4 2676 */ 2677 public static final int HALFWIDTH = 2; 2678 /** 2679 * @stable ICU 2.4 2680 */ 2681 public static final int FULLWIDTH = 3; 2682 /** 2683 * @stable ICU 2.4 2684 */ 2685 public static final int NARROW = 4; 2686 /** 2687 * @stable ICU 2.4 2688 */ 2689 public static final int WIDE = 5; 2690 /** 2691 * One more than the highest normal EastAsianWidth value. 2692 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2693 * 2694 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2695 */ 2696 @Deprecated 2697 public static final int COUNT = 6; 2698 } 2699 2700 /** 2701 * Decomposition Type constants. 2702 * @see UProperty#DECOMPOSITION_TYPE 2703 * @stable ICU 2.4 2704 */ 2705 public static interface DecompositionType 2706 { 2707 /** 2708 * @stable ICU 2.4 2709 */ 2710 public static final int NONE = 0; 2711 /** 2712 * @stable ICU 2.4 2713 */ 2714 public static final int CANONICAL = 1; 2715 /** 2716 * @stable ICU 2.4 2717 */ 2718 public static final int COMPAT = 2; 2719 /** 2720 * @stable ICU 2.4 2721 */ 2722 public static final int CIRCLE = 3; 2723 /** 2724 * @stable ICU 2.4 2725 */ 2726 public static final int FINAL = 4; 2727 /** 2728 * @stable ICU 2.4 2729 */ 2730 public static final int FONT = 5; 2731 /** 2732 * @stable ICU 2.4 2733 */ 2734 public static final int FRACTION = 6; 2735 /** 2736 * @stable ICU 2.4 2737 */ 2738 public static final int INITIAL = 7; 2739 /** 2740 * @stable ICU 2.4 2741 */ 2742 public static final int ISOLATED = 8; 2743 /** 2744 * @stable ICU 2.4 2745 */ 2746 public static final int MEDIAL = 9; 2747 /** 2748 * @stable ICU 2.4 2749 */ 2750 public static final int NARROW = 10; 2751 /** 2752 * @stable ICU 2.4 2753 */ 2754 public static final int NOBREAK = 11; 2755 /** 2756 * @stable ICU 2.4 2757 */ 2758 public static final int SMALL = 12; 2759 /** 2760 * @stable ICU 2.4 2761 */ 2762 public static final int SQUARE = 13; 2763 /** 2764 * @stable ICU 2.4 2765 */ 2766 public static final int SUB = 14; 2767 /** 2768 * @stable ICU 2.4 2769 */ 2770 public static final int SUPER = 15; 2771 /** 2772 * @stable ICU 2.4 2773 */ 2774 public static final int VERTICAL = 16; 2775 /** 2776 * @stable ICU 2.4 2777 */ 2778 public static final int WIDE = 17; 2779 /** 2780 * One more than the highest normal DecompositionType value. 2781 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 2782 * 2783 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2784 */ 2785 @Deprecated 2786 public static final int COUNT = 18; 2787 } 2788 2789 /** 2790 * Joining Type constants. 2791 * @see UProperty#JOINING_TYPE 2792 * @stable ICU 2.4 2793 */ 2794 public static interface JoiningType 2795 { 2796 /** 2797 * @stable ICU 2.4 2798 */ 2799 public static final int NON_JOINING = 0; 2800 /** 2801 * @stable ICU 2.4 2802 */ 2803 public static final int JOIN_CAUSING = 1; 2804 /** 2805 * @stable ICU 2.4 2806 */ 2807 public static final int DUAL_JOINING = 2; 2808 /** 2809 * @stable ICU 2.4 2810 */ 2811 public static final int LEFT_JOINING = 3; 2812 /** 2813 * @stable ICU 2.4 2814 */ 2815 public static final int RIGHT_JOINING = 4; 2816 /** 2817 * @stable ICU 2.4 2818 */ 2819 public static final int TRANSPARENT = 5; 2820 /** 2821 * One more than the highest normal JoiningType value. 2822 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 2823 * 2824 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2825 */ 2826 @Deprecated 2827 public static final int COUNT = 6; 2828 } 2829 2830 /** 2831 * Joining Group constants. 2832 * @see UProperty#JOINING_GROUP 2833 * @stable ICU 2.4 2834 */ 2835 public static interface JoiningGroup 2836 { 2837 /** 2838 * @stable ICU 2.4 2839 */ 2840 public static final int NO_JOINING_GROUP = 0; 2841 /** 2842 * @stable ICU 2.4 2843 */ 2844 public static final int AIN = 1; 2845 /** 2846 * @stable ICU 2.4 2847 */ 2848 public static final int ALAPH = 2; 2849 /** 2850 * @stable ICU 2.4 2851 */ 2852 public static final int ALEF = 3; 2853 /** 2854 * @stable ICU 2.4 2855 */ 2856 public static final int BEH = 4; 2857 /** 2858 * @stable ICU 2.4 2859 */ 2860 public static final int BETH = 5; 2861 /** 2862 * @stable ICU 2.4 2863 */ 2864 public static final int DAL = 6; 2865 /** 2866 * @stable ICU 2.4 2867 */ 2868 public static final int DALATH_RISH = 7; 2869 /** 2870 * @stable ICU 2.4 2871 */ 2872 public static final int E = 8; 2873 /** 2874 * @stable ICU 2.4 2875 */ 2876 public static final int FEH = 9; 2877 /** 2878 * @stable ICU 2.4 2879 */ 2880 public static final int FINAL_SEMKATH = 10; 2881 /** 2882 * @stable ICU 2.4 2883 */ 2884 public static final int GAF = 11; 2885 /** 2886 * @stable ICU 2.4 2887 */ 2888 public static final int GAMAL = 12; 2889 /** 2890 * @stable ICU 2.4 2891 */ 2892 public static final int HAH = 13; 2893 /** @stable ICU 4.6 */ 2894 public static final int TEH_MARBUTA_GOAL = 14; 2895 /** 2896 * @stable ICU 2.4 2897 */ 2898 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2899 /** 2900 * @stable ICU 2.4 2901 */ 2902 public static final int HE = 15; 2903 /** 2904 * @stable ICU 2.4 2905 */ 2906 public static final int HEH = 16; 2907 /** 2908 * @stable ICU 2.4 2909 */ 2910 public static final int HEH_GOAL = 17; 2911 /** 2912 * @stable ICU 2.4 2913 */ 2914 public static final int HETH = 18; 2915 /** 2916 * @stable ICU 2.4 2917 */ 2918 public static final int KAF = 19; 2919 /** 2920 * @stable ICU 2.4 2921 */ 2922 public static final int KAPH = 20; 2923 /** 2924 * @stable ICU 2.4 2925 */ 2926 public static final int KNOTTED_HEH = 21; 2927 /** 2928 * @stable ICU 2.4 2929 */ 2930 public static final int LAM = 22; 2931 /** 2932 * @stable ICU 2.4 2933 */ 2934 public static final int LAMADH = 23; 2935 /** 2936 * @stable ICU 2.4 2937 */ 2938 public static final int MEEM = 24; 2939 /** 2940 * @stable ICU 2.4 2941 */ 2942 public static final int MIM = 25; 2943 /** 2944 * @stable ICU 2.4 2945 */ 2946 public static final int NOON = 26; 2947 /** 2948 * @stable ICU 2.4 2949 */ 2950 public static final int NUN = 27; 2951 /** 2952 * @stable ICU 2.4 2953 */ 2954 public static final int PE = 28; 2955 /** 2956 * @stable ICU 2.4 2957 */ 2958 public static final int QAF = 29; 2959 /** 2960 * @stable ICU 2.4 2961 */ 2962 public static final int QAPH = 30; 2963 /** 2964 * @stable ICU 2.4 2965 */ 2966 public static final int REH = 31; 2967 /** 2968 * @stable ICU 2.4 2969 */ 2970 public static final int REVERSED_PE = 32; 2971 /** 2972 * @stable ICU 2.4 2973 */ 2974 public static final int SAD = 33; 2975 /** 2976 * @stable ICU 2.4 2977 */ 2978 public static final int SADHE = 34; 2979 /** 2980 * @stable ICU 2.4 2981 */ 2982 public static final int SEEN = 35; 2983 /** 2984 * @stable ICU 2.4 2985 */ 2986 public static final int SEMKATH = 36; 2987 /** 2988 * @stable ICU 2.4 2989 */ 2990 public static final int SHIN = 37; 2991 /** 2992 * @stable ICU 2.4 2993 */ 2994 public static final int SWASH_KAF = 38; 2995 /** 2996 * @stable ICU 2.4 2997 */ 2998 public static final int SYRIAC_WAW = 39; 2999 /** 3000 * @stable ICU 2.4 3001 */ 3002 public static final int TAH = 40; 3003 /** 3004 * @stable ICU 2.4 3005 */ 3006 public static final int TAW = 41; 3007 /** 3008 * @stable ICU 2.4 3009 */ 3010 public static final int TEH_MARBUTA = 42; 3011 /** 3012 * @stable ICU 2.4 3013 */ 3014 public static final int TETH = 43; 3015 /** 3016 * @stable ICU 2.4 3017 */ 3018 public static final int WAW = 44; 3019 /** 3020 * @stable ICU 2.4 3021 */ 3022 public static final int YEH = 45; 3023 /** 3024 * @stable ICU 2.4 3025 */ 3026 public static final int YEH_BARREE = 46; 3027 /** 3028 * @stable ICU 2.4 3029 */ 3030 public static final int YEH_WITH_TAIL = 47; 3031 /** 3032 * @stable ICU 2.4 3033 */ 3034 public static final int YUDH = 48; 3035 /** 3036 * @stable ICU 2.4 3037 */ 3038 public static final int YUDH_HE = 49; 3039 /** 3040 * @stable ICU 2.4 3041 */ 3042 public static final int ZAIN = 50; 3043 /** 3044 * @stable ICU 2.6 3045 */ 3046 public static final int FE = 51; 3047 /** 3048 * @stable ICU 2.6 3049 */ 3050 public static final int KHAPH = 52; 3051 /** 3052 * @stable ICU 2.6 3053 */ 3054 public static final int ZHAIN = 53; 3055 /** 3056 * @stable ICU 4.0 3057 */ 3058 public static final int BURUSHASKI_YEH_BARREE = 54; 3059 /** @stable ICU 4.4 */ 3060 public static final int FARSI_YEH = 55; 3061 /** @stable ICU 4.4 */ 3062 public static final int NYA = 56; 3063 /** @stable ICU 49 */ 3064 public static final int ROHINGYA_YEH = 57; 3065 3066 /** @stable ICU 54 */ 3067 public static final int MANICHAEAN_ALEPH = 58; 3068 /** @stable ICU 54 */ 3069 public static final int MANICHAEAN_AYIN = 59; 3070 /** @stable ICU 54 */ 3071 public static final int MANICHAEAN_BETH = 60; 3072 /** @stable ICU 54 */ 3073 public static final int MANICHAEAN_DALETH = 61; 3074 /** @stable ICU 54 */ 3075 public static final int MANICHAEAN_DHAMEDH = 62; 3076 /** @stable ICU 54 */ 3077 public static final int MANICHAEAN_FIVE = 63; 3078 /** @stable ICU 54 */ 3079 public static final int MANICHAEAN_GIMEL = 64; 3080 /** @stable ICU 54 */ 3081 public static final int MANICHAEAN_HETH = 65; 3082 /** @stable ICU 54 */ 3083 public static final int MANICHAEAN_HUNDRED = 66; 3084 /** @stable ICU 54 */ 3085 public static final int MANICHAEAN_KAPH = 67; 3086 /** @stable ICU 54 */ 3087 public static final int MANICHAEAN_LAMEDH = 68; 3088 /** @stable ICU 54 */ 3089 public static final int MANICHAEAN_MEM = 69; 3090 /** @stable ICU 54 */ 3091 public static final int MANICHAEAN_NUN = 70; 3092 /** @stable ICU 54 */ 3093 public static final int MANICHAEAN_ONE = 71; 3094 /** @stable ICU 54 */ 3095 public static final int MANICHAEAN_PE = 72; 3096 /** @stable ICU 54 */ 3097 public static final int MANICHAEAN_QOPH = 73; 3098 /** @stable ICU 54 */ 3099 public static final int MANICHAEAN_RESH = 74; 3100 /** @stable ICU 54 */ 3101 public static final int MANICHAEAN_SADHE = 75; 3102 /** @stable ICU 54 */ 3103 public static final int MANICHAEAN_SAMEKH = 76; 3104 /** @stable ICU 54 */ 3105 public static final int MANICHAEAN_TAW = 77; 3106 /** @stable ICU 54 */ 3107 public static final int MANICHAEAN_TEN = 78; 3108 /** @stable ICU 54 */ 3109 public static final int MANICHAEAN_TETH = 79; 3110 /** @stable ICU 54 */ 3111 public static final int MANICHAEAN_THAMEDH = 80; 3112 /** @stable ICU 54 */ 3113 public static final int MANICHAEAN_TWENTY = 81; 3114 /** @stable ICU 54 */ 3115 public static final int MANICHAEAN_WAW = 82; 3116 /** @stable ICU 54 */ 3117 public static final int MANICHAEAN_YODH = 83; 3118 /** @stable ICU 54 */ 3119 public static final int MANICHAEAN_ZAYIN = 84; 3120 /** @stable ICU 54 */ 3121 public static final int STRAIGHT_WAW = 85; 3122 3123 /** @stable ICU 58 */ 3124 public static final int AFRICAN_FEH = 86; 3125 /** @stable ICU 58 */ 3126 public static final int AFRICAN_NOON = 87; 3127 /** @stable ICU 58 */ 3128 public static final int AFRICAN_QAF = 88; 3129 3130 /** @stable ICU 60 */ 3131 public static final int MALAYALAM_BHA = 89; 3132 /** @stable ICU 60 */ 3133 public static final int MALAYALAM_JA = 90; 3134 /** @stable ICU 60 */ 3135 public static final int MALAYALAM_LLA = 91; 3136 /** @stable ICU 60 */ 3137 public static final int MALAYALAM_LLLA = 92; 3138 /** @stable ICU 60 */ 3139 public static final int MALAYALAM_NGA = 93; 3140 /** @stable ICU 60 */ 3141 public static final int MALAYALAM_NNA = 94; 3142 /** @stable ICU 60 */ 3143 public static final int MALAYALAM_NNNA = 95; 3144 /** @stable ICU 60 */ 3145 public static final int MALAYALAM_NYA = 96; 3146 /** @stable ICU 60 */ 3147 public static final int MALAYALAM_RA = 97; 3148 /** @stable ICU 60 */ 3149 public static final int MALAYALAM_SSA = 98; 3150 /** @stable ICU 60 */ 3151 public static final int MALAYALAM_TTA = 99; 3152 3153 /** @stable ICU 62 */ 3154 public static final int HANIFI_ROHINGYA_KINNA_YA = 100; 3155 /** @stable ICU 62 */ 3156 public static final int HANIFI_ROHINGYA_PA = 101; 3157 3158 /** 3159 * One more than the highest normal JoiningGroup value. 3160 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 3161 * 3162 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3163 */ 3164 @Deprecated 3165 public static final int COUNT = 102; 3166 } 3167 3168 /** 3169 * Grapheme Cluster Break constants. 3170 * @see UProperty#GRAPHEME_CLUSTER_BREAK 3171 * @stable ICU 3.4 3172 */ 3173 public static interface GraphemeClusterBreak { 3174 /** 3175 * @stable ICU 3.4 3176 */ 3177 public static final int OTHER = 0; 3178 /** 3179 * @stable ICU 3.4 3180 */ 3181 public static final int CONTROL = 1; 3182 /** 3183 * @stable ICU 3.4 3184 */ 3185 public static final int CR = 2; 3186 /** 3187 * @stable ICU 3.4 3188 */ 3189 public static final int EXTEND = 3; 3190 /** 3191 * @stable ICU 3.4 3192 */ 3193 public static final int L = 4; 3194 /** 3195 * @stable ICU 3.4 3196 */ 3197 public static final int LF = 5; 3198 /** 3199 * @stable ICU 3.4 3200 */ 3201 public static final int LV = 6; 3202 /** 3203 * @stable ICU 3.4 3204 */ 3205 public static final int LVT = 7; 3206 /** 3207 * @stable ICU 3.4 3208 */ 3209 public static final int T = 8; 3210 /** 3211 * @stable ICU 3.4 3212 */ 3213 public static final int V = 9; 3214 /** 3215 * @stable ICU 4.0 3216 */ 3217 public static final int SPACING_MARK = 10; 3218 /** 3219 * @stable ICU 4.0 3220 */ 3221 public static final int PREPEND = 11; 3222 /** @stable ICU 50 */ 3223 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3224 /** @stable ICU 58 */ 3225 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3226 /** @stable ICU 58 */ 3227 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 3228 /** @stable ICU 58 */ 3229 public static final int E_MODIFIER = 15; /*[EM]*/ 3230 /** @stable ICU 58 */ 3231 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 3232 /** @stable ICU 58 */ 3233 public static final int ZWJ = 17; /*[ZWJ]*/ 3234 3235 /** 3236 * One more than the highest normal GraphemeClusterBreak value. 3237 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 3238 * 3239 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3240 */ 3241 @Deprecated 3242 public static final int COUNT = 18; 3243 } 3244 3245 /** 3246 * Word Break constants. 3247 * @see UProperty#WORD_BREAK 3248 * @stable ICU 3.4 3249 */ 3250 public static interface WordBreak { 3251 /** 3252 * @stable ICU 3.8 3253 */ 3254 public static final int OTHER = 0; 3255 /** 3256 * @stable ICU 3.8 3257 */ 3258 public static final int ALETTER = 1; 3259 /** 3260 * @stable ICU 3.8 3261 */ 3262 public static final int FORMAT = 2; 3263 /** 3264 * @stable ICU 3.8 3265 */ 3266 public static final int KATAKANA = 3; 3267 /** 3268 * @stable ICU 3.8 3269 */ 3270 public static final int MIDLETTER = 4; 3271 /** 3272 * @stable ICU 3.8 3273 */ 3274 public static final int MIDNUM = 5; 3275 /** 3276 * @stable ICU 3.8 3277 */ 3278 public static final int NUMERIC = 6; 3279 /** 3280 * @stable ICU 3.8 3281 */ 3282 public static final int EXTENDNUMLET = 7; 3283 /** 3284 * @stable ICU 4.0 3285 */ 3286 public static final int CR = 8; 3287 /** 3288 * @stable ICU 4.0 3289 */ 3290 public static final int EXTEND = 9; 3291 /** 3292 * @stable ICU 4.0 3293 */ 3294 public static final int LF = 10; 3295 /** 3296 * @stable ICU 4.0 3297 */ 3298 public static final int MIDNUMLET = 11; 3299 /** 3300 * @stable ICU 4.0 3301 */ 3302 public static final int NEWLINE = 12; 3303 /** @stable ICU 50 */ 3304 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3305 /** @stable ICU 52 */ 3306 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 3307 /** @stable ICU 52 */ 3308 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 3309 /** @stable ICU 52 */ 3310 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 3311 /** @stable ICU 58 */ 3312 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3313 /** @stable ICU 58 */ 3314 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 3315 /** @stable ICU 58 */ 3316 public static final int E_MODIFIER = 19; /*[EM]*/ 3317 /** @stable ICU 58 */ 3318 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 3319 /** @stable ICU 58 */ 3320 public static final int ZWJ = 21; /*[ZWJ]*/ 3321 /** @stable ICU 62 */ 3322 public static final int WSEGSPACE = 22; /*[WSEGSPACE]*/ 3323 /** 3324 * One more than the highest normal WordBreak value. 3325 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 3326 * 3327 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3328 */ 3329 @Deprecated 3330 public static final int COUNT = 23; 3331 } 3332 3333 /** 3334 * Sentence Break constants. 3335 * @see UProperty#SENTENCE_BREAK 3336 * @stable ICU 3.4 3337 */ 3338 public static interface SentenceBreak { 3339 /** 3340 * @stable ICU 3.8 3341 */ 3342 public static final int OTHER = 0; 3343 /** 3344 * @stable ICU 3.8 3345 */ 3346 public static final int ATERM = 1; 3347 /** 3348 * @stable ICU 3.8 3349 */ 3350 public static final int CLOSE = 2; 3351 /** 3352 * @stable ICU 3.8 3353 */ 3354 public static final int FORMAT = 3; 3355 /** 3356 * @stable ICU 3.8 3357 */ 3358 public static final int LOWER = 4; 3359 /** 3360 * @stable ICU 3.8 3361 */ 3362 public static final int NUMERIC = 5; 3363 /** 3364 * @stable ICU 3.8 3365 */ 3366 public static final int OLETTER = 6; 3367 /** 3368 * @stable ICU 3.8 3369 */ 3370 public static final int SEP = 7; 3371 /** 3372 * @stable ICU 3.8 3373 */ 3374 public static final int SP = 8; 3375 /** 3376 * @stable ICU 3.8 3377 */ 3378 public static final int STERM = 9; 3379 /** 3380 * @stable ICU 3.8 3381 */ 3382 public static final int UPPER = 10; 3383 /** 3384 * @stable ICU 4.0 3385 */ 3386 public static final int CR = 11; 3387 /** 3388 * @stable ICU 4.0 3389 */ 3390 public static final int EXTEND = 12; 3391 /** 3392 * @stable ICU 4.0 3393 */ 3394 public static final int LF = 13; 3395 /** 3396 * @stable ICU 4.0 3397 */ 3398 public static final int SCONTINUE = 14; 3399 /** 3400 * One more than the highest normal SentenceBreak value. 3401 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 3402 * 3403 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3404 */ 3405 @Deprecated 3406 public static final int COUNT = 15; 3407 } 3408 3409 /** 3410 * Line Break constants. 3411 * @see UProperty#LINE_BREAK 3412 * @stable ICU 2.4 3413 */ 3414 public static interface LineBreak 3415 { 3416 /** 3417 * @stable ICU 2.4 3418 */ 3419 public static final int UNKNOWN = 0; 3420 /** 3421 * @stable ICU 2.4 3422 */ 3423 public static final int AMBIGUOUS = 1; 3424 /** 3425 * @stable ICU 2.4 3426 */ 3427 public static final int ALPHABETIC = 2; 3428 /** 3429 * @stable ICU 2.4 3430 */ 3431 public static final int BREAK_BOTH = 3; 3432 /** 3433 * @stable ICU 2.4 3434 */ 3435 public static final int BREAK_AFTER = 4; 3436 /** 3437 * @stable ICU 2.4 3438 */ 3439 public static final int BREAK_BEFORE = 5; 3440 /** 3441 * @stable ICU 2.4 3442 */ 3443 public static final int MANDATORY_BREAK = 6; 3444 /** 3445 * @stable ICU 2.4 3446 */ 3447 public static final int CONTINGENT_BREAK = 7; 3448 /** 3449 * @stable ICU 2.4 3450 */ 3451 public static final int CLOSE_PUNCTUATION = 8; 3452 /** 3453 * @stable ICU 2.4 3454 */ 3455 public static final int COMBINING_MARK = 9; 3456 /** 3457 * @stable ICU 2.4 3458 */ 3459 public static final int CARRIAGE_RETURN = 10; 3460 /** 3461 * @stable ICU 2.4 3462 */ 3463 public static final int EXCLAMATION = 11; 3464 /** 3465 * @stable ICU 2.4 3466 */ 3467 public static final int GLUE = 12; 3468 /** 3469 * @stable ICU 2.4 3470 */ 3471 public static final int HYPHEN = 13; 3472 /** 3473 * @stable ICU 2.4 3474 */ 3475 public static final int IDEOGRAPHIC = 14; 3476 /** 3477 * @see #INSEPARABLE 3478 * @stable ICU 2.4 3479 */ 3480 public static final int INSEPERABLE = 15; 3481 /** 3482 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3483 * @stable ICU 3.0 3484 */ 3485 public static final int INSEPARABLE = 15; 3486 /** 3487 * @stable ICU 2.4 3488 */ 3489 public static final int INFIX_NUMERIC = 16; 3490 /** 3491 * @stable ICU 2.4 3492 */ 3493 public static final int LINE_FEED = 17; 3494 /** 3495 * @stable ICU 2.4 3496 */ 3497 public static final int NONSTARTER = 18; 3498 /** 3499 * @stable ICU 2.4 3500 */ 3501 public static final int NUMERIC = 19; 3502 /** 3503 * @stable ICU 2.4 3504 */ 3505 public static final int OPEN_PUNCTUATION = 20; 3506 /** 3507 * @stable ICU 2.4 3508 */ 3509 public static final int POSTFIX_NUMERIC = 21; 3510 /** 3511 * @stable ICU 2.4 3512 */ 3513 public static final int PREFIX_NUMERIC = 22; 3514 /** 3515 * @stable ICU 2.4 3516 */ 3517 public static final int QUOTATION = 23; 3518 /** 3519 * @stable ICU 2.4 3520 */ 3521 public static final int COMPLEX_CONTEXT = 24; 3522 /** 3523 * @stable ICU 2.4 3524 */ 3525 public static final int SURROGATE = 25; 3526 /** 3527 * @stable ICU 2.4 3528 */ 3529 public static final int SPACE = 26; 3530 /** 3531 * @stable ICU 2.4 3532 */ 3533 public static final int BREAK_SYMBOLS = 27; 3534 /** 3535 * @stable ICU 2.4 3536 */ 3537 public static final int ZWSPACE = 28; 3538 /** 3539 * @stable ICU 2.6 3540 */ 3541 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3542 /** 3543 * @stable ICU 2.6 3544 */ 3545 public static final int WORD_JOINER = 30; /*[WJ]*/ 3546 /** 3547 * @stable ICU 3.4 3548 */ 3549 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3550 /** 3551 * @stable ICU 3.4 3552 */ 3553 public static final int H3 = 32; 3554 /** 3555 * @stable ICU 3.4 3556 */ 3557 public static final int JL = 33; 3558 /** 3559 * @stable ICU 3.4 3560 */ 3561 public static final int JT = 34; 3562 /** 3563 * @stable ICU 3.4 3564 */ 3565 public static final int JV = 35; 3566 /** @stable ICU 4.4 */ 3567 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3568 /** @stable ICU 49 */ 3569 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3570 /** @stable ICU 49 */ 3571 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3572 /** @stable ICU 50 */ 3573 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3574 /** @stable ICU 58 */ 3575 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3576 /** @stable ICU 58 */ 3577 public static final int E_MODIFIER = 41; /*[EM]*/ 3578 /** @stable ICU 58 */ 3579 public static final int ZWJ = 42; /*[ZWJ]*/ 3580 /** 3581 * One more than the highest normal LineBreak value. 3582 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 3583 * 3584 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3585 */ 3586 @Deprecated 3587 public static final int COUNT = 43; 3588 } 3589 3590 /** 3591 * Numeric Type constants. 3592 * @see UProperty#NUMERIC_TYPE 3593 * @stable ICU 2.4 3594 */ 3595 public static interface NumericType 3596 { 3597 /** 3598 * @stable ICU 2.4 3599 */ 3600 public static final int NONE = 0; 3601 /** 3602 * @stable ICU 2.4 3603 */ 3604 public static final int DECIMAL = 1; 3605 /** 3606 * @stable ICU 2.4 3607 */ 3608 public static final int DIGIT = 2; 3609 /** 3610 * @stable ICU 2.4 3611 */ 3612 public static final int NUMERIC = 3; 3613 /** 3614 * One more than the highest normal NumericType value. 3615 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 3616 * 3617 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3618 */ 3619 @Deprecated 3620 public static final int COUNT = 4; 3621 } 3622 3623 /** 3624 * Hangul Syllable Type constants. 3625 * 3626 * @see UProperty#HANGUL_SYLLABLE_TYPE 3627 * @stable ICU 2.6 3628 */ 3629 public static interface HangulSyllableType 3630 { 3631 /** 3632 * @stable ICU 2.6 3633 */ 3634 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3635 /** 3636 * @stable ICU 2.6 3637 */ 3638 public static final int LEADING_JAMO = 1; /*[L]*/ 3639 /** 3640 * @stable ICU 2.6 3641 */ 3642 public static final int VOWEL_JAMO = 2; /*[V]*/ 3643 /** 3644 * @stable ICU 2.6 3645 */ 3646 public static final int TRAILING_JAMO = 3; /*[T]*/ 3647 /** 3648 * @stable ICU 2.6 3649 */ 3650 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3651 /** 3652 * @stable ICU 2.6 3653 */ 3654 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3655 /** 3656 * One more than the highest normal HangulSyllableType value. 3657 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 3658 * 3659 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3660 */ 3661 @Deprecated 3662 public static final int COUNT = 6; 3663 } 3664 3665 /** 3666 * Bidi Paired Bracket Type constants. 3667 * 3668 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3669 * @stable ICU 52 3670 */ 3671 public static interface BidiPairedBracketType { 3672 /** 3673 * Not a paired bracket. 3674 * @stable ICU 52 3675 */ 3676 public static final int NONE = 0; 3677 /** 3678 * Open paired bracket. 3679 * @stable ICU 52 3680 */ 3681 public static final int OPEN = 1; 3682 /** 3683 * Close paired bracket. 3684 * @stable ICU 52 3685 */ 3686 public static final int CLOSE = 2; 3687 /** 3688 * One more than the highest normal BidiPairedBracketType value. 3689 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3690 * 3691 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3692 */ 3693 @Deprecated 3694 public static final int COUNT = 3; 3695 } 3696 3697 /** 3698 * Indic Positional Category constants. 3699 * 3700 * @see UProperty#INDIC_POSITIONAL_CATEGORY 3701 * @stable ICU 63 3702 */ 3703 public static interface IndicPositionalCategory { 3704 /** @stable ICU 63 */ 3705 public static final int NA = 0; 3706 /** @stable ICU 63 */ 3707 public static final int BOTTOM = 1; 3708 /** @stable ICU 63 */ 3709 public static final int BOTTOM_AND_LEFT = 2; 3710 /** @stable ICU 63 */ 3711 public static final int BOTTOM_AND_RIGHT = 3; 3712 /** @stable ICU 63 */ 3713 public static final int LEFT = 4; 3714 /** @stable ICU 63 */ 3715 public static final int LEFT_AND_RIGHT = 5; 3716 /** @stable ICU 63 */ 3717 public static final int OVERSTRUCK = 6; 3718 /** @stable ICU 63 */ 3719 public static final int RIGHT = 7; 3720 /** @stable ICU 63 */ 3721 public static final int TOP = 8; 3722 /** @stable ICU 63 */ 3723 public static final int TOP_AND_BOTTOM = 9; 3724 /** @stable ICU 63 */ 3725 public static final int TOP_AND_BOTTOM_AND_RIGHT = 10; 3726 /** @stable ICU 63 */ 3727 public static final int TOP_AND_LEFT = 11; 3728 /** @stable ICU 63 */ 3729 public static final int TOP_AND_LEFT_AND_RIGHT = 12; 3730 /** @stable ICU 63 */ 3731 public static final int TOP_AND_RIGHT = 13; 3732 /** @stable ICU 63 */ 3733 public static final int VISUAL_ORDER_LEFT = 14; 3734 } 3735 3736 /** 3737 * Indic Syllabic Category constants. 3738 * 3739 * @see UProperty#INDIC_SYLLABIC_CATEGORY 3740 * @stable ICU 63 3741 */ 3742 public static interface IndicSyllabicCategory { 3743 /** @stable ICU 63 */ 3744 public static final int OTHER = 0; 3745 /** @stable ICU 63 */ 3746 public static final int AVAGRAHA = 1; 3747 /** @stable ICU 63 */ 3748 public static final int BINDU = 2; 3749 /** @stable ICU 63 */ 3750 public static final int BRAHMI_JOINING_NUMBER = 3; 3751 /** @stable ICU 63 */ 3752 public static final int CANTILLATION_MARK = 4; 3753 /** @stable ICU 63 */ 3754 public static final int CONSONANT = 5; 3755 /** @stable ICU 63 */ 3756 public static final int CONSONANT_DEAD = 6; 3757 /** @stable ICU 63 */ 3758 public static final int CONSONANT_FINAL = 7; 3759 /** @stable ICU 63 */ 3760 public static final int CONSONANT_HEAD_LETTER = 8; 3761 /** @stable ICU 63 */ 3762 public static final int CONSONANT_INITIAL_POSTFIXED = 9; 3763 /** @stable ICU 63 */ 3764 public static final int CONSONANT_KILLER = 10; 3765 /** @stable ICU 63 */ 3766 public static final int CONSONANT_MEDIAL = 11; 3767 /** @stable ICU 63 */ 3768 public static final int CONSONANT_PLACEHOLDER = 12; 3769 /** @stable ICU 63 */ 3770 public static final int CONSONANT_PRECEDING_REPHA = 13; 3771 /** @stable ICU 63 */ 3772 public static final int CONSONANT_PREFIXED = 14; 3773 /** @stable ICU 63 */ 3774 public static final int CONSONANT_SUBJOINED = 15; 3775 /** @stable ICU 63 */ 3776 public static final int CONSONANT_SUCCEEDING_REPHA = 16; 3777 /** @stable ICU 63 */ 3778 public static final int CONSONANT_WITH_STACKER = 17; 3779 /** @stable ICU 63 */ 3780 public static final int GEMINATION_MARK = 18; 3781 /** @stable ICU 63 */ 3782 public static final int INVISIBLE_STACKER = 19; 3783 /** @stable ICU 63 */ 3784 public static final int JOINER = 20; 3785 /** @stable ICU 63 */ 3786 public static final int MODIFYING_LETTER = 21; 3787 /** @stable ICU 63 */ 3788 public static final int NON_JOINER = 22; 3789 /** @stable ICU 63 */ 3790 public static final int NUKTA = 23; 3791 /** @stable ICU 63 */ 3792 public static final int NUMBER = 24; 3793 /** @stable ICU 63 */ 3794 public static final int NUMBER_JOINER = 25; 3795 /** @stable ICU 63 */ 3796 public static final int PURE_KILLER = 26; 3797 /** @stable ICU 63 */ 3798 public static final int REGISTER_SHIFTER = 27; 3799 /** @stable ICU 63 */ 3800 public static final int SYLLABLE_MODIFIER = 28; 3801 /** @stable ICU 63 */ 3802 public static final int TONE_LETTER = 29; 3803 /** @stable ICU 63 */ 3804 public static final int TONE_MARK = 30; 3805 /** @stable ICU 63 */ 3806 public static final int VIRAMA = 31; 3807 /** @stable ICU 63 */ 3808 public static final int VISARGA = 32; 3809 /** @stable ICU 63 */ 3810 public static final int VOWEL = 33; 3811 /** @stable ICU 63 */ 3812 public static final int VOWEL_DEPENDENT = 34; 3813 /** @stable ICU 63 */ 3814 public static final int VOWEL_INDEPENDENT = 35; 3815 } 3816 3817 /** 3818 * Vertical Orientation constants. 3819 * 3820 * @see UProperty#VERTICAL_ORIENTATION 3821 * @stable ICU 63 3822 */ 3823 public static interface VerticalOrientation { 3824 /** @stable ICU 63 */ 3825 public static final int ROTATED = 0; 3826 /** @stable ICU 63 */ 3827 public static final int TRANSFORMED_ROTATED = 1; 3828 /** @stable ICU 63 */ 3829 public static final int TRANSFORMED_UPRIGHT = 2; 3830 /** @stable ICU 63 */ 3831 public static final int UPRIGHT = 3; 3832 } 3833 3834 // public data members ----------------------------------------------- 3835 3836 /** 3837 * The lowest Unicode code point value, constant 0. 3838 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 3839 * 3840 * @stable ICU 2.1 3841 */ 3842 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 3843 3844 /** 3845 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 3846 * Same as {@link Character#MAX_CODE_POINT}. 3847 * 3848 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 3849 * which is still a char with the value U+FFFF. 3850 * 3851 * @stable ICU 2.1 3852 */ 3853 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 3854 3855 /** 3856 * The minimum value for Supplementary code points, constant U+10000. 3857 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 3858 * 3859 * @stable ICU 2.1 3860 */ 3861 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 3862 3863 /** 3864 * Unicode value used when translating into Unicode encoding form and there 3865 * is no existing character. 3866 * @stable ICU 2.1 3867 */ 3868 public static final int REPLACEMENT_CHAR = '\uFFFD'; 3869 3870 /** 3871 * Special value that is returned by getUnicodeNumericValue(int) when no 3872 * numeric value is defined for a code point. 3873 * @stable ICU 2.4 3874 * @see #getUnicodeNumericValue 3875 */ 3876 public static final double NO_NUMERIC_VALUE = -123456789; 3877 3878 /** 3879 * Compatibility constant for Java Character's MIN_RADIX. 3880 * @stable ICU 3.4 3881 */ 3882 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 3883 3884 /** 3885 * Compatibility constant for Java Character's MAX_RADIX. 3886 * @stable ICU 3.4 3887 */ 3888 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 3889 3890 /** 3891 * Do not lowercase non-initial parts of words when titlecasing. 3892 * Option bit for titlecasing APIs that take an options bit set. 3893 * 3894 * By default, titlecasing will titlecase the first cased character 3895 * of a word and lowercase all other characters. 3896 * With this option, the other characters will not be modified. 3897 * 3898 * @see #toTitleCase 3899 * @stable ICU 3.8 3900 */ 3901 public static final int TITLECASE_NO_LOWERCASE = 0x100; 3902 3903 /** 3904 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 3905 * titlecase exactly the characters at breaks from the iterator. 3906 * Option bit for titlecasing APIs that take an options bit set. 3907 * 3908 * By default, titlecasing will take each break iterator index, 3909 * adjust it by looking for the next cased character, and titlecase that one. 3910 * Other characters are lowercased. 3911 * 3912 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 3913 * 3914 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 3915 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 3916 * cased character F. If F exists, map F to default_title(F); then map each 3917 * subsequent character C to default_lower(C). 3918 * 3919 * @see #toTitleCase 3920 * @see #TITLECASE_NO_LOWERCASE 3921 * @stable ICU 3.8 3922 */ 3923 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 3924 3925 // public methods ---------------------------------------------------- 3926 3927 /** 3928 * Returnss the numeric value of a decimal digit code point. 3929 * <br>This method observes the semantics of 3930 * <code>java.lang.Character.digit()</code>. Note that this 3931 * will return positive values for code points for which isDigit 3932 * returns false, just like java.lang.Character. 3933 * <br><em>Semantic Change:</em> In release 1.3.1 and 3934 * prior, this did not treat the European letters as having a 3935 * digit value, and also treated numeric letters and other numbers as 3936 * digits. 3937 * This has been changed to conform to the java semantics. 3938 * <br>A code point is a valid digit if and only if: 3939 * <ul> 3940 * <li>ch is a decimal digit or one of the european letters, and 3941 * <li>the value of ch is less than the specified radix. 3942 * </ul> 3943 * @param ch the code point to query 3944 * @param radix the radix 3945 * @return the numeric value represented by the code point in the 3946 * specified radix, or -1 if the code point is not a decimal digit 3947 * or if its value is too large for the radix 3948 * @stable ICU 2.1 3949 */ digit(int ch, int radix)3950 public static int digit(int ch, int radix) 3951 { 3952 if (2 <= radix && radix <= 36) { 3953 int value = digit(ch); 3954 if (value < 0) { 3955 // ch is not a decimal digit, try latin letters 3956 value = UCharacterProperty.getEuropeanDigit(ch); 3957 } 3958 return (value < radix) ? value : -1; 3959 } else { 3960 return -1; // invalid radix 3961 } 3962 } 3963 3964 /** 3965 * Returnss the numeric value of a decimal digit code point. 3966 * <br>This is a convenience overload of <code>digit(int, int)</code> 3967 * that provides a decimal radix. 3968 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 3969 * treated numeric letters and other numbers as digits. This has 3970 * been changed to conform to the java semantics. 3971 * @param ch the code point to query 3972 * @return the numeric value represented by the code point, 3973 * or -1 if the code point is not a decimal digit or if its 3974 * value is too large for a decimal radix 3975 * @stable ICU 2.1 3976 */ digit(int ch)3977 public static int digit(int ch) 3978 { 3979 return UCharacterProperty.INSTANCE.digit(ch); 3980 } 3981 3982 /** 3983 * Returns the numeric value of the code point as a nonnegative 3984 * integer. 3985 * <br>If the code point does not have a numeric value, then -1 is returned. 3986 * <br> 3987 * If the code point has a numeric value that cannot be represented as a 3988 * nonnegative integer (for example, a fractional value), then -2 is 3989 * returned. 3990 * @param ch the code point to query 3991 * @return the numeric value of the code point, or -1 if it has no numeric 3992 * value, or -2 if it has a numeric value that cannot be represented as a 3993 * nonnegative integer 3994 * @stable ICU 2.1 3995 */ getNumericValue(int ch)3996 public static int getNumericValue(int ch) 3997 { 3998 return UCharacterProperty.INSTANCE.getNumericValue(ch); 3999 } 4000 4001 /** 4002 * {@icu} Returns the numeric value for a Unicode code point as defined in the 4003 * Unicode Character Database. 4004 * <p>A "double" return type is necessary because some numeric values are 4005 * fractions, negative, or too large for int. 4006 * <p>For characters without any numeric values in the Unicode Character 4007 * Database, this function will return NO_NUMERIC_VALUE. 4008 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 4009 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 4010 * return type int and returns -1 when the argument ch does not have a 4011 * corresponding numeric value. This has been changed to synch with ICU4C 4012 * 4013 * This corresponds to the ICU4C function u_getNumericValue. 4014 * @param ch Code point to get the numeric value for. 4015 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 4016 * @stable ICU 2.4 4017 */ getUnicodeNumericValue(int ch)4018 public static double getUnicodeNumericValue(int ch) 4019 { 4020 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 4021 } 4022 4023 /** 4024 * Compatibility override of Java deprecated method. This 4025 * method will always remain deprecated. 4026 * Same as java.lang.Character.isSpace(). 4027 * @param ch the code point 4028 * @return true if the code point is a space character as 4029 * defined by java.lang.Character.isSpace. 4030 * @deprecated ICU 3.4 (Java) 4031 */ 4032 @Deprecated isSpace(int ch)4033 public static boolean isSpace(int ch) { 4034 return ch <= 0x20 && 4035 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 4036 } 4037 4038 /** 4039 * Returns a value indicating a code point's Unicode category. 4040 * Up-to-date Unicode implementation of java.lang.Character.getType() 4041 * except for the above mentioned code points that had their category 4042 * changed.<br> 4043 * Return results are constants from the interface 4044 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 4045 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 4046 * those returned by java.lang.Character.getType. UCharacterCategory values 4047 * match the ones used in ICU4C, while java.lang.Character type 4048 * values, though similar, skip the value 17. 4049 * @param ch code point whose type is to be determined 4050 * @return category which is a value of UCharacterCategory 4051 * @stable ICU 2.1 4052 */ getType(int ch)4053 public static int getType(int ch) 4054 { 4055 return UCharacterProperty.INSTANCE.getType(ch); 4056 } 4057 4058 /** 4059 * Determines if a code point has a defined meaning in the up-to-date 4060 * Unicode standard. 4061 * E.g. supplementary code points though allocated space are not defined in 4062 * Unicode yet.<br> 4063 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 4064 * @param ch code point to be determined if it is defined in the most 4065 * current version of Unicode 4066 * @return true if this code point is defined in unicode 4067 * @stable ICU 2.1 4068 */ isDefined(int ch)4069 public static boolean isDefined(int ch) 4070 { 4071 return getType(ch) != 0; 4072 } 4073 4074 /** 4075 * Determines if a code point is a Java digit. 4076 * <br>This method observes the semantics of 4077 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 4078 * digits only. 4079 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 4080 * numeric letters and other numbers as digits. 4081 * This has been changed to conform to the java semantics. 4082 * @param ch code point to query 4083 * @return true if this code point is a digit 4084 * @stable ICU 2.1 4085 */ isDigit(int ch)4086 public static boolean isDigit(int ch) 4087 { 4088 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 4089 } 4090 4091 /** 4092 * Determines if the specified code point is an ISO control character. 4093 * A code point is considered to be an ISO control character if it is in 4094 * the range \u0000 through \u001F or in the range \u007F through 4095 * \u009F.<br> 4096 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 4097 * @param ch code point to determine if it is an ISO control character 4098 * @return true if code point is a ISO control character 4099 * @stable ICU 2.1 4100 */ isISOControl(int ch)4101 public static boolean isISOControl(int ch) 4102 { 4103 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 4104 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 4105 } 4106 4107 /** 4108 * Determines if the specified code point is a letter. 4109 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 4110 * @param ch code point to determine if it is a letter 4111 * @return true if code point is a letter 4112 * @stable ICU 2.1 4113 */ isLetter(int ch)4114 public static boolean isLetter(int ch) 4115 { 4116 // if props == 0, it will just fall through and return false 4117 return ((1 << getType(ch)) 4118 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4119 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4120 | (1 << UCharacterCategory.TITLECASE_LETTER) 4121 | (1 << UCharacterCategory.MODIFIER_LETTER) 4122 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 4123 } 4124 4125 /** 4126 * Determines if the specified code point is a letter or digit. 4127 * {@icunote} This method, unlike java.lang.Character does not regard the ascii 4128 * characters 'A' - 'Z' and 'a' - 'z' as digits. 4129 * @param ch code point to determine if it is a letter or a digit 4130 * @return true if code point is a letter or a digit 4131 * @stable ICU 2.1 4132 */ isLetterOrDigit(int ch)4133 public static boolean isLetterOrDigit(int ch) 4134 { 4135 return ((1 << getType(ch)) 4136 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4137 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4138 | (1 << UCharacterCategory.TITLECASE_LETTER) 4139 | (1 << UCharacterCategory.MODIFIER_LETTER) 4140 | (1 << UCharacterCategory.OTHER_LETTER) 4141 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 4142 } 4143 4144 /** 4145 * Compatibility override of Java deprecated method. This 4146 * method will always remain deprecated. Delegates to 4147 * java.lang.Character.isJavaIdentifierStart. 4148 * @param cp the code point 4149 * @return true if the code point can start a java identifier. 4150 * @deprecated ICU 3.4 (Java) 4151 */ 4152 @Deprecated isJavaLetter(int cp)4153 public static boolean isJavaLetter(int cp) { 4154 return isJavaIdentifierStart(cp); 4155 } 4156 4157 /** 4158 * Compatibility override of Java deprecated method. This 4159 * method will always remain deprecated. Delegates to 4160 * java.lang.Character.isJavaIdentifierPart. 4161 * @param cp the code point 4162 * @return true if the code point can continue a java identifier. 4163 * @deprecated ICU 3.4 (Java) 4164 */ 4165 @Deprecated isJavaLetterOrDigit(int cp)4166 public static boolean isJavaLetterOrDigit(int cp) { 4167 return isJavaIdentifierPart(cp); 4168 } 4169 4170 /** 4171 * Compatibility override of Java method, delegates to 4172 * java.lang.Character.isJavaIdentifierStart. 4173 * @param cp the code point 4174 * @return true if the code point can start a java identifier. 4175 * @stable ICU 3.4 4176 */ isJavaIdentifierStart(int cp)4177 public static boolean isJavaIdentifierStart(int cp) { 4178 // note, downcast to char for jdk 1.4 compatibility 4179 return java.lang.Character.isJavaIdentifierStart((char)cp); 4180 } 4181 4182 /** 4183 * Compatibility override of Java method, delegates to 4184 * java.lang.Character.isJavaIdentifierPart. 4185 * @param cp the code point 4186 * @return true if the code point can continue a java identifier. 4187 * @stable ICU 3.4 4188 */ isJavaIdentifierPart(int cp)4189 public static boolean isJavaIdentifierPart(int cp) { 4190 // note, downcast to char for jdk 1.4 compatibility 4191 return java.lang.Character.isJavaIdentifierPart((char)cp); 4192 } 4193 4194 /** 4195 * Determines if the specified code point is a lowercase character. 4196 * UnicodeData only contains case mappings for code points where they are 4197 * one-to-one mappings; it also omits information about context-sensitive 4198 * case mappings.<br> For more information about Unicode case mapping 4199 * please refer to the 4200 * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 4201 * #21</a>.<br> 4202 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 4203 * @param ch code point to determine if it is in lowercase 4204 * @return true if code point is a lowercase character 4205 * @stable ICU 2.1 4206 */ isLowerCase(int ch)4207 public static boolean isLowerCase(int ch) 4208 { 4209 // if props == 0, it will just fall through and return false 4210 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 4211 } 4212 4213 /** 4214 * Determines if the specified code point is a white space character. 4215 * A code point is considered to be an whitespace character if and only 4216 * if it satisfies one of the following criteria: 4217 * <ul> 4218 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 4219 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 4220 * <li> It is \u0009, HORIZONTAL TABULATION. 4221 * <li> It is \u000A, LINE FEED. 4222 * <li> It is \u000B, VERTICAL TABULATION. 4223 * <li> It is \u000C, FORM FEED. 4224 * <li> It is \u000D, CARRIAGE RETURN. 4225 * <li> It is \u001C, FILE SEPARATOR. 4226 * <li> It is \u001D, GROUP SEPARATOR. 4227 * <li> It is \u001E, RECORD SEPARATOR. 4228 * <li> It is \u001F, UNIT SEPARATOR. 4229 * </ul> 4230 * 4231 * This API tries to sync with the semantics of Java's 4232 * java.lang.Character.isWhitespace(), but it may not return 4233 * the exact same results because of the Unicode version 4234 * difference. 4235 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 4236 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 4237 * See http://www.unicode.org/versions/Unicode4.0.1/ 4238 * @param ch code point to determine if it is a white space 4239 * @return true if the specified code point is a white space character 4240 * @stable ICU 2.1 4241 */ isWhitespace(int ch)4242 public static boolean isWhitespace(int ch) 4243 { 4244 // exclude no-break spaces 4245 // if props == 0, it will just fall through and return false 4246 return ((1 << getType(ch)) & 4247 ((1 << UCharacterCategory.SPACE_SEPARATOR) 4248 | (1 << UCharacterCategory.LINE_SEPARATOR) 4249 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 4250 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 4251 // TAB VT LF FF CR FS GS RS US NL are all control characters 4252 // that are white spaces. 4253 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 4254 } 4255 4256 /** 4257 * Determines if the specified code point is a Unicode specified space 4258 * character, i.e. if code point is in the category Zs, Zl and Zp. 4259 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 4260 * @param ch code point to determine if it is a space 4261 * @return true if the specified code point is a space character 4262 * @stable ICU 2.1 4263 */ isSpaceChar(int ch)4264 public static boolean isSpaceChar(int ch) 4265 { 4266 // if props == 0, it will just fall through and return false 4267 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 4268 | (1 << UCharacterCategory.LINE_SEPARATOR) 4269 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 4270 != 0; 4271 } 4272 4273 /** 4274 * Determines if the specified code point is a titlecase character. 4275 * UnicodeData only contains case mappings for code points where they are 4276 * one-to-one mappings; it also omits information about context-sensitive 4277 * case mappings.<br> 4278 * For more information about Unicode case mapping please refer to the 4279 * <a href=http://www.unicode.org/unicode/reports/tr21/> 4280 * Technical report #21</a>.<br> 4281 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 4282 * @param ch code point to determine if it is in title case 4283 * @return true if the specified code point is a titlecase character 4284 * @stable ICU 2.1 4285 */ isTitleCase(int ch)4286 public static boolean isTitleCase(int ch) 4287 { 4288 // if props == 0, it will just fall through and return false 4289 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 4290 } 4291 4292 /** 4293 * Determines if the specified code point may be any part of a Unicode 4294 * identifier other than the starting character. 4295 * A code point may be part of a Unicode identifier if and only if it is 4296 * one of the following: 4297 * <ul> 4298 * <li> Lu Uppercase letter 4299 * <li> Ll Lowercase letter 4300 * <li> Lt Titlecase letter 4301 * <li> Lm Modifier letter 4302 * <li> Lo Other letter 4303 * <li> Nl Letter number 4304 * <li> Pc Connecting punctuation character 4305 * <li> Nd decimal number 4306 * <li> Mc Spacing combining mark 4307 * <li> Mn Non-spacing mark 4308 * <li> Cf formatting code 4309 * </ul> 4310 * Up-to-date Unicode implementation of 4311 * java.lang.Character.isUnicodeIdentifierPart().<br> 4312 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4313 * @param ch code point to determine if is can be part of a Unicode 4314 * identifier 4315 * @return true if code point is any character belonging a unicode 4316 * identifier suffix after the first character 4317 * @stable ICU 2.1 4318 */ isUnicodeIdentifierPart(int ch)4319 public static boolean isUnicodeIdentifierPart(int ch) 4320 { 4321 // if props == 0, it will just fall through and return false 4322 // cat == format 4323 return ((1 << getType(ch)) 4324 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4325 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4326 | (1 << UCharacterCategory.TITLECASE_LETTER) 4327 | (1 << UCharacterCategory.MODIFIER_LETTER) 4328 | (1 << UCharacterCategory.OTHER_LETTER) 4329 | (1 << UCharacterCategory.LETTER_NUMBER) 4330 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 4331 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 4332 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 4333 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 4334 || isIdentifierIgnorable(ch); 4335 } 4336 4337 /** 4338 * Determines if the specified code point is permissible as the first 4339 * character in a Unicode identifier. 4340 * A code point may start a Unicode identifier if it is of type either 4341 * <ul> 4342 * <li> Lu Uppercase letter 4343 * <li> Ll Lowercase letter 4344 * <li> Lt Titlecase letter 4345 * <li> Lm Modifier letter 4346 * <li> Lo Other letter 4347 * <li> Nl Letter number 4348 * </ul> 4349 * Up-to-date Unicode implementation of 4350 * java.lang.Character.isUnicodeIdentifierStart().<br> 4351 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4352 * @param ch code point to determine if it can start a Unicode identifier 4353 * @return true if code point is the first character belonging a unicode 4354 * identifier 4355 * @stable ICU 2.1 4356 */ isUnicodeIdentifierStart(int ch)4357 public static boolean isUnicodeIdentifierStart(int ch) 4358 { 4359 /*int cat = getType(ch);*/ 4360 // if props == 0, it will just fall through and return false 4361 return ((1 << getType(ch)) 4362 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4363 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4364 | (1 << UCharacterCategory.TITLECASE_LETTER) 4365 | (1 << UCharacterCategory.MODIFIER_LETTER) 4366 | (1 << UCharacterCategory.OTHER_LETTER) 4367 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 4368 } 4369 4370 /** 4371 * Determines if the specified code point should be regarded as an 4372 * ignorable character in a Java identifier. 4373 * A character is Java-identifier-ignorable if it has the general category 4374 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 4375 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 4376 * Up-to-date Unicode implementation of 4377 * java.lang.Character.isIdentifierIgnorable().<br> 4378 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4379 * <p>Note that Unicode just recommends to ignore Cf (format controls). 4380 * @param ch code point to be determined if it can be ignored in a Unicode 4381 * identifier. 4382 * @return true if the code point is ignorable 4383 * @stable ICU 2.1 4384 */ isIdentifierIgnorable(int ch)4385 public static boolean isIdentifierIgnorable(int ch) 4386 { 4387 // see java.lang.Character.isIdentifierIgnorable() on range of 4388 // ignorable characters. 4389 if (ch <= 0x9f) { 4390 return isISOControl(ch) 4391 && !((ch >= 0x9 && ch <= 0xd) 4392 || (ch >= 0x1c && ch <= 0x1f)); 4393 } 4394 return getType(ch) == UCharacterCategory.FORMAT; 4395 } 4396 4397 /** 4398 * Determines if the specified code point is an uppercase character. 4399 * UnicodeData only contains case mappings for code point where they are 4400 * one-to-one mappings; it also omits information about context-sensitive 4401 * case mappings.<br> 4402 * For language specific case conversion behavior, use 4403 * toUpperCase(locale, str). <br> 4404 * For example, the case conversion for dot-less i and dotted I in Turkish, 4405 * or for final sigma in Greek. 4406 * For more information about Unicode case mapping please refer to the 4407 * <a href=http://www.unicode.org/unicode/reports/tr21/> 4408 * Technical report #21</a>.<br> 4409 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 4410 * @param ch code point to determine if it is in uppercase 4411 * @return true if the code point is an uppercase character 4412 * @stable ICU 2.1 4413 */ isUpperCase(int ch)4414 public static boolean isUpperCase(int ch) 4415 { 4416 // if props == 0, it will just fall through and return false 4417 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 4418 } 4419 4420 /** 4421 * The given code point is mapped to its lowercase equivalent; if the code 4422 * point has no lowercase equivalent, the code point itself is returned. 4423 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 4424 * 4425 * <p>This function only returns the simple, single-code point case mapping. 4426 * Full case mappings should be used whenever possible because they produce 4427 * better results by working on whole strings. 4428 * They take into account the string context and the language and can map 4429 * to a result string with a different length as appropriate. 4430 * Full case mappings are applied by the case mapping functions 4431 * that take String parameters rather than code points (int). 4432 * See also the User Guide chapter on C/POSIX migration: 4433 * http://www.icu-project.org/userguide/posix.html#case_mappings 4434 * 4435 * @param ch code point whose lowercase equivalent is to be retrieved 4436 * @return the lowercase equivalent code point 4437 * @stable ICU 2.1 4438 */ toLowerCase(int ch)4439 public static int toLowerCase(int ch) { 4440 return UCaseProps.INSTANCE.tolower(ch); 4441 } 4442 4443 /** 4444 * Converts argument code point and returns a String object representing 4445 * the code point's value in UTF-16 format. 4446 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 4447 * 4448 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 4449 * 4450 * @param ch code point 4451 * @return string representation of the code point, null if code point is not 4452 * defined in unicode 4453 * @stable ICU 2.1 4454 */ toString(int ch)4455 public static String toString(int ch) 4456 { 4457 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4458 return null; 4459 } 4460 4461 if (ch < SUPPLEMENTARY_MIN_VALUE) { 4462 return String.valueOf((char)ch); 4463 } 4464 4465 return new String(Character.toChars(ch)); 4466 } 4467 4468 /** 4469 * Converts the code point argument to titlecase. 4470 * If no titlecase is available, the uppercase is returned. If no uppercase 4471 * is available, the code point itself is returned. 4472 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4473 * 4474 * <p>This function only returns the simple, single-code point case mapping. 4475 * Full case mappings should be used whenever possible because they produce 4476 * better results by working on whole strings. 4477 * They take into account the string context and the language and can map 4478 * to a result string with a different length as appropriate. 4479 * Full case mappings are applied by the case mapping functions 4480 * that take String parameters rather than code points (int). 4481 * See also the User Guide chapter on C/POSIX migration: 4482 * http://www.icu-project.org/userguide/posix.html#case_mappings 4483 * 4484 * @param ch code point whose title case is to be retrieved 4485 * @return titlecase code point 4486 * @stable ICU 2.1 4487 */ toTitleCase(int ch)4488 public static int toTitleCase(int ch) { 4489 return UCaseProps.INSTANCE.totitle(ch); 4490 } 4491 4492 /** 4493 * Converts the character argument to uppercase. 4494 * If no uppercase is available, the character itself is returned. 4495 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4496 * 4497 * <p>This function only returns the simple, single-code point case mapping. 4498 * Full case mappings should be used whenever possible because they produce 4499 * better results by working on whole strings. 4500 * They take into account the string context and the language and can map 4501 * to a result string with a different length as appropriate. 4502 * Full case mappings are applied by the case mapping functions 4503 * that take String parameters rather than code points (int). 4504 * See also the User Guide chapter on C/POSIX migration: 4505 * http://www.icu-project.org/userguide/posix.html#case_mappings 4506 * 4507 * @param ch code point whose uppercase is to be retrieved 4508 * @return uppercase code point 4509 * @stable ICU 2.1 4510 */ toUpperCase(int ch)4511 public static int toUpperCase(int ch) { 4512 return UCaseProps.INSTANCE.toupper(ch); 4513 } 4514 4515 // extra methods not in java.lang.Character -------------------------- 4516 4517 /** 4518 * {@icu} Determines if the code point is a supplementary character. 4519 * A code point is a supplementary character if and only if it is greater 4520 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4521 * @param ch code point to be determined if it is in the supplementary 4522 * plane 4523 * @return true if code point is a supplementary character 4524 * @stable ICU 2.1 4525 */ isSupplementary(int ch)4526 public static boolean isSupplementary(int ch) 4527 { 4528 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4529 ch <= UCharacter.MAX_VALUE; 4530 } 4531 4532 /** 4533 * {@icu} Determines if the code point is in the BMP plane. 4534 * @param ch code point to be determined if it is not a supplementary 4535 * character 4536 * @return true if code point is not a supplementary character 4537 * @stable ICU 2.1 4538 */ isBMP(int ch)4539 public static boolean isBMP(int ch) 4540 { 4541 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4542 } 4543 4544 /** 4545 * {@icu} Determines whether the specified code point is a printable character 4546 * according to the Unicode standard. 4547 * @param ch code point to be determined if it is printable 4548 * @return true if the code point is a printable character 4549 * @stable ICU 2.1 4550 */ isPrintable(int ch)4551 public static boolean isPrintable(int ch) 4552 { 4553 int cat = getType(ch); 4554 // if props == 0, it will just fall through and return false 4555 return (cat != UCharacterCategory.UNASSIGNED && 4556 cat != UCharacterCategory.CONTROL && 4557 cat != UCharacterCategory.FORMAT && 4558 cat != UCharacterCategory.PRIVATE_USE && 4559 cat != UCharacterCategory.SURROGATE && 4560 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4561 } 4562 4563 /** 4564 * {@icu} Determines whether the specified code point is of base form. 4565 * A code point of base form does not graphically combine with preceding 4566 * characters, and is neither a control nor a format character. 4567 * @param ch code point to be determined if it is of base form 4568 * @return true if the code point is of base form 4569 * @stable ICU 2.1 4570 */ isBaseForm(int ch)4571 public static boolean isBaseForm(int ch) 4572 { 4573 int cat = getType(ch); 4574 // if props == 0, it will just fall through and return false 4575 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4576 cat == UCharacterCategory.OTHER_NUMBER || 4577 cat == UCharacterCategory.LETTER_NUMBER || 4578 cat == UCharacterCategory.UPPERCASE_LETTER || 4579 cat == UCharacterCategory.LOWERCASE_LETTER || 4580 cat == UCharacterCategory.TITLECASE_LETTER || 4581 cat == UCharacterCategory.MODIFIER_LETTER || 4582 cat == UCharacterCategory.OTHER_LETTER || 4583 cat == UCharacterCategory.NON_SPACING_MARK || 4584 cat == UCharacterCategory.ENCLOSING_MARK || 4585 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4586 } 4587 4588 /** 4589 * {@icu} Returns the Bidirection property of a code point. 4590 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4591 * property.<br> 4592 * Result returned belongs to the interface 4593 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4594 * @param ch the code point to be determined its direction 4595 * @return direction constant from UCharacterDirection. 4596 * @stable ICU 2.1 4597 */ getDirection(int ch)4598 public static int getDirection(int ch) 4599 { 4600 return UBiDiProps.INSTANCE.getClass(ch); 4601 } 4602 4603 /** 4604 * Determines whether the code point has the "mirrored" property. 4605 * This property is set for characters that are commonly used in 4606 * Right-To-Left contexts and need to be displayed with a "mirrored" 4607 * glyph. 4608 * @param ch code point whose mirror is to be determined 4609 * @return true if the code point has the "mirrored" property 4610 * @stable ICU 2.1 4611 */ isMirrored(int ch)4612 public static boolean isMirrored(int ch) 4613 { 4614 return UBiDiProps.INSTANCE.isMirrored(ch); 4615 } 4616 4617 /** 4618 * {@icu} Maps the specified code point to a "mirror-image" code point. 4619 * For code points with the "mirrored" property, implementations sometimes 4620 * need a "poor man's" mapping to another code point such that the default 4621 * glyph may serve as the mirror-image of the default glyph of the 4622 * specified code point.<br> 4623 * This is useful for text conversion to and from codepages with visual 4624 * order, and for displays without glyph selection capabilities. 4625 * @param ch code point whose mirror is to be retrieved 4626 * @return another code point that may serve as a mirror-image substitute, 4627 * or ch itself if there is no such mapping or ch does not have the 4628 * "mirrored" property 4629 * @stable ICU 2.1 4630 */ getMirror(int ch)4631 public static int getMirror(int ch) 4632 { 4633 return UBiDiProps.INSTANCE.getMirror(ch); 4634 } 4635 4636 /** 4637 * {@icu} Maps the specified character to its paired bracket character. 4638 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4639 * Otherwise c itself is returned. 4640 * See http://www.unicode.org/reports/tr9/ 4641 * 4642 * @param c the code point to be mapped 4643 * @return the paired bracket code point, 4644 * or c itself if there is no such mapping 4645 * (Bidi_Paired_Bracket_Type=None) 4646 * 4647 * @see UProperty#BIDI_PAIRED_BRACKET 4648 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4649 * @see #getMirror(int) 4650 * @stable ICU 52 4651 */ getBidiPairedBracket(int c)4652 public static int getBidiPairedBracket(int c) { 4653 return UBiDiProps.INSTANCE.getPairedBracket(c); 4654 } 4655 4656 /** 4657 * {@icu} Returns the combining class of the argument codepoint 4658 * @param ch code point whose combining is to be retrieved 4659 * @return the combining class of the codepoint 4660 * @stable ICU 2.1 4661 */ getCombiningClass(int ch)4662 public static int getCombiningClass(int ch) 4663 { 4664 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4665 } 4666 4667 /** 4668 * {@icu} A code point is illegal if and only if 4669 * <ul> 4670 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4671 * <li> A surrogate value, 0xD800 to 0xDFFF 4672 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4673 * </ul> 4674 * Note: legal does not mean that it is assigned in this version of Unicode. 4675 * @param ch code point to determine if it is a legal code point by itself 4676 * @return true if and only if legal. 4677 * @stable ICU 2.1 4678 */ isLegal(int ch)4679 public static boolean isLegal(int ch) 4680 { 4681 if (ch < MIN_VALUE) { 4682 return false; 4683 } 4684 if (ch < Character.MIN_SURROGATE) { 4685 return true; 4686 } 4687 if (ch <= Character.MAX_SURROGATE) { 4688 return false; 4689 } 4690 if (UCharacterUtility.isNonCharacter(ch)) { 4691 return false; 4692 } 4693 return (ch <= MAX_VALUE); 4694 } 4695 4696 /** 4697 * {@icu} A string is legal iff all its code points are legal. 4698 * A code point is illegal if and only if 4699 * <ul> 4700 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4701 * <li> A surrogate value, 0xD800 to 0xDFFF 4702 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4703 * </ul> 4704 * Note: legal does not mean that it is assigned in this version of Unicode. 4705 * @param str containing code points to examin 4706 * @return true if and only if legal. 4707 * @stable ICU 2.1 4708 */ isLegal(String str)4709 public static boolean isLegal(String str) 4710 { 4711 int size = str.length(); 4712 int codepoint; 4713 for (int i = 0; i < size; i += Character.charCount(codepoint)) 4714 { 4715 codepoint = str.codePointAt(i); 4716 if (!isLegal(codepoint)) { 4717 return false; 4718 } 4719 } 4720 return true; 4721 } 4722 4723 /** 4724 * {@icu} Returns the version of Unicode data used. 4725 * @return the unicode version number used 4726 * @stable ICU 2.1 4727 */ getUnicodeVersion()4728 public static VersionInfo getUnicodeVersion() 4729 { 4730 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 4731 } 4732 4733 /** 4734 * {@icu} Returns the most current Unicode name of the argument code point, or 4735 * null if the character is unassigned or outside the range 4736 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4737 * <br> 4738 * Note calling any methods related to code point names, e.g. get*Name*() 4739 * incurs a one-time initialisation cost to construct the name tables. 4740 * @param ch the code point for which to get the name 4741 * @return most current Unicode name 4742 * @stable ICU 2.1 4743 */ getName(int ch)4744 public static String getName(int ch) 4745 { 4746 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 4747 } 4748 4749 /** 4750 * {@icu} Returns the names for each of the characters in a string 4751 * @param s string to format 4752 * @param separator string to go between names 4753 * @return string of names 4754 * @stable ICU 3.8 4755 */ getName(String s, String separator)4756 public static String getName(String s, String separator) { 4757 if (s.length() == 1) { // handle common case 4758 return getName(s.charAt(0)); 4759 } 4760 int cp; 4761 StringBuilder sb = new StringBuilder(); 4762 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 4763 cp = s.codePointAt(i); 4764 if (i != 0) sb.append(separator); 4765 sb.append(UCharacter.getName(cp)); 4766 } 4767 return sb.toString(); 4768 } 4769 4770 /** 4771 * {@icu} Returns null. 4772 * Used to return the Unicode_1_Name property value which was of little practical value. 4773 * @param ch the code point for which to get the name 4774 * @return null 4775 * @deprecated ICU 49 4776 */ 4777 @Deprecated getName1_0(int ch)4778 public static String getName1_0(int ch) 4779 { 4780 return null; 4781 } 4782 4783 /** 4784 * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and 4785 * getName1_0(int), this method will return a name even for codepoints that 4786 * are not assigned a name in UnicodeData.txt. 4787 * 4788 * <p>The names are returned in the following order. 4789 * <ul> 4790 * <li> Most current Unicode name if there is any 4791 * <li> Unicode 1.0 name if there is any 4792 * <li> Extended name in the form of 4793 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 4794 * </ul> 4795 * Note calling any methods related to code point names, e.g. get*Name*() 4796 * incurs a one-time initialisation cost to construct the name tables. 4797 * @param ch the code point for which to get the name 4798 * @return a name for the argument codepoint 4799 * @stable ICU 2.6 4800 */ getExtendedName(int ch)4801 public static String getExtendedName(int ch) { 4802 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 4803 } 4804 4805 /** 4806 * {@icu} Returns the corrected name from NameAliases.txt if there is one. 4807 * Returns null if the character is unassigned or outside the range 4808 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4809 * <br> 4810 * Note calling any methods related to code point names, e.g. get*Name*() 4811 * incurs a one-time initialisation cost to construct the name tables. 4812 * @param ch the code point for which to get the name alias 4813 * @return Unicode name alias, or null 4814 * @stable ICU 4.4 4815 */ getNameAlias(int ch)4816 public static String getNameAlias(int ch) 4817 { 4818 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 4819 } 4820 4821 /** 4822 * {@icu} Returns null. 4823 * Used to return the ISO 10646 comment for a character. 4824 * The Unicode ISO_Comment property is deprecated and has no values. 4825 * 4826 * @param ch The code point for which to get the ISO comment. 4827 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 4828 * @return null 4829 * @deprecated ICU 49 4830 */ 4831 @Deprecated getISOComment(int ch)4832 public static String getISOComment(int ch) 4833 { 4834 return null; 4835 } 4836 4837 /** 4838 * {@icu} <p>Finds a Unicode code point by its most current Unicode name and 4839 * return its code point value. All Unicode names are in uppercase. 4840 * Note calling any methods related to code point names, e.g. get*Name*() 4841 * incurs a one-time initialisation cost to construct the name tables. 4842 * @param name most current Unicode character name whose code point is to 4843 * be returned 4844 * @return code point or -1 if name is not found 4845 * @stable ICU 2.1 4846 */ getCharFromName(String name)4847 public static int getCharFromName(String name){ 4848 return UCharacterName.INSTANCE.getCharFromName( 4849 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 4850 } 4851 4852 /** 4853 * {@icu} Returns -1. 4854 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 4855 * its code point value. 4856 * @param name Unicode 1.0 code point name whose code point is to be 4857 * returned 4858 * @return -1 4859 * @deprecated ICU 49 4860 * @see #getName1_0(int) 4861 */ 4862 @Deprecated getCharFromName1_0(String name)4863 public static int getCharFromName1_0(String name){ 4864 return -1; 4865 } 4866 4867 /** 4868 * {@icu} <p>Find a Unicode character by either its name and return its code 4869 * point value. All Unicode names are in uppercase. 4870 * Extended names are all lowercase except for numbers and are contained 4871 * within angle brackets. 4872 * The names are searched in the following order 4873 * <ul> 4874 * <li> Most current Unicode name if there is any 4875 * <li> Unicode 1.0 name if there is any 4876 * <li> Extended name in the form of 4877 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 4878 * </ul> 4879 * Note calling any methods related to code point names, e.g. get*Name*() 4880 * incurs a one-time initialisation cost to construct the name tables. 4881 * @param name codepoint name 4882 * @return code point associated with the name or -1 if the name is not 4883 * found. 4884 * @stable ICU 2.6 4885 */ getCharFromExtendedName(String name)4886 public static int getCharFromExtendedName(String name){ 4887 return UCharacterName.INSTANCE.getCharFromName( 4888 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 4889 } 4890 4891 /** 4892 * {@icu} <p>Find a Unicode character by its corrected name alias and return 4893 * its code point value. All Unicode names are in uppercase. 4894 * Note calling any methods related to code point names, e.g. get*Name*() 4895 * incurs a one-time initialisation cost to construct the name tables. 4896 * @param name Unicode name alias whose code point is to be returned 4897 * @return code point or -1 if name is not found 4898 * @stable ICU 4.4 4899 */ getCharFromNameAlias(String name)4900 public static int getCharFromNameAlias(String name){ 4901 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 4902 } 4903 4904 /** 4905 * {@icu} Return the Unicode name for a given property, as given in the 4906 * Unicode database file PropertyAliases.txt. Most properties 4907 * have more than one name. The nameChoice determines which one 4908 * is returned. 4909 * 4910 * In addition, this function maps the property 4911 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 4912 * "General_Category_Mask". These names are not in 4913 * PropertyAliases.txt. 4914 * 4915 * @param property UProperty selector. 4916 * 4917 * @param nameChoice UProperty.NameChoice selector for which name 4918 * to get. All properties have a long name. Most have a short 4919 * name, but some do not. Unicode allows for additional names; if 4920 * present these will be returned by UProperty.NameChoice.LONG + i, 4921 * where i=1, 2,... 4922 * 4923 * @return a name, or null if Unicode explicitly defines no name 4924 * ("n/a") for a given property/nameChoice. If a given nameChoice 4925 * throws an exception, then all larger values of nameChoice will 4926 * throw an exception. If null is returned for a given 4927 * nameChoice, then other nameChoice values may return non-null 4928 * results. 4929 * 4930 * @exception IllegalArgumentException thrown if property or 4931 * nameChoice are invalid. 4932 * 4933 * @see UProperty 4934 * @see UProperty.NameChoice 4935 * @stable ICU 2.4 4936 */ getPropertyName(int property, int nameChoice)4937 public static String getPropertyName(int property, 4938 int nameChoice) { 4939 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 4940 } 4941 4942 /** 4943 * {@icu} Return the UProperty selector for a given property name, as 4944 * specified in the Unicode database file PropertyAliases.txt. 4945 * Short, long, and any other variants are recognized. 4946 * 4947 * In addition, this function maps the synthetic names "gcm" / 4948 * "General_Category_Mask" to the property 4949 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 4950 * PropertyAliases.txt. 4951 * 4952 * @param propertyAlias the property name to be matched. The name 4953 * is compared using "loose matching" as described in 4954 * PropertyAliases.txt. 4955 * 4956 * @return a UProperty enum. 4957 * 4958 * @exception IllegalArgumentException thrown if propertyAlias 4959 * is not recognized. 4960 * 4961 * @see UProperty 4962 * @stable ICU 2.4 4963 */ getPropertyEnum(CharSequence propertyAlias)4964 public static int getPropertyEnum(CharSequence propertyAlias) { 4965 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 4966 if (propEnum == UProperty.UNDEFINED) { 4967 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 4968 } 4969 return propEnum; 4970 } 4971 4972 /** 4973 * {@icu} Return the Unicode name for a given property value, as given in 4974 * the Unicode database file PropertyValueAliases.txt. Most 4975 * values have more than one name. The nameChoice determines 4976 * which one is returned. 4977 * 4978 * Note: Some of the names in PropertyValueAliases.txt can only be 4979 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 4980 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4981 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4982 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4983 * 4984 * @param property UProperty selector constant. 4985 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4986 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4987 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4988 * If out of range, null is returned. 4989 * 4990 * @param value selector for a value for the given property. In 4991 * general, valid values range from 0 up to some maximum. There 4992 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 4993 * non-zero value BASIC_LATIN.getID(). (2.) 4994 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 4995 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 4996 * are mask values produced by left-shifting 1 by 4997 * UCharacter.getType(). This allows grouped categories such as 4998 * [:L:] to be represented. Mask values are non-contiguous. 4999 * 5000 * @param nameChoice UProperty.NameChoice selector for which name 5001 * to get. All values have a long name. Most have a short name, 5002 * but some do not. Unicode allows for additional names; if 5003 * present these will be returned by UProperty.NameChoice.LONG + i, 5004 * where i=1, 2,... 5005 * 5006 * @return a name, or null if Unicode explicitly defines no name 5007 * ("n/a") for a given property/value/nameChoice. If a given 5008 * nameChoice throws an exception, then all larger values of 5009 * nameChoice will throw an exception. If null is returned for a 5010 * given nameChoice, then other nameChoice values may return 5011 * non-null results. 5012 * 5013 * @exception IllegalArgumentException thrown if property, value, 5014 * or nameChoice are invalid. 5015 * 5016 * @see UProperty 5017 * @see UProperty.NameChoice 5018 * @stable ICU 2.4 5019 */ getPropertyValueName(int property, int value, int nameChoice)5020 public static String getPropertyValueName(int property, 5021 int value, 5022 int nameChoice) 5023 { 5024 if ((property == UProperty.CANONICAL_COMBINING_CLASS 5025 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 5026 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 5027 && value >= UCharacter.getIntPropertyMinValue( 5028 UProperty.CANONICAL_COMBINING_CLASS) 5029 && value <= UCharacter.getIntPropertyMaxValue( 5030 UProperty.CANONICAL_COMBINING_CLASS) 5031 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 5032 // this is hard coded for the valid cc 5033 // because PropertyValueAliases.txt does not contain all of them 5034 try { 5035 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 5036 nameChoice); 5037 } 5038 catch (IllegalArgumentException e) { 5039 return null; 5040 } 5041 } 5042 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 5043 } 5044 5045 /** 5046 * {@icu} Return the property value integer for a given value name, as 5047 * specified in the Unicode database file PropertyValueAliases.txt. 5048 * Short, long, and any other variants are recognized. 5049 * 5050 * Note: Some of the names in PropertyValueAliases.txt will only be 5051 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 5052 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 5053 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 5054 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 5055 * 5056 * @param property UProperty selector constant. 5057 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 5058 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 5059 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 5060 * Only these properties can be enumerated. 5061 * 5062 * @param valueAlias the value name to be matched. The name is 5063 * compared using "loose matching" as described in 5064 * PropertyValueAliases.txt. 5065 * 5066 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 5067 * values are mask values produced by left-shifting 1 by 5068 * UCharacter.getType(). This allows grouped categories such as 5069 * [:L:] to be represented. 5070 * 5071 * @see UProperty 5072 * @throws IllegalArgumentException if property is not a valid UProperty 5073 * selector or valueAlias is not a value of this property 5074 * @stable ICU 2.4 5075 */ getPropertyValueEnum(int property, CharSequence valueAlias)5076 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 5077 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 5078 if (propEnum == UProperty.UNDEFINED) { 5079 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 5080 } 5081 return propEnum; 5082 } 5083 5084 /** 5085 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 5086 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 5087 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 5088 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 5089 * @internal 5090 * @deprecated This API is ICU internal only. 5091 */ 5092 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)5093 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 5094 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 5095 } 5096 5097 5098 /** 5099 * {@icu} Returns a code point corresponding to the two surrogate code units. 5100 * 5101 * @param lead the lead char 5102 * @param trail the trail char 5103 * @return code point if surrogate characters are valid. 5104 * @exception IllegalArgumentException thrown when the code units do 5105 * not form a valid code point 5106 * @stable ICU 2.1 5107 */ getCodePoint(char lead, char trail)5108 public static int getCodePoint(char lead, char trail) 5109 { 5110 if (Character.isSurrogatePair(lead, trail)) { 5111 return Character.toCodePoint(lead, trail); 5112 } 5113 throw new IllegalArgumentException("Illegal surrogate characters"); 5114 } 5115 5116 /** 5117 * {@icu} Returns the code point corresponding to the BMP code point. 5118 * 5119 * @param char16 the BMP code point 5120 * @return code point if argument is a valid character. 5121 * @exception IllegalArgumentException thrown when char16 is not a valid 5122 * code point 5123 * @stable ICU 2.1 5124 */ getCodePoint(char char16)5125 public static int getCodePoint(char char16) 5126 { 5127 if (UCharacter.isLegal(char16)) { 5128 return char16; 5129 } 5130 throw new IllegalArgumentException("Illegal codepoint"); 5131 } 5132 5133 /** 5134 * Returns the uppercase version of the argument string. 5135 * Casing is dependent on the default locale and context-sensitive. 5136 * @param str source string to be performed on 5137 * @return uppercase version of the argument string 5138 * @stable ICU 2.1 5139 */ toUpperCase(String str)5140 public static String toUpperCase(String str) 5141 { 5142 return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str); 5143 } 5144 5145 /** 5146 * Returns the lowercase version of the argument string. 5147 * Casing is dependent on the default locale and context-sensitive 5148 * @param str source string to be performed on 5149 * @return lowercase version of the argument string 5150 * @stable ICU 2.1 5151 */ toLowerCase(String str)5152 public static String toLowerCase(String str) 5153 { 5154 return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str); 5155 } 5156 5157 /** 5158 * <p>Returns the titlecase version of the argument string. 5159 * <p>Position for titlecasing is determined by the argument break 5160 * iterator, hence the user can customize his break iterator for 5161 * a specialized titlecasing. In this case only the forward iteration 5162 * needs to be implemented. 5163 * If the break iterator passed in is null, the default Unicode algorithm 5164 * will be used to determine the titlecase positions. 5165 * 5166 * <p>Only positions returned by the break iterator will be title cased, 5167 * character in between the positions will all be in lower case. 5168 * <p>Casing is dependent on the default locale and context-sensitive 5169 * @param str source string to be performed on 5170 * @param breakiter break iterator to determine the positions in which 5171 * the character should be title cased. 5172 * @return titlecase version of the argument string 5173 * @stable ICU 2.6 5174 */ toTitleCase(String str, BreakIterator breakiter)5175 public static String toTitleCase(String str, BreakIterator breakiter) 5176 { 5177 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 5178 } 5179 getDefaultCaseLocale()5180 private static int getDefaultCaseLocale() { 5181 return UCaseProps.getCaseLocale(Locale.getDefault()); 5182 } 5183 getCaseLocale(Locale locale)5184 private static int getCaseLocale(Locale locale) { 5185 if (locale == null) { 5186 locale = Locale.getDefault(); 5187 } 5188 return UCaseProps.getCaseLocale(locale); 5189 } 5190 getCaseLocale(ULocale locale)5191 private static int getCaseLocale(ULocale locale) { 5192 if (locale == null) { 5193 locale = ULocale.getDefault(); 5194 } 5195 return UCaseProps.getCaseLocale(locale); 5196 } 5197 5198 /** 5199 * Returns the uppercase version of the argument string. 5200 * Casing is dependent on the argument locale and context-sensitive. 5201 * @param locale which string is to be converted in 5202 * @param str source string to be performed on 5203 * @return uppercase version of the argument string 5204 * @stable ICU 2.1 5205 */ toUpperCase(Locale locale, String str)5206 public static String toUpperCase(Locale locale, String str) 5207 { 5208 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5209 } 5210 5211 /** 5212 * Returns the uppercase version of the argument string. 5213 * Casing is dependent on the argument locale and context-sensitive. 5214 * @param locale which string is to be converted in 5215 * @param str source string to be performed on 5216 * @return uppercase version of the argument string 5217 * @stable ICU 3.2 5218 */ toUpperCase(ULocale locale, String str)5219 public static String toUpperCase(ULocale locale, String str) { 5220 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5221 } 5222 5223 /** 5224 * Returns the lowercase version of the argument string. 5225 * Casing is dependent on the argument locale and context-sensitive 5226 * @param locale which string is to be converted in 5227 * @param str source string to be performed on 5228 * @return lowercase version of the argument string 5229 * @stable ICU 2.1 5230 */ toLowerCase(Locale locale, String str)5231 public static String toLowerCase(Locale locale, String str) 5232 { 5233 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5234 } 5235 5236 /** 5237 * Returns the lowercase version of the argument string. 5238 * Casing is dependent on the argument locale and context-sensitive 5239 * @param locale which string is to be converted in 5240 * @param str source string to be performed on 5241 * @return lowercase version of the argument string 5242 * @stable ICU 3.2 5243 */ toLowerCase(ULocale locale, String str)5244 public static String toLowerCase(ULocale locale, String str) { 5245 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5246 } 5247 5248 /** 5249 * <p>Returns the titlecase version of the argument string. 5250 * <p>Position for titlecasing is determined by the argument break 5251 * iterator, hence the user can customize his break iterator for 5252 * a specialized titlecasing. In this case only the forward iteration 5253 * needs to be implemented. 5254 * If the break iterator passed in is null, the default Unicode algorithm 5255 * will be used to determine the titlecase positions. 5256 * 5257 * <p>Only positions returned by the break iterator will be title cased, 5258 * character in between the positions will all be in lower case. 5259 * <p>Casing is dependent on the argument locale and context-sensitive 5260 * @param locale which string is to be converted in 5261 * @param str source string to be performed on 5262 * @param breakiter break iterator to determine the positions in which 5263 * the character should be title cased. 5264 * @return titlecase version of the argument string 5265 * @stable ICU 2.6 5266 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)5267 public static String toTitleCase(Locale locale, String str, 5268 BreakIterator breakiter) 5269 { 5270 return toTitleCase(locale, str, breakiter, 0); 5271 } 5272 5273 /** 5274 * <p>Returns the titlecase version of the argument string. 5275 * <p>Position for titlecasing is determined by the argument break 5276 * iterator, hence the user can customize his break iterator for 5277 * a specialized titlecasing. In this case only the forward iteration 5278 * needs to be implemented. 5279 * If the break iterator passed in is null, the default Unicode algorithm 5280 * will be used to determine the titlecase positions. 5281 * 5282 * <p>Only positions returned by the break iterator will be title cased, 5283 * character in between the positions will all be in lower case. 5284 * <p>Casing is dependent on the argument locale and context-sensitive 5285 * @param locale which string is to be converted in 5286 * @param str source string to be performed on 5287 * @param titleIter break iterator to determine the positions in which 5288 * the character should be title cased. 5289 * @return titlecase version of the argument string 5290 * @stable ICU 3.2 5291 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)5292 public static String toTitleCase(ULocale locale, String str, 5293 BreakIterator titleIter) { 5294 return toTitleCase(locale, str, titleIter, 0); 5295 } 5296 5297 /** 5298 * <p>Returns the titlecase version of the argument string. 5299 * <p>Position for titlecasing is determined by the argument break 5300 * iterator, hence the user can customize his break iterator for 5301 * a specialized titlecasing. In this case only the forward iteration 5302 * needs to be implemented. 5303 * If the break iterator passed in is null, the default Unicode algorithm 5304 * will be used to determine the titlecase positions. 5305 * 5306 * <p>Only positions returned by the break iterator will be title cased, 5307 * character in between the positions will all be in lower case. 5308 * <p>Casing is dependent on the argument locale and context-sensitive 5309 * @param locale which string is to be converted in 5310 * @param str source string to be performed on 5311 * @param titleIter break iterator to determine the positions in which 5312 * the character should be title cased. 5313 * @param options bit set to modify the titlecasing operation 5314 * @return titlecase version of the argument string 5315 * @stable ICU 3.8 5316 * @see #TITLECASE_NO_LOWERCASE 5317 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5318 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5319 public static String toTitleCase(ULocale locale, String str, 5320 BreakIterator titleIter, int options) { 5321 if (titleIter == null && locale == null) { 5322 locale = ULocale.getDefault(); 5323 } 5324 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5325 titleIter.setText(str); 5326 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5327 } 5328 5329 /** 5330 * {@icu} <p>Returns the titlecase version of the argument string. 5331 * <p>Position for titlecasing is determined by the argument break 5332 * iterator, hence the user can customize his break iterator for 5333 * a specialized titlecasing. In this case only the forward iteration 5334 * needs to be implemented. 5335 * If the break iterator passed in is null, the default Unicode algorithm 5336 * will be used to determine the titlecase positions. 5337 * 5338 * <p>Only positions returned by the break iterator will be title cased, 5339 * character in between the positions will all be in lower case. 5340 * <p>Casing is dependent on the argument locale and context-sensitive 5341 * @param locale which string is to be converted in 5342 * @param str source string to be performed on 5343 * @param titleIter break iterator to determine the positions in which 5344 * the character should be title cased. 5345 * @param options bit set to modify the titlecasing operation 5346 * @return titlecase version of the argument string 5347 * @see #TITLECASE_NO_LOWERCASE 5348 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5349 * @stable ICU 54 5350 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5351 public static String toTitleCase(Locale locale, String str, 5352 BreakIterator titleIter, 5353 int options) { 5354 if (titleIter == null && locale == null) { 5355 locale = Locale.getDefault(); 5356 } 5357 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5358 titleIter.setText(str); 5359 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5360 } 5361 5362 /** 5363 * {@icu} The given character is mapped to its case folding equivalent according 5364 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5365 * folding equivalent, the character itself is returned. 5366 * 5367 * <p>This function only returns the simple, single-code point case mapping. 5368 * Full case mappings should be used whenever possible because they produce 5369 * better results by working on whole strings. 5370 * They can map to a result string with a different length as appropriate. 5371 * Full case mappings are applied by the case mapping functions 5372 * that take String parameters rather than code points (int). 5373 * See also the User Guide chapter on C/POSIX migration: 5374 * http://www.icu-project.org/userguide/posix.html#case_mappings 5375 * 5376 * @param ch the character to be converted 5377 * @param defaultmapping Indicates whether the default mappings defined in 5378 * CaseFolding.txt are to be used, otherwise the 5379 * mappings for dotted I and dotless i marked with 5380 * 'T' in CaseFolding.txt are included. 5381 * @return the case folding equivalent of the character, if 5382 * any; otherwise the character itself. 5383 * @see #foldCase(String, boolean) 5384 * @stable ICU 2.1 5385 */ foldCase(int ch, boolean defaultmapping)5386 public static int foldCase(int ch, boolean defaultmapping) { 5387 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5388 } 5389 5390 /** 5391 * {@icu} The given string is mapped to its case folding equivalent according to 5392 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5393 * folding equivalent, the character itself is returned. 5394 * "Full", multiple-code point case folding mappings are returned here. 5395 * For "simple" single-code point mappings use the API 5396 * foldCase(int ch, boolean defaultmapping). 5397 * @param str the String to be converted 5398 * @param defaultmapping Indicates whether the default mappings defined in 5399 * CaseFolding.txt are to be used, otherwise the 5400 * mappings for dotted I and dotless i marked with 5401 * 'T' in CaseFolding.txt are included. 5402 * @return the case folding equivalent of the character, if 5403 * any; otherwise the character itself. 5404 * @see #foldCase(int, boolean) 5405 * @stable ICU 2.1 5406 */ foldCase(String str, boolean defaultmapping)5407 public static String foldCase(String str, boolean defaultmapping) { 5408 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5409 } 5410 5411 /** 5412 * {@icu} Option value for case folding: use default mappings defined in 5413 * CaseFolding.txt. 5414 * @stable ICU 2.6 5415 */ 5416 public static final int FOLD_CASE_DEFAULT = 0x0000; 5417 /** 5418 * {@icu} Option value for case folding: 5419 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 5420 * and dotless i appropriately for Turkic languages (tr, az). 5421 * 5422 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 5423 * are to be included for default mappings and 5424 * excluded for the Turkic-specific mappings. 5425 * 5426 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 5427 * are to be excluded for default mappings and 5428 * included for the Turkic-specific mappings. 5429 * 5430 * @stable ICU 2.6 5431 */ 5432 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 5433 5434 /** 5435 * {@icu} The given character is mapped to its case folding equivalent according 5436 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5437 * folding equivalent, the character itself is returned. 5438 * 5439 * <p>This function only returns the simple, single-code point case mapping. 5440 * Full case mappings should be used whenever possible because they produce 5441 * better results by working on whole strings. 5442 * They can map to a result string with a different length as appropriate. 5443 * Full case mappings are applied by the case mapping functions 5444 * that take String parameters rather than code points (int). 5445 * See also the User Guide chapter on C/POSIX migration: 5446 * http://www.icu-project.org/userguide/posix.html#case_mappings 5447 * 5448 * @param ch the character to be converted 5449 * @param options A bit set for special processing. Currently the recognised options 5450 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5451 * @return the case folding equivalent of the character, if any; otherwise the 5452 * character itself. 5453 * @see #foldCase(String, boolean) 5454 * @stable ICU 2.6 5455 */ foldCase(int ch, int options)5456 public static int foldCase(int ch, int options) { 5457 return UCaseProps.INSTANCE.fold(ch, options); 5458 } 5459 5460 /** 5461 * {@icu} The given string is mapped to its case folding equivalent according to 5462 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5463 * folding equivalent, the character itself is returned. 5464 * "Full", multiple-code point case folding mappings are returned here. 5465 * For "simple" single-code point mappings use the API 5466 * foldCase(int ch, boolean defaultmapping). 5467 * @param str the String to be converted 5468 * @param options A bit set for special processing. Currently the recognised options 5469 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5470 * @return the case folding equivalent of the character, if any; otherwise the 5471 * character itself. 5472 * @see #foldCase(int, boolean) 5473 * @stable ICU 2.6 5474 */ foldCase(String str, int options)5475 public static final String foldCase(String str, int options) { 5476 return CaseMapImpl.fold(options, str); 5477 } 5478 5479 /** 5480 * {@icu} Returns the numeric value of a Han character. 5481 * 5482 * <p>This returns the value of Han 'numeric' code points, 5483 * including those for zero, ten, hundred, thousand, ten thousand, 5484 * and hundred million. 5485 * This includes both the standard and 'checkwriting' 5486 * characters, the 'big circle' zero character, and the standard 5487 * zero character. 5488 * 5489 * <p>Note: The Unicode Standard has numeric values for more 5490 * Han characters recognized by this method 5491 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 5492 * and a {@link com.ibm.icu.text.NumberFormat} can be used with 5493 * a Chinese {@link com.ibm.icu.text.NumberingSystem}. 5494 * 5495 * @param ch code point to query 5496 * @return value if it is a Han 'numeric character,' otherwise return -1. 5497 * @stable ICU 2.4 5498 */ getHanNumericValue(int ch)5499 public static int getHanNumericValue(int ch) 5500 { 5501 switch(ch) 5502 { 5503 case IDEOGRAPHIC_NUMBER_ZERO_ : 5504 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 5505 return 0; // Han Zero 5506 case CJK_IDEOGRAPH_FIRST_ : 5507 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5508 return 1; // Han One 5509 case CJK_IDEOGRAPH_SECOND_ : 5510 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5511 return 2; // Han Two 5512 case CJK_IDEOGRAPH_THIRD_ : 5513 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5514 return 3; // Han Three 5515 case CJK_IDEOGRAPH_FOURTH_ : 5516 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5517 return 4; // Han Four 5518 case CJK_IDEOGRAPH_FIFTH_ : 5519 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5520 return 5; // Han Five 5521 case CJK_IDEOGRAPH_SIXTH_ : 5522 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5523 return 6; // Han Six 5524 case CJK_IDEOGRAPH_SEVENTH_ : 5525 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5526 return 7; // Han Seven 5527 case CJK_IDEOGRAPH_EIGHTH_ : 5528 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5529 return 8; // Han Eight 5530 case CJK_IDEOGRAPH_NINETH_ : 5531 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5532 return 9; // Han Nine 5533 case CJK_IDEOGRAPH_TEN_ : 5534 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5535 return 10; 5536 case CJK_IDEOGRAPH_HUNDRED_ : 5537 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5538 return 100; 5539 case CJK_IDEOGRAPH_THOUSAND_ : 5540 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5541 return 1000; 5542 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5543 return 10000; 5544 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5545 return 100000000; 5546 } 5547 return -1; // no value 5548 } 5549 5550 /** 5551 * {@icu} <p>Returns an iterator for character types, iterating over codepoints. 5552 * <p>Example of use:<br> 5553 * <pre> 5554 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5555 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5556 * while (iterator.next(element)) { 5557 * System.out.println("Codepoint \\u" + 5558 * Integer.toHexString(element.start) + 5559 * " to codepoint \\u" + 5560 * Integer.toHexString(element.limit - 1) + 5561 * " has the character type " + 5562 * element.value); 5563 * } 5564 * </pre> 5565 * @return an iterator 5566 * @stable ICU 2.6 5567 */ getTypeIterator()5568 public static RangeValueIterator getTypeIterator() 5569 { 5570 return new UCharacterTypeIterator(); 5571 } 5572 5573 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()5574 UCharacterTypeIterator() { 5575 reset(); 5576 } 5577 5578 // implements RangeValueIterator 5579 @Override next(Element element)5580 public boolean next(Element element) { 5581 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5582 element.start=range.startCodePoint; 5583 element.limit=range.endCodePoint+1; 5584 element.value=range.value; 5585 return true; 5586 } else { 5587 return false; 5588 } 5589 } 5590 5591 // implements RangeValueIterator 5592 @Override reset()5593 public void reset() { 5594 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5595 } 5596 5597 private Iterator<Trie2.Range> trieIterator; 5598 private Trie2.Range range; 5599 5600 private static final class MaskType implements Trie2.ValueMapper { 5601 // Extracts the general category ("character type") from the trie value. 5602 @Override map(int value)5603 public int map(int value) { 5604 return value & UCharacterProperty.TYPE_MASK; 5605 } 5606 } 5607 private static final MaskType MASK_TYPE=new MaskType(); 5608 } 5609 5610 /** 5611 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5612 * <p>This API only gets the iterator for the modern, most up-to-date 5613 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5614 * for extended names use getExtendedNameIterator(). 5615 * <p>Example of use:<br> 5616 * <pre> 5617 * ValueIterator iterator = UCharacter.getNameIterator(); 5618 * ValueIterator.Element element = new ValueIterator.Element(); 5619 * while (iterator.next(element)) { 5620 * System.out.println("Codepoint \\u" + 5621 * Integer.toHexString(element.codepoint) + 5622 * " has the name " + (String)element.value); 5623 * } 5624 * </pre> 5625 * <p>The maximal range which the name iterator iterates is from 5626 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 5627 * @return an iterator 5628 * @stable ICU 2.6 5629 */ getNameIterator()5630 public static ValueIterator getNameIterator(){ 5631 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5632 UCharacterNameChoice.UNICODE_CHAR_NAME); 5633 } 5634 5635 /** 5636 * {@icu} Returns an empty iterator. 5637 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 5638 * @return an empty iterator 5639 * @deprecated ICU 49 5640 * @see #getName1_0(int) 5641 */ 5642 @Deprecated getName1_0Iterator()5643 public static ValueIterator getName1_0Iterator(){ 5644 return new DummyValueIterator(); 5645 } 5646 5647 private static final class DummyValueIterator implements ValueIterator { 5648 @Override next(Element element)5649 public boolean next(Element element) { return false; } 5650 @Override reset()5651 public void reset() {} 5652 @Override setRange(int start, int limit)5653 public void setRange(int start, int limit) {} 5654 } 5655 5656 /** 5657 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5658 * <p>This API only gets the iterator for the extended names. 5659 * For modern, most up-to-date Unicode names use getNameIterator() or 5660 * for older 1.0 Unicode names use get1_0NameIterator(). 5661 * <p>Example of use:<br> 5662 * <pre> 5663 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5664 * ValueIterator.Element element = new ValueIterator.Element(); 5665 * while (iterator.next(element)) { 5666 * System.out.println("Codepoint \\u" + 5667 * Integer.toHexString(element.codepoint) + 5668 * " has the name " + (String)element.value); 5669 * } 5670 * </pre> 5671 * <p>The maximal range which the name iterator iterates is from 5672 * @return an iterator 5673 * @stable ICU 2.6 5674 */ getExtendedNameIterator()5675 public static ValueIterator getExtendedNameIterator(){ 5676 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5677 UCharacterNameChoice.EXTENDED_CHAR_NAME); 5678 } 5679 5680 /** 5681 * {@icu} Returns the "age" of the code point. 5682 * <p>The "age" is the Unicode version when the code point was first 5683 * designated (as a non-character or for Private Use) or assigned a 5684 * character. 5685 * <p>This can be useful to avoid emitting code points to receiving 5686 * processes that do not accept newer characters. 5687 * <p>The data is from the UCD file DerivedAge.txt. 5688 * @param ch The code point. 5689 * @return the Unicode version number 5690 * @stable ICU 2.6 5691 */ getAge(int ch)5692 public static VersionInfo getAge(int ch) 5693 { 5694 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5695 throw new IllegalArgumentException("Codepoint out of bounds"); 5696 } 5697 return UCharacterProperty.INSTANCE.getAge(ch); 5698 } 5699 5700 /** 5701 * {@icu} Check a binary Unicode property for a code point. 5702 * <p>Unicode, especially in version 3.2, defines many more properties 5703 * than the original set in UnicodeData.txt. 5704 * <p>This API is intended to reflect Unicode properties as defined in 5705 * the Unicode Character Database (UCD) and Unicode Technical Reports 5706 * (UTR). 5707 * <p>For details about the properties see 5708 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 5709 * <p>For names of Unicode properties see the UCD file 5710 * PropertyAliases.txt. 5711 * <p>This API does not check the validity of the codepoint. 5712 * <p>Important: If ICU is built with UCD files from Unicode versions 5713 * below 3.2, then properties marked with "new" are not or 5714 * not fully available. 5715 * @param ch code point to test. 5716 * @param property selector constant from com.ibm.icu.lang.UProperty, 5717 * identifies which binary property to check. 5718 * @return true or false according to the binary Unicode property value 5719 * for ch. Also false if property is out of bounds or if the 5720 * Unicode version does not have data for the property at all, or 5721 * not for this code point. 5722 * @see com.ibm.icu.lang.UProperty 5723 * @see CharacterProperties#getBinaryPropertySet(int) 5724 * @stable ICU 2.6 5725 */ hasBinaryProperty(int ch, int property)5726 public static boolean hasBinaryProperty(int ch, int property) 5727 { 5728 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5729 } 5730 5731 /** 5732 * {@icu} <p>Check if a code point has the Alphabetic Unicode property. 5733 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 5734 * <p>Different from UCharacter.isLetter(ch)! 5735 * @stable ICU 2.6 5736 * @param ch codepoint to be tested 5737 */ isUAlphabetic(int ch)5738 public static boolean isUAlphabetic(int ch) 5739 { 5740 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5741 } 5742 5743 /** 5744 * {@icu} <p>Check if a code point has the Lowercase Unicode property. 5745 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 5746 * <p>This is different from UCharacter.isLowerCase(ch)! 5747 * @param ch codepoint to be tested 5748 * @stable ICU 2.6 5749 */ isULowercase(int ch)5750 public static boolean isULowercase(int ch) 5751 { 5752 return hasBinaryProperty(ch, UProperty.LOWERCASE); 5753 } 5754 5755 /** 5756 * {@icu} <p>Check if a code point has the Uppercase Unicode property. 5757 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 5758 * <p>This is different from UCharacter.isUpperCase(ch)! 5759 * @param ch codepoint to be tested 5760 * @stable ICU 2.6 5761 */ isUUppercase(int ch)5762 public static boolean isUUppercase(int ch) 5763 { 5764 return hasBinaryProperty(ch, UProperty.UPPERCASE); 5765 } 5766 5767 /** 5768 * {@icu} <p>Check if a code point has the White_Space Unicode property. 5769 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 5770 * <p>This is different from both UCharacter.isSpace(ch) and 5771 * UCharacter.isWhitespace(ch)! 5772 * @param ch codepoint to be tested 5773 * @stable ICU 2.6 5774 */ isUWhiteSpace(int ch)5775 public static boolean isUWhiteSpace(int ch) 5776 { 5777 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 5778 } 5779 5780 /** 5781 * {@icu} Returns the property value for a Unicode property type of a code point. 5782 * Also returns binary and mask property values. 5783 * <p>Unicode, especially in version 3.2, defines many more properties than 5784 * the original set in UnicodeData.txt. 5785 * <p>The properties APIs are intended to reflect Unicode properties as 5786 * defined in the Unicode Character Database (UCD) and Unicode Technical 5787 * Reports (UTR). For details about the properties see 5788 * http://www.unicode.org/. 5789 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 5790 * 5791 * <pre> 5792 * Sample usage: 5793 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 5794 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 5795 * boolean b = (ideo == 1) ? true : false; 5796 * </pre> 5797 * @param ch code point to test. 5798 * @param type UProperty selector constant, identifies which binary 5799 * property to check. Must be 5800 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5801 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5802 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5803 * @return numeric value that is directly the property value or, 5804 * for enumerated properties, corresponds to the numeric value of 5805 * the enumerated constant of the respective property value type 5806 * ({@link ECharacterCategory}, {@link ECharacterDirection}, 5807 * {@link DecompositionType}, etc.). 5808 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5809 * Returns a bit-mask for mask properties. 5810 * Returns 0 if 'type' is out of bounds or if the Unicode version 5811 * does not have data for the property at all, or not for this code 5812 * point. 5813 * @see UProperty 5814 * @see #hasBinaryProperty 5815 * @see #getIntPropertyMinValue 5816 * @see #getIntPropertyMaxValue 5817 * @see CharacterProperties#getIntPropertyMap(int) 5818 * @see #getUnicodeVersion 5819 * @stable ICU 2.4 5820 */ getIntPropertyValue(int ch, int type)5821 public static int getIntPropertyValue(int ch, int type) 5822 { 5823 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5824 } 5825 /** 5826 * {@icu} Returns a string version of the property value. 5827 * @param propertyEnum The property enum value. 5828 * @param codepoint The codepoint value. 5829 * @param nameChoice The choice of the name. 5830 * @return value as string 5831 * @internal 5832 * @deprecated This API is ICU internal only. 5833 */ 5834 @Deprecated 5835 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5836 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5837 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5838 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5839 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5840 nameChoice); 5841 } 5842 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5843 return String.valueOf(getUnicodeNumericValue(codepoint)); 5844 } 5845 // otherwise must be string property 5846 switch (propertyEnum) { 5847 case UProperty.AGE: return getAge(codepoint).toString(); 5848 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5849 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 5850 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 5851 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5852 case UProperty.NAME: return getName(codepoint); 5853 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 5854 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5855 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5856 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5857 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5858 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5859 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5860 } 5861 throw new IllegalArgumentException("Illegal Property Enum"); 5862 } 5863 ///CLOVER:ON 5864 5865 /** 5866 * {@icu} Returns the minimum value for an integer/binary Unicode property type. 5867 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5868 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5869 * @param type UProperty selector constant, identifies which binary 5870 * property to check. Must be 5871 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5872 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5873 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5874 * for a Unicode property. 0 if the property 5875 * selector 'type' is out of range. 5876 * @see UProperty 5877 * @see #hasBinaryProperty 5878 * @see #getUnicodeVersion 5879 * @see #getIntPropertyMaxValue 5880 * @see #getIntPropertyValue 5881 * @stable ICU 2.4 5882 */ getIntPropertyMinValue(int type)5883 public static int getIntPropertyMinValue(int type){ 5884 5885 return 0; // undefined; and: all other properties have a minimum value of 0 5886 } 5887 5888 5889 /** 5890 * {@icu} Returns the maximum value for an integer/binary Unicode property. 5891 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5892 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5893 * Examples for min/max values (for Unicode 3.2): 5894 * <ul> 5895 * <li> UProperty.BIDI_CLASS: 0/18 5896 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5897 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5898 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5899 * </ul> 5900 * For undefined UProperty constant values, min/max values will be 0/-1. 5901 * @param type UProperty selector constant, identifies which binary 5902 * property to check. Must be 5903 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5904 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5905 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 5906 * property. <= 0 if the property selector 'type' is out of range. 5907 * @see UProperty 5908 * @see #hasBinaryProperty 5909 * @see #getUnicodeVersion 5910 * @see #getIntPropertyMaxValue 5911 * @see #getIntPropertyValue 5912 * @stable ICU 2.4 5913 */ getIntPropertyMaxValue(int type)5914 public static int getIntPropertyMaxValue(int type) 5915 { 5916 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 5917 } 5918 5919 /** 5920 * Provide the java.lang.Character forDigit API, for convenience. 5921 * @stable ICU 3.0 5922 */ forDigit(int digit, int radix)5923 public static char forDigit(int digit, int radix) { 5924 return java.lang.Character.forDigit(digit, radix); 5925 } 5926 5927 // JDK 1.5 API coverage 5928 5929 /** 5930 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 5931 * 5932 * @stable ICU 3.0 5933 */ 5934 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 5935 5936 /** 5937 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 5938 * 5939 * @stable ICU 3.0 5940 */ 5941 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 5942 5943 /** 5944 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 5945 * 5946 * @stable ICU 3.0 5947 */ 5948 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 5949 5950 /** 5951 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 5952 * 5953 * @stable ICU 3.0 5954 */ 5955 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 5956 5957 /** 5958 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 5959 * 5960 * @stable ICU 3.0 5961 */ 5962 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 5963 5964 /** 5965 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 5966 * 5967 * @stable ICU 3.0 5968 */ 5969 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 5970 5971 /** 5972 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 5973 * 5974 * @stable ICU 3.0 5975 */ 5976 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 5977 5978 /** 5979 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 5980 * 5981 * @stable ICU 3.0 5982 */ 5983 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 5984 5985 /** 5986 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 5987 * 5988 * @stable ICU 3.0 5989 */ 5990 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 5991 5992 /** 5993 * Equivalent to {@link Character#isValidCodePoint}. 5994 * 5995 * @param cp the code point to check 5996 * @return true if cp is a valid code point 5997 * @stable ICU 3.0 5998 */ isValidCodePoint(int cp)5999 public static final boolean isValidCodePoint(int cp) { 6000 return cp >= 0 && cp <= MAX_CODE_POINT; 6001 } 6002 6003 /** 6004 * Same as {@link Character#isSupplementaryCodePoint}. 6005 * 6006 * @param cp the code point to check 6007 * @return true if cp is a supplementary code point 6008 * @stable ICU 3.0 6009 */ isSupplementaryCodePoint(int cp)6010 public static final boolean isSupplementaryCodePoint(int cp) { 6011 return Character.isSupplementaryCodePoint(cp); 6012 } 6013 6014 /** 6015 * Same as {@link Character#isHighSurrogate}. 6016 * 6017 * @param ch the char to check 6018 * @return true if ch is a high (lead) surrogate 6019 * @stable ICU 3.0 6020 */ isHighSurrogate(char ch)6021 public static boolean isHighSurrogate(char ch) { 6022 return Character.isHighSurrogate(ch); 6023 } 6024 6025 /** 6026 * Same as {@link Character#isLowSurrogate}. 6027 * 6028 * @param ch the char to check 6029 * @return true if ch is a low (trail) surrogate 6030 * @stable ICU 3.0 6031 */ isLowSurrogate(char ch)6032 public static boolean isLowSurrogate(char ch) { 6033 return Character.isLowSurrogate(ch); 6034 } 6035 6036 /** 6037 * Same as {@link Character#isSurrogatePair}. 6038 * 6039 * @param high the high (lead) char 6040 * @param low the low (trail) char 6041 * @return true if high, low form a surrogate pair 6042 * @stable ICU 3.0 6043 */ isSurrogatePair(char high, char low)6044 public static final boolean isSurrogatePair(char high, char low) { 6045 return Character.isSurrogatePair(high, low); 6046 } 6047 6048 /** 6049 * Same as {@link Character#charCount}. 6050 * Returns the number of chars needed to represent the code point (1 or 2). 6051 * This does not check the code point for validity. 6052 * 6053 * @param cp the code point to check 6054 * @return the number of chars needed to represent the code point 6055 * @stable ICU 3.0 6056 */ charCount(int cp)6057 public static int charCount(int cp) { 6058 return Character.charCount(cp); 6059 } 6060 6061 /** 6062 * Same as {@link Character#toCodePoint}. 6063 * Returns the code point represented by the two surrogate code units. 6064 * This does not check the surrogate pair for validity. 6065 * 6066 * @param high the high (lead) surrogate 6067 * @param low the low (trail) surrogate 6068 * @return the code point formed by the surrogate pair 6069 * @stable ICU 3.0 6070 */ toCodePoint(char high, char low)6071 public static final int toCodePoint(char high, char low) { 6072 return Character.toCodePoint(high, low); 6073 } 6074 6075 /** 6076 * Same as {@link Character#codePointAt(CharSequence, int)}. 6077 * Returns the code point at index. 6078 * This examines only the characters at index and index+1. 6079 * 6080 * @param seq the characters to check 6081 * @param index the index of the first or only char forming the code point 6082 * @return the code point at the index 6083 * @stable ICU 3.0 6084 */ codePointAt(CharSequence seq, int index)6085 public static final int codePointAt(CharSequence seq, int index) { 6086 char c1 = seq.charAt(index++); 6087 if (isHighSurrogate(c1)) { 6088 if (index < seq.length()) { 6089 char c2 = seq.charAt(index); 6090 if (isLowSurrogate(c2)) { 6091 return toCodePoint(c1, c2); 6092 } 6093 } 6094 } 6095 return c1; 6096 } 6097 6098 /** 6099 * Same as {@link Character#codePointAt(char[], int)}. 6100 * Returns the code point at index. 6101 * This examines only the characters at index and index+1. 6102 * 6103 * @param text the characters to check 6104 * @param index the index of the first or only char forming the code point 6105 * @return the code point at the index 6106 * @stable ICU 3.0 6107 */ codePointAt(char[] text, int index)6108 public static final int codePointAt(char[] text, int index) { 6109 char c1 = text[index++]; 6110 if (isHighSurrogate(c1)) { 6111 if (index < text.length) { 6112 char c2 = text[index]; 6113 if (isLowSurrogate(c2)) { 6114 return toCodePoint(c1, c2); 6115 } 6116 } 6117 } 6118 return c1; 6119 } 6120 6121 /** 6122 * Same as {@link Character#codePointAt(char[], int, int)}. 6123 * Returns the code point at index. 6124 * This examines only the characters at index and index+1. 6125 * 6126 * @param text the characters to check 6127 * @param index the index of the first or only char forming the code point 6128 * @param limit the limit of the valid text 6129 * @return the code point at the index 6130 * @stable ICU 3.0 6131 */ codePointAt(char[] text, int index, int limit)6132 public static final int codePointAt(char[] text, int index, int limit) { 6133 if (index >= limit || limit > text.length) { 6134 throw new IndexOutOfBoundsException(); 6135 } 6136 char c1 = text[index++]; 6137 if (isHighSurrogate(c1)) { 6138 if (index < limit) { 6139 char c2 = text[index]; 6140 if (isLowSurrogate(c2)) { 6141 return toCodePoint(c1, c2); 6142 } 6143 } 6144 } 6145 return c1; 6146 } 6147 6148 /** 6149 * Same as {@link Character#codePointBefore(CharSequence, int)}. 6150 * Return the code point before index. 6151 * This examines only the characters at index-1 and index-2. 6152 * 6153 * @param seq the characters to check 6154 * @param index the index after the last or only char forming the code point 6155 * @return the code point before the index 6156 * @stable ICU 3.0 6157 */ codePointBefore(CharSequence seq, int index)6158 public static final int codePointBefore(CharSequence seq, int index) { 6159 char c2 = seq.charAt(--index); 6160 if (isLowSurrogate(c2)) { 6161 if (index > 0) { 6162 char c1 = seq.charAt(--index); 6163 if (isHighSurrogate(c1)) { 6164 return toCodePoint(c1, c2); 6165 } 6166 } 6167 } 6168 return c2; 6169 } 6170 6171 /** 6172 * Same as {@link Character#codePointBefore(char[], int)}. 6173 * Returns the code point before index. 6174 * This examines only the characters at index-1 and index-2. 6175 * 6176 * @param text the characters to check 6177 * @param index the index after the last or only char forming the code point 6178 * @return the code point before the index 6179 * @stable ICU 3.0 6180 */ codePointBefore(char[] text, int index)6181 public static final int codePointBefore(char[] text, int index) { 6182 char c2 = text[--index]; 6183 if (isLowSurrogate(c2)) { 6184 if (index > 0) { 6185 char c1 = text[--index]; 6186 if (isHighSurrogate(c1)) { 6187 return toCodePoint(c1, c2); 6188 } 6189 } 6190 } 6191 return c2; 6192 } 6193 6194 /** 6195 * Same as {@link Character#codePointBefore(char[], int, int)}. 6196 * Return the code point before index. 6197 * This examines only the characters at index-1 and index-2. 6198 * 6199 * @param text the characters to check 6200 * @param index the index after the last or only char forming the code point 6201 * @param limit the start of the valid text 6202 * @return the code point before the index 6203 * @stable ICU 3.0 6204 */ codePointBefore(char[] text, int index, int limit)6205 public static final int codePointBefore(char[] text, int index, int limit) { 6206 if (index <= limit || limit < 0) { 6207 throw new IndexOutOfBoundsException(); 6208 } 6209 char c2 = text[--index]; 6210 if (isLowSurrogate(c2)) { 6211 if (index > limit) { 6212 char c1 = text[--index]; 6213 if (isHighSurrogate(c1)) { 6214 return toCodePoint(c1, c2); 6215 } 6216 } 6217 } 6218 return c2; 6219 } 6220 6221 /** 6222 * Same as {@link Character#toChars(int, char[], int)}. 6223 * Writes the chars representing the 6224 * code point into the destination at the given index. 6225 * 6226 * @param cp the code point to convert 6227 * @param dst the destination array into which to put the char(s) representing the code point 6228 * @param dstIndex the index at which to put the first (or only) char 6229 * @return the count of the number of chars written (1 or 2) 6230 * @throws IllegalArgumentException if cp is not a valid code point 6231 * @stable ICU 3.0 6232 */ toChars(int cp, char[] dst, int dstIndex)6233 public static final int toChars(int cp, char[] dst, int dstIndex) { 6234 return Character.toChars(cp, dst, dstIndex); 6235 } 6236 6237 /** 6238 * Same as {@link Character#toChars(int)}. 6239 * Returns a char array representing the code point. 6240 * 6241 * @param cp the code point to convert 6242 * @return an array containing the char(s) representing the code point 6243 * @throws IllegalArgumentException if cp is not a valid code point 6244 * @stable ICU 3.0 6245 */ toChars(int cp)6246 public static final char[] toChars(int cp) { 6247 return Character.toChars(cp); 6248 } 6249 6250 /** 6251 * Equivalent to the {@link Character#getDirectionality(char)} method, for 6252 * convenience. Returns a byte representing the directionality of the 6253 * character. 6254 * 6255 * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns 6256 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 6257 * 6258 * {@icunote} The return value must be tested using the constants defined in {@link 6259 * UCharacterDirection} and its interface {@link 6260 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 6261 * defined by <code>java.lang.Character</code>. 6262 * @param cp the code point to check 6263 * @return the directionality of the code point 6264 * @see #getDirection 6265 * @stable ICU 3.0 6266 */ getDirectionality(int cp)6267 public static byte getDirectionality(int cp) 6268 { 6269 return (byte)getDirection(cp); 6270 } 6271 6272 /** 6273 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 6274 * method, for convenience. Counts the number of code points in the range 6275 * of text. 6276 * @param text the characters to check 6277 * @param start the start of the range 6278 * @param limit the limit of the range 6279 * @return the number of code points in the range 6280 * @stable ICU 3.0 6281 */ codePointCount(CharSequence text, int start, int limit)6282 public static int codePointCount(CharSequence text, int start, int limit) { 6283 if (start < 0 || limit < start || limit > text.length()) { 6284 throw new IndexOutOfBoundsException("start (" + start + 6285 ") or limit (" + limit + 6286 ") invalid or out of range 0, " + text.length()); 6287 } 6288 6289 int len = limit - start; 6290 while (limit > start) { 6291 char ch = text.charAt(--limit); 6292 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6293 ch = text.charAt(--limit); 6294 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6295 --len; 6296 break; 6297 } 6298 } 6299 } 6300 return len; 6301 } 6302 6303 /** 6304 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 6305 * convenience. Counts the number of code points in the range of text. 6306 * @param text the characters to check 6307 * @param start the start of the range 6308 * @param limit the limit of the range 6309 * @return the number of code points in the range 6310 * @stable ICU 3.0 6311 */ codePointCount(char[] text, int start, int limit)6312 public static int codePointCount(char[] text, int start, int limit) { 6313 if (start < 0 || limit < start || limit > text.length) { 6314 throw new IndexOutOfBoundsException("start (" + start + 6315 ") or limit (" + limit + 6316 ") invalid or out of range 0, " + text.length); 6317 } 6318 6319 int len = limit - start; 6320 while (limit > start) { 6321 char ch = text[--limit]; 6322 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6323 ch = text[--limit]; 6324 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6325 --len; 6326 break; 6327 } 6328 } 6329 } 6330 return len; 6331 } 6332 6333 /** 6334 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 6335 * method, for convenience. Adjusts the char index by a code point offset. 6336 * @param text the characters to check 6337 * @param index the index to adjust 6338 * @param codePointOffset the number of code points by which to offset the index 6339 * @return the adjusted index 6340 * @stable ICU 3.0 6341 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)6342 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 6343 if (index < 0 || index > text.length()) { 6344 throw new IndexOutOfBoundsException("index ( " + index + 6345 ") out of range 0, " + text.length()); 6346 } 6347 6348 if (codePointOffset < 0) { 6349 while (++codePointOffset <= 0) { 6350 char ch = text.charAt(--index); 6351 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 6352 ch = text.charAt(--index); 6353 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6354 if (++codePointOffset > 0) { 6355 return index+1; 6356 } 6357 } 6358 } 6359 } 6360 } else { 6361 int limit = text.length(); 6362 while (--codePointOffset >= 0) { 6363 char ch = text.charAt(index++); 6364 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6365 ch = text.charAt(index++); 6366 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6367 if (--codePointOffset < 0) { 6368 return index-1; 6369 } 6370 } 6371 } 6372 } 6373 } 6374 6375 return index; 6376 } 6377 6378 /** 6379 * Equivalent to the 6380 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 6381 * method, for convenience. Adjusts the char index by a code point offset. 6382 * @param text the characters to check 6383 * @param start the start of the range to check 6384 * @param count the length of the range to check 6385 * @param index the index to adjust 6386 * @param codePointOffset the number of code points by which to offset the index 6387 * @return the adjusted index 6388 * @stable ICU 3.0 6389 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6390 public static int offsetByCodePoints(char[] text, int start, int count, int index, 6391 int codePointOffset) { 6392 int limit = start + count; 6393 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 6394 throw new IndexOutOfBoundsException("index ( " + index + 6395 ") out of range " + start + 6396 ", " + limit + 6397 " in array 0, " + text.length); 6398 } 6399 6400 if (codePointOffset < 0) { 6401 while (++codePointOffset <= 0) { 6402 char ch = text[--index]; 6403 if (index < start) { 6404 throw new IndexOutOfBoundsException("index ( " + index + 6405 ") < start (" + start + 6406 ")"); 6407 } 6408 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 6409 ch = text[--index]; 6410 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6411 if (++codePointOffset > 0) { 6412 return index+1; 6413 } 6414 } 6415 } 6416 } 6417 } else { 6418 while (--codePointOffset >= 0) { 6419 char ch = text[index++]; 6420 if (index > limit) { 6421 throw new IndexOutOfBoundsException("index ( " + index + 6422 ") > limit (" + limit + 6423 ")"); 6424 } 6425 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6426 ch = text[index++]; 6427 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6428 if (--codePointOffset < 0) { 6429 return index-1; 6430 } 6431 } 6432 } 6433 } 6434 } 6435 6436 return index; 6437 } 6438 6439 // private variables ------------------------------------------------- 6440 6441 /** 6442 * To get the last character out from a data type 6443 */ 6444 private static final int LAST_CHAR_MASK_ = 0xFFFF; 6445 6446 // /** 6447 // * To get the last byte out from a data type 6448 // */ 6449 // private static final int LAST_BYTE_MASK_ = 0xFF; 6450 // 6451 // /** 6452 // * Shift 16 bits 6453 // */ 6454 // private static final int SHIFT_16_ = 16; 6455 // 6456 // /** 6457 // * Shift 24 bits 6458 // */ 6459 // private static final int SHIFT_24_ = 24; 6460 // 6461 // /** 6462 // * Decimal radix 6463 // */ 6464 // private static final int DECIMAL_RADIX_ = 10; 6465 6466 /** 6467 * No break space code point 6468 */ 6469 private static final int NO_BREAK_SPACE_ = 0xA0; 6470 6471 /** 6472 * Figure space code point 6473 */ 6474 private static final int FIGURE_SPACE_ = 0x2007; 6475 6476 /** 6477 * Narrow no break space code point 6478 */ 6479 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 6480 6481 /** 6482 * Ideographic number zero code point 6483 */ 6484 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6485 6486 /** 6487 * CJK Ideograph, First code point 6488 */ 6489 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6490 6491 /** 6492 * CJK Ideograph, Second code point 6493 */ 6494 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6495 6496 /** 6497 * CJK Ideograph, Third code point 6498 */ 6499 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6500 6501 /** 6502 * CJK Ideograph, Fourth code point 6503 */ 6504 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 6505 6506 /** 6507 * CJK Ideograph, FIFTH code point 6508 */ 6509 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6510 6511 /** 6512 * CJK Ideograph, Sixth code point 6513 */ 6514 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6515 6516 /** 6517 * CJK Ideograph, Seventh code point 6518 */ 6519 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6520 6521 /** 6522 * CJK Ideograph, Eighth code point 6523 */ 6524 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6525 6526 /** 6527 * CJK Ideograph, Nineth code point 6528 */ 6529 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6530 6531 /** 6532 * Application Program command code point 6533 */ 6534 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6535 6536 /** 6537 * Unit separator code point 6538 */ 6539 private static final int UNIT_SEPARATOR_ = 0x001F; 6540 6541 /** 6542 * Delete code point 6543 */ 6544 private static final int DELETE_ = 0x007F; 6545 6546 /** 6547 * Han digit characters 6548 */ 6549 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6550 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6551 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6552 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6553 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6554 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6555 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6556 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6557 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6558 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6559 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6560 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6561 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6562 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6563 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6564 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6565 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6566 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6567 6568 // private constructor ----------------------------------------------- 6569 ///CLOVER:OFF 6570 /** 6571 * Private constructor to prevent instantiation 6572 */ UCharacter()6573 private UCharacter() 6574 { 6575 } 6576 ///CLOVER:ON 6577 } 6578