1 /* 2 * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import dalvik.annotation.optimization.FastNative; 29 // Android-removed: CDS is not used on Android. 30 // import jdk.internal.misc.CDS; 31 import jdk.internal.vm.annotation.IntrinsicCandidate; 32 33 import java.util.Arrays; 34 import java.util.HashMap; 35 import java.util.Locale; 36 import java.util.Map; 37 38 import java.lang.constant.Constable; 39 import java.lang.constant.DynamicConstantDesc; 40 import java.util.Optional; 41 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST; 42 import static java.lang.constant.ConstantDescs.CD_char; 43 import static java.lang.constant.ConstantDescs.CD_int; 44 import static java.lang.constant.ConstantDescs.DEFAULT_NAME; 45 46 // Android-changed: Remove reference to a specific unicode standard version 47 /** 48 * The {@code Character} class wraps a value of the primitive 49 * type {@code char} in an object. An object of class 50 * {@code Character} contains a single field whose type is 51 * {@code char}. 52 * <p> 53 * In addition, this class provides several methods for determining 54 * a character's category (lowercase letter, digit, etc.) and for converting 55 * characters from uppercase to lowercase and vice versa. 56 * <p> 57 * Character information is based on the Unicode Standard 58 * <p> 59 * The methods and data of class {@code Character} are defined by 60 * the information in the <i>UnicodeData</i> file that is part of the 61 * Unicode Character Database maintained by the Unicode 62 * Consortium. This file specifies various properties including name 63 * and general category for every defined Unicode code point or 64 * character range. 65 * <p> 66 * The file and its description are available from the Unicode Consortium at: 67 * <ul> 68 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 69 * </ul> 70 * 71 * <h2><a id="conformance">Unicode Conformance</a></h2> 72 * <p> 73 * The fields and methods of class {@code Character} are defined in terms 74 * of character information from the Unicode Standard, specifically the 75 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 76 * This file specifies properties including name and category for every 77 * assigned Unicode code point or character range. The file is available 78 * from the Unicode Consortium at 79 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 80 * <p> 81 * Character information is based on the Unicode Standard, version 13.0. 82 * <p> 83 * The Java platform has supported different versions of the Unicode 84 * Standard over time. Upgrades to newer versions of the Unicode Standard 85 * occurred in the following Java releases, each indicating the new version: 86 * <table class="striped"> 87 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption> 88 * <thead> 89 * <tr><th scope="col">Java release</th> 90 * <th scope="col">Unicode version</th></tr> 91 * </thead> 92 * <tbody> 93 * <tr><td>Java SE 15</td> 94 * <td>Unicode 13.0</td></tr> 95 * <tr><td>Java SE 13</td> 96 * <td>Unicode 12.1</td></tr> 97 * <tr><td>Java SE 12</td> 98 * <td>Unicode 11.0</td></tr> 99 * <tr><td>Java SE 11</td> 100 * <td>Unicode 10.0</td></tr> 101 * <tr><td>Java SE 9</td> 102 * <td>Unicode 8.0</td></tr> 103 * <tr><td>Java SE 8</td> 104 * <td>Unicode 6.2</td></tr> 105 * <tr><td>Java SE 7</td> 106 * <td>Unicode 6.0</td></tr> 107 * <tr><td>Java SE 5.0</td> 108 * <td>Unicode 4.0</td></tr> 109 * <tr><td>Java SE 1.4</td> 110 * <td>Unicode 3.0</td></tr> 111 * <tr><td>JDK 1.1</td> 112 * <td>Unicode 2.0</td></tr> 113 * <tr><td>JDK 1.0.2</td> 114 * <td>Unicode 1.1.5</td></tr> 115 * </tbody> 116 * </table> 117 * Variations from these base Unicode versions, such as recognized appendixes, 118 * are documented elsewhere. 119 * <h2><a id="unicode">Unicode Character Representations</a></h2> 120 * 121 * <p>The {@code char} data type (and therefore the value that a 122 * {@code Character} object encapsulates) are based on the 123 * original Unicode specification, which defined characters as 124 * fixed-width 16-bit entities. The Unicode Standard has since been 125 * changed to allow for characters whose representation requires more 126 * than 16 bits. The range of legal <em>code point</em>s is now 127 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 128 * (Refer to the <a 129 * href="http://www.unicode.org/reports/tr27/#notation"><i> 130 * definition</i></a> of the U+<i>n</i> notation in the Unicode 131 * Standard.) 132 * 133 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 134 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 135 * <a id="supplementary">Characters</a> whose code points are greater 136 * than U+FFFF are called <em>supplementary character</em>s. The Java 137 * platform uses the UTF-16 representation in {@code char} arrays and 138 * in the {@code String} and {@code StringBuffer} classes. In 139 * this representation, supplementary characters are represented as a pair 140 * of {@code char} values, the first from the <em>high-surrogates</em> 141 * range, (\uD800-\uDBFF), the second from the 142 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 143 * 144 * <p>A {@code char} value, therefore, represents Basic 145 * Multilingual Plane (BMP) code points, including the surrogate 146 * code points, or code units of the UTF-16 encoding. An 147 * {@code int} value represents all Unicode code points, 148 * including supplementary code points. The lower (least significant) 149 * 21 bits of {@code int} are used to represent Unicode code 150 * points and the upper (most significant) 11 bits must be zero. 151 * Unless otherwise specified, the behavior with respect to 152 * supplementary characters and surrogate {@code char} values is 153 * as follows: 154 * 155 * <ul> 156 * <li>The methods that only accept a {@code char} value cannot support 157 * supplementary characters. They treat {@code char} values from the 158 * surrogate ranges as undefined characters. For example, 159 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 160 * this specific value if followed by any low-surrogate value in a string 161 * would represent a letter. 162 * 163 * <li>The methods that accept an {@code int} value support all 164 * Unicode characters, including supplementary characters. For 165 * example, {@code Character.isLetter(0x2F81A)} returns 166 * {@code true} because the code point value represents a letter 167 * (a CJK ideograph). 168 * </ul> 169 * 170 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 171 * used for character values in the range between U+0000 and U+10FFFF, 172 * and <em>Unicode code unit</em> is used for 16-bit 173 * {@code char} values that are code units of the <em>UTF-16</em> 174 * encoding. For more information on Unicode terminology, refer to the 175 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 176 * 177 * <!-- Android-removed: paragraph on ValueBased 178 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a> 179 * class; programmers should treat instances that are 180 * {@linkplain #equals(Object) equal} as interchangeable and should not 181 * use instances for synchronization, or unpredictable behavior may 182 * occur. For example, in a future release, synchronization may fail. 183 * --> 184 * 185 * @author Lee Boynton 186 * @author Guy Steele 187 * @author Akira Tanaka 188 * @author Martin Buchholz 189 * @author Ulf Zibis 190 * @since 1.0 191 */ 192 @jdk.internal.ValueBased 193 public final 194 class Character implements java.io.Serializable, Comparable<Character>, Constable { 195 /** 196 * The minimum radix available for conversion to and from strings. 197 * The constant value of this field is the smallest value permitted 198 * for the radix argument in radix-conversion methods such as the 199 * {@code digit} method, the {@code forDigit} method, and the 200 * {@code toString} method of class {@code Integer}. 201 * 202 * @see Character#digit(char, int) 203 * @see Character#forDigit(int, int) 204 * @see Integer#toString(int, int) 205 * @see Integer#valueOf(String) 206 */ 207 public static final int MIN_RADIX = 2; 208 209 /** 210 * The maximum radix available for conversion to and from strings. 211 * The constant value of this field is the largest value permitted 212 * for the radix argument in radix-conversion methods such as the 213 * {@code digit} method, the {@code forDigit} method, and the 214 * {@code toString} method of class {@code Integer}. 215 * 216 * @see Character#digit(char, int) 217 * @see Character#forDigit(int, int) 218 * @see Integer#toString(int, int) 219 * @see Integer#valueOf(String) 220 */ 221 public static final int MAX_RADIX = 36; 222 223 /** 224 * The constant value of this field is the smallest value of type 225 * {@code char}, {@code '\u005Cu0000'}. 226 * 227 * @since 1.0.2 228 */ 229 public static final char MIN_VALUE = '\u0000'; 230 231 /** 232 * The constant value of this field is the largest value of type 233 * {@code char}, {@code '\u005CuFFFF'}. 234 * 235 * @since 1.0.2 236 */ 237 public static final char MAX_VALUE = '\uFFFF'; 238 239 /** 240 * The {@code Class} instance representing the primitive type 241 * {@code char}. 242 * 243 * @since 1.1 244 */ 245 @SuppressWarnings("unchecked") 246 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 247 248 /* 249 * Normative general types 250 */ 251 252 /* 253 * General character types 254 */ 255 256 /** 257 * General category "Cn" in the Unicode specification. 258 * @since 1.1 259 */ 260 public static final byte UNASSIGNED = 0; 261 262 /** 263 * General category "Lu" in the Unicode specification. 264 * @since 1.1 265 */ 266 public static final byte UPPERCASE_LETTER = 1; 267 268 /** 269 * General category "Ll" in the Unicode specification. 270 * @since 1.1 271 */ 272 public static final byte LOWERCASE_LETTER = 2; 273 274 /** 275 * General category "Lt" in the Unicode specification. 276 * @since 1.1 277 */ 278 public static final byte TITLECASE_LETTER = 3; 279 280 /** 281 * General category "Lm" in the Unicode specification. 282 * @since 1.1 283 */ 284 public static final byte MODIFIER_LETTER = 4; 285 286 /** 287 * General category "Lo" in the Unicode specification. 288 * @since 1.1 289 */ 290 public static final byte OTHER_LETTER = 5; 291 292 /** 293 * General category "Mn" in the Unicode specification. 294 * @since 1.1 295 */ 296 public static final byte NON_SPACING_MARK = 6; 297 298 /** 299 * General category "Me" in the Unicode specification. 300 * @since 1.1 301 */ 302 public static final byte ENCLOSING_MARK = 7; 303 304 /** 305 * General category "Mc" in the Unicode specification. 306 * @since 1.1 307 */ 308 public static final byte COMBINING_SPACING_MARK = 8; 309 310 /** 311 * General category "Nd" in the Unicode specification. 312 * @since 1.1 313 */ 314 public static final byte DECIMAL_DIGIT_NUMBER = 9; 315 316 /** 317 * General category "Nl" in the Unicode specification. 318 * @since 1.1 319 */ 320 public static final byte LETTER_NUMBER = 10; 321 322 /** 323 * General category "No" in the Unicode specification. 324 * @since 1.1 325 */ 326 public static final byte OTHER_NUMBER = 11; 327 328 /** 329 * General category "Zs" in the Unicode specification. 330 * @since 1.1 331 */ 332 public static final byte SPACE_SEPARATOR = 12; 333 334 /** 335 * General category "Zl" in the Unicode specification. 336 * @since 1.1 337 */ 338 public static final byte LINE_SEPARATOR = 13; 339 340 /** 341 * General category "Zp" in the Unicode specification. 342 * @since 1.1 343 */ 344 public static final byte PARAGRAPH_SEPARATOR = 14; 345 346 /** 347 * General category "Cc" in the Unicode specification. 348 * @since 1.1 349 */ 350 public static final byte CONTROL = 15; 351 352 /** 353 * General category "Cf" in the Unicode specification. 354 * @since 1.1 355 */ 356 public static final byte FORMAT = 16; 357 358 /** 359 * General category "Co" in the Unicode specification. 360 * @since 1.1 361 */ 362 public static final byte PRIVATE_USE = 18; 363 364 /** 365 * General category "Cs" in the Unicode specification. 366 * @since 1.1 367 */ 368 public static final byte SURROGATE = 19; 369 370 /** 371 * General category "Pd" in the Unicode specification. 372 * @since 1.1 373 */ 374 public static final byte DASH_PUNCTUATION = 20; 375 376 /** 377 * General category "Ps" in the Unicode specification. 378 * @since 1.1 379 */ 380 public static final byte START_PUNCTUATION = 21; 381 382 /** 383 * General category "Pe" in the Unicode specification. 384 * @since 1.1 385 */ 386 public static final byte END_PUNCTUATION = 22; 387 388 /** 389 * General category "Pc" in the Unicode specification. 390 * @since 1.1 391 */ 392 public static final byte CONNECTOR_PUNCTUATION = 23; 393 394 /** 395 * General category "Po" in the Unicode specification. 396 * @since 1.1 397 */ 398 public static final byte OTHER_PUNCTUATION = 24; 399 400 /** 401 * General category "Sm" in the Unicode specification. 402 * @since 1.1 403 */ 404 public static final byte MATH_SYMBOL = 25; 405 406 /** 407 * General category "Sc" in the Unicode specification. 408 * @since 1.1 409 */ 410 public static final byte CURRENCY_SYMBOL = 26; 411 412 /** 413 * General category "Sk" in the Unicode specification. 414 * @since 1.1 415 */ 416 public static final byte MODIFIER_SYMBOL = 27; 417 418 /** 419 * General category "So" in the Unicode specification. 420 * @since 1.1 421 */ 422 public static final byte OTHER_SYMBOL = 28; 423 424 /** 425 * General category "Pi" in the Unicode specification. 426 * @since 1.4 427 */ 428 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 429 430 /** 431 * General category "Pf" in the Unicode specification. 432 * @since 1.4 433 */ 434 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 435 436 /** 437 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 438 */ 439 static final int ERROR = 0xFFFFFFFF; 440 441 442 /** 443 * Undefined bidirectional character type. Undefined {@code char} 444 * values have undefined directionality in the Unicode specification. 445 * @since 1.4 446 */ 447 public static final byte DIRECTIONALITY_UNDEFINED = -1; 448 449 /** 450 * Strong bidirectional character type "L" in the Unicode specification. 451 * @since 1.4 452 */ 453 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 454 455 /** 456 * Strong bidirectional character type "R" in the Unicode specification. 457 * @since 1.4 458 */ 459 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 460 461 /** 462 * Strong bidirectional character type "AL" in the Unicode specification. 463 * @since 1.4 464 */ 465 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 466 467 /** 468 * Weak bidirectional character type "EN" in the Unicode specification. 469 * @since 1.4 470 */ 471 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 472 473 /** 474 * Weak bidirectional character type "ES" in the Unicode specification. 475 * @since 1.4 476 */ 477 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 478 479 /** 480 * Weak bidirectional character type "ET" in the Unicode specification. 481 * @since 1.4 482 */ 483 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 484 485 /** 486 * Weak bidirectional character type "AN" in the Unicode specification. 487 * @since 1.4 488 */ 489 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 490 491 /** 492 * Weak bidirectional character type "CS" in the Unicode specification. 493 * @since 1.4 494 */ 495 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 496 497 /** 498 * Weak bidirectional character type "NSM" in the Unicode specification. 499 * @since 1.4 500 */ 501 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 502 503 /** 504 * Weak bidirectional character type "BN" in the Unicode specification. 505 * @since 1.4 506 */ 507 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 508 509 /** 510 * Neutral bidirectional character type "B" in the Unicode specification. 511 * @since 1.4 512 */ 513 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 514 515 /** 516 * Neutral bidirectional character type "S" in the Unicode specification. 517 * @since 1.4 518 */ 519 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 520 521 /** 522 * Neutral bidirectional character type "WS" in the Unicode specification. 523 * @since 1.4 524 */ 525 public static final byte DIRECTIONALITY_WHITESPACE = 12; 526 527 /** 528 * Neutral bidirectional character type "ON" in the Unicode specification. 529 * @since 1.4 530 */ 531 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 532 533 /** 534 * Strong bidirectional character type "LRE" in the Unicode specification. 535 * @since 1.4 536 */ 537 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 538 539 /** 540 * Strong bidirectional character type "LRO" in the Unicode specification. 541 * @since 1.4 542 */ 543 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 544 545 /** 546 * Strong bidirectional character type "RLE" in the Unicode specification. 547 * @since 1.4 548 */ 549 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 550 551 /** 552 * Strong bidirectional character type "RLO" in the Unicode specification. 553 * @since 1.4 554 */ 555 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 556 557 /** 558 * Weak bidirectional character type "PDF" in the Unicode specification. 559 * @since 1.4 560 */ 561 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 562 563 /** 564 * Weak bidirectional character type "LRI" in the Unicode specification. 565 * @since 9 566 */ 567 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 568 569 /** 570 * Weak bidirectional character type "RLI" in the Unicode specification. 571 * @since 9 572 */ 573 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 574 575 /** 576 * Weak bidirectional character type "FSI" in the Unicode specification. 577 * @since 9 578 */ 579 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 580 581 /** 582 * Weak bidirectional character type "PDI" in the Unicode specification. 583 * @since 9 584 */ 585 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 586 587 /** 588 * The minimum value of a 589 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 590 * Unicode high-surrogate code unit</a> 591 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 592 * A high-surrogate is also known as a <i>leading-surrogate</i>. 593 * 594 * @since 1.5 595 */ 596 public static final char MIN_HIGH_SURROGATE = '\uD800'; 597 598 /** 599 * The maximum value of a 600 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 601 * Unicode high-surrogate code unit</a> 602 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 603 * A high-surrogate is also known as a <i>leading-surrogate</i>. 604 * 605 * @since 1.5 606 */ 607 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 608 609 /** 610 * The minimum value of a 611 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 612 * Unicode low-surrogate code unit</a> 613 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 614 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 615 * 616 * @since 1.5 617 */ 618 public static final char MIN_LOW_SURROGATE = '\uDC00'; 619 620 /** 621 * The maximum value of a 622 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 623 * Unicode low-surrogate code unit</a> 624 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 625 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 626 * 627 * @since 1.5 628 */ 629 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 630 631 /** 632 * The minimum value of a Unicode surrogate code unit in the 633 * UTF-16 encoding, constant {@code '\u005CuD800'}. 634 * 635 * @since 1.5 636 */ 637 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 638 639 /** 640 * The maximum value of a Unicode surrogate code unit in the 641 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 642 * 643 * @since 1.5 644 */ 645 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 646 647 /** 648 * The minimum value of a 649 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 650 * Unicode supplementary code point</a>, constant {@code U+10000}. 651 * 652 * @since 1.5 653 */ 654 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 655 656 /** 657 * The minimum value of a 658 * <a href="http://www.unicode.org/glossary/#code_point"> 659 * Unicode code point</a>, constant {@code U+0000}. 660 * 661 * @since 1.5 662 */ 663 public static final int MIN_CODE_POINT = 0x000000; 664 665 /** 666 * The maximum value of a 667 * <a href="http://www.unicode.org/glossary/#code_point"> 668 * Unicode code point</a>, constant {@code U+10FFFF}. 669 * 670 * @since 1.5 671 */ 672 public static final int MAX_CODE_POINT = 0X10FFFF; 673 674 // BEGIN Android-added: Use ICU. 675 // The indices in int[] DIRECTIONALITY are based on icu4c's u_charDirection(), 676 // accessed via getDirectionalityImpl(), implemented in Character.cpp. 677 private static final byte[] DIRECTIONALITY = new byte[] { 678 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 679 DIRECTIONALITY_EUROPEAN_NUMBER, 680 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 681 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 682 DIRECTIONALITY_ARABIC_NUMBER, 683 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 684 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 685 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 686 DIRECTIONALITY_OTHER_NEUTRALS, 687 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 688 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 689 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 690 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 691 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 692 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 693 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 694 // END Android-added: Use ICU. 695 696 /** 697 * Returns an {@link Optional} containing the nominal descriptor for this 698 * instance. 699 * 700 * @return an {@link Optional} describing the {@linkplain Character} instance 701 * @since 15 702 * @hide 703 */ 704 @Override describeConstable()705 public Optional<DynamicConstantDesc<Character>> describeConstable() { 706 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value)); 707 } 708 709 /** 710 * Instances of this class represent particular subsets of the Unicode 711 * character set. The only family of subsets defined in the 712 * {@code Character} class is {@link Character.UnicodeBlock}. 713 * Other portions of the Java API may define other subsets for their 714 * own purposes. 715 * 716 * @since 1.2 717 */ 718 public static class Subset { 719 720 private String name; 721 722 /** 723 * Constructs a new {@code Subset} instance. 724 * 725 * @param name The name of this subset 726 * @throws NullPointerException if name is {@code null} 727 */ Subset(String name)728 protected Subset(String name) { 729 if (name == null) { 730 throw new NullPointerException("name"); 731 } 732 this.name = name; 733 } 734 735 /** 736 * Compares two {@code Subset} objects for equality. 737 * This method returns {@code true} if and only if 738 * {@code this} and the argument refer to the same 739 * object; since this method is {@code final}, this 740 * guarantee holds for all subclasses. 741 */ equals(Object obj)742 public final boolean equals(Object obj) { 743 return (this == obj); 744 } 745 746 /** 747 * Returns the standard hash code as defined by the 748 * {@link Object#hashCode} method. This method 749 * is {@code final} in order to ensure that the 750 * {@code equals} and {@code hashCode} methods will 751 * be consistent in all subclasses. 752 */ hashCode()753 public final int hashCode() { 754 return super.hashCode(); 755 } 756 757 /** 758 * Returns the name of this subset. 759 */ toString()760 public final String toString() { 761 return name; 762 } 763 } 764 765 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 766 // for the latest specification of Unicode Blocks. 767 768 /** 769 * A family of character subsets representing the character blocks in the 770 * Unicode specification. Character blocks generally define characters 771 * used for a specific script or purpose. A character is contained by 772 * at most one Unicode block. 773 * 774 * @since 1.2 775 */ 776 public static final class UnicodeBlock extends Subset { 777 /** 778 * 684 - the expected number of entities 779 * 0.75 - the default load factor of HashMap 780 */ 781 private static final int NUM_ENTITIES = 684; 782 private static Map<String, UnicodeBlock> map = 783 new HashMap<>((int)(NUM_ENTITIES / 0.75f + 1.0f)); 784 785 /** 786 * Creates a UnicodeBlock with the given identifier name. 787 * This name must be the same as the block identifier. 788 */ UnicodeBlock(String idName)789 private UnicodeBlock(String idName) { 790 super(idName); 791 map.put(idName, this); 792 } 793 794 // BEGIN Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229 795 // Add a (String, boolean) constructor for use by SURROGATES_AREA. UnicodeBlock(String idName, boolean isMap)796 private UnicodeBlock(String idName, boolean isMap) { 797 super(idName); 798 if (isMap) { 799 map.put(idName, this); 800 } 801 } 802 // END Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229 803 804 /** 805 * Creates a UnicodeBlock with the given identifier name and 806 * alias name. 807 */ UnicodeBlock(String idName, String alias)808 private UnicodeBlock(String idName, String alias) { 809 this(idName); 810 map.put(alias, this); 811 } 812 813 /** 814 * Creates a UnicodeBlock with the given identifier name and 815 * alias names. 816 */ UnicodeBlock(String idName, String... aliases)817 private UnicodeBlock(String idName, String... aliases) { 818 this(idName); 819 for (String alias : aliases) 820 map.put(alias, this); 821 } 822 823 /** 824 * Constant for the "Basic Latin" Unicode character block. 825 * @since 1.2 826 */ 827 public static final UnicodeBlock BASIC_LATIN = 828 new UnicodeBlock("BASIC_LATIN", 829 "BASIC LATIN", 830 "BASICLATIN"); 831 832 /** 833 * Constant for the "Latin-1 Supplement" Unicode character block. 834 * @since 1.2 835 */ 836 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 837 new UnicodeBlock("LATIN_1_SUPPLEMENT", 838 "LATIN-1 SUPPLEMENT", 839 "LATIN-1SUPPLEMENT"); 840 841 /** 842 * Constant for the "Latin Extended-A" Unicode character block. 843 * @since 1.2 844 */ 845 public static final UnicodeBlock LATIN_EXTENDED_A = 846 new UnicodeBlock("LATIN_EXTENDED_A", 847 "LATIN EXTENDED-A", 848 "LATINEXTENDED-A"); 849 850 /** 851 * Constant for the "Latin Extended-B" Unicode character block. 852 * @since 1.2 853 */ 854 public static final UnicodeBlock LATIN_EXTENDED_B = 855 new UnicodeBlock("LATIN_EXTENDED_B", 856 "LATIN EXTENDED-B", 857 "LATINEXTENDED-B"); 858 859 /** 860 * Constant for the "IPA Extensions" Unicode character block. 861 * @since 1.2 862 */ 863 public static final UnicodeBlock IPA_EXTENSIONS = 864 new UnicodeBlock("IPA_EXTENSIONS", 865 "IPA EXTENSIONS", 866 "IPAEXTENSIONS"); 867 868 /** 869 * Constant for the "Spacing Modifier Letters" Unicode character block. 870 * @since 1.2 871 */ 872 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 873 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 874 "SPACING MODIFIER LETTERS", 875 "SPACINGMODIFIERLETTERS"); 876 877 /** 878 * Constant for the "Combining Diacritical Marks" Unicode character block. 879 * @since 1.2 880 */ 881 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 882 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 883 "COMBINING DIACRITICAL MARKS", 884 "COMBININGDIACRITICALMARKS"); 885 886 /** 887 * Constant for the "Greek and Coptic" Unicode character block. 888 * <p> 889 * This block was previously known as the "Greek" block. 890 * 891 * @since 1.2 892 */ 893 public static final UnicodeBlock GREEK = 894 new UnicodeBlock("GREEK", 895 "GREEK AND COPTIC", 896 "GREEKANDCOPTIC"); 897 898 /** 899 * Constant for the "Cyrillic" Unicode character block. 900 * @since 1.2 901 */ 902 public static final UnicodeBlock CYRILLIC = 903 new UnicodeBlock("CYRILLIC"); 904 905 /** 906 * Constant for the "Armenian" Unicode character block. 907 * @since 1.2 908 */ 909 public static final UnicodeBlock ARMENIAN = 910 new UnicodeBlock("ARMENIAN"); 911 912 /** 913 * Constant for the "Hebrew" Unicode character block. 914 * @since 1.2 915 */ 916 public static final UnicodeBlock HEBREW = 917 new UnicodeBlock("HEBREW"); 918 919 /** 920 * Constant for the "Arabic" Unicode character block. 921 * @since 1.2 922 */ 923 public static final UnicodeBlock ARABIC = 924 new UnicodeBlock("ARABIC"); 925 926 /** 927 * Constant for the "Devanagari" Unicode character block. 928 * @since 1.2 929 */ 930 public static final UnicodeBlock DEVANAGARI = 931 new UnicodeBlock("DEVANAGARI"); 932 933 /** 934 * Constant for the "Bengali" Unicode character block. 935 * @since 1.2 936 */ 937 public static final UnicodeBlock BENGALI = 938 new UnicodeBlock("BENGALI"); 939 940 /** 941 * Constant for the "Gurmukhi" Unicode character block. 942 * @since 1.2 943 */ 944 public static final UnicodeBlock GURMUKHI = 945 new UnicodeBlock("GURMUKHI"); 946 947 /** 948 * Constant for the "Gujarati" Unicode character block. 949 * @since 1.2 950 */ 951 public static final UnicodeBlock GUJARATI = 952 new UnicodeBlock("GUJARATI"); 953 954 /** 955 * Constant for the "Oriya" Unicode character block. 956 * @since 1.2 957 */ 958 public static final UnicodeBlock ORIYA = 959 new UnicodeBlock("ORIYA"); 960 961 /** 962 * Constant for the "Tamil" Unicode character block. 963 * @since 1.2 964 */ 965 public static final UnicodeBlock TAMIL = 966 new UnicodeBlock("TAMIL"); 967 968 /** 969 * Constant for the "Telugu" Unicode character block. 970 * @since 1.2 971 */ 972 public static final UnicodeBlock TELUGU = 973 new UnicodeBlock("TELUGU"); 974 975 /** 976 * Constant for the "Kannada" Unicode character block. 977 * @since 1.2 978 */ 979 public static final UnicodeBlock KANNADA = 980 new UnicodeBlock("KANNADA"); 981 982 /** 983 * Constant for the "Malayalam" Unicode character block. 984 * @since 1.2 985 */ 986 public static final UnicodeBlock MALAYALAM = 987 new UnicodeBlock("MALAYALAM"); 988 989 /** 990 * Constant for the "Thai" Unicode character block. 991 * @since 1.2 992 */ 993 public static final UnicodeBlock THAI = 994 new UnicodeBlock("THAI"); 995 996 /** 997 * Constant for the "Lao" Unicode character block. 998 * @since 1.2 999 */ 1000 public static final UnicodeBlock LAO = 1001 new UnicodeBlock("LAO"); 1002 1003 /** 1004 * Constant for the "Tibetan" Unicode character block. 1005 * @since 1.2 1006 */ 1007 public static final UnicodeBlock TIBETAN = 1008 new UnicodeBlock("TIBETAN"); 1009 1010 /** 1011 * Constant for the "Georgian" Unicode character block. 1012 * @since 1.2 1013 */ 1014 public static final UnicodeBlock GEORGIAN = 1015 new UnicodeBlock("GEORGIAN"); 1016 1017 /** 1018 * Constant for the "Hangul Jamo" Unicode character block. 1019 * @since 1.2 1020 */ 1021 public static final UnicodeBlock HANGUL_JAMO = 1022 new UnicodeBlock("HANGUL_JAMO", 1023 "HANGUL JAMO", 1024 "HANGULJAMO"); 1025 1026 /** 1027 * Constant for the "Latin Extended Additional" Unicode character block. 1028 * @since 1.2 1029 */ 1030 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 1031 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 1032 "LATIN EXTENDED ADDITIONAL", 1033 "LATINEXTENDEDADDITIONAL"); 1034 1035 /** 1036 * Constant for the "Greek Extended" Unicode character block. 1037 * @since 1.2 1038 */ 1039 public static final UnicodeBlock GREEK_EXTENDED = 1040 new UnicodeBlock("GREEK_EXTENDED", 1041 "GREEK EXTENDED", 1042 "GREEKEXTENDED"); 1043 1044 /** 1045 * Constant for the "General Punctuation" Unicode character block. 1046 * @since 1.2 1047 */ 1048 public static final UnicodeBlock GENERAL_PUNCTUATION = 1049 new UnicodeBlock("GENERAL_PUNCTUATION", 1050 "GENERAL PUNCTUATION", 1051 "GENERALPUNCTUATION"); 1052 1053 /** 1054 * Constant for the "Superscripts and Subscripts" Unicode character 1055 * block. 1056 * @since 1.2 1057 */ 1058 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 1059 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 1060 "SUPERSCRIPTS AND SUBSCRIPTS", 1061 "SUPERSCRIPTSANDSUBSCRIPTS"); 1062 1063 /** 1064 * Constant for the "Currency Symbols" Unicode character block. 1065 * @since 1.2 1066 */ 1067 public static final UnicodeBlock CURRENCY_SYMBOLS = 1068 new UnicodeBlock("CURRENCY_SYMBOLS", 1069 "CURRENCY SYMBOLS", 1070 "CURRENCYSYMBOLS"); 1071 1072 /** 1073 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 1074 * character block. 1075 * <p> 1076 * This block was previously known as "Combining Marks for Symbols". 1077 * @since 1.2 1078 */ 1079 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 1080 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 1081 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 1082 "COMBININGDIACRITICALMARKSFORSYMBOLS", 1083 "COMBINING MARKS FOR SYMBOLS", 1084 "COMBININGMARKSFORSYMBOLS"); 1085 1086 /** 1087 * Constant for the "Letterlike Symbols" Unicode character block. 1088 * @since 1.2 1089 */ 1090 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 1091 new UnicodeBlock("LETTERLIKE_SYMBOLS", 1092 "LETTERLIKE SYMBOLS", 1093 "LETTERLIKESYMBOLS"); 1094 1095 /** 1096 * Constant for the "Number Forms" Unicode character block. 1097 * @since 1.2 1098 */ 1099 public static final UnicodeBlock NUMBER_FORMS = 1100 new UnicodeBlock("NUMBER_FORMS", 1101 "NUMBER FORMS", 1102 "NUMBERFORMS"); 1103 1104 /** 1105 * Constant for the "Arrows" Unicode character block. 1106 * @since 1.2 1107 */ 1108 public static final UnicodeBlock ARROWS = 1109 new UnicodeBlock("ARROWS"); 1110 1111 /** 1112 * Constant for the "Mathematical Operators" Unicode character block. 1113 * @since 1.2 1114 */ 1115 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1116 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1117 "MATHEMATICAL OPERATORS", 1118 "MATHEMATICALOPERATORS"); 1119 1120 /** 1121 * Constant for the "Miscellaneous Technical" Unicode character block. 1122 * @since 1.2 1123 */ 1124 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1125 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1126 "MISCELLANEOUS TECHNICAL", 1127 "MISCELLANEOUSTECHNICAL"); 1128 1129 /** 1130 * Constant for the "Control Pictures" Unicode character block. 1131 * @since 1.2 1132 */ 1133 public static final UnicodeBlock CONTROL_PICTURES = 1134 new UnicodeBlock("CONTROL_PICTURES", 1135 "CONTROL PICTURES", 1136 "CONTROLPICTURES"); 1137 1138 /** 1139 * Constant for the "Optical Character Recognition" Unicode character block. 1140 * @since 1.2 1141 */ 1142 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1143 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1144 "OPTICAL CHARACTER RECOGNITION", 1145 "OPTICALCHARACTERRECOGNITION"); 1146 1147 /** 1148 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1149 * @since 1.2 1150 */ 1151 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1152 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1153 "ENCLOSED ALPHANUMERICS", 1154 "ENCLOSEDALPHANUMERICS"); 1155 1156 /** 1157 * Constant for the "Box Drawing" Unicode character block. 1158 * @since 1.2 1159 */ 1160 public static final UnicodeBlock BOX_DRAWING = 1161 new UnicodeBlock("BOX_DRAWING", 1162 "BOX DRAWING", 1163 "BOXDRAWING"); 1164 1165 /** 1166 * Constant for the "Block Elements" Unicode character block. 1167 * @since 1.2 1168 */ 1169 public static final UnicodeBlock BLOCK_ELEMENTS = 1170 new UnicodeBlock("BLOCK_ELEMENTS", 1171 "BLOCK ELEMENTS", 1172 "BLOCKELEMENTS"); 1173 1174 /** 1175 * Constant for the "Geometric Shapes" Unicode character block. 1176 * @since 1.2 1177 */ 1178 public static final UnicodeBlock GEOMETRIC_SHAPES = 1179 new UnicodeBlock("GEOMETRIC_SHAPES", 1180 "GEOMETRIC SHAPES", 1181 "GEOMETRICSHAPES"); 1182 1183 /** 1184 * Constant for the "Miscellaneous Symbols" Unicode character block. 1185 * @since 1.2 1186 */ 1187 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1188 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1189 "MISCELLANEOUS SYMBOLS", 1190 "MISCELLANEOUSSYMBOLS"); 1191 1192 /** 1193 * Constant for the "Dingbats" Unicode character block. 1194 * @since 1.2 1195 */ 1196 public static final UnicodeBlock DINGBATS = 1197 new UnicodeBlock("DINGBATS"); 1198 1199 /** 1200 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1201 * @since 1.2 1202 */ 1203 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1204 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1205 "CJK SYMBOLS AND PUNCTUATION", 1206 "CJKSYMBOLSANDPUNCTUATION"); 1207 1208 /** 1209 * Constant for the "Hiragana" Unicode character block. 1210 * @since 1.2 1211 */ 1212 public static final UnicodeBlock HIRAGANA = 1213 new UnicodeBlock("HIRAGANA"); 1214 1215 /** 1216 * Constant for the "Katakana" Unicode character block. 1217 * @since 1.2 1218 */ 1219 public static final UnicodeBlock KATAKANA = 1220 new UnicodeBlock("KATAKANA"); 1221 1222 /** 1223 * Constant for the "Bopomofo" Unicode character block. 1224 * @since 1.2 1225 */ 1226 public static final UnicodeBlock BOPOMOFO = 1227 new UnicodeBlock("BOPOMOFO"); 1228 1229 /** 1230 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1231 * @since 1.2 1232 */ 1233 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1234 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1235 "HANGUL COMPATIBILITY JAMO", 1236 "HANGULCOMPATIBILITYJAMO"); 1237 1238 /** 1239 * Constant for the "Kanbun" Unicode character block. 1240 * @since 1.2 1241 */ 1242 public static final UnicodeBlock KANBUN = 1243 new UnicodeBlock("KANBUN"); 1244 1245 /** 1246 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1247 * @since 1.2 1248 */ 1249 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1250 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1251 "ENCLOSED CJK LETTERS AND MONTHS", 1252 "ENCLOSEDCJKLETTERSANDMONTHS"); 1253 1254 /** 1255 * Constant for the "CJK Compatibility" Unicode character block. 1256 * @since 1.2 1257 */ 1258 public static final UnicodeBlock CJK_COMPATIBILITY = 1259 new UnicodeBlock("CJK_COMPATIBILITY", 1260 "CJK COMPATIBILITY", 1261 "CJKCOMPATIBILITY"); 1262 1263 /** 1264 * Constant for the "CJK Unified Ideographs" Unicode character block. 1265 * @since 1.2 1266 */ 1267 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1268 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1269 "CJK UNIFIED IDEOGRAPHS", 1270 "CJKUNIFIEDIDEOGRAPHS"); 1271 1272 /** 1273 * Constant for the "Hangul Syllables" Unicode character block. 1274 * @since 1.2 1275 */ 1276 public static final UnicodeBlock HANGUL_SYLLABLES = 1277 new UnicodeBlock("HANGUL_SYLLABLES", 1278 "HANGUL SYLLABLES", 1279 "HANGULSYLLABLES"); 1280 1281 /** 1282 * Constant for the "Private Use Area" Unicode character block. 1283 * @since 1.2 1284 */ 1285 public static final UnicodeBlock PRIVATE_USE_AREA = 1286 new UnicodeBlock("PRIVATE_USE_AREA", 1287 "PRIVATE USE AREA", 1288 "PRIVATEUSEAREA"); 1289 1290 /** 1291 * Constant for the "CJK Compatibility Ideographs" Unicode character 1292 * block. 1293 * @since 1.2 1294 */ 1295 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1296 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1297 "CJK COMPATIBILITY IDEOGRAPHS", 1298 "CJKCOMPATIBILITYIDEOGRAPHS"); 1299 1300 /** 1301 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1302 * @since 1.2 1303 */ 1304 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1305 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1306 "ALPHABETIC PRESENTATION FORMS", 1307 "ALPHABETICPRESENTATIONFORMS"); 1308 1309 /** 1310 * Constant for the "Arabic Presentation Forms-A" Unicode character 1311 * block. 1312 * @since 1.2 1313 */ 1314 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1315 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1316 "ARABIC PRESENTATION FORMS-A", 1317 "ARABICPRESENTATIONFORMS-A"); 1318 1319 /** 1320 * Constant for the "Combining Half Marks" Unicode character block. 1321 * @since 1.2 1322 */ 1323 public static final UnicodeBlock COMBINING_HALF_MARKS = 1324 new UnicodeBlock("COMBINING_HALF_MARKS", 1325 "COMBINING HALF MARKS", 1326 "COMBININGHALFMARKS"); 1327 1328 /** 1329 * Constant for the "CJK Compatibility Forms" Unicode character block. 1330 * @since 1.2 1331 */ 1332 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1333 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1334 "CJK COMPATIBILITY FORMS", 1335 "CJKCOMPATIBILITYFORMS"); 1336 1337 /** 1338 * Constant for the "Small Form Variants" Unicode character block. 1339 * @since 1.2 1340 */ 1341 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1342 new UnicodeBlock("SMALL_FORM_VARIANTS", 1343 "SMALL FORM VARIANTS", 1344 "SMALLFORMVARIANTS"); 1345 1346 /** 1347 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1348 * @since 1.2 1349 */ 1350 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1351 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1352 "ARABIC PRESENTATION FORMS-B", 1353 "ARABICPRESENTATIONFORMS-B"); 1354 1355 /** 1356 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1357 * block. 1358 * @since 1.2 1359 */ 1360 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1361 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1362 "HALFWIDTH AND FULLWIDTH FORMS", 1363 "HALFWIDTHANDFULLWIDTHFORMS"); 1364 1365 /** 1366 * Constant for the "Specials" Unicode character block. 1367 * @since 1.2 1368 */ 1369 public static final UnicodeBlock SPECIALS = 1370 new UnicodeBlock("SPECIALS"); 1371 1372 /** 1373 * @deprecated 1374 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1375 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1376 * These constants match the block definitions of the Unicode Standard. 1377 * The {@link #of(char)} and {@link #of(int)} methods return the 1378 * standard constants. 1379 */ 1380 @Deprecated(since="1.5") 1381 public static final UnicodeBlock SURROGATES_AREA = 1382 // Android-changed: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229 1383 // new UnicodeBlock("SURROGATES_AREA"); 1384 new UnicodeBlock("SURROGATES_AREA", false); 1385 1386 /** 1387 * Constant for the "Syriac" Unicode character block. 1388 * @since 1.4 1389 */ 1390 public static final UnicodeBlock SYRIAC = 1391 new UnicodeBlock("SYRIAC"); 1392 1393 /** 1394 * Constant for the "Thaana" Unicode character block. 1395 * @since 1.4 1396 */ 1397 public static final UnicodeBlock THAANA = 1398 new UnicodeBlock("THAANA"); 1399 1400 /** 1401 * Constant for the "Sinhala" Unicode character block. 1402 * @since 1.4 1403 */ 1404 public static final UnicodeBlock SINHALA = 1405 new UnicodeBlock("SINHALA"); 1406 1407 /** 1408 * Constant for the "Myanmar" Unicode character block. 1409 * @since 1.4 1410 */ 1411 public static final UnicodeBlock MYANMAR = 1412 new UnicodeBlock("MYANMAR"); 1413 1414 /** 1415 * Constant for the "Ethiopic" Unicode character block. 1416 * @since 1.4 1417 */ 1418 public static final UnicodeBlock ETHIOPIC = 1419 new UnicodeBlock("ETHIOPIC"); 1420 1421 /** 1422 * Constant for the "Cherokee" Unicode character block. 1423 * @since 1.4 1424 */ 1425 public static final UnicodeBlock CHEROKEE = 1426 new UnicodeBlock("CHEROKEE"); 1427 1428 /** 1429 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1430 * @since 1.4 1431 */ 1432 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1433 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1434 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1435 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1436 1437 /** 1438 * Constant for the "Ogham" Unicode character block. 1439 * @since 1.4 1440 */ 1441 public static final UnicodeBlock OGHAM = 1442 new UnicodeBlock("OGHAM"); 1443 1444 /** 1445 * Constant for the "Runic" Unicode character block. 1446 * @since 1.4 1447 */ 1448 public static final UnicodeBlock RUNIC = 1449 new UnicodeBlock("RUNIC"); 1450 1451 /** 1452 * Constant for the "Khmer" Unicode character block. 1453 * @since 1.4 1454 */ 1455 public static final UnicodeBlock KHMER = 1456 new UnicodeBlock("KHMER"); 1457 1458 /** 1459 * Constant for the "Mongolian" Unicode character block. 1460 * @since 1.4 1461 */ 1462 public static final UnicodeBlock MONGOLIAN = 1463 new UnicodeBlock("MONGOLIAN"); 1464 1465 /** 1466 * Constant for the "Braille Patterns" Unicode character block. 1467 * @since 1.4 1468 */ 1469 public static final UnicodeBlock BRAILLE_PATTERNS = 1470 new UnicodeBlock("BRAILLE_PATTERNS", 1471 "BRAILLE PATTERNS", 1472 "BRAILLEPATTERNS"); 1473 1474 /** 1475 * Constant for the "CJK Radicals Supplement" Unicode character block. 1476 * @since 1.4 1477 */ 1478 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1479 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1480 "CJK RADICALS SUPPLEMENT", 1481 "CJKRADICALSSUPPLEMENT"); 1482 1483 /** 1484 * Constant for the "Kangxi Radicals" Unicode character block. 1485 * @since 1.4 1486 */ 1487 public static final UnicodeBlock KANGXI_RADICALS = 1488 new UnicodeBlock("KANGXI_RADICALS", 1489 "KANGXI RADICALS", 1490 "KANGXIRADICALS"); 1491 1492 /** 1493 * Constant for the "Ideographic Description Characters" Unicode character block. 1494 * @since 1.4 1495 */ 1496 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1497 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1498 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1499 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1500 1501 /** 1502 * Constant for the "Bopomofo Extended" Unicode character block. 1503 * @since 1.4 1504 */ 1505 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1506 new UnicodeBlock("BOPOMOFO_EXTENDED", 1507 "BOPOMOFO EXTENDED", 1508 "BOPOMOFOEXTENDED"); 1509 1510 /** 1511 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1512 * @since 1.4 1513 */ 1514 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1515 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1516 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1517 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1518 1519 /** 1520 * Constant for the "Yi Syllables" Unicode character block. 1521 * @since 1.4 1522 */ 1523 public static final UnicodeBlock YI_SYLLABLES = 1524 new UnicodeBlock("YI_SYLLABLES", 1525 "YI SYLLABLES", 1526 "YISYLLABLES"); 1527 1528 /** 1529 * Constant for the "Yi Radicals" Unicode character block. 1530 * @since 1.4 1531 */ 1532 public static final UnicodeBlock YI_RADICALS = 1533 new UnicodeBlock("YI_RADICALS", 1534 "YI RADICALS", 1535 "YIRADICALS"); 1536 1537 /** 1538 * Constant for the "Cyrillic Supplement" Unicode character block. 1539 * This block was previously known as the "Cyrillic Supplementary" block. 1540 * @since 1.5 1541 */ 1542 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1543 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1544 "CYRILLIC SUPPLEMENTARY", 1545 "CYRILLICSUPPLEMENTARY", 1546 "CYRILLIC SUPPLEMENT", 1547 "CYRILLICSUPPLEMENT"); 1548 1549 /** 1550 * Constant for the "Tagalog" Unicode character block. 1551 * @since 1.5 1552 */ 1553 public static final UnicodeBlock TAGALOG = 1554 new UnicodeBlock("TAGALOG"); 1555 1556 /** 1557 * Constant for the "Hanunoo" Unicode character block. 1558 * @since 1.5 1559 */ 1560 public static final UnicodeBlock HANUNOO = 1561 new UnicodeBlock("HANUNOO"); 1562 1563 /** 1564 * Constant for the "Buhid" Unicode character block. 1565 * @since 1.5 1566 */ 1567 public static final UnicodeBlock BUHID = 1568 new UnicodeBlock("BUHID"); 1569 1570 /** 1571 * Constant for the "Tagbanwa" Unicode character block. 1572 * @since 1.5 1573 */ 1574 public static final UnicodeBlock TAGBANWA = 1575 new UnicodeBlock("TAGBANWA"); 1576 1577 /** 1578 * Constant for the "Limbu" Unicode character block. 1579 * @since 1.5 1580 */ 1581 public static final UnicodeBlock LIMBU = 1582 new UnicodeBlock("LIMBU"); 1583 1584 /** 1585 * Constant for the "Tai Le" Unicode character block. 1586 * @since 1.5 1587 */ 1588 public static final UnicodeBlock TAI_LE = 1589 new UnicodeBlock("TAI_LE", 1590 "TAI LE", 1591 "TAILE"); 1592 1593 /** 1594 * Constant for the "Khmer Symbols" Unicode character block. 1595 * @since 1.5 1596 */ 1597 public static final UnicodeBlock KHMER_SYMBOLS = 1598 new UnicodeBlock("KHMER_SYMBOLS", 1599 "KHMER SYMBOLS", 1600 "KHMERSYMBOLS"); 1601 1602 /** 1603 * Constant for the "Phonetic Extensions" Unicode character block. 1604 * @since 1.5 1605 */ 1606 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1607 new UnicodeBlock("PHONETIC_EXTENSIONS", 1608 "PHONETIC EXTENSIONS", 1609 "PHONETICEXTENSIONS"); 1610 1611 /** 1612 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1613 * @since 1.5 1614 */ 1615 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1616 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1617 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1618 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1619 1620 /** 1621 * Constant for the "Supplemental Arrows-A" Unicode character block. 1622 * @since 1.5 1623 */ 1624 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1625 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1626 "SUPPLEMENTAL ARROWS-A", 1627 "SUPPLEMENTALARROWS-A"); 1628 1629 /** 1630 * Constant for the "Supplemental Arrows-B" Unicode character block. 1631 * @since 1.5 1632 */ 1633 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1634 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1635 "SUPPLEMENTAL ARROWS-B", 1636 "SUPPLEMENTALARROWS-B"); 1637 1638 /** 1639 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1640 * character block. 1641 * @since 1.5 1642 */ 1643 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1644 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1645 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1646 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1647 1648 /** 1649 * Constant for the "Supplemental Mathematical Operators" Unicode 1650 * character block. 1651 * @since 1.5 1652 */ 1653 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1654 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1655 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1656 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1657 1658 /** 1659 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1660 * block. 1661 * @since 1.5 1662 */ 1663 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1664 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1665 "MISCELLANEOUS SYMBOLS AND ARROWS", 1666 "MISCELLANEOUSSYMBOLSANDARROWS"); 1667 1668 /** 1669 * Constant for the "Katakana Phonetic Extensions" Unicode character 1670 * block. 1671 * @since 1.5 1672 */ 1673 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1674 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1675 "KATAKANA PHONETIC EXTENSIONS", 1676 "KATAKANAPHONETICEXTENSIONS"); 1677 1678 /** 1679 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1680 * @since 1.5 1681 */ 1682 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1683 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1684 "YIJING HEXAGRAM SYMBOLS", 1685 "YIJINGHEXAGRAMSYMBOLS"); 1686 1687 /** 1688 * Constant for the "Variation Selectors" Unicode character block. 1689 * @since 1.5 1690 */ 1691 public static final UnicodeBlock VARIATION_SELECTORS = 1692 new UnicodeBlock("VARIATION_SELECTORS", 1693 "VARIATION SELECTORS", 1694 "VARIATIONSELECTORS"); 1695 1696 /** 1697 * Constant for the "Linear B Syllabary" Unicode character block. 1698 * @since 1.5 1699 */ 1700 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1701 new UnicodeBlock("LINEAR_B_SYLLABARY", 1702 "LINEAR B SYLLABARY", 1703 "LINEARBSYLLABARY"); 1704 1705 /** 1706 * Constant for the "Linear B Ideograms" Unicode character block. 1707 * @since 1.5 1708 */ 1709 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1710 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1711 "LINEAR B IDEOGRAMS", 1712 "LINEARBIDEOGRAMS"); 1713 1714 /** 1715 * Constant for the "Aegean Numbers" Unicode character block. 1716 * @since 1.5 1717 */ 1718 public static final UnicodeBlock AEGEAN_NUMBERS = 1719 new UnicodeBlock("AEGEAN_NUMBERS", 1720 "AEGEAN NUMBERS", 1721 "AEGEANNUMBERS"); 1722 1723 /** 1724 * Constant for the "Old Italic" Unicode character block. 1725 * @since 1.5 1726 */ 1727 public static final UnicodeBlock OLD_ITALIC = 1728 new UnicodeBlock("OLD_ITALIC", 1729 "OLD ITALIC", 1730 "OLDITALIC"); 1731 1732 /** 1733 * Constant for the "Gothic" Unicode character block. 1734 * @since 1.5 1735 */ 1736 public static final UnicodeBlock GOTHIC = 1737 new UnicodeBlock("GOTHIC"); 1738 1739 /** 1740 * Constant for the "Ugaritic" Unicode character block. 1741 * @since 1.5 1742 */ 1743 public static final UnicodeBlock UGARITIC = 1744 new UnicodeBlock("UGARITIC"); 1745 1746 /** 1747 * Constant for the "Deseret" Unicode character block. 1748 * @since 1.5 1749 */ 1750 public static final UnicodeBlock DESERET = 1751 new UnicodeBlock("DESERET"); 1752 1753 /** 1754 * Constant for the "Shavian" Unicode character block. 1755 * @since 1.5 1756 */ 1757 public static final UnicodeBlock SHAVIAN = 1758 new UnicodeBlock("SHAVIAN"); 1759 1760 /** 1761 * Constant for the "Osmanya" Unicode character block. 1762 * @since 1.5 1763 */ 1764 public static final UnicodeBlock OSMANYA = 1765 new UnicodeBlock("OSMANYA"); 1766 1767 /** 1768 * Constant for the "Cypriot Syllabary" Unicode character block. 1769 * @since 1.5 1770 */ 1771 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1772 new UnicodeBlock("CYPRIOT_SYLLABARY", 1773 "CYPRIOT SYLLABARY", 1774 "CYPRIOTSYLLABARY"); 1775 1776 /** 1777 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1778 * @since 1.5 1779 */ 1780 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1781 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1782 "BYZANTINE MUSICAL SYMBOLS", 1783 "BYZANTINEMUSICALSYMBOLS"); 1784 1785 /** 1786 * Constant for the "Musical Symbols" Unicode character block. 1787 * @since 1.5 1788 */ 1789 public static final UnicodeBlock MUSICAL_SYMBOLS = 1790 new UnicodeBlock("MUSICAL_SYMBOLS", 1791 "MUSICAL SYMBOLS", 1792 "MUSICALSYMBOLS"); 1793 1794 /** 1795 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1796 * @since 1.5 1797 */ 1798 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1799 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1800 "TAI XUAN JING SYMBOLS", 1801 "TAIXUANJINGSYMBOLS"); 1802 1803 /** 1804 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1805 * character block. 1806 * @since 1.5 1807 */ 1808 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1809 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1810 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1811 "MATHEMATICALALPHANUMERICSYMBOLS"); 1812 1813 /** 1814 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1815 * character block. 1816 * @since 1.5 1817 */ 1818 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1819 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1820 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1821 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1822 1823 /** 1824 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1825 * @since 1.5 1826 */ 1827 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1828 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1829 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1830 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1831 1832 /** 1833 * Constant for the "Tags" Unicode character block. 1834 * @since 1.5 1835 */ 1836 public static final UnicodeBlock TAGS = 1837 new UnicodeBlock("TAGS"); 1838 1839 /** 1840 * Constant for the "Variation Selectors Supplement" Unicode character 1841 * block. 1842 * @since 1.5 1843 */ 1844 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1845 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1846 "VARIATION SELECTORS SUPPLEMENT", 1847 "VARIATIONSELECTORSSUPPLEMENT"); 1848 1849 /** 1850 * Constant for the "Supplementary Private Use Area-A" Unicode character 1851 * block. 1852 * @since 1.5 1853 */ 1854 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1855 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1856 "SUPPLEMENTARY PRIVATE USE AREA-A", 1857 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1858 1859 /** 1860 * Constant for the "Supplementary Private Use Area-B" Unicode character 1861 * block. 1862 * @since 1.5 1863 */ 1864 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1865 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1866 "SUPPLEMENTARY PRIVATE USE AREA-B", 1867 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1868 1869 /** 1870 * Constant for the "High Surrogates" Unicode character block. 1871 * This block represents codepoint values in the high surrogate 1872 * range: U+D800 through U+DB7F 1873 * 1874 * @since 1.5 1875 */ 1876 public static final UnicodeBlock HIGH_SURROGATES = 1877 new UnicodeBlock("HIGH_SURROGATES", 1878 "HIGH SURROGATES", 1879 "HIGHSURROGATES"); 1880 1881 /** 1882 * Constant for the "High Private Use Surrogates" Unicode character 1883 * block. 1884 * This block represents codepoint values in the private use high 1885 * surrogate range: U+DB80 through U+DBFF 1886 * 1887 * @since 1.5 1888 */ 1889 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1890 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1891 "HIGH PRIVATE USE SURROGATES", 1892 "HIGHPRIVATEUSESURROGATES"); 1893 1894 /** 1895 * Constant for the "Low Surrogates" Unicode character block. 1896 * This block represents codepoint values in the low surrogate 1897 * range: U+DC00 through U+DFFF 1898 * 1899 * @since 1.5 1900 */ 1901 public static final UnicodeBlock LOW_SURROGATES = 1902 new UnicodeBlock("LOW_SURROGATES", 1903 "LOW SURROGATES", 1904 "LOWSURROGATES"); 1905 1906 /** 1907 * Constant for the "Arabic Supplement" Unicode character block. 1908 * @since 1.7 1909 */ 1910 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1911 new UnicodeBlock("ARABIC_SUPPLEMENT", 1912 "ARABIC SUPPLEMENT", 1913 "ARABICSUPPLEMENT"); 1914 1915 /** 1916 * Constant for the "NKo" Unicode character block. 1917 * @since 1.7 1918 */ 1919 public static final UnicodeBlock NKO = 1920 new UnicodeBlock("NKO"); 1921 1922 /** 1923 * Constant for the "Samaritan" Unicode character block. 1924 * @since 1.7 1925 */ 1926 public static final UnicodeBlock SAMARITAN = 1927 new UnicodeBlock("SAMARITAN"); 1928 1929 /** 1930 * Constant for the "Mandaic" Unicode character block. 1931 * @since 1.7 1932 */ 1933 public static final UnicodeBlock MANDAIC = 1934 new UnicodeBlock("MANDAIC"); 1935 1936 /** 1937 * Constant for the "Ethiopic Supplement" Unicode character block. 1938 * @since 1.7 1939 */ 1940 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1941 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1942 "ETHIOPIC SUPPLEMENT", 1943 "ETHIOPICSUPPLEMENT"); 1944 1945 /** 1946 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1947 * Unicode character block. 1948 * @since 1.7 1949 */ 1950 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1951 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1952 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1953 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1954 1955 /** 1956 * Constant for the "New Tai Lue" Unicode character block. 1957 * @since 1.7 1958 */ 1959 public static final UnicodeBlock NEW_TAI_LUE = 1960 new UnicodeBlock("NEW_TAI_LUE", 1961 "NEW TAI LUE", 1962 "NEWTAILUE"); 1963 1964 /** 1965 * Constant for the "Buginese" Unicode character block. 1966 * @since 1.7 1967 */ 1968 public static final UnicodeBlock BUGINESE = 1969 new UnicodeBlock("BUGINESE"); 1970 1971 /** 1972 * Constant for the "Tai Tham" Unicode character block. 1973 * @since 1.7 1974 */ 1975 public static final UnicodeBlock TAI_THAM = 1976 new UnicodeBlock("TAI_THAM", 1977 "TAI THAM", 1978 "TAITHAM"); 1979 1980 /** 1981 * Constant for the "Balinese" Unicode character block. 1982 * @since 1.7 1983 */ 1984 public static final UnicodeBlock BALINESE = 1985 new UnicodeBlock("BALINESE"); 1986 1987 /** 1988 * Constant for the "Sundanese" Unicode character block. 1989 * @since 1.7 1990 */ 1991 public static final UnicodeBlock SUNDANESE = 1992 new UnicodeBlock("SUNDANESE"); 1993 1994 /** 1995 * Constant for the "Batak" Unicode character block. 1996 * @since 1.7 1997 */ 1998 public static final UnicodeBlock BATAK = 1999 new UnicodeBlock("BATAK"); 2000 2001 /** 2002 * Constant for the "Lepcha" Unicode character block. 2003 * @since 1.7 2004 */ 2005 public static final UnicodeBlock LEPCHA = 2006 new UnicodeBlock("LEPCHA"); 2007 2008 /** 2009 * Constant for the "Ol Chiki" Unicode character block. 2010 * @since 1.7 2011 */ 2012 public static final UnicodeBlock OL_CHIKI = 2013 new UnicodeBlock("OL_CHIKI", 2014 "OL CHIKI", 2015 "OLCHIKI"); 2016 2017 /** 2018 * Constant for the "Vedic Extensions" Unicode character block. 2019 * @since 1.7 2020 */ 2021 public static final UnicodeBlock VEDIC_EXTENSIONS = 2022 new UnicodeBlock("VEDIC_EXTENSIONS", 2023 "VEDIC EXTENSIONS", 2024 "VEDICEXTENSIONS"); 2025 2026 /** 2027 * Constant for the "Phonetic Extensions Supplement" Unicode character 2028 * block. 2029 * @since 1.7 2030 */ 2031 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 2032 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 2033 "PHONETIC EXTENSIONS SUPPLEMENT", 2034 "PHONETICEXTENSIONSSUPPLEMENT"); 2035 2036 /** 2037 * Constant for the "Combining Diacritical Marks Supplement" Unicode 2038 * character block. 2039 * @since 1.7 2040 */ 2041 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 2042 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 2043 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 2044 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 2045 2046 /** 2047 * Constant for the "Glagolitic" Unicode character block. 2048 * @since 1.7 2049 */ 2050 public static final UnicodeBlock GLAGOLITIC = 2051 new UnicodeBlock("GLAGOLITIC"); 2052 2053 /** 2054 * Constant for the "Latin Extended-C" Unicode character block. 2055 * @since 1.7 2056 */ 2057 public static final UnicodeBlock LATIN_EXTENDED_C = 2058 new UnicodeBlock("LATIN_EXTENDED_C", 2059 "LATIN EXTENDED-C", 2060 "LATINEXTENDED-C"); 2061 2062 /** 2063 * Constant for the "Coptic" Unicode character block. 2064 * @since 1.7 2065 */ 2066 public static final UnicodeBlock COPTIC = 2067 new UnicodeBlock("COPTIC"); 2068 2069 /** 2070 * Constant for the "Georgian Supplement" Unicode character block. 2071 * @since 1.7 2072 */ 2073 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2074 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 2075 "GEORGIAN SUPPLEMENT", 2076 "GEORGIANSUPPLEMENT"); 2077 2078 /** 2079 * Constant for the "Tifinagh" Unicode character block. 2080 * @since 1.7 2081 */ 2082 public static final UnicodeBlock TIFINAGH = 2083 new UnicodeBlock("TIFINAGH"); 2084 2085 /** 2086 * Constant for the "Ethiopic Extended" Unicode character block. 2087 * @since 1.7 2088 */ 2089 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2090 new UnicodeBlock("ETHIOPIC_EXTENDED", 2091 "ETHIOPIC EXTENDED", 2092 "ETHIOPICEXTENDED"); 2093 2094 /** 2095 * Constant for the "Cyrillic Extended-A" Unicode character block. 2096 * @since 1.7 2097 */ 2098 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2099 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2100 "CYRILLIC EXTENDED-A", 2101 "CYRILLICEXTENDED-A"); 2102 2103 /** 2104 * Constant for the "Supplemental Punctuation" Unicode character block. 2105 * @since 1.7 2106 */ 2107 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2108 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2109 "SUPPLEMENTAL PUNCTUATION", 2110 "SUPPLEMENTALPUNCTUATION"); 2111 2112 /** 2113 * Constant for the "CJK Strokes" Unicode character block. 2114 * @since 1.7 2115 */ 2116 public static final UnicodeBlock CJK_STROKES = 2117 new UnicodeBlock("CJK_STROKES", 2118 "CJK STROKES", 2119 "CJKSTROKES"); 2120 2121 /** 2122 * Constant for the "Lisu" Unicode character block. 2123 * @since 1.7 2124 */ 2125 public static final UnicodeBlock LISU = 2126 new UnicodeBlock("LISU"); 2127 2128 /** 2129 * Constant for the "Vai" Unicode character block. 2130 * @since 1.7 2131 */ 2132 public static final UnicodeBlock VAI = 2133 new UnicodeBlock("VAI"); 2134 2135 /** 2136 * Constant for the "Cyrillic Extended-B" Unicode character block. 2137 * @since 1.7 2138 */ 2139 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2140 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2141 "CYRILLIC EXTENDED-B", 2142 "CYRILLICEXTENDED-B"); 2143 2144 /** 2145 * Constant for the "Bamum" Unicode character block. 2146 * @since 1.7 2147 */ 2148 public static final UnicodeBlock BAMUM = 2149 new UnicodeBlock("BAMUM"); 2150 2151 /** 2152 * Constant for the "Modifier Tone Letters" Unicode character block. 2153 * @since 1.7 2154 */ 2155 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2156 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2157 "MODIFIER TONE LETTERS", 2158 "MODIFIERTONELETTERS"); 2159 2160 /** 2161 * Constant for the "Latin Extended-D" Unicode character block. 2162 * @since 1.7 2163 */ 2164 public static final UnicodeBlock LATIN_EXTENDED_D = 2165 new UnicodeBlock("LATIN_EXTENDED_D", 2166 "LATIN EXTENDED-D", 2167 "LATINEXTENDED-D"); 2168 2169 /** 2170 * Constant for the "Syloti Nagri" Unicode character block. 2171 * @since 1.7 2172 */ 2173 public static final UnicodeBlock SYLOTI_NAGRI = 2174 new UnicodeBlock("SYLOTI_NAGRI", 2175 "SYLOTI NAGRI", 2176 "SYLOTINAGRI"); 2177 2178 /** 2179 * Constant for the "Common Indic Number Forms" Unicode character block. 2180 * @since 1.7 2181 */ 2182 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2183 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2184 "COMMON INDIC NUMBER FORMS", 2185 "COMMONINDICNUMBERFORMS"); 2186 2187 /** 2188 * Constant for the "Phags-pa" Unicode character block. 2189 * @since 1.7 2190 */ 2191 public static final UnicodeBlock PHAGS_PA = 2192 new UnicodeBlock("PHAGS_PA", 2193 "PHAGS-PA"); 2194 2195 /** 2196 * Constant for the "Saurashtra" Unicode character block. 2197 * @since 1.7 2198 */ 2199 public static final UnicodeBlock SAURASHTRA = 2200 new UnicodeBlock("SAURASHTRA"); 2201 2202 /** 2203 * Constant for the "Devanagari Extended" Unicode character block. 2204 * @since 1.7 2205 */ 2206 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2207 new UnicodeBlock("DEVANAGARI_EXTENDED", 2208 "DEVANAGARI EXTENDED", 2209 "DEVANAGARIEXTENDED"); 2210 2211 /** 2212 * Constant for the "Kayah Li" Unicode character block. 2213 * @since 1.7 2214 */ 2215 public static final UnicodeBlock KAYAH_LI = 2216 new UnicodeBlock("KAYAH_LI", 2217 "KAYAH LI", 2218 "KAYAHLI"); 2219 2220 /** 2221 * Constant for the "Rejang" Unicode character block. 2222 * @since 1.7 2223 */ 2224 public static final UnicodeBlock REJANG = 2225 new UnicodeBlock("REJANG"); 2226 2227 /** 2228 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2229 * @since 1.7 2230 */ 2231 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2232 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2233 "HANGUL JAMO EXTENDED-A", 2234 "HANGULJAMOEXTENDED-A"); 2235 2236 /** 2237 * Constant for the "Javanese" Unicode character block. 2238 * @since 1.7 2239 */ 2240 public static final UnicodeBlock JAVANESE = 2241 new UnicodeBlock("JAVANESE"); 2242 2243 /** 2244 * Constant for the "Cham" Unicode character block. 2245 * @since 1.7 2246 */ 2247 public static final UnicodeBlock CHAM = 2248 new UnicodeBlock("CHAM"); 2249 2250 /** 2251 * Constant for the "Myanmar Extended-A" Unicode character block. 2252 * @since 1.7 2253 */ 2254 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2255 new UnicodeBlock("MYANMAR_EXTENDED_A", 2256 "MYANMAR EXTENDED-A", 2257 "MYANMAREXTENDED-A"); 2258 2259 /** 2260 * Constant for the "Tai Viet" Unicode character block. 2261 * @since 1.7 2262 */ 2263 public static final UnicodeBlock TAI_VIET = 2264 new UnicodeBlock("TAI_VIET", 2265 "TAI VIET", 2266 "TAIVIET"); 2267 2268 /** 2269 * Constant for the "Ethiopic Extended-A" Unicode character block. 2270 * @since 1.7 2271 */ 2272 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2273 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2274 "ETHIOPIC EXTENDED-A", 2275 "ETHIOPICEXTENDED-A"); 2276 2277 /** 2278 * Constant for the "Meetei Mayek" Unicode character block. 2279 * @since 1.7 2280 */ 2281 public static final UnicodeBlock MEETEI_MAYEK = 2282 new UnicodeBlock("MEETEI_MAYEK", 2283 "MEETEI MAYEK", 2284 "MEETEIMAYEK"); 2285 2286 /** 2287 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2288 * @since 1.7 2289 */ 2290 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2291 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2292 "HANGUL JAMO EXTENDED-B", 2293 "HANGULJAMOEXTENDED-B"); 2294 2295 /** 2296 * Constant for the "Vertical Forms" Unicode character block. 2297 * @since 1.7 2298 */ 2299 public static final UnicodeBlock VERTICAL_FORMS = 2300 new UnicodeBlock("VERTICAL_FORMS", 2301 "VERTICAL FORMS", 2302 "VERTICALFORMS"); 2303 2304 /** 2305 * Constant for the "Ancient Greek Numbers" Unicode character block. 2306 * @since 1.7 2307 */ 2308 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2309 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2310 "ANCIENT GREEK NUMBERS", 2311 "ANCIENTGREEKNUMBERS"); 2312 2313 /** 2314 * Constant for the "Ancient Symbols" Unicode character block. 2315 * @since 1.7 2316 */ 2317 public static final UnicodeBlock ANCIENT_SYMBOLS = 2318 new UnicodeBlock("ANCIENT_SYMBOLS", 2319 "ANCIENT SYMBOLS", 2320 "ANCIENTSYMBOLS"); 2321 2322 /** 2323 * Constant for the "Phaistos Disc" Unicode character block. 2324 * @since 1.7 2325 */ 2326 public static final UnicodeBlock PHAISTOS_DISC = 2327 new UnicodeBlock("PHAISTOS_DISC", 2328 "PHAISTOS DISC", 2329 "PHAISTOSDISC"); 2330 2331 /** 2332 * Constant for the "Lycian" Unicode character block. 2333 * @since 1.7 2334 */ 2335 public static final UnicodeBlock LYCIAN = 2336 new UnicodeBlock("LYCIAN"); 2337 2338 /** 2339 * Constant for the "Carian" Unicode character block. 2340 * @since 1.7 2341 */ 2342 public static final UnicodeBlock CARIAN = 2343 new UnicodeBlock("CARIAN"); 2344 2345 /** 2346 * Constant for the "Old Persian" Unicode character block. 2347 * @since 1.7 2348 */ 2349 public static final UnicodeBlock OLD_PERSIAN = 2350 new UnicodeBlock("OLD_PERSIAN", 2351 "OLD PERSIAN", 2352 "OLDPERSIAN"); 2353 2354 /** 2355 * Constant for the "Imperial Aramaic" Unicode character block. 2356 * @since 1.7 2357 */ 2358 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2359 new UnicodeBlock("IMPERIAL_ARAMAIC", 2360 "IMPERIAL ARAMAIC", 2361 "IMPERIALARAMAIC"); 2362 2363 /** 2364 * Constant for the "Phoenician" Unicode character block. 2365 * @since 1.7 2366 */ 2367 public static final UnicodeBlock PHOENICIAN = 2368 new UnicodeBlock("PHOENICIAN"); 2369 2370 /** 2371 * Constant for the "Lydian" Unicode character block. 2372 * @since 1.7 2373 */ 2374 public static final UnicodeBlock LYDIAN = 2375 new UnicodeBlock("LYDIAN"); 2376 2377 /** 2378 * Constant for the "Kharoshthi" Unicode character block. 2379 * @since 1.7 2380 */ 2381 public static final UnicodeBlock KHAROSHTHI = 2382 new UnicodeBlock("KHAROSHTHI"); 2383 2384 /** 2385 * Constant for the "Old South Arabian" Unicode character block. 2386 * @since 1.7 2387 */ 2388 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2389 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2390 "OLD SOUTH ARABIAN", 2391 "OLDSOUTHARABIAN"); 2392 2393 /** 2394 * Constant for the "Avestan" Unicode character block. 2395 * @since 1.7 2396 */ 2397 public static final UnicodeBlock AVESTAN = 2398 new UnicodeBlock("AVESTAN"); 2399 2400 /** 2401 * Constant for the "Inscriptional Parthian" Unicode character block. 2402 * @since 1.7 2403 */ 2404 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2405 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2406 "INSCRIPTIONAL PARTHIAN", 2407 "INSCRIPTIONALPARTHIAN"); 2408 2409 /** 2410 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2411 * @since 1.7 2412 */ 2413 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2414 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2415 "INSCRIPTIONAL PAHLAVI", 2416 "INSCRIPTIONALPAHLAVI"); 2417 2418 /** 2419 * Constant for the "Old Turkic" Unicode character block. 2420 * @since 1.7 2421 */ 2422 public static final UnicodeBlock OLD_TURKIC = 2423 new UnicodeBlock("OLD_TURKIC", 2424 "OLD TURKIC", 2425 "OLDTURKIC"); 2426 2427 /** 2428 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2429 * @since 1.7 2430 */ 2431 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2432 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2433 "RUMI NUMERAL SYMBOLS", 2434 "RUMINUMERALSYMBOLS"); 2435 2436 /** 2437 * Constant for the "Brahmi" Unicode character block. 2438 * @since 1.7 2439 */ 2440 public static final UnicodeBlock BRAHMI = 2441 new UnicodeBlock("BRAHMI"); 2442 2443 /** 2444 * Constant for the "Kaithi" Unicode character block. 2445 * @since 1.7 2446 */ 2447 public static final UnicodeBlock KAITHI = 2448 new UnicodeBlock("KAITHI"); 2449 2450 /** 2451 * Constant for the "Cuneiform" Unicode character block. 2452 * @since 1.7 2453 */ 2454 public static final UnicodeBlock CUNEIFORM = 2455 new UnicodeBlock("CUNEIFORM"); 2456 2457 /** 2458 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2459 * character block. 2460 * @since 1.7 2461 */ 2462 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2463 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2464 "CUNEIFORM NUMBERS AND PUNCTUATION", 2465 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2466 2467 /** 2468 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2469 * @since 1.7 2470 */ 2471 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2472 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2473 "EGYPTIAN HIEROGLYPHS", 2474 "EGYPTIANHIEROGLYPHS"); 2475 2476 /** 2477 * Constant for the "Bamum Supplement" Unicode character block. 2478 * @since 1.7 2479 */ 2480 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2481 new UnicodeBlock("BAMUM_SUPPLEMENT", 2482 "BAMUM SUPPLEMENT", 2483 "BAMUMSUPPLEMENT"); 2484 2485 /** 2486 * Constant for the "Kana Supplement" Unicode character block. 2487 * @since 1.7 2488 */ 2489 public static final UnicodeBlock KANA_SUPPLEMENT = 2490 new UnicodeBlock("KANA_SUPPLEMENT", 2491 "KANA SUPPLEMENT", 2492 "KANASUPPLEMENT"); 2493 2494 /** 2495 * Constant for the "Ancient Greek Musical Notation" Unicode character 2496 * block. 2497 * @since 1.7 2498 */ 2499 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2500 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2501 "ANCIENT GREEK MUSICAL NOTATION", 2502 "ANCIENTGREEKMUSICALNOTATION"); 2503 2504 /** 2505 * Constant for the "Counting Rod Numerals" Unicode character block. 2506 * @since 1.7 2507 */ 2508 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2509 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2510 "COUNTING ROD NUMERALS", 2511 "COUNTINGRODNUMERALS"); 2512 2513 /** 2514 * Constant for the "Mahjong Tiles" Unicode character block. 2515 * @since 1.7 2516 */ 2517 public static final UnicodeBlock MAHJONG_TILES = 2518 new UnicodeBlock("MAHJONG_TILES", 2519 "MAHJONG TILES", 2520 "MAHJONGTILES"); 2521 2522 /** 2523 * Constant for the "Domino Tiles" Unicode character block. 2524 * @since 1.7 2525 */ 2526 public static final UnicodeBlock DOMINO_TILES = 2527 new UnicodeBlock("DOMINO_TILES", 2528 "DOMINO TILES", 2529 "DOMINOTILES"); 2530 2531 /** 2532 * Constant for the "Playing Cards" Unicode character block. 2533 * @since 1.7 2534 */ 2535 public static final UnicodeBlock PLAYING_CARDS = 2536 new UnicodeBlock("PLAYING_CARDS", 2537 "PLAYING CARDS", 2538 "PLAYINGCARDS"); 2539 2540 /** 2541 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2542 * block. 2543 * @since 1.7 2544 */ 2545 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2546 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2547 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2548 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2549 2550 /** 2551 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2552 * block. 2553 * @since 1.7 2554 */ 2555 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2556 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2557 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2558 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2559 2560 /** 2561 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2562 * character block. 2563 * @since 1.7 2564 */ 2565 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2566 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2567 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2568 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2569 2570 /** 2571 * Constant for the "Emoticons" Unicode character block. 2572 * @since 1.7 2573 */ 2574 public static final UnicodeBlock EMOTICONS = 2575 new UnicodeBlock("EMOTICONS"); 2576 2577 /** 2578 * Constant for the "Transport And Map Symbols" Unicode character block. 2579 * @since 1.7 2580 */ 2581 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2582 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2583 "TRANSPORT AND MAP SYMBOLS", 2584 "TRANSPORTANDMAPSYMBOLS"); 2585 2586 /** 2587 * Constant for the "Alchemical Symbols" Unicode character block. 2588 * @since 1.7 2589 */ 2590 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2591 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2592 "ALCHEMICAL SYMBOLS", 2593 "ALCHEMICALSYMBOLS"); 2594 2595 /** 2596 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2597 * character block. 2598 * @since 1.7 2599 */ 2600 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2601 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2602 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2603 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2604 2605 /** 2606 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2607 * character block. 2608 * @since 1.7 2609 */ 2610 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2611 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2612 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2613 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2614 2615 /** 2616 * Constant for the "Arabic Extended-A" Unicode character block. 2617 * @since 1.8 2618 */ 2619 public static final UnicodeBlock ARABIC_EXTENDED_A = 2620 new UnicodeBlock("ARABIC_EXTENDED_A", 2621 "ARABIC EXTENDED-A", 2622 "ARABICEXTENDED-A"); 2623 2624 /** 2625 * Constant for the "Sundanese Supplement" Unicode character block. 2626 * @since 1.8 2627 */ 2628 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2629 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2630 "SUNDANESE SUPPLEMENT", 2631 "SUNDANESESUPPLEMENT"); 2632 2633 /** 2634 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2635 * @since 1.8 2636 */ 2637 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2638 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2639 "MEETEI MAYEK EXTENSIONS", 2640 "MEETEIMAYEKEXTENSIONS"); 2641 2642 /** 2643 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2644 * @since 1.8 2645 */ 2646 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2647 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2648 "MEROITIC HIEROGLYPHS", 2649 "MEROITICHIEROGLYPHS"); 2650 2651 /** 2652 * Constant for the "Meroitic Cursive" Unicode character block. 2653 * @since 1.8 2654 */ 2655 public static final UnicodeBlock MEROITIC_CURSIVE = 2656 new UnicodeBlock("MEROITIC_CURSIVE", 2657 "MEROITIC CURSIVE", 2658 "MEROITICCURSIVE"); 2659 2660 /** 2661 * Constant for the "Sora Sompeng" Unicode character block. 2662 * @since 1.8 2663 */ 2664 public static final UnicodeBlock SORA_SOMPENG = 2665 new UnicodeBlock("SORA_SOMPENG", 2666 "SORA SOMPENG", 2667 "SORASOMPENG"); 2668 2669 /** 2670 * Constant for the "Chakma" Unicode character block. 2671 * @since 1.8 2672 */ 2673 public static final UnicodeBlock CHAKMA = 2674 new UnicodeBlock("CHAKMA"); 2675 2676 /** 2677 * Constant for the "Sharada" Unicode character block. 2678 * @since 1.8 2679 */ 2680 public static final UnicodeBlock SHARADA = 2681 new UnicodeBlock("SHARADA"); 2682 2683 /** 2684 * Constant for the "Takri" Unicode character block. 2685 * @since 1.8 2686 */ 2687 public static final UnicodeBlock TAKRI = 2688 new UnicodeBlock("TAKRI"); 2689 2690 /** 2691 * Constant for the "Miao" Unicode character block. 2692 * @since 1.8 2693 */ 2694 public static final UnicodeBlock MIAO = 2695 new UnicodeBlock("MIAO"); 2696 2697 /** 2698 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2699 * character block. 2700 * @since 1.8 2701 */ 2702 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2703 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2704 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2705 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2706 2707 /** 2708 * Constant for the "Combining Diacritical Marks Extended" Unicode 2709 * character block. 2710 * @since 9 2711 */ 2712 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2713 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2714 "COMBINING DIACRITICAL MARKS EXTENDED", 2715 "COMBININGDIACRITICALMARKSEXTENDED"); 2716 2717 /** 2718 * Constant for the "Myanmar Extended-B" Unicode character block. 2719 * @since 9 2720 */ 2721 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2722 new UnicodeBlock("MYANMAR_EXTENDED_B", 2723 "MYANMAR EXTENDED-B", 2724 "MYANMAREXTENDED-B"); 2725 2726 /** 2727 * Constant for the "Latin Extended-E" Unicode character block. 2728 * @since 9 2729 */ 2730 public static final UnicodeBlock LATIN_EXTENDED_E = 2731 new UnicodeBlock("LATIN_EXTENDED_E", 2732 "LATIN EXTENDED-E", 2733 "LATINEXTENDED-E"); 2734 2735 /** 2736 * Constant for the "Coptic Epact Numbers" Unicode character block. 2737 * @since 9 2738 */ 2739 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2740 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2741 "COPTIC EPACT NUMBERS", 2742 "COPTICEPACTNUMBERS"); 2743 2744 /** 2745 * Constant for the "Old Permic" Unicode character block. 2746 * @since 9 2747 */ 2748 public static final UnicodeBlock OLD_PERMIC = 2749 new UnicodeBlock("OLD_PERMIC", 2750 "OLD PERMIC", 2751 "OLDPERMIC"); 2752 2753 /** 2754 * Constant for the "Elbasan" Unicode character block. 2755 * @since 9 2756 */ 2757 public static final UnicodeBlock ELBASAN = 2758 new UnicodeBlock("ELBASAN"); 2759 2760 /** 2761 * Constant for the "Caucasian Albanian" Unicode character block. 2762 * @since 9 2763 */ 2764 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2765 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2766 "CAUCASIAN ALBANIAN", 2767 "CAUCASIANALBANIAN"); 2768 2769 /** 2770 * Constant for the "Linear A" Unicode character block. 2771 * @since 9 2772 */ 2773 public static final UnicodeBlock LINEAR_A = 2774 new UnicodeBlock("LINEAR_A", 2775 "LINEAR A", 2776 "LINEARA"); 2777 2778 /** 2779 * Constant for the "Palmyrene" Unicode character block. 2780 * @since 9 2781 */ 2782 public static final UnicodeBlock PALMYRENE = 2783 new UnicodeBlock("PALMYRENE"); 2784 2785 /** 2786 * Constant for the "Nabataean" Unicode character block. 2787 * @since 9 2788 */ 2789 public static final UnicodeBlock NABATAEAN = 2790 new UnicodeBlock("NABATAEAN"); 2791 2792 /** 2793 * Constant for the "Old North Arabian" Unicode character block. 2794 * @since 9 2795 */ 2796 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2797 new UnicodeBlock("OLD_NORTH_ARABIAN", 2798 "OLD NORTH ARABIAN", 2799 "OLDNORTHARABIAN"); 2800 2801 /** 2802 * Constant for the "Manichaean" Unicode character block. 2803 * @since 9 2804 */ 2805 public static final UnicodeBlock MANICHAEAN = 2806 new UnicodeBlock("MANICHAEAN"); 2807 2808 /** 2809 * Constant for the "Psalter Pahlavi" Unicode character block. 2810 * @since 9 2811 */ 2812 public static final UnicodeBlock PSALTER_PAHLAVI = 2813 new UnicodeBlock("PSALTER_PAHLAVI", 2814 "PSALTER PAHLAVI", 2815 "PSALTERPAHLAVI"); 2816 2817 /** 2818 * Constant for the "Mahajani" Unicode character block. 2819 * @since 9 2820 */ 2821 public static final UnicodeBlock MAHAJANI = 2822 new UnicodeBlock("MAHAJANI"); 2823 2824 /** 2825 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2826 * @since 9 2827 */ 2828 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2829 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2830 "SINHALA ARCHAIC NUMBERS", 2831 "SINHALAARCHAICNUMBERS"); 2832 2833 /** 2834 * Constant for the "Khojki" Unicode character block. 2835 * @since 9 2836 */ 2837 public static final UnicodeBlock KHOJKI = 2838 new UnicodeBlock("KHOJKI"); 2839 2840 /** 2841 * Constant for the "Khudawadi" Unicode character block. 2842 * @since 9 2843 */ 2844 public static final UnicodeBlock KHUDAWADI = 2845 new UnicodeBlock("KHUDAWADI"); 2846 2847 /** 2848 * Constant for the "Grantha" Unicode character block. 2849 * @since 9 2850 */ 2851 public static final UnicodeBlock GRANTHA = 2852 new UnicodeBlock("GRANTHA"); 2853 2854 /** 2855 * Constant for the "Tirhuta" Unicode character block. 2856 * @since 9 2857 */ 2858 public static final UnicodeBlock TIRHUTA = 2859 new UnicodeBlock("TIRHUTA"); 2860 2861 /** 2862 * Constant for the "Siddham" Unicode character block. 2863 * @since 9 2864 */ 2865 public static final UnicodeBlock SIDDHAM = 2866 new UnicodeBlock("SIDDHAM"); 2867 2868 /** 2869 * Constant for the "Modi" Unicode character block. 2870 * @since 9 2871 */ 2872 public static final UnicodeBlock MODI = 2873 new UnicodeBlock("MODI"); 2874 2875 /** 2876 * Constant for the "Warang Citi" Unicode character block. 2877 * @since 9 2878 */ 2879 public static final UnicodeBlock WARANG_CITI = 2880 new UnicodeBlock("WARANG_CITI", 2881 "WARANG CITI", 2882 "WARANGCITI"); 2883 2884 /** 2885 * Constant for the "Pau Cin Hau" Unicode character block. 2886 * @since 9 2887 */ 2888 public static final UnicodeBlock PAU_CIN_HAU = 2889 new UnicodeBlock("PAU_CIN_HAU", 2890 "PAU CIN HAU", 2891 "PAUCINHAU"); 2892 2893 /** 2894 * Constant for the "Mro" Unicode character block. 2895 * @since 9 2896 */ 2897 public static final UnicodeBlock MRO = 2898 new UnicodeBlock("MRO"); 2899 2900 /** 2901 * Constant for the "Bassa Vah" Unicode character block. 2902 * @since 9 2903 */ 2904 public static final UnicodeBlock BASSA_VAH = 2905 new UnicodeBlock("BASSA_VAH", 2906 "BASSA VAH", 2907 "BASSAVAH"); 2908 2909 /** 2910 * Constant for the "Pahawh Hmong" Unicode character block. 2911 * @since 9 2912 */ 2913 public static final UnicodeBlock PAHAWH_HMONG = 2914 new UnicodeBlock("PAHAWH_HMONG", 2915 "PAHAWH HMONG", 2916 "PAHAWHHMONG"); 2917 2918 /** 2919 * Constant for the "Duployan" Unicode character block. 2920 * @since 9 2921 */ 2922 public static final UnicodeBlock DUPLOYAN = 2923 new UnicodeBlock("DUPLOYAN"); 2924 2925 /** 2926 * Constant for the "Shorthand Format Controls" Unicode character block. 2927 * @since 9 2928 */ 2929 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2930 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2931 "SHORTHAND FORMAT CONTROLS", 2932 "SHORTHANDFORMATCONTROLS"); 2933 2934 /** 2935 * Constant for the "Mende Kikakui" Unicode character block. 2936 * @since 9 2937 */ 2938 public static final UnicodeBlock MENDE_KIKAKUI = 2939 new UnicodeBlock("MENDE_KIKAKUI", 2940 "MENDE KIKAKUI", 2941 "MENDEKIKAKUI"); 2942 2943 /** 2944 * Constant for the "Ornamental Dingbats" Unicode character block. 2945 * @since 9 2946 */ 2947 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2948 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2949 "ORNAMENTAL DINGBATS", 2950 "ORNAMENTALDINGBATS"); 2951 2952 /** 2953 * Constant for the "Geometric Shapes Extended" Unicode character block. 2954 * @since 9 2955 */ 2956 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2957 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2958 "GEOMETRIC SHAPES EXTENDED", 2959 "GEOMETRICSHAPESEXTENDED"); 2960 2961 /** 2962 * Constant for the "Supplemental Arrows-C" Unicode character block. 2963 * @since 9 2964 */ 2965 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2966 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2967 "SUPPLEMENTAL ARROWS-C", 2968 "SUPPLEMENTALARROWS-C"); 2969 2970 /** 2971 * Constant for the "Cherokee Supplement" Unicode character block. 2972 * @since 9 2973 */ 2974 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2975 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2976 "CHEROKEE SUPPLEMENT", 2977 "CHEROKEESUPPLEMENT"); 2978 2979 /** 2980 * Constant for the "Hatran" Unicode character block. 2981 * @since 9 2982 */ 2983 public static final UnicodeBlock HATRAN = 2984 new UnicodeBlock("HATRAN"); 2985 2986 /** 2987 * Constant for the "Old Hungarian" Unicode character block. 2988 * @since 9 2989 */ 2990 public static final UnicodeBlock OLD_HUNGARIAN = 2991 new UnicodeBlock("OLD_HUNGARIAN", 2992 "OLD HUNGARIAN", 2993 "OLDHUNGARIAN"); 2994 2995 /** 2996 * Constant for the "Multani" Unicode character block. 2997 * @since 9 2998 */ 2999 public static final UnicodeBlock MULTANI = 3000 new UnicodeBlock("MULTANI"); 3001 3002 /** 3003 * Constant for the "Ahom" Unicode character block. 3004 * @since 9 3005 */ 3006 public static final UnicodeBlock AHOM = 3007 new UnicodeBlock("AHOM"); 3008 3009 /** 3010 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 3011 * @since 9 3012 */ 3013 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 3014 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 3015 "EARLY DYNASTIC CUNEIFORM", 3016 "EARLYDYNASTICCUNEIFORM"); 3017 3018 /** 3019 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 3020 * @since 9 3021 */ 3022 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 3023 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 3024 "ANATOLIAN HIEROGLYPHS", 3025 "ANATOLIANHIEROGLYPHS"); 3026 3027 /** 3028 * Constant for the "Sutton SignWriting" Unicode character block. 3029 * @since 9 3030 */ 3031 public static final UnicodeBlock SUTTON_SIGNWRITING = 3032 new UnicodeBlock("SUTTON_SIGNWRITING", 3033 "SUTTON SIGNWRITING", 3034 "SUTTONSIGNWRITING"); 3035 3036 /** 3037 * Constant for the "Supplemental Symbols and Pictographs" Unicode 3038 * character block. 3039 * @since 9 3040 */ 3041 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 3042 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 3043 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 3044 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 3045 3046 /** 3047 * Constant for the "CJK Unified Ideographs Extension E" Unicode 3048 * character block. 3049 * @since 9 3050 */ 3051 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 3052 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 3053 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 3054 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 3055 3056 /** 3057 * Constant for the "Syriac Supplement" Unicode 3058 * character block. 3059 * @since 11 3060 */ 3061 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 3062 new UnicodeBlock("SYRIAC_SUPPLEMENT", 3063 "SYRIAC SUPPLEMENT", 3064 "SYRIACSUPPLEMENT"); 3065 3066 /** 3067 * Constant for the "Cyrillic Extended-C" Unicode 3068 * character block. 3069 * @since 11 3070 */ 3071 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 3072 new UnicodeBlock("CYRILLIC_EXTENDED_C", 3073 "CYRILLIC EXTENDED-C", 3074 "CYRILLICEXTENDED-C"); 3075 3076 /** 3077 * Constant for the "Osage" Unicode 3078 * character block. 3079 * @since 11 3080 */ 3081 public static final UnicodeBlock OSAGE = 3082 new UnicodeBlock("OSAGE"); 3083 3084 /** 3085 * Constant for the "Newa" Unicode 3086 * character block. 3087 * @since 11 3088 */ 3089 public static final UnicodeBlock NEWA = 3090 new UnicodeBlock("NEWA"); 3091 3092 /** 3093 * Constant for the "Mongolian Supplement" Unicode 3094 * character block. 3095 * @since 11 3096 */ 3097 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3098 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3099 "MONGOLIAN SUPPLEMENT", 3100 "MONGOLIANSUPPLEMENT"); 3101 3102 /** 3103 * Constant for the "Marchen" Unicode 3104 * character block. 3105 * @since 11 3106 */ 3107 public static final UnicodeBlock MARCHEN = 3108 new UnicodeBlock("MARCHEN"); 3109 3110 /** 3111 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3112 * character block. 3113 * @since 11 3114 */ 3115 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3116 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3117 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3118 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3119 3120 /** 3121 * Constant for the "Tangut" Unicode 3122 * character block. 3123 * @since 11 3124 */ 3125 public static final UnicodeBlock TANGUT = 3126 new UnicodeBlock("TANGUT"); 3127 3128 /** 3129 * Constant for the "Tangut Components" Unicode 3130 * character block. 3131 * @since 11 3132 */ 3133 public static final UnicodeBlock TANGUT_COMPONENTS = 3134 new UnicodeBlock("TANGUT_COMPONENTS", 3135 "TANGUT COMPONENTS", 3136 "TANGUTCOMPONENTS"); 3137 3138 /** 3139 * Constant for the "Kana Extended-A" Unicode 3140 * character block. 3141 * @since 11 3142 */ 3143 public static final UnicodeBlock KANA_EXTENDED_A = 3144 new UnicodeBlock("KANA_EXTENDED_A", 3145 "KANA EXTENDED-A", 3146 "KANAEXTENDED-A"); 3147 /** 3148 * Constant for the "Glagolitic Supplement" Unicode 3149 * character block. 3150 * @since 11 3151 */ 3152 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3153 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3154 "GLAGOLITIC SUPPLEMENT", 3155 "GLAGOLITICSUPPLEMENT"); 3156 /** 3157 * Constant for the "Adlam" Unicode 3158 * character block. 3159 * @since 11 3160 */ 3161 public static final UnicodeBlock ADLAM = 3162 new UnicodeBlock("ADLAM"); 3163 3164 /** 3165 * Constant for the "Masaram Gondi" Unicode 3166 * character block. 3167 * @since 11 3168 */ 3169 public static final UnicodeBlock MASARAM_GONDI = 3170 new UnicodeBlock("MASARAM_GONDI", 3171 "MASARAM GONDI", 3172 "MASARAMGONDI"); 3173 3174 /** 3175 * Constant for the "Zanabazar Square" Unicode 3176 * character block. 3177 * @since 11 3178 */ 3179 public static final UnicodeBlock ZANABAZAR_SQUARE = 3180 new UnicodeBlock("ZANABAZAR_SQUARE", 3181 "ZANABAZAR SQUARE", 3182 "ZANABAZARSQUARE"); 3183 3184 /** 3185 * Constant for the "Nushu" Unicode 3186 * character block. 3187 * @since 11 3188 */ 3189 public static final UnicodeBlock NUSHU = 3190 new UnicodeBlock("NUSHU"); 3191 3192 /** 3193 * Constant for the "Soyombo" Unicode 3194 * character block. 3195 * @since 11 3196 */ 3197 public static final UnicodeBlock SOYOMBO = 3198 new UnicodeBlock("SOYOMBO"); 3199 3200 /** 3201 * Constant for the "Bhaiksuki" Unicode 3202 * character block. 3203 * @since 11 3204 */ 3205 public static final UnicodeBlock BHAIKSUKI = 3206 new UnicodeBlock("BHAIKSUKI"); 3207 3208 /** 3209 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3210 * character block. 3211 * @since 11 3212 */ 3213 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3214 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3215 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3216 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3217 /** 3218 * Constant for the "Georgian Extended" Unicode 3219 * character block. 3220 * @since 12 3221 */ 3222 public static final UnicodeBlock GEORGIAN_EXTENDED = 3223 new UnicodeBlock("GEORGIAN_EXTENDED", 3224 "GEORGIAN EXTENDED", 3225 "GEORGIANEXTENDED"); 3226 3227 /** 3228 * Constant for the "Hanifi Rohingya" Unicode 3229 * character block. 3230 * @since 12 3231 */ 3232 public static final UnicodeBlock HANIFI_ROHINGYA = 3233 new UnicodeBlock("HANIFI_ROHINGYA", 3234 "HANIFI ROHINGYA", 3235 "HANIFIROHINGYA"); 3236 3237 /** 3238 * Constant for the "Old Sogdian" Unicode 3239 * character block. 3240 * @since 12 3241 */ 3242 public static final UnicodeBlock OLD_SOGDIAN = 3243 new UnicodeBlock("OLD_SOGDIAN", 3244 "OLD SOGDIAN", 3245 "OLDSOGDIAN"); 3246 3247 /** 3248 * Constant for the "Sogdian" Unicode 3249 * character block. 3250 * @since 12 3251 */ 3252 public static final UnicodeBlock SOGDIAN = 3253 new UnicodeBlock("SOGDIAN"); 3254 3255 /** 3256 * Constant for the "Dogra" Unicode 3257 * character block. 3258 * @since 12 3259 */ 3260 public static final UnicodeBlock DOGRA = 3261 new UnicodeBlock("DOGRA"); 3262 3263 /** 3264 * Constant for the "Gunjala Gondi" Unicode 3265 * character block. 3266 * @since 12 3267 */ 3268 public static final UnicodeBlock GUNJALA_GONDI = 3269 new UnicodeBlock("GUNJALA_GONDI", 3270 "GUNJALA GONDI", 3271 "GUNJALAGONDI"); 3272 3273 /** 3274 * Constant for the "Makasar" Unicode 3275 * character block. 3276 * @since 12 3277 */ 3278 public static final UnicodeBlock MAKASAR = 3279 new UnicodeBlock("MAKASAR"); 3280 3281 /** 3282 * Constant for the "Medefaidrin" Unicode 3283 * character block. 3284 * @since 12 3285 */ 3286 public static final UnicodeBlock MEDEFAIDRIN = 3287 new UnicodeBlock("MEDEFAIDRIN"); 3288 3289 /** 3290 * Constant for the "Mayan Numerals" Unicode 3291 * character block. 3292 * @since 12 3293 */ 3294 public static final UnicodeBlock MAYAN_NUMERALS = 3295 new UnicodeBlock("MAYAN_NUMERALS", 3296 "MAYAN NUMERALS", 3297 "MAYANNUMERALS"); 3298 3299 /** 3300 * Constant for the "Indic Siyaq Numbers" Unicode 3301 * character block. 3302 * @since 12 3303 */ 3304 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3305 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3306 "INDIC SIYAQ NUMBERS", 3307 "INDICSIYAQNUMBERS"); 3308 3309 /** 3310 * Constant for the "Chess Symbols" Unicode 3311 * character block. 3312 * @since 12 3313 */ 3314 public static final UnicodeBlock CHESS_SYMBOLS = 3315 new UnicodeBlock("CHESS_SYMBOLS", 3316 "CHESS SYMBOLS", 3317 "CHESSSYMBOLS"); 3318 3319 /** 3320 * Constant for the "Elymaic" Unicode 3321 * character block. 3322 * @since 13 3323 */ 3324 public static final UnicodeBlock ELYMAIC = 3325 new UnicodeBlock("ELYMAIC"); 3326 3327 /** 3328 * Constant for the "Nandinagari" Unicode 3329 * character block. 3330 * @since 13 3331 */ 3332 public static final UnicodeBlock NANDINAGARI = 3333 new UnicodeBlock("NANDINAGARI"); 3334 3335 /** 3336 * Constant for the "Tamil Supplement" Unicode 3337 * character block. 3338 * @since 13 3339 */ 3340 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3341 new UnicodeBlock("TAMIL_SUPPLEMENT", 3342 "TAMIL SUPPLEMENT", 3343 "TAMILSUPPLEMENT"); 3344 3345 /** 3346 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3347 * character block. 3348 * @since 13 3349 */ 3350 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3351 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3352 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3353 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3354 3355 /** 3356 * Constant for the "Small Kana Extension" Unicode 3357 * character block. 3358 * @since 13 3359 */ 3360 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3361 new UnicodeBlock("SMALL_KANA_EXTENSION", 3362 "SMALL KANA EXTENSION", 3363 "SMALLKANAEXTENSION"); 3364 3365 /** 3366 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3367 * character block. 3368 * @since 13 3369 */ 3370 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3371 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3372 "NYIAKENG PUACHUE HMONG", 3373 "NYIAKENGPUACHUEHMONG"); 3374 3375 /** 3376 * Constant for the "Wancho" Unicode 3377 * character block. 3378 * @since 13 3379 */ 3380 public static final UnicodeBlock WANCHO = 3381 new UnicodeBlock("WANCHO"); 3382 3383 /** 3384 * Constant for the "Ottoman Siyaq Numbers" Unicode 3385 * character block. 3386 * @since 13 3387 */ 3388 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3389 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3390 "OTTOMAN SIYAQ NUMBERS", 3391 "OTTOMANSIYAQNUMBERS"); 3392 3393 /** 3394 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3395 * character block. 3396 * @since 13 3397 */ 3398 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3399 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3400 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3401 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3402 3403 /** 3404 * Constant for the "Yezidi" Unicode 3405 * character block. 3406 * @since 15 3407 */ 3408 public static final UnicodeBlock YEZIDI = 3409 new UnicodeBlock("YEZIDI"); 3410 3411 /** 3412 * Constant for the "Chorasmian" Unicode 3413 * character block. 3414 * @since 15 3415 */ 3416 public static final UnicodeBlock CHORASMIAN = 3417 new UnicodeBlock("CHORASMIAN"); 3418 3419 /** 3420 * Constant for the "Dives Akuru" Unicode 3421 * character block. 3422 * @since 15 3423 */ 3424 public static final UnicodeBlock DIVES_AKURU = 3425 new UnicodeBlock("DIVES_AKURU", 3426 "DIVES AKURU", 3427 "DIVESAKURU"); 3428 3429 /** 3430 * Constant for the "Lisu Supplement" Unicode 3431 * character block. 3432 * @since 15 3433 */ 3434 public static final UnicodeBlock LISU_SUPPLEMENT = 3435 new UnicodeBlock("LISU_SUPPLEMENT", 3436 "LISU SUPPLEMENT", 3437 "LISUSUPPLEMENT"); 3438 3439 /** 3440 * Constant for the "Khitan Small Script" Unicode 3441 * character block. 3442 * @since 15 3443 */ 3444 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 3445 new UnicodeBlock("KHITAN_SMALL_SCRIPT", 3446 "KHITAN SMALL SCRIPT", 3447 "KHITANSMALLSCRIPT"); 3448 3449 /** 3450 * Constant for the "Tangut Supplement" Unicode 3451 * character block. 3452 * @since 15 3453 */ 3454 public static final UnicodeBlock TANGUT_SUPPLEMENT = 3455 new UnicodeBlock("TANGUT_SUPPLEMENT", 3456 "TANGUT SUPPLEMENT", 3457 "TANGUTSUPPLEMENT"); 3458 3459 /** 3460 * Constant for the "Symbols for Legacy Computing" Unicode 3461 * character block. 3462 * @since 15 3463 */ 3464 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 3465 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", 3466 "SYMBOLS FOR LEGACY COMPUTING", 3467 "SYMBOLSFORLEGACYCOMPUTING"); 3468 3469 /** 3470 * Constant for the "CJK Unified Ideographs Extension G" Unicode 3471 * character block. 3472 * @since 15 3473 */ 3474 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 3475 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 3476 "CJK UNIFIED IDEOGRAPHS EXTENSION G", 3477 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG"); 3478 3479 private static final int[] blockStarts = { 3480 0x0000, // 0000..007F; Basic Latin 3481 0x0080, // 0080..00FF; Latin-1 Supplement 3482 0x0100, // 0100..017F; Latin Extended-A 3483 0x0180, // 0180..024F; Latin Extended-B 3484 0x0250, // 0250..02AF; IPA Extensions 3485 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3486 0x0300, // 0300..036F; Combining Diacritical Marks 3487 0x0370, // 0370..03FF; Greek and Coptic 3488 0x0400, // 0400..04FF; Cyrillic 3489 0x0500, // 0500..052F; Cyrillic Supplement 3490 0x0530, // 0530..058F; Armenian 3491 0x0590, // 0590..05FF; Hebrew 3492 0x0600, // 0600..06FF; Arabic 3493 0x0700, // 0700..074F; Syriac 3494 0x0750, // 0750..077F; Arabic Supplement 3495 0x0780, // 0780..07BF; Thaana 3496 0x07C0, // 07C0..07FF; NKo 3497 0x0800, // 0800..083F; Samaritan 3498 0x0840, // 0840..085F; Mandaic 3499 0x0860, // 0860..086F; Syriac Supplement 3500 0x0870, // unassigned 3501 0x08A0, // 08A0..08FF; Arabic Extended-A 3502 0x0900, // 0900..097F; Devanagari 3503 0x0980, // 0980..09FF; Bengali 3504 0x0A00, // 0A00..0A7F; Gurmukhi 3505 0x0A80, // 0A80..0AFF; Gujarati 3506 0x0B00, // 0B00..0B7F; Oriya 3507 0x0B80, // 0B80..0BFF; Tamil 3508 0x0C00, // 0C00..0C7F; Telugu 3509 0x0C80, // 0C80..0CFF; Kannada 3510 0x0D00, // 0D00..0D7F; Malayalam 3511 0x0D80, // 0D80..0DFF; Sinhala 3512 0x0E00, // 0E00..0E7F; Thai 3513 0x0E80, // 0E80..0EFF; Lao 3514 0x0F00, // 0F00..0FFF; Tibetan 3515 0x1000, // 1000..109F; Myanmar 3516 0x10A0, // 10A0..10FF; Georgian 3517 0x1100, // 1100..11FF; Hangul Jamo 3518 0x1200, // 1200..137F; Ethiopic 3519 0x1380, // 1380..139F; Ethiopic Supplement 3520 0x13A0, // 13A0..13FF; Cherokee 3521 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3522 0x1680, // 1680..169F; Ogham 3523 0x16A0, // 16A0..16FF; Runic 3524 0x1700, // 1700..171F; Tagalog 3525 0x1720, // 1720..173F; Hanunoo 3526 0x1740, // 1740..175F; Buhid 3527 0x1760, // 1760..177F; Tagbanwa 3528 0x1780, // 1780..17FF; Khmer 3529 0x1800, // 1800..18AF; Mongolian 3530 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3531 0x1900, // 1900..194F; Limbu 3532 0x1950, // 1950..197F; Tai Le 3533 0x1980, // 1980..19DF; New Tai Lue 3534 0x19E0, // 19E0..19FF; Khmer Symbols 3535 0x1A00, // 1A00..1A1F; Buginese 3536 0x1A20, // 1A20..1AAF; Tai Tham 3537 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3538 0x1B00, // 1B00..1B7F; Balinese 3539 0x1B80, // 1B80..1BBF; Sundanese 3540 0x1BC0, // 1BC0..1BFF; Batak 3541 0x1C00, // 1C00..1C4F; Lepcha 3542 0x1C50, // 1C50..1C7F; Ol Chiki 3543 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3544 0x1C90, // 1C90..1CBF; Georgian Extended 3545 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3546 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3547 0x1D00, // 1D00..1D7F; Phonetic Extensions 3548 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3549 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3550 0x1E00, // 1E00..1EFF; Latin Extended Additional 3551 0x1F00, // 1F00..1FFF; Greek Extended 3552 0x2000, // 2000..206F; General Punctuation 3553 0x2070, // 2070..209F; Superscripts and Subscripts 3554 0x20A0, // 20A0..20CF; Currency Symbols 3555 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3556 0x2100, // 2100..214F; Letterlike Symbols 3557 0x2150, // 2150..218F; Number Forms 3558 0x2190, // 2190..21FF; Arrows 3559 0x2200, // 2200..22FF; Mathematical Operators 3560 0x2300, // 2300..23FF; Miscellaneous Technical 3561 0x2400, // 2400..243F; Control Pictures 3562 0x2440, // 2440..245F; Optical Character Recognition 3563 0x2460, // 2460..24FF; Enclosed Alphanumerics 3564 0x2500, // 2500..257F; Box Drawing 3565 0x2580, // 2580..259F; Block Elements 3566 0x25A0, // 25A0..25FF; Geometric Shapes 3567 0x2600, // 2600..26FF; Miscellaneous Symbols 3568 0x2700, // 2700..27BF; Dingbats 3569 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3570 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3571 0x2800, // 2800..28FF; Braille Patterns 3572 0x2900, // 2900..297F; Supplemental Arrows-B 3573 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3574 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3575 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3576 0x2C00, // 2C00..2C5F; Glagolitic 3577 0x2C60, // 2C60..2C7F; Latin Extended-C 3578 0x2C80, // 2C80..2CFF; Coptic 3579 0x2D00, // 2D00..2D2F; Georgian Supplement 3580 0x2D30, // 2D30..2D7F; Tifinagh 3581 0x2D80, // 2D80..2DDF; Ethiopic Extended 3582 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3583 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3584 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3585 0x2F00, // 2F00..2FDF; Kangxi Radicals 3586 0x2FE0, // unassigned 3587 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3588 0x3000, // 3000..303F; CJK Symbols and Punctuation 3589 0x3040, // 3040..309F; Hiragana 3590 0x30A0, // 30A0..30FF; Katakana 3591 0x3100, // 3100..312F; Bopomofo 3592 0x3130, // 3130..318F; Hangul Compatibility Jamo 3593 0x3190, // 3190..319F; Kanbun 3594 0x31A0, // 31A0..31BF; Bopomofo Extended 3595 0x31C0, // 31C0..31EF; CJK Strokes 3596 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3597 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3598 0x3300, // 3300..33FF; CJK Compatibility 3599 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3600 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3601 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3602 0xA000, // A000..A48F; Yi Syllables 3603 0xA490, // A490..A4CF; Yi Radicals 3604 0xA4D0, // A4D0..A4FF; Lisu 3605 0xA500, // A500..A63F; Vai 3606 0xA640, // A640..A69F; Cyrillic Extended-B 3607 0xA6A0, // A6A0..A6FF; Bamum 3608 0xA700, // A700..A71F; Modifier Tone Letters 3609 0xA720, // A720..A7FF; Latin Extended-D 3610 0xA800, // A800..A82F; Syloti Nagri 3611 0xA830, // A830..A83F; Common Indic Number Forms 3612 0xA840, // A840..A87F; Phags-pa 3613 0xA880, // A880..A8DF; Saurashtra 3614 0xA8E0, // A8E0..A8FF; Devanagari Extended 3615 0xA900, // A900..A92F; Kayah Li 3616 0xA930, // A930..A95F; Rejang 3617 0xA960, // A960..A97F; Hangul Jamo Extended-A 3618 0xA980, // A980..A9DF; Javanese 3619 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3620 0xAA00, // AA00..AA5F; Cham 3621 0xAA60, // AA60..AA7F; Myanmar Extended-A 3622 0xAA80, // AA80..AADF; Tai Viet 3623 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3624 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3625 0xAB30, // AB30..AB6F; Latin Extended-E 3626 0xAB70, // AB70..ABBF; Cherokee Supplement 3627 0xABC0, // ABC0..ABFF; Meetei Mayek 3628 0xAC00, // AC00..D7AF; Hangul Syllables 3629 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3630 0xD800, // D800..DB7F; High Surrogates 3631 0xDB80, // DB80..DBFF; High Private Use Surrogates 3632 0xDC00, // DC00..DFFF; Low Surrogates 3633 0xE000, // E000..F8FF; Private Use Area 3634 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3635 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3636 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3637 0xFE00, // FE00..FE0F; Variation Selectors 3638 0xFE10, // FE10..FE1F; Vertical Forms 3639 0xFE20, // FE20..FE2F; Combining Half Marks 3640 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3641 0xFE50, // FE50..FE6F; Small Form Variants 3642 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3643 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3644 0xFFF0, // FFF0..FFFF; Specials 3645 0x10000, // 10000..1007F; Linear B Syllabary 3646 0x10080, // 10080..100FF; Linear B Ideograms 3647 0x10100, // 10100..1013F; Aegean Numbers 3648 0x10140, // 10140..1018F; Ancient Greek Numbers 3649 0x10190, // 10190..101CF; Ancient Symbols 3650 0x101D0, // 101D0..101FF; Phaistos Disc 3651 0x10200, // unassigned 3652 0x10280, // 10280..1029F; Lycian 3653 0x102A0, // 102A0..102DF; Carian 3654 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3655 0x10300, // 10300..1032F; Old Italic 3656 0x10330, // 10330..1034F; Gothic 3657 0x10350, // 10350..1037F; Old Permic 3658 0x10380, // 10380..1039F; Ugaritic 3659 0x103A0, // 103A0..103DF; Old Persian 3660 0x103E0, // unassigned 3661 0x10400, // 10400..1044F; Deseret 3662 0x10450, // 10450..1047F; Shavian 3663 0x10480, // 10480..104AF; Osmanya 3664 0x104B0, // 104B0..104FF; Osage 3665 0x10500, // 10500..1052F; Elbasan 3666 0x10530, // 10530..1056F; Caucasian Albanian 3667 0x10570, // unassigned 3668 0x10600, // 10600..1077F; Linear A 3669 0x10780, // unassigned 3670 0x10800, // 10800..1083F; Cypriot Syllabary 3671 0x10840, // 10840..1085F; Imperial Aramaic 3672 0x10860, // 10860..1087F; Palmyrene 3673 0x10880, // 10880..108AF; Nabataean 3674 0x108B0, // unassigned 3675 0x108E0, // 108E0..108FF; Hatran 3676 0x10900, // 10900..1091F; Phoenician 3677 0x10920, // 10920..1093F; Lydian 3678 0x10940, // unassigned 3679 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3680 0x109A0, // 109A0..109FF; Meroitic Cursive 3681 0x10A00, // 10A00..10A5F; Kharoshthi 3682 0x10A60, // 10A60..10A7F; Old South Arabian 3683 0x10A80, // 10A80..10A9F; Old North Arabian 3684 0x10AA0, // unassigned 3685 0x10AC0, // 10AC0..10AFF; Manichaean 3686 0x10B00, // 10B00..10B3F; Avestan 3687 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3688 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3689 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3690 0x10BB0, // unassigned 3691 0x10C00, // 10C00..10C4F; Old Turkic 3692 0x10C50, // unassigned 3693 0x10C80, // 10C80..10CFF; Old Hungarian 3694 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3695 0x10D40, // unassigned 3696 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3697 0x10E80, // 10E80..10EBF; Yezidi 3698 0x10EC0, // unassigned 3699 0x10F00, // 10F00..10F2F; Old Sogdian 3700 0x10F30, // 10F30..10F6F; Sogdian 3701 0x10F70, // unassigned 3702 0x10FB0, // 10FB0..10FDF; Chorasmian 3703 0x10FE0, // 10FE0..10FFF; Elymaic 3704 0x11000, // 11000..1107F; Brahmi 3705 0x11080, // 11080..110CF; Kaithi 3706 0x110D0, // 110D0..110FF; Sora Sompeng 3707 0x11100, // 11100..1114F; Chakma 3708 0x11150, // 11150..1117F; Mahajani 3709 0x11180, // 11180..111DF; Sharada 3710 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3711 0x11200, // 11200..1124F; Khojki 3712 0x11250, // unassigned 3713 0x11280, // 11280..112AF; Multani 3714 0x112B0, // 112B0..112FF; Khudawadi 3715 0x11300, // 11300..1137F; Grantha 3716 0x11380, // unassigned 3717 0x11400, // 11400..1147F; Newa 3718 0x11480, // 11480..114DF; Tirhuta 3719 0x114E0, // unassigned 3720 0x11580, // 11580..115FF; Siddham 3721 0x11600, // 11600..1165F; Modi 3722 0x11660, // 11660..1167F; Mongolian Supplement 3723 0x11680, // 11680..116CF; Takri 3724 0x116D0, // unassigned 3725 0x11700, // 11700..1173F; Ahom 3726 0x11740, // unassigned 3727 0x11800, // 11800..1184F; Dogra 3728 0x11850, // unassigned 3729 0x118A0, // 118A0..118FF; Warang Citi 3730 0x11900, // 11900..1195F; Dives Akuru 3731 0x11960, // unassigned 3732 0x119A0, // 119A0..119FF; Nandinagari 3733 0x11A00, // 11A00..11A4F; Zanabazar Square 3734 0x11A50, // 11A50..11AAF; Soyombo 3735 0x11AB0, // unassigned 3736 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3737 0x11B00, // unassigned 3738 0x11C00, // 11C00..11C6F; Bhaiksuki 3739 0x11C70, // 11C70..11CBF; Marchen 3740 0x11CC0, // unassigned 3741 0x11D00, // 11D00..11D5F; Masaram Gondi 3742 0x11D60, // 11D60..11DAF; Gunjala Gondi 3743 0x11DB0, // unassigned 3744 0x11EE0, // 11EE0..11EFF; Makasar 3745 0x11F00, // unassigned 3746 0x11FB0, // 11FB0..11FBF; Lisu Supplement 3747 0x11FC0, // 11FC0..11FFF; Tamil Supplement 3748 0x12000, // 12000..123FF; Cuneiform 3749 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3750 0x12480, // 12480..1254F; Early Dynastic Cuneiform 3751 0x12550, // unassigned 3752 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3753 0x13430, // 13430..1343F; Egyptian Hieroglyph Format Controls 3754 0x13440, // unassigned 3755 0x14400, // 14400..1467F; Anatolian Hieroglyphs 3756 0x14680, // unassigned 3757 0x16800, // 16800..16A3F; Bamum Supplement 3758 0x16A40, // 16A40..16A6F; Mro 3759 0x16A70, // unassigned 3760 0x16AD0, // 16AD0..16AFF; Bassa Vah 3761 0x16B00, // 16B00..16B8F; Pahawh Hmong 3762 0x16B90, // unassigned 3763 0x16E40, // 16E40..16E9F; Medefaidrin 3764 0x16EA0, // unassigned 3765 0x16F00, // 16F00..16F9F; Miao 3766 0x16FA0, // unassigned 3767 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 3768 0x17000, // 17000..187FF; Tangut 3769 0x18800, // 18800..18AFF; Tangut Components 3770 0x18B00, // 18B00..18CFF; Khitan Small Script 3771 0x18D00, // 18D00..18D8F; Tangut Supplement 3772 0x18D90, // unassigned 3773 0x1B000, // 1B000..1B0FF; Kana Supplement 3774 0x1B100, // 1B100..1B12F; Kana Extended-A 3775 0x1B130, // 1B130..1B16F; Small Kana Extension 3776 0x1B170, // 1B170..1B2FF; Nushu 3777 0x1B300, // unassigned 3778 0x1BC00, // 1BC00..1BC9F; Duployan 3779 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 3780 0x1BCB0, // unassigned 3781 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 3782 0x1D100, // 1D100..1D1FF; Musical Symbols 3783 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 3784 0x1D250, // unassigned 3785 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 3786 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 3787 0x1D360, // 1D360..1D37F; Counting Rod Numerals 3788 0x1D380, // unassigned 3789 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 3790 0x1D800, // 1D800..1DAAF; Sutton SignWriting 3791 0x1DAB0, // unassigned 3792 0x1E000, // 1E000..1E02F; Glagolitic Supplement 3793 0x1E030, // unassigned 3794 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 3795 0x1E150, // unassigned 3796 0x1E2C0, // 1E2C0..1E2FF; Wancho 3797 0x1E300, // unassigned 3798 0x1E800, // 1E800..1E8DF; Mende Kikakui 3799 0x1E8E0, // unassigned 3800 0x1E900, // 1E900..1E95F; Adlam 3801 0x1E960, // unassigned 3802 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 3803 0x1ECC0, // unassigned 3804 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 3805 0x1ED50, // unassigned 3806 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 3807 0x1EF00, // unassigned 3808 0x1F000, // 1F000..1F02F; Mahjong Tiles 3809 0x1F030, // 1F030..1F09F; Domino Tiles 3810 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 3811 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 3812 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 3813 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 3814 0x1F600, // 1F600..1F64F; Emoticons 3815 0x1F650, // 1F650..1F67F; Ornamental Dingbats 3816 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 3817 0x1F700, // 1F700..1F77F; Alchemical Symbols 3818 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 3819 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 3820 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 3821 0x1FA00, // 1FA00..1FA6F; Chess Symbols 3822 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 3823 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing 3824 0x1FC00, // unassigned 3825 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 3826 0x2A6E0, // unassigned 3827 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 3828 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 3829 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 3830 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 3831 0x2EBF0, // unassigned 3832 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 3833 0x2FA20, // unassigned 3834 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G 3835 0x31350, // unassigned 3836 0xE0000, // E0000..E007F; Tags 3837 0xE0080, // unassigned 3838 0xE0100, // E0100..E01EF; Variation Selectors Supplement 3839 0xE01F0, // unassigned 3840 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 3841 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 3842 }; 3843 3844 private static final UnicodeBlock[] blocks = { 3845 BASIC_LATIN, 3846 LATIN_1_SUPPLEMENT, 3847 LATIN_EXTENDED_A, 3848 LATIN_EXTENDED_B, 3849 IPA_EXTENSIONS, 3850 SPACING_MODIFIER_LETTERS, 3851 COMBINING_DIACRITICAL_MARKS, 3852 GREEK, 3853 CYRILLIC, 3854 CYRILLIC_SUPPLEMENTARY, 3855 ARMENIAN, 3856 HEBREW, 3857 ARABIC, 3858 SYRIAC, 3859 ARABIC_SUPPLEMENT, 3860 THAANA, 3861 NKO, 3862 SAMARITAN, 3863 MANDAIC, 3864 SYRIAC_SUPPLEMENT, 3865 null, 3866 ARABIC_EXTENDED_A, 3867 DEVANAGARI, 3868 BENGALI, 3869 GURMUKHI, 3870 GUJARATI, 3871 ORIYA, 3872 TAMIL, 3873 TELUGU, 3874 KANNADA, 3875 MALAYALAM, 3876 SINHALA, 3877 THAI, 3878 LAO, 3879 TIBETAN, 3880 MYANMAR, 3881 GEORGIAN, 3882 HANGUL_JAMO, 3883 ETHIOPIC, 3884 ETHIOPIC_SUPPLEMENT, 3885 CHEROKEE, 3886 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 3887 OGHAM, 3888 RUNIC, 3889 TAGALOG, 3890 HANUNOO, 3891 BUHID, 3892 TAGBANWA, 3893 KHMER, 3894 MONGOLIAN, 3895 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 3896 LIMBU, 3897 TAI_LE, 3898 NEW_TAI_LUE, 3899 KHMER_SYMBOLS, 3900 BUGINESE, 3901 TAI_THAM, 3902 COMBINING_DIACRITICAL_MARKS_EXTENDED, 3903 BALINESE, 3904 SUNDANESE, 3905 BATAK, 3906 LEPCHA, 3907 OL_CHIKI, 3908 CYRILLIC_EXTENDED_C, 3909 GEORGIAN_EXTENDED, 3910 SUNDANESE_SUPPLEMENT, 3911 VEDIC_EXTENSIONS, 3912 PHONETIC_EXTENSIONS, 3913 PHONETIC_EXTENSIONS_SUPPLEMENT, 3914 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 3915 LATIN_EXTENDED_ADDITIONAL, 3916 GREEK_EXTENDED, 3917 GENERAL_PUNCTUATION, 3918 SUPERSCRIPTS_AND_SUBSCRIPTS, 3919 CURRENCY_SYMBOLS, 3920 COMBINING_MARKS_FOR_SYMBOLS, 3921 LETTERLIKE_SYMBOLS, 3922 NUMBER_FORMS, 3923 ARROWS, 3924 MATHEMATICAL_OPERATORS, 3925 MISCELLANEOUS_TECHNICAL, 3926 CONTROL_PICTURES, 3927 OPTICAL_CHARACTER_RECOGNITION, 3928 ENCLOSED_ALPHANUMERICS, 3929 BOX_DRAWING, 3930 BLOCK_ELEMENTS, 3931 GEOMETRIC_SHAPES, 3932 MISCELLANEOUS_SYMBOLS, 3933 DINGBATS, 3934 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 3935 SUPPLEMENTAL_ARROWS_A, 3936 BRAILLE_PATTERNS, 3937 SUPPLEMENTAL_ARROWS_B, 3938 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 3939 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 3940 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 3941 GLAGOLITIC, 3942 LATIN_EXTENDED_C, 3943 COPTIC, 3944 GEORGIAN_SUPPLEMENT, 3945 TIFINAGH, 3946 ETHIOPIC_EXTENDED, 3947 CYRILLIC_EXTENDED_A, 3948 SUPPLEMENTAL_PUNCTUATION, 3949 CJK_RADICALS_SUPPLEMENT, 3950 KANGXI_RADICALS, 3951 null, 3952 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 3953 CJK_SYMBOLS_AND_PUNCTUATION, 3954 HIRAGANA, 3955 KATAKANA, 3956 BOPOMOFO, 3957 HANGUL_COMPATIBILITY_JAMO, 3958 KANBUN, 3959 BOPOMOFO_EXTENDED, 3960 CJK_STROKES, 3961 KATAKANA_PHONETIC_EXTENSIONS, 3962 ENCLOSED_CJK_LETTERS_AND_MONTHS, 3963 CJK_COMPATIBILITY, 3964 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 3965 YIJING_HEXAGRAM_SYMBOLS, 3966 CJK_UNIFIED_IDEOGRAPHS, 3967 YI_SYLLABLES, 3968 YI_RADICALS, 3969 LISU, 3970 VAI, 3971 CYRILLIC_EXTENDED_B, 3972 BAMUM, 3973 MODIFIER_TONE_LETTERS, 3974 LATIN_EXTENDED_D, 3975 SYLOTI_NAGRI, 3976 COMMON_INDIC_NUMBER_FORMS, 3977 PHAGS_PA, 3978 SAURASHTRA, 3979 DEVANAGARI_EXTENDED, 3980 KAYAH_LI, 3981 REJANG, 3982 HANGUL_JAMO_EXTENDED_A, 3983 JAVANESE, 3984 MYANMAR_EXTENDED_B, 3985 CHAM, 3986 MYANMAR_EXTENDED_A, 3987 TAI_VIET, 3988 MEETEI_MAYEK_EXTENSIONS, 3989 ETHIOPIC_EXTENDED_A, 3990 LATIN_EXTENDED_E, 3991 CHEROKEE_SUPPLEMENT, 3992 MEETEI_MAYEK, 3993 HANGUL_SYLLABLES, 3994 HANGUL_JAMO_EXTENDED_B, 3995 HIGH_SURROGATES, 3996 HIGH_PRIVATE_USE_SURROGATES, 3997 LOW_SURROGATES, 3998 PRIVATE_USE_AREA, 3999 CJK_COMPATIBILITY_IDEOGRAPHS, 4000 ALPHABETIC_PRESENTATION_FORMS, 4001 ARABIC_PRESENTATION_FORMS_A, 4002 VARIATION_SELECTORS, 4003 VERTICAL_FORMS, 4004 COMBINING_HALF_MARKS, 4005 CJK_COMPATIBILITY_FORMS, 4006 SMALL_FORM_VARIANTS, 4007 ARABIC_PRESENTATION_FORMS_B, 4008 HALFWIDTH_AND_FULLWIDTH_FORMS, 4009 SPECIALS, 4010 LINEAR_B_SYLLABARY, 4011 LINEAR_B_IDEOGRAMS, 4012 AEGEAN_NUMBERS, 4013 ANCIENT_GREEK_NUMBERS, 4014 ANCIENT_SYMBOLS, 4015 PHAISTOS_DISC, 4016 null, 4017 LYCIAN, 4018 CARIAN, 4019 COPTIC_EPACT_NUMBERS, 4020 OLD_ITALIC, 4021 GOTHIC, 4022 OLD_PERMIC, 4023 UGARITIC, 4024 OLD_PERSIAN, 4025 null, 4026 DESERET, 4027 SHAVIAN, 4028 OSMANYA, 4029 OSAGE, 4030 ELBASAN, 4031 CAUCASIAN_ALBANIAN, 4032 null, 4033 LINEAR_A, 4034 null, 4035 CYPRIOT_SYLLABARY, 4036 IMPERIAL_ARAMAIC, 4037 PALMYRENE, 4038 NABATAEAN, 4039 null, 4040 HATRAN, 4041 PHOENICIAN, 4042 LYDIAN, 4043 null, 4044 MEROITIC_HIEROGLYPHS, 4045 MEROITIC_CURSIVE, 4046 KHAROSHTHI, 4047 OLD_SOUTH_ARABIAN, 4048 OLD_NORTH_ARABIAN, 4049 null, 4050 MANICHAEAN, 4051 AVESTAN, 4052 INSCRIPTIONAL_PARTHIAN, 4053 INSCRIPTIONAL_PAHLAVI, 4054 PSALTER_PAHLAVI, 4055 null, 4056 OLD_TURKIC, 4057 null, 4058 OLD_HUNGARIAN, 4059 HANIFI_ROHINGYA, 4060 null, 4061 RUMI_NUMERAL_SYMBOLS, 4062 YEZIDI, 4063 null, 4064 OLD_SOGDIAN, 4065 SOGDIAN, 4066 null, 4067 CHORASMIAN, 4068 ELYMAIC, 4069 BRAHMI, 4070 KAITHI, 4071 SORA_SOMPENG, 4072 CHAKMA, 4073 MAHAJANI, 4074 SHARADA, 4075 SINHALA_ARCHAIC_NUMBERS, 4076 KHOJKI, 4077 null, 4078 MULTANI, 4079 KHUDAWADI, 4080 GRANTHA, 4081 null, 4082 NEWA, 4083 TIRHUTA, 4084 null, 4085 SIDDHAM, 4086 MODI, 4087 MONGOLIAN_SUPPLEMENT, 4088 TAKRI, 4089 null, 4090 AHOM, 4091 null, 4092 DOGRA, 4093 null, 4094 WARANG_CITI, 4095 DIVES_AKURU, 4096 null, 4097 NANDINAGARI, 4098 ZANABAZAR_SQUARE, 4099 SOYOMBO, 4100 null, 4101 PAU_CIN_HAU, 4102 null, 4103 BHAIKSUKI, 4104 MARCHEN, 4105 null, 4106 MASARAM_GONDI, 4107 GUNJALA_GONDI, 4108 null, 4109 MAKASAR, 4110 null, 4111 LISU_SUPPLEMENT, 4112 TAMIL_SUPPLEMENT, 4113 CUNEIFORM, 4114 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 4115 EARLY_DYNASTIC_CUNEIFORM, 4116 null, 4117 EGYPTIAN_HIEROGLYPHS, 4118 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 4119 null, 4120 ANATOLIAN_HIEROGLYPHS, 4121 null, 4122 BAMUM_SUPPLEMENT, 4123 MRO, 4124 null, 4125 BASSA_VAH, 4126 PAHAWH_HMONG, 4127 null, 4128 MEDEFAIDRIN, 4129 null, 4130 MIAO, 4131 null, 4132 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 4133 TANGUT, 4134 TANGUT_COMPONENTS, 4135 KHITAN_SMALL_SCRIPT, 4136 TANGUT_SUPPLEMENT, 4137 null, 4138 KANA_SUPPLEMENT, 4139 KANA_EXTENDED_A, 4140 SMALL_KANA_EXTENSION, 4141 NUSHU, 4142 null, 4143 DUPLOYAN, 4144 SHORTHAND_FORMAT_CONTROLS, 4145 null, 4146 BYZANTINE_MUSICAL_SYMBOLS, 4147 MUSICAL_SYMBOLS, 4148 ANCIENT_GREEK_MUSICAL_NOTATION, 4149 null, 4150 MAYAN_NUMERALS, 4151 TAI_XUAN_JING_SYMBOLS, 4152 COUNTING_ROD_NUMERALS, 4153 null, 4154 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 4155 SUTTON_SIGNWRITING, 4156 null, 4157 GLAGOLITIC_SUPPLEMENT, 4158 null, 4159 NYIAKENG_PUACHUE_HMONG, 4160 null, 4161 WANCHO, 4162 null, 4163 MENDE_KIKAKUI, 4164 null, 4165 ADLAM, 4166 null, 4167 INDIC_SIYAQ_NUMBERS, 4168 null, 4169 OTTOMAN_SIYAQ_NUMBERS, 4170 null, 4171 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 4172 null, 4173 MAHJONG_TILES, 4174 DOMINO_TILES, 4175 PLAYING_CARDS, 4176 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 4177 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 4178 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 4179 EMOTICONS, 4180 ORNAMENTAL_DINGBATS, 4181 TRANSPORT_AND_MAP_SYMBOLS, 4182 ALCHEMICAL_SYMBOLS, 4183 GEOMETRIC_SHAPES_EXTENDED, 4184 SUPPLEMENTAL_ARROWS_C, 4185 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 4186 CHESS_SYMBOLS, 4187 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 4188 SYMBOLS_FOR_LEGACY_COMPUTING, 4189 null, 4190 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 4191 null, 4192 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 4193 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 4194 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 4195 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 4196 null, 4197 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 4198 null, 4199 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, 4200 null, 4201 TAGS, 4202 null, 4203 VARIATION_SELECTORS_SUPPLEMENT, 4204 null, 4205 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 4206 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 4207 }; 4208 4209 4210 /** 4211 * Returns the object representing the Unicode block containing the 4212 * given character, or {@code null} if the character is not a 4213 * member of a defined block. 4214 * 4215 * <p><b>Note:</b> This method cannot handle 4216 * <a href="Character.html#supplementary"> supplementary 4217 * characters</a>. To support all Unicode characters, including 4218 * supplementary characters, use the {@link #of(int)} method. 4219 * 4220 * @param c The character in question 4221 * @return The {@code UnicodeBlock} instance representing the 4222 * Unicode block of which this character is a member, or 4223 * {@code null} if the character is not a member of any 4224 * Unicode block 4225 */ of(char c)4226 public static UnicodeBlock of(char c) { 4227 return of((int)c); 4228 } 4229 4230 /** 4231 * Returns the object representing the Unicode block 4232 * containing the given character (Unicode code point), or 4233 * {@code null} if the character is not a member of a 4234 * defined block. 4235 * 4236 * @param codePoint the character (Unicode code point) in question. 4237 * @return The {@code UnicodeBlock} instance representing the 4238 * Unicode block of which this character is a member, or 4239 * {@code null} if the character is not a member of any 4240 * Unicode block 4241 * @throws IllegalArgumentException if the specified 4242 * {@code codePoint} is an invalid Unicode code point. 4243 * @see Character#isValidCodePoint(int) 4244 * @since 1.5 4245 */ of(int codePoint)4246 public static UnicodeBlock of(int codePoint) { 4247 if (!isValidCodePoint(codePoint)) { 4248 throw new IllegalArgumentException( 4249 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4250 } 4251 4252 int top, bottom, current; 4253 bottom = 0; 4254 top = blockStarts.length; 4255 current = top/2; 4256 4257 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4258 while (top - bottom > 1) { 4259 if (codePoint >= blockStarts[current]) { 4260 bottom = current; 4261 } else { 4262 top = current; 4263 } 4264 current = (top + bottom) / 2; 4265 } 4266 return blocks[current]; 4267 } 4268 4269 /** 4270 * Returns the UnicodeBlock with the given name. Block 4271 * names are determined by The Unicode Standard. The file 4272 * {@code Blocks-<version>.txt} defines blocks for a particular 4273 * version of the standard. The {@link Character} class specifies 4274 * the version of the standard that it supports. 4275 * <p> 4276 * This method accepts block names in the following forms: 4277 * <ol> 4278 * <li> Canonical block names as defined by the Unicode Standard. 4279 * For example, the standard defines a "Basic Latin" block. Therefore, this 4280 * method accepts "Basic Latin" as a valid block name. The documentation of 4281 * each UnicodeBlock provides the canonical name. 4282 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4283 * is a valid block name for the "Basic Latin" block. 4284 * <li>The text representation of each constant UnicodeBlock identifier. 4285 * For example, this method will return the {@link #BASIC_LATIN} block if 4286 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4287 * hyphens in the canonical name with underscores. 4288 * </ol> 4289 * Finally, character case is ignored for all of the valid block name forms. 4290 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4291 * The en_US locale's case mapping rules are used to provide case-insensitive 4292 * string comparisons for block name validation. 4293 * <p> 4294 * If the Unicode Standard changes block names, both the previous and 4295 * current names will be accepted. 4296 * 4297 * @param blockName A {@code UnicodeBlock} name. 4298 * @return The {@code UnicodeBlock} instance identified 4299 * by {@code blockName} 4300 * @throws IllegalArgumentException if {@code blockName} is an 4301 * invalid name 4302 * @throws NullPointerException if {@code blockName} is null 4303 * @since 1.5 4304 */ forName(String blockName)4305 public static final UnicodeBlock forName(String blockName) { 4306 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4307 if (block == null) { 4308 throw new IllegalArgumentException("Not a valid block name: " 4309 + blockName); 4310 } 4311 return block; 4312 } 4313 } 4314 4315 4316 /** 4317 * A family of character subsets representing the character scripts 4318 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4319 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4320 * character is assigned to a single Unicode script, either a specific 4321 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4322 * one of the following three special values, 4323 * {@link Character.UnicodeScript#INHERITED Inherited}, 4324 * {@link Character.UnicodeScript#COMMON Common} or 4325 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4326 * 4327 * @since 1.7 4328 */ 4329 public static enum UnicodeScript { 4330 /** 4331 * Unicode script "Common". 4332 */ 4333 COMMON, 4334 4335 /** 4336 * Unicode script "Latin". 4337 */ 4338 LATIN, 4339 4340 /** 4341 * Unicode script "Greek". 4342 */ 4343 GREEK, 4344 4345 /** 4346 * Unicode script "Cyrillic". 4347 */ 4348 CYRILLIC, 4349 4350 /** 4351 * Unicode script "Armenian". 4352 */ 4353 ARMENIAN, 4354 4355 /** 4356 * Unicode script "Hebrew". 4357 */ 4358 HEBREW, 4359 4360 /** 4361 * Unicode script "Arabic". 4362 */ 4363 ARABIC, 4364 4365 /** 4366 * Unicode script "Syriac". 4367 */ 4368 SYRIAC, 4369 4370 /** 4371 * Unicode script "Thaana". 4372 */ 4373 THAANA, 4374 4375 /** 4376 * Unicode script "Devanagari". 4377 */ 4378 DEVANAGARI, 4379 4380 /** 4381 * Unicode script "Bengali". 4382 */ 4383 BENGALI, 4384 4385 /** 4386 * Unicode script "Gurmukhi". 4387 */ 4388 GURMUKHI, 4389 4390 /** 4391 * Unicode script "Gujarati". 4392 */ 4393 GUJARATI, 4394 4395 /** 4396 * Unicode script "Oriya". 4397 */ 4398 ORIYA, 4399 4400 /** 4401 * Unicode script "Tamil". 4402 */ 4403 TAMIL, 4404 4405 /** 4406 * Unicode script "Telugu". 4407 */ 4408 TELUGU, 4409 4410 /** 4411 * Unicode script "Kannada". 4412 */ 4413 KANNADA, 4414 4415 /** 4416 * Unicode script "Malayalam". 4417 */ 4418 MALAYALAM, 4419 4420 /** 4421 * Unicode script "Sinhala". 4422 */ 4423 SINHALA, 4424 4425 /** 4426 * Unicode script "Thai". 4427 */ 4428 THAI, 4429 4430 /** 4431 * Unicode script "Lao". 4432 */ 4433 LAO, 4434 4435 /** 4436 * Unicode script "Tibetan". 4437 */ 4438 TIBETAN, 4439 4440 /** 4441 * Unicode script "Myanmar". 4442 */ 4443 MYANMAR, 4444 4445 /** 4446 * Unicode script "Georgian". 4447 */ 4448 GEORGIAN, 4449 4450 /** 4451 * Unicode script "Hangul". 4452 */ 4453 HANGUL, 4454 4455 /** 4456 * Unicode script "Ethiopic". 4457 */ 4458 ETHIOPIC, 4459 4460 /** 4461 * Unicode script "Cherokee". 4462 */ 4463 CHEROKEE, 4464 4465 /** 4466 * Unicode script "Canadian_Aboriginal". 4467 */ 4468 CANADIAN_ABORIGINAL, 4469 4470 /** 4471 * Unicode script "Ogham". 4472 */ 4473 OGHAM, 4474 4475 /** 4476 * Unicode script "Runic". 4477 */ 4478 RUNIC, 4479 4480 /** 4481 * Unicode script "Khmer". 4482 */ 4483 KHMER, 4484 4485 /** 4486 * Unicode script "Mongolian". 4487 */ 4488 MONGOLIAN, 4489 4490 /** 4491 * Unicode script "Hiragana". 4492 */ 4493 HIRAGANA, 4494 4495 /** 4496 * Unicode script "Katakana". 4497 */ 4498 KATAKANA, 4499 4500 /** 4501 * Unicode script "Bopomofo". 4502 */ 4503 BOPOMOFO, 4504 4505 /** 4506 * Unicode script "Han". 4507 */ 4508 HAN, 4509 4510 /** 4511 * Unicode script "Yi". 4512 */ 4513 YI, 4514 4515 /** 4516 * Unicode script "Old_Italic". 4517 */ 4518 OLD_ITALIC, 4519 4520 /** 4521 * Unicode script "Gothic". 4522 */ 4523 GOTHIC, 4524 4525 /** 4526 * Unicode script "Deseret". 4527 */ 4528 DESERET, 4529 4530 /** 4531 * Unicode script "Inherited". 4532 */ 4533 INHERITED, 4534 4535 /** 4536 * Unicode script "Tagalog". 4537 */ 4538 TAGALOG, 4539 4540 /** 4541 * Unicode script "Hanunoo". 4542 */ 4543 HANUNOO, 4544 4545 /** 4546 * Unicode script "Buhid". 4547 */ 4548 BUHID, 4549 4550 /** 4551 * Unicode script "Tagbanwa". 4552 */ 4553 TAGBANWA, 4554 4555 /** 4556 * Unicode script "Limbu". 4557 */ 4558 LIMBU, 4559 4560 /** 4561 * Unicode script "Tai_Le". 4562 */ 4563 TAI_LE, 4564 4565 /** 4566 * Unicode script "Linear_B". 4567 */ 4568 LINEAR_B, 4569 4570 /** 4571 * Unicode script "Ugaritic". 4572 */ 4573 UGARITIC, 4574 4575 /** 4576 * Unicode script "Shavian". 4577 */ 4578 SHAVIAN, 4579 4580 /** 4581 * Unicode script "Osmanya". 4582 */ 4583 OSMANYA, 4584 4585 /** 4586 * Unicode script "Cypriot". 4587 */ 4588 CYPRIOT, 4589 4590 /** 4591 * Unicode script "Braille". 4592 */ 4593 BRAILLE, 4594 4595 /** 4596 * Unicode script "Buginese". 4597 */ 4598 BUGINESE, 4599 4600 /** 4601 * Unicode script "Coptic". 4602 */ 4603 COPTIC, 4604 4605 /** 4606 * Unicode script "New_Tai_Lue". 4607 */ 4608 NEW_TAI_LUE, 4609 4610 /** 4611 * Unicode script "Glagolitic". 4612 */ 4613 GLAGOLITIC, 4614 4615 /** 4616 * Unicode script "Tifinagh". 4617 */ 4618 TIFINAGH, 4619 4620 /** 4621 * Unicode script "Syloti_Nagri". 4622 */ 4623 SYLOTI_NAGRI, 4624 4625 /** 4626 * Unicode script "Old_Persian". 4627 */ 4628 OLD_PERSIAN, 4629 4630 /** 4631 * Unicode script "Kharoshthi". 4632 */ 4633 KHAROSHTHI, 4634 4635 /** 4636 * Unicode script "Balinese". 4637 */ 4638 BALINESE, 4639 4640 /** 4641 * Unicode script "Cuneiform". 4642 */ 4643 CUNEIFORM, 4644 4645 /** 4646 * Unicode script "Phoenician". 4647 */ 4648 PHOENICIAN, 4649 4650 /** 4651 * Unicode script "Phags_Pa". 4652 */ 4653 PHAGS_PA, 4654 4655 /** 4656 * Unicode script "Nko". 4657 */ 4658 NKO, 4659 4660 /** 4661 * Unicode script "Sundanese". 4662 */ 4663 SUNDANESE, 4664 4665 /** 4666 * Unicode script "Batak". 4667 */ 4668 BATAK, 4669 4670 /** 4671 * Unicode script "Lepcha". 4672 */ 4673 LEPCHA, 4674 4675 /** 4676 * Unicode script "Ol_Chiki". 4677 */ 4678 OL_CHIKI, 4679 4680 /** 4681 * Unicode script "Vai". 4682 */ 4683 VAI, 4684 4685 /** 4686 * Unicode script "Saurashtra". 4687 */ 4688 SAURASHTRA, 4689 4690 /** 4691 * Unicode script "Kayah_Li". 4692 */ 4693 KAYAH_LI, 4694 4695 /** 4696 * Unicode script "Rejang". 4697 */ 4698 REJANG, 4699 4700 /** 4701 * Unicode script "Lycian". 4702 */ 4703 LYCIAN, 4704 4705 /** 4706 * Unicode script "Carian". 4707 */ 4708 CARIAN, 4709 4710 /** 4711 * Unicode script "Lydian". 4712 */ 4713 LYDIAN, 4714 4715 /** 4716 * Unicode script "Cham". 4717 */ 4718 CHAM, 4719 4720 /** 4721 * Unicode script "Tai_Tham". 4722 */ 4723 TAI_THAM, 4724 4725 /** 4726 * Unicode script "Tai_Viet". 4727 */ 4728 TAI_VIET, 4729 4730 /** 4731 * Unicode script "Avestan". 4732 */ 4733 AVESTAN, 4734 4735 /** 4736 * Unicode script "Egyptian_Hieroglyphs". 4737 */ 4738 EGYPTIAN_HIEROGLYPHS, 4739 4740 /** 4741 * Unicode script "Samaritan". 4742 */ 4743 SAMARITAN, 4744 4745 /** 4746 * Unicode script "Mandaic". 4747 */ 4748 MANDAIC, 4749 4750 /** 4751 * Unicode script "Lisu". 4752 */ 4753 LISU, 4754 4755 /** 4756 * Unicode script "Bamum". 4757 */ 4758 BAMUM, 4759 4760 /** 4761 * Unicode script "Javanese". 4762 */ 4763 JAVANESE, 4764 4765 /** 4766 * Unicode script "Meetei_Mayek". 4767 */ 4768 MEETEI_MAYEK, 4769 4770 /** 4771 * Unicode script "Imperial_Aramaic". 4772 */ 4773 IMPERIAL_ARAMAIC, 4774 4775 /** 4776 * Unicode script "Old_South_Arabian". 4777 */ 4778 OLD_SOUTH_ARABIAN, 4779 4780 /** 4781 * Unicode script "Inscriptional_Parthian". 4782 */ 4783 INSCRIPTIONAL_PARTHIAN, 4784 4785 /** 4786 * Unicode script "Inscriptional_Pahlavi". 4787 */ 4788 INSCRIPTIONAL_PAHLAVI, 4789 4790 /** 4791 * Unicode script "Old_Turkic". 4792 */ 4793 OLD_TURKIC, 4794 4795 /** 4796 * Unicode script "Brahmi". 4797 */ 4798 BRAHMI, 4799 4800 /** 4801 * Unicode script "Kaithi". 4802 */ 4803 KAITHI, 4804 4805 /** 4806 * Unicode script "Meroitic Hieroglyphs". 4807 * @since 1.8 4808 */ 4809 MEROITIC_HIEROGLYPHS, 4810 4811 /** 4812 * Unicode script "Meroitic Cursive". 4813 * @since 1.8 4814 */ 4815 MEROITIC_CURSIVE, 4816 4817 /** 4818 * Unicode script "Sora Sompeng". 4819 * @since 1.8 4820 */ 4821 SORA_SOMPENG, 4822 4823 /** 4824 * Unicode script "Chakma". 4825 * @since 1.8 4826 */ 4827 CHAKMA, 4828 4829 /** 4830 * Unicode script "Sharada". 4831 * @since 1.8 4832 */ 4833 SHARADA, 4834 4835 /** 4836 * Unicode script "Takri". 4837 * @since 1.8 4838 */ 4839 TAKRI, 4840 4841 /** 4842 * Unicode script "Miao". 4843 * @since 1.8 4844 */ 4845 MIAO, 4846 4847 /** 4848 * Unicode script "Caucasian Albanian". 4849 * @since 9 4850 */ 4851 CAUCASIAN_ALBANIAN, 4852 4853 /** 4854 * Unicode script "Bassa Vah". 4855 * @since 9 4856 */ 4857 BASSA_VAH, 4858 4859 /** 4860 * Unicode script "Duployan". 4861 * @since 9 4862 */ 4863 DUPLOYAN, 4864 4865 /** 4866 * Unicode script "Elbasan". 4867 * @since 9 4868 */ 4869 ELBASAN, 4870 4871 /** 4872 * Unicode script "Grantha". 4873 * @since 9 4874 */ 4875 GRANTHA, 4876 4877 /** 4878 * Unicode script "Pahawh Hmong". 4879 * @since 9 4880 */ 4881 PAHAWH_HMONG, 4882 4883 /** 4884 * Unicode script "Khojki". 4885 * @since 9 4886 */ 4887 KHOJKI, 4888 4889 /** 4890 * Unicode script "Linear A". 4891 * @since 9 4892 */ 4893 LINEAR_A, 4894 4895 /** 4896 * Unicode script "Mahajani". 4897 * @since 9 4898 */ 4899 MAHAJANI, 4900 4901 /** 4902 * Unicode script "Manichaean". 4903 * @since 9 4904 */ 4905 MANICHAEAN, 4906 4907 /** 4908 * Unicode script "Mende Kikakui". 4909 * @since 9 4910 */ 4911 MENDE_KIKAKUI, 4912 4913 /** 4914 * Unicode script "Modi". 4915 * @since 9 4916 */ 4917 MODI, 4918 4919 /** 4920 * Unicode script "Mro". 4921 * @since 9 4922 */ 4923 MRO, 4924 4925 /** 4926 * Unicode script "Old North Arabian". 4927 * @since 9 4928 */ 4929 OLD_NORTH_ARABIAN, 4930 4931 /** 4932 * Unicode script "Nabataean". 4933 * @since 9 4934 */ 4935 NABATAEAN, 4936 4937 /** 4938 * Unicode script "Palmyrene". 4939 * @since 9 4940 */ 4941 PALMYRENE, 4942 4943 /** 4944 * Unicode script "Pau Cin Hau". 4945 * @since 9 4946 */ 4947 PAU_CIN_HAU, 4948 4949 /** 4950 * Unicode script "Old Permic". 4951 * @since 9 4952 */ 4953 OLD_PERMIC, 4954 4955 /** 4956 * Unicode script "Psalter Pahlavi". 4957 * @since 9 4958 */ 4959 PSALTER_PAHLAVI, 4960 4961 /** 4962 * Unicode script "Siddham". 4963 * @since 9 4964 */ 4965 SIDDHAM, 4966 4967 /** 4968 * Unicode script "Khudawadi". 4969 * @since 9 4970 */ 4971 KHUDAWADI, 4972 4973 /** 4974 * Unicode script "Tirhuta". 4975 * @since 9 4976 */ 4977 TIRHUTA, 4978 4979 /** 4980 * Unicode script "Warang Citi". 4981 * @since 9 4982 */ 4983 WARANG_CITI, 4984 4985 /** 4986 * Unicode script "Ahom". 4987 * @since 9 4988 */ 4989 AHOM, 4990 4991 /** 4992 * Unicode script "Anatolian Hieroglyphs". 4993 * @since 9 4994 */ 4995 ANATOLIAN_HIEROGLYPHS, 4996 4997 /** 4998 * Unicode script "Hatran". 4999 * @since 9 5000 */ 5001 HATRAN, 5002 5003 /** 5004 * Unicode script "Multani". 5005 * @since 9 5006 */ 5007 MULTANI, 5008 5009 /** 5010 * Unicode script "Old Hungarian". 5011 * @since 9 5012 */ 5013 OLD_HUNGARIAN, 5014 5015 /** 5016 * Unicode script "SignWriting". 5017 * @since 9 5018 */ 5019 SIGNWRITING, 5020 5021 /** 5022 * Unicode script "Adlam". 5023 * @since 11 5024 */ 5025 ADLAM, 5026 5027 /** 5028 * Unicode script "Bhaiksuki". 5029 * @since 11 5030 */ 5031 BHAIKSUKI, 5032 5033 /** 5034 * Unicode script "Marchen". 5035 * @since 11 5036 */ 5037 MARCHEN, 5038 5039 /** 5040 * Unicode script "Newa". 5041 * @since 11 5042 */ 5043 NEWA, 5044 5045 /** 5046 * Unicode script "Osage". 5047 * @since 11 5048 */ 5049 OSAGE, 5050 5051 /** 5052 * Unicode script "Tangut". 5053 * @since 11 5054 */ 5055 TANGUT, 5056 5057 /** 5058 * Unicode script "Masaram Gondi". 5059 * @since 11 5060 */ 5061 MASARAM_GONDI, 5062 5063 /** 5064 * Unicode script "Nushu". 5065 * @since 11 5066 */ 5067 NUSHU, 5068 5069 /** 5070 * Unicode script "Soyombo". 5071 * @since 11 5072 */ 5073 SOYOMBO, 5074 5075 /** 5076 * Unicode script "Zanabazar Square". 5077 * @since 11 5078 */ 5079 ZANABAZAR_SQUARE, 5080 5081 /** 5082 * Unicode script "Hanifi Rohingya". 5083 * @since 12 5084 */ 5085 HANIFI_ROHINGYA, 5086 5087 /** 5088 * Unicode script "Old Sogdian". 5089 * @since 12 5090 */ 5091 OLD_SOGDIAN, 5092 5093 /** 5094 * Unicode script "Sogdian". 5095 * @since 12 5096 */ 5097 SOGDIAN, 5098 5099 /** 5100 * Unicode script "Dogra". 5101 * @since 12 5102 */ 5103 DOGRA, 5104 5105 /** 5106 * Unicode script "Gunjala Gondi". 5107 * @since 12 5108 */ 5109 GUNJALA_GONDI, 5110 5111 /** 5112 * Unicode script "Makasar". 5113 * @since 12 5114 */ 5115 MAKASAR, 5116 5117 /** 5118 * Unicode script "Medefaidrin". 5119 * @since 12 5120 */ 5121 MEDEFAIDRIN, 5122 5123 /** 5124 * Unicode script "Elymaic". 5125 * @since 13 5126 */ 5127 ELYMAIC, 5128 5129 /** 5130 * Unicode script "Nandinagari". 5131 * @since 13 5132 */ 5133 NANDINAGARI, 5134 5135 /** 5136 * Unicode script "Nyiakeng Puachue Hmong". 5137 * @since 13 5138 */ 5139 NYIAKENG_PUACHUE_HMONG, 5140 5141 /** 5142 * Unicode script "Wancho". 5143 * @since 13 5144 */ 5145 WANCHO, 5146 5147 /** 5148 * Unicode script "Yezidi". 5149 * @since 15 5150 */ 5151 YEZIDI, 5152 5153 /** 5154 * Unicode script "Chorasmian". 5155 * @since 15 5156 */ 5157 CHORASMIAN, 5158 5159 /** 5160 * Unicode script "Dives Akuru". 5161 * @since 15 5162 */ 5163 DIVES_AKURU, 5164 5165 /** 5166 * Unicode script "Khitan Small Script". 5167 * @since 15 5168 */ 5169 KHITAN_SMALL_SCRIPT, 5170 5171 /** 5172 * Unicode script "Unknown". 5173 */ 5174 UNKNOWN; 5175 5176 private static final int[] scriptStarts = { 5177 0x0000, // 0000..0040; COMMON 5178 0x0041, // 0041..005A; LATIN 5179 0x005B, // 005B..0060; COMMON 5180 0x0061, // 0061..007A; LATIN 5181 0x007B, // 007B..00A9; COMMON 5182 0x00AA, // 00AA ; LATIN 5183 0x00AB, // 00AB..00B9; COMMON 5184 0x00BA, // 00BA ; LATIN 5185 0x00BB, // 00BB..00BF; COMMON 5186 0x00C0, // 00C0..00D6; LATIN 5187 0x00D7, // 00D7 ; COMMON 5188 0x00D8, // 00D8..00F6; LATIN 5189 0x00F7, // 00F7 ; COMMON 5190 0x00F8, // 00F8..02B8; LATIN 5191 0x02B9, // 02B9..02DF; COMMON 5192 0x02E0, // 02E0..02E4; LATIN 5193 0x02E5, // 02E5..02E9; COMMON 5194 0x02EA, // 02EA..02EB; BOPOMOFO 5195 0x02EC, // 02EC..02FF; COMMON 5196 0x0300, // 0300..036F; INHERITED 5197 0x0370, // 0370..0373; GREEK 5198 0x0374, // 0374 ; COMMON 5199 0x0375, // 0375..0377; GREEK 5200 0x0378, // 0378..0379; UNKNOWN 5201 0x037A, // 037A..037D; GREEK 5202 0x037E, // 037E ; COMMON 5203 0x037F, // 037F ; GREEK 5204 0x0380, // 0380..0383; UNKNOWN 5205 0x0384, // 0384 ; GREEK 5206 0x0385, // 0385 ; COMMON 5207 0x0386, // 0386 ; GREEK 5208 0x0387, // 0387 ; COMMON 5209 0x0388, // 0388..038A; GREEK 5210 0x038B, // 038B ; UNKNOWN 5211 0x038C, // 038C ; GREEK 5212 0x038D, // 038D ; UNKNOWN 5213 0x038E, // 038E..03A1; GREEK 5214 0x03A2, // 03A2 ; UNKNOWN 5215 0x03A3, // 03A3..03E1; GREEK 5216 0x03E2, // 03E2..03EF; COPTIC 5217 0x03F0, // 03F0..03FF; GREEK 5218 0x0400, // 0400..0484; CYRILLIC 5219 0x0485, // 0485..0486; INHERITED 5220 0x0487, // 0487..052F; CYRILLIC 5221 0x0530, // 0530 ; UNKNOWN 5222 0x0531, // 0531..0556; ARMENIAN 5223 0x0557, // 0557..0558; UNKNOWN 5224 0x0559, // 0559..058A; ARMENIAN 5225 0x058B, // 058B..058C; UNKNOWN 5226 0x058D, // 058D..058F; ARMENIAN 5227 0x0590, // 0590 ; UNKNOWN 5228 0x0591, // 0591..05C7; HEBREW 5229 0x05C8, // 05C8..05CF; UNKNOWN 5230 0x05D0, // 05D0..05EA; HEBREW 5231 0x05EB, // 05EB..05EE; UNKNOWN 5232 0x05EF, // 05EF..05F4; HEBREW 5233 0x05F5, // 05F5..05FF; UNKNOWN 5234 0x0600, // 0600..0604; ARABIC 5235 0x0605, // 0605 ; COMMON 5236 0x0606, // 0606..060B; ARABIC 5237 0x060C, // 060C ; COMMON 5238 0x060D, // 060D..061A; ARABIC 5239 0x061B, // 061B ; COMMON 5240 0x061C, // 061C ; ARABIC 5241 0x061D, // 061D ; UNKNOWN 5242 0x061E, // 061E ; ARABIC 5243 0x061F, // 061F ; COMMON 5244 0x0620, // 0620..063F; ARABIC 5245 0x0640, // 0640 ; COMMON 5246 0x0641, // 0641..064A; ARABIC 5247 0x064B, // 064B..0655; INHERITED 5248 0x0656, // 0656..066F; ARABIC 5249 0x0670, // 0670 ; INHERITED 5250 0x0671, // 0671..06DC; ARABIC 5251 0x06DD, // 06DD ; COMMON 5252 0x06DE, // 06DE..06FF; ARABIC 5253 0x0700, // 0700..070D; SYRIAC 5254 0x070E, // 070E ; UNKNOWN 5255 0x070F, // 070F..074A; SYRIAC 5256 0x074B, // 074B..074C; UNKNOWN 5257 0x074D, // 074D..074F; SYRIAC 5258 0x0750, // 0750..077F; ARABIC 5259 0x0780, // 0780..07B1; THAANA 5260 0x07B2, // 07B2..07BF; UNKNOWN 5261 0x07C0, // 07C0..07FA; NKO 5262 0x07FB, // 07FB..07FC; UNKNOWN 5263 0x07FD, // 07FD..07FF; NKO 5264 0x0800, // 0800..082D; SAMARITAN 5265 0x082E, // 082E..082F; UNKNOWN 5266 0x0830, // 0830..083E; SAMARITAN 5267 0x083F, // 083F ; UNKNOWN 5268 0x0840, // 0840..085B; MANDAIC 5269 0x085C, // 085C..085D; UNKNOWN 5270 0x085E, // 085E ; MANDAIC 5271 0x085F, // 085F ; UNKNOWN 5272 0x0860, // 0860..086A; SYRIAC 5273 0x086B, // 086B..089F; UNKNOWN 5274 0x08A0, // 08A0..08B4; ARABIC 5275 0x08B5, // 08B5 ; UNKNOWN 5276 0x08B6, // 08B6..08C7; ARABIC 5277 0x08C8, // 08C8..08D2; UNKNOWN 5278 0x08D3, // 08D3..08E1; ARABIC 5279 0x08E2, // 08E2 ; COMMON 5280 0x08E3, // 08E3..08FF; ARABIC 5281 0x0900, // 0900..0950; DEVANAGARI 5282 0x0951, // 0951..0954; INHERITED 5283 0x0955, // 0955..0963; DEVANAGARI 5284 0x0964, // 0964..0965; COMMON 5285 0x0966, // 0966..097F; DEVANAGARI 5286 0x0980, // 0980..0983; BENGALI 5287 0x0984, // 0984 ; UNKNOWN 5288 0x0985, // 0985..098C; BENGALI 5289 0x098D, // 098D..098E; UNKNOWN 5290 0x098F, // 098F..0990; BENGALI 5291 0x0991, // 0991..0992; UNKNOWN 5292 0x0993, // 0993..09A8; BENGALI 5293 0x09A9, // 09A9 ; UNKNOWN 5294 0x09AA, // 09AA..09B0; BENGALI 5295 0x09B1, // 09B1 ; UNKNOWN 5296 0x09B2, // 09B2 ; BENGALI 5297 0x09B3, // 09B3..09B5; UNKNOWN 5298 0x09B6, // 09B6..09B9; BENGALI 5299 0x09BA, // 09BA..09BB; UNKNOWN 5300 0x09BC, // 09BC..09C4; BENGALI 5301 0x09C5, // 09C5..09C6; UNKNOWN 5302 0x09C7, // 09C7..09C8; BENGALI 5303 0x09C9, // 09C9..09CA; UNKNOWN 5304 0x09CB, // 09CB..09CE; BENGALI 5305 0x09CF, // 09CF..09D6; UNKNOWN 5306 0x09D7, // 09D7 ; BENGALI 5307 0x09D8, // 09D8..09DB; UNKNOWN 5308 0x09DC, // 09DC..09DD; BENGALI 5309 0x09DE, // 09DE ; UNKNOWN 5310 0x09DF, // 09DF..09E3; BENGALI 5311 0x09E4, // 09E4..09E5; UNKNOWN 5312 0x09E6, // 09E6..09FE; BENGALI 5313 0x09FF, // 09FF..0A00; UNKNOWN 5314 0x0A01, // 0A01..0A03; GURMUKHI 5315 0x0A04, // 0A04 ; UNKNOWN 5316 0x0A05, // 0A05..0A0A; GURMUKHI 5317 0x0A0B, // 0A0B..0A0E; UNKNOWN 5318 0x0A0F, // 0A0F..0A10; GURMUKHI 5319 0x0A11, // 0A11..0A12; UNKNOWN 5320 0x0A13, // 0A13..0A28; GURMUKHI 5321 0x0A29, // 0A29 ; UNKNOWN 5322 0x0A2A, // 0A2A..0A30; GURMUKHI 5323 0x0A31, // 0A31 ; UNKNOWN 5324 0x0A32, // 0A32..0A33; GURMUKHI 5325 0x0A34, // 0A34 ; UNKNOWN 5326 0x0A35, // 0A35..0A36; GURMUKHI 5327 0x0A37, // 0A37 ; UNKNOWN 5328 0x0A38, // 0A38..0A39; GURMUKHI 5329 0x0A3A, // 0A3A..0A3B; UNKNOWN 5330 0x0A3C, // 0A3C ; GURMUKHI 5331 0x0A3D, // 0A3D ; UNKNOWN 5332 0x0A3E, // 0A3E..0A42; GURMUKHI 5333 0x0A43, // 0A43..0A46; UNKNOWN 5334 0x0A47, // 0A47..0A48; GURMUKHI 5335 0x0A49, // 0A49..0A4A; UNKNOWN 5336 0x0A4B, // 0A4B..0A4D; GURMUKHI 5337 0x0A4E, // 0A4E..0A50; UNKNOWN 5338 0x0A51, // 0A51 ; GURMUKHI 5339 0x0A52, // 0A52..0A58; UNKNOWN 5340 0x0A59, // 0A59..0A5C; GURMUKHI 5341 0x0A5D, // 0A5D ; UNKNOWN 5342 0x0A5E, // 0A5E ; GURMUKHI 5343 0x0A5F, // 0A5F..0A65; UNKNOWN 5344 0x0A66, // 0A66..0A76; GURMUKHI 5345 0x0A77, // 0A77..0A80; UNKNOWN 5346 0x0A81, // 0A81..0A83; GUJARATI 5347 0x0A84, // 0A84 ; UNKNOWN 5348 0x0A85, // 0A85..0A8D; GUJARATI 5349 0x0A8E, // 0A8E ; UNKNOWN 5350 0x0A8F, // 0A8F..0A91; GUJARATI 5351 0x0A92, // 0A92 ; UNKNOWN 5352 0x0A93, // 0A93..0AA8; GUJARATI 5353 0x0AA9, // 0AA9 ; UNKNOWN 5354 0x0AAA, // 0AAA..0AB0; GUJARATI 5355 0x0AB1, // 0AB1 ; UNKNOWN 5356 0x0AB2, // 0AB2..0AB3; GUJARATI 5357 0x0AB4, // 0AB4 ; UNKNOWN 5358 0x0AB5, // 0AB5..0AB9; GUJARATI 5359 0x0ABA, // 0ABA..0ABB; UNKNOWN 5360 0x0ABC, // 0ABC..0AC5; GUJARATI 5361 0x0AC6, // 0AC6 ; UNKNOWN 5362 0x0AC7, // 0AC7..0AC9; GUJARATI 5363 0x0ACA, // 0ACA ; UNKNOWN 5364 0x0ACB, // 0ACB..0ACD; GUJARATI 5365 0x0ACE, // 0ACE..0ACF; UNKNOWN 5366 0x0AD0, // 0AD0 ; GUJARATI 5367 0x0AD1, // 0AD1..0ADF; UNKNOWN 5368 0x0AE0, // 0AE0..0AE3; GUJARATI 5369 0x0AE4, // 0AE4..0AE5; UNKNOWN 5370 0x0AE6, // 0AE6..0AF1; GUJARATI 5371 0x0AF2, // 0AF2..0AF8; UNKNOWN 5372 0x0AF9, // 0AF9..0AFF; GUJARATI 5373 0x0B00, // 0B00 ; UNKNOWN 5374 0x0B01, // 0B01..0B03; ORIYA 5375 0x0B04, // 0B04 ; UNKNOWN 5376 0x0B05, // 0B05..0B0C; ORIYA 5377 0x0B0D, // 0B0D..0B0E; UNKNOWN 5378 0x0B0F, // 0B0F..0B10; ORIYA 5379 0x0B11, // 0B11..0B12; UNKNOWN 5380 0x0B13, // 0B13..0B28; ORIYA 5381 0x0B29, // 0B29 ; UNKNOWN 5382 0x0B2A, // 0B2A..0B30; ORIYA 5383 0x0B31, // 0B31 ; UNKNOWN 5384 0x0B32, // 0B32..0B33; ORIYA 5385 0x0B34, // 0B34 ; UNKNOWN 5386 0x0B35, // 0B35..0B39; ORIYA 5387 0x0B3A, // 0B3A..0B3B; UNKNOWN 5388 0x0B3C, // 0B3C..0B44; ORIYA 5389 0x0B45, // 0B45..0B46; UNKNOWN 5390 0x0B47, // 0B47..0B48; ORIYA 5391 0x0B49, // 0B49..0B4A; UNKNOWN 5392 0x0B4B, // 0B4B..0B4D; ORIYA 5393 0x0B4E, // 0B4E..0B54; UNKNOWN 5394 0x0B55, // 0B55..0B57; ORIYA 5395 0x0B58, // 0B58..0B5B; UNKNOWN 5396 0x0B5C, // 0B5C..0B5D; ORIYA 5397 0x0B5E, // 0B5E ; UNKNOWN 5398 0x0B5F, // 0B5F..0B63; ORIYA 5399 0x0B64, // 0B64..0B65; UNKNOWN 5400 0x0B66, // 0B66..0B77; ORIYA 5401 0x0B78, // 0B78..0B81; UNKNOWN 5402 0x0B82, // 0B82..0B83; TAMIL 5403 0x0B84, // 0B84 ; UNKNOWN 5404 0x0B85, // 0B85..0B8A; TAMIL 5405 0x0B8B, // 0B8B..0B8D; UNKNOWN 5406 0x0B8E, // 0B8E..0B90; TAMIL 5407 0x0B91, // 0B91 ; UNKNOWN 5408 0x0B92, // 0B92..0B95; TAMIL 5409 0x0B96, // 0B96..0B98; UNKNOWN 5410 0x0B99, // 0B99..0B9A; TAMIL 5411 0x0B9B, // 0B9B ; UNKNOWN 5412 0x0B9C, // 0B9C ; TAMIL 5413 0x0B9D, // 0B9D ; UNKNOWN 5414 0x0B9E, // 0B9E..0B9F; TAMIL 5415 0x0BA0, // 0BA0..0BA2; UNKNOWN 5416 0x0BA3, // 0BA3..0BA4; TAMIL 5417 0x0BA5, // 0BA5..0BA7; UNKNOWN 5418 0x0BA8, // 0BA8..0BAA; TAMIL 5419 0x0BAB, // 0BAB..0BAD; UNKNOWN 5420 0x0BAE, // 0BAE..0BB9; TAMIL 5421 0x0BBA, // 0BBA..0BBD; UNKNOWN 5422 0x0BBE, // 0BBE..0BC2; TAMIL 5423 0x0BC3, // 0BC3..0BC5; UNKNOWN 5424 0x0BC6, // 0BC6..0BC8; TAMIL 5425 0x0BC9, // 0BC9 ; UNKNOWN 5426 0x0BCA, // 0BCA..0BCD; TAMIL 5427 0x0BCE, // 0BCE..0BCF; UNKNOWN 5428 0x0BD0, // 0BD0 ; TAMIL 5429 0x0BD1, // 0BD1..0BD6; UNKNOWN 5430 0x0BD7, // 0BD7 ; TAMIL 5431 0x0BD8, // 0BD8..0BE5; UNKNOWN 5432 0x0BE6, // 0BE6..0BFA; TAMIL 5433 0x0BFB, // 0BFB..0BFF; UNKNOWN 5434 0x0C00, // 0C00..0C0C; TELUGU 5435 0x0C0D, // 0C0D ; UNKNOWN 5436 0x0C0E, // 0C0E..0C10; TELUGU 5437 0x0C11, // 0C11 ; UNKNOWN 5438 0x0C12, // 0C12..0C28; TELUGU 5439 0x0C29, // 0C29 ; UNKNOWN 5440 0x0C2A, // 0C2A..0C39; TELUGU 5441 0x0C3A, // 0C3A..0C3C; UNKNOWN 5442 0x0C3D, // 0C3D..0C44; TELUGU 5443 0x0C45, // 0C45 ; UNKNOWN 5444 0x0C46, // 0C46..0C48; TELUGU 5445 0x0C49, // 0C49 ; UNKNOWN 5446 0x0C4A, // 0C4A..0C4D; TELUGU 5447 0x0C4E, // 0C4E..0C54; UNKNOWN 5448 0x0C55, // 0C55..0C56; TELUGU 5449 0x0C57, // 0C57 ; UNKNOWN 5450 0x0C58, // 0C58..0C5A; TELUGU 5451 0x0C5B, // 0C5B..0C5F; UNKNOWN 5452 0x0C60, // 0C60..0C63; TELUGU 5453 0x0C64, // 0C64..0C65; UNKNOWN 5454 0x0C66, // 0C66..0C6F; TELUGU 5455 0x0C70, // 0C70..0C76; UNKNOWN 5456 0x0C77, // 0C77..0C7F; TELUGU 5457 0x0C80, // 0C80..0C8C; KANNADA 5458 0x0C8D, // 0C8D ; UNKNOWN 5459 0x0C8E, // 0C8E..0C90; KANNADA 5460 0x0C91, // 0C91 ; UNKNOWN 5461 0x0C92, // 0C92..0CA8; KANNADA 5462 0x0CA9, // 0CA9 ; UNKNOWN 5463 0x0CAA, // 0CAA..0CB3; KANNADA 5464 0x0CB4, // 0CB4 ; UNKNOWN 5465 0x0CB5, // 0CB5..0CB9; KANNADA 5466 0x0CBA, // 0CBA..0CBB; UNKNOWN 5467 0x0CBC, // 0CBC..0CC4; KANNADA 5468 0x0CC5, // 0CC5 ; UNKNOWN 5469 0x0CC6, // 0CC6..0CC8; KANNADA 5470 0x0CC9, // 0CC9 ; UNKNOWN 5471 0x0CCA, // 0CCA..0CCD; KANNADA 5472 0x0CCE, // 0CCE..0CD4; UNKNOWN 5473 0x0CD5, // 0CD5..0CD6; KANNADA 5474 0x0CD7, // 0CD7..0CDD; UNKNOWN 5475 0x0CDE, // 0CDE ; KANNADA 5476 0x0CDF, // 0CDF ; UNKNOWN 5477 0x0CE0, // 0CE0..0CE3; KANNADA 5478 0x0CE4, // 0CE4..0CE5; UNKNOWN 5479 0x0CE6, // 0CE6..0CEF; KANNADA 5480 0x0CF0, // 0CF0 ; UNKNOWN 5481 0x0CF1, // 0CF1..0CF2; KANNADA 5482 0x0CF3, // 0CF3..0CFF; UNKNOWN 5483 0x0D00, // 0D00..0D0C; MALAYALAM 5484 0x0D0D, // 0D0D ; UNKNOWN 5485 0x0D0E, // 0D0E..0D10; MALAYALAM 5486 0x0D11, // 0D11 ; UNKNOWN 5487 0x0D12, // 0D12..0D44; MALAYALAM 5488 0x0D45, // 0D45 ; UNKNOWN 5489 0x0D46, // 0D46..0D48; MALAYALAM 5490 0x0D49, // 0D49 ; UNKNOWN 5491 0x0D4A, // 0D4A..0D4F; MALAYALAM 5492 0x0D50, // 0D50..0D53; UNKNOWN 5493 0x0D54, // 0D54..0D63; MALAYALAM 5494 0x0D64, // 0D64..0D65; UNKNOWN 5495 0x0D66, // 0D66..0D7F; MALAYALAM 5496 0x0D80, // 0D80 ; UNKNOWN 5497 0x0D81, // 0D81..0D83; SINHALA 5498 0x0D84, // 0D84 ; UNKNOWN 5499 0x0D85, // 0D85..0D96; SINHALA 5500 0x0D97, // 0D97..0D99; UNKNOWN 5501 0x0D9A, // 0D9A..0DB1; SINHALA 5502 0x0DB2, // 0DB2 ; UNKNOWN 5503 0x0DB3, // 0DB3..0DBB; SINHALA 5504 0x0DBC, // 0DBC ; UNKNOWN 5505 0x0DBD, // 0DBD ; SINHALA 5506 0x0DBE, // 0DBE..0DBF; UNKNOWN 5507 0x0DC0, // 0DC0..0DC6; SINHALA 5508 0x0DC7, // 0DC7..0DC9; UNKNOWN 5509 0x0DCA, // 0DCA ; SINHALA 5510 0x0DCB, // 0DCB..0DCE; UNKNOWN 5511 0x0DCF, // 0DCF..0DD4; SINHALA 5512 0x0DD5, // 0DD5 ; UNKNOWN 5513 0x0DD6, // 0DD6 ; SINHALA 5514 0x0DD7, // 0DD7 ; UNKNOWN 5515 0x0DD8, // 0DD8..0DDF; SINHALA 5516 0x0DE0, // 0DE0..0DE5; UNKNOWN 5517 0x0DE6, // 0DE6..0DEF; SINHALA 5518 0x0DF0, // 0DF0..0DF1; UNKNOWN 5519 0x0DF2, // 0DF2..0DF4; SINHALA 5520 0x0DF5, // 0DF5..0E00; UNKNOWN 5521 0x0E01, // 0E01..0E3A; THAI 5522 0x0E3B, // 0E3B..0E3E; UNKNOWN 5523 0x0E3F, // 0E3F ; COMMON 5524 0x0E40, // 0E40..0E5B; THAI 5525 0x0E5C, // 0E5C..0E80; UNKNOWN 5526 0x0E81, // 0E81..0E82; LAO 5527 0x0E83, // 0E83 ; UNKNOWN 5528 0x0E84, // 0E84 ; LAO 5529 0x0E85, // 0E85 ; UNKNOWN 5530 0x0E86, // 0E86..0E8A; LAO 5531 0x0E8B, // 0E8B ; UNKNOWN 5532 0x0E8C, // 0E8C..0EA3; LAO 5533 0x0EA4, // 0EA4 ; UNKNOWN 5534 0x0EA5, // 0EA5 ; LAO 5535 0x0EA6, // 0EA6 ; UNKNOWN 5536 0x0EA7, // 0EA7..0EBD; LAO 5537 0x0EBE, // 0EBE..0EBF; UNKNOWN 5538 0x0EC0, // 0EC0..0EC4; LAO 5539 0x0EC5, // 0EC5 ; UNKNOWN 5540 0x0EC6, // 0EC6 ; LAO 5541 0x0EC7, // 0EC7 ; UNKNOWN 5542 0x0EC8, // 0EC8..0ECD; LAO 5543 0x0ECE, // 0ECE..0ECF; UNKNOWN 5544 0x0ED0, // 0ED0..0ED9; LAO 5545 0x0EDA, // 0EDA..0EDB; UNKNOWN 5546 0x0EDC, // 0EDC..0EDF; LAO 5547 0x0EE0, // 0EE0..0EFF; UNKNOWN 5548 0x0F00, // 0F00..0F47; TIBETAN 5549 0x0F48, // 0F48 ; UNKNOWN 5550 0x0F49, // 0F49..0F6C; TIBETAN 5551 0x0F6D, // 0F6D..0F70; UNKNOWN 5552 0x0F71, // 0F71..0F97; TIBETAN 5553 0x0F98, // 0F98 ; UNKNOWN 5554 0x0F99, // 0F99..0FBC; TIBETAN 5555 0x0FBD, // 0FBD ; UNKNOWN 5556 0x0FBE, // 0FBE..0FCC; TIBETAN 5557 0x0FCD, // 0FCD ; UNKNOWN 5558 0x0FCE, // 0FCE..0FD4; TIBETAN 5559 0x0FD5, // 0FD5..0FD8; COMMON 5560 0x0FD9, // 0FD9..0FDA; TIBETAN 5561 0x0FDB, // 0FDB..0FFF; UNKNOWN 5562 0x1000, // 1000..109F; MYANMAR 5563 0x10A0, // 10A0..10C5; GEORGIAN 5564 0x10C6, // 10C6 ; UNKNOWN 5565 0x10C7, // 10C7 ; GEORGIAN 5566 0x10C8, // 10C8..10CC; UNKNOWN 5567 0x10CD, // 10CD ; GEORGIAN 5568 0x10CE, // 10CE..10CF; UNKNOWN 5569 0x10D0, // 10D0..10FA; GEORGIAN 5570 0x10FB, // 10FB ; COMMON 5571 0x10FC, // 10FC..10FF; GEORGIAN 5572 0x1100, // 1100..11FF; HANGUL 5573 0x1200, // 1200..1248; ETHIOPIC 5574 0x1249, // 1249 ; UNKNOWN 5575 0x124A, // 124A..124D; ETHIOPIC 5576 0x124E, // 124E..124F; UNKNOWN 5577 0x1250, // 1250..1256; ETHIOPIC 5578 0x1257, // 1257 ; UNKNOWN 5579 0x1258, // 1258 ; ETHIOPIC 5580 0x1259, // 1259 ; UNKNOWN 5581 0x125A, // 125A..125D; ETHIOPIC 5582 0x125E, // 125E..125F; UNKNOWN 5583 0x1260, // 1260..1288; ETHIOPIC 5584 0x1289, // 1289 ; UNKNOWN 5585 0x128A, // 128A..128D; ETHIOPIC 5586 0x128E, // 128E..128F; UNKNOWN 5587 0x1290, // 1290..12B0; ETHIOPIC 5588 0x12B1, // 12B1 ; UNKNOWN 5589 0x12B2, // 12B2..12B5; ETHIOPIC 5590 0x12B6, // 12B6..12B7; UNKNOWN 5591 0x12B8, // 12B8..12BE; ETHIOPIC 5592 0x12BF, // 12BF ; UNKNOWN 5593 0x12C0, // 12C0 ; ETHIOPIC 5594 0x12C1, // 12C1 ; UNKNOWN 5595 0x12C2, // 12C2..12C5; ETHIOPIC 5596 0x12C6, // 12C6..12C7; UNKNOWN 5597 0x12C8, // 12C8..12D6; ETHIOPIC 5598 0x12D7, // 12D7 ; UNKNOWN 5599 0x12D8, // 12D8..1310; ETHIOPIC 5600 0x1311, // 1311 ; UNKNOWN 5601 0x1312, // 1312..1315; ETHIOPIC 5602 0x1316, // 1316..1317; UNKNOWN 5603 0x1318, // 1318..135A; ETHIOPIC 5604 0x135B, // 135B..135C; UNKNOWN 5605 0x135D, // 135D..137C; ETHIOPIC 5606 0x137D, // 137D..137F; UNKNOWN 5607 0x1380, // 1380..1399; ETHIOPIC 5608 0x139A, // 139A..139F; UNKNOWN 5609 0x13A0, // 13A0..13F5; CHEROKEE 5610 0x13F6, // 13F6..13F7; UNKNOWN 5611 0x13F8, // 13F8..13FD; CHEROKEE 5612 0x13FE, // 13FE..13FF; UNKNOWN 5613 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 5614 0x1680, // 1680..169C; OGHAM 5615 0x169D, // 169D..169F; UNKNOWN 5616 0x16A0, // 16A0..16EA; RUNIC 5617 0x16EB, // 16EB..16ED; COMMON 5618 0x16EE, // 16EE..16F8; RUNIC 5619 0x16F9, // 16F9..16FF; UNKNOWN 5620 0x1700, // 1700..170C; TAGALOG 5621 0x170D, // 170D ; UNKNOWN 5622 0x170E, // 170E..1714; TAGALOG 5623 0x1715, // 1715..171F; UNKNOWN 5624 0x1720, // 1720..1734; HANUNOO 5625 0x1735, // 1735..1736; COMMON 5626 0x1737, // 1737..173F; UNKNOWN 5627 0x1740, // 1740..1753; BUHID 5628 0x1754, // 1754..175F; UNKNOWN 5629 0x1760, // 1760..176C; TAGBANWA 5630 0x176D, // 176D ; UNKNOWN 5631 0x176E, // 176E..1770; TAGBANWA 5632 0x1771, // 1771 ; UNKNOWN 5633 0x1772, // 1772..1773; TAGBANWA 5634 0x1774, // 1774..177F; UNKNOWN 5635 0x1780, // 1780..17DD; KHMER 5636 0x17DE, // 17DE..17DF; UNKNOWN 5637 0x17E0, // 17E0..17E9; KHMER 5638 0x17EA, // 17EA..17EF; UNKNOWN 5639 0x17F0, // 17F0..17F9; KHMER 5640 0x17FA, // 17FA..17FF; UNKNOWN 5641 0x1800, // 1800..1801; MONGOLIAN 5642 0x1802, // 1802..1803; COMMON 5643 0x1804, // 1804 ; MONGOLIAN 5644 0x1805, // 1805 ; COMMON 5645 0x1806, // 1806..180E; MONGOLIAN 5646 0x180F, // 180F ; UNKNOWN 5647 0x1810, // 1810..1819; MONGOLIAN 5648 0x181A, // 181A..181F; UNKNOWN 5649 0x1820, // 1820..1878; MONGOLIAN 5650 0x1879, // 1879..187F; UNKNOWN 5651 0x1880, // 1880..18AA; MONGOLIAN 5652 0x18AB, // 18AB..18AF; UNKNOWN 5653 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 5654 0x18F6, // 18F6..18FF; UNKNOWN 5655 0x1900, // 1900..191E; LIMBU 5656 0x191F, // 191F ; UNKNOWN 5657 0x1920, // 1920..192B; LIMBU 5658 0x192C, // 192C..192F; UNKNOWN 5659 0x1930, // 1930..193B; LIMBU 5660 0x193C, // 193C..193F; UNKNOWN 5661 0x1940, // 1940 ; LIMBU 5662 0x1941, // 1941..1943; UNKNOWN 5663 0x1944, // 1944..194F; LIMBU 5664 0x1950, // 1950..196D; TAI_LE 5665 0x196E, // 196E..196F; UNKNOWN 5666 0x1970, // 1970..1974; TAI_LE 5667 0x1975, // 1975..197F; UNKNOWN 5668 0x1980, // 1980..19AB; NEW_TAI_LUE 5669 0x19AC, // 19AC..19AF; UNKNOWN 5670 0x19B0, // 19B0..19C9; NEW_TAI_LUE 5671 0x19CA, // 19CA..19CF; UNKNOWN 5672 0x19D0, // 19D0..19DA; NEW_TAI_LUE 5673 0x19DB, // 19DB..19DD; UNKNOWN 5674 0x19DE, // 19DE..19DF; NEW_TAI_LUE 5675 0x19E0, // 19E0..19FF; KHMER 5676 0x1A00, // 1A00..1A1B; BUGINESE 5677 0x1A1C, // 1A1C..1A1D; UNKNOWN 5678 0x1A1E, // 1A1E..1A1F; BUGINESE 5679 0x1A20, // 1A20..1A5E; TAI_THAM 5680 0x1A5F, // 1A5F ; UNKNOWN 5681 0x1A60, // 1A60..1A7C; TAI_THAM 5682 0x1A7D, // 1A7D..1A7E; UNKNOWN 5683 0x1A7F, // 1A7F..1A89; TAI_THAM 5684 0x1A8A, // 1A8A..1A8F; UNKNOWN 5685 0x1A90, // 1A90..1A99; TAI_THAM 5686 0x1A9A, // 1A9A..1A9F; UNKNOWN 5687 0x1AA0, // 1AA0..1AAD; TAI_THAM 5688 0x1AAE, // 1AAE..1AAF; UNKNOWN 5689 0x1AB0, // 1AB0..1AC0; INHERITED 5690 0x1AC1, // 1AC1..1AFF; UNKNOWN 5691 0x1B00, // 1B00..1B4B; BALINESE 5692 0x1B4C, // 1B4C..1B4F; UNKNOWN 5693 0x1B50, // 1B50..1B7C; BALINESE 5694 0x1B7D, // 1B7D..1B7F; UNKNOWN 5695 0x1B80, // 1B80..1BBF; SUNDANESE 5696 0x1BC0, // 1BC0..1BF3; BATAK 5697 0x1BF4, // 1BF4..1BFB; UNKNOWN 5698 0x1BFC, // 1BFC..1BFF; BATAK 5699 0x1C00, // 1C00..1C37; LEPCHA 5700 0x1C38, // 1C38..1C3A; UNKNOWN 5701 0x1C3B, // 1C3B..1C49; LEPCHA 5702 0x1C4A, // 1C4A..1C4C; UNKNOWN 5703 0x1C4D, // 1C4D..1C4F; LEPCHA 5704 0x1C50, // 1C50..1C7F; OL_CHIKI 5705 0x1C80, // 1C80..1C88; CYRILLIC 5706 0x1C89, // 1C89..1C8F; UNKNOWN 5707 0x1C90, // 1C90..1CBA; GEORGIAN 5708 0x1CBB, // 1CBB..1CBC; UNKNOWN 5709 0x1CBD, // 1CBD..1CBF; GEORGIAN 5710 0x1CC0, // 1CC0..1CC7; SUNDANESE 5711 0x1CC8, // 1CC8..1CCF; UNKNOWN 5712 0x1CD0, // 1CD0..1CD2; INHERITED 5713 0x1CD3, // 1CD3 ; COMMON 5714 0x1CD4, // 1CD4..1CE0; INHERITED 5715 0x1CE1, // 1CE1 ; COMMON 5716 0x1CE2, // 1CE2..1CE8; INHERITED 5717 0x1CE9, // 1CE9..1CEC; COMMON 5718 0x1CED, // 1CED ; INHERITED 5719 0x1CEE, // 1CEE..1CF3; COMMON 5720 0x1CF4, // 1CF4 ; INHERITED 5721 0x1CF5, // 1CF5..1CF7; COMMON 5722 0x1CF8, // 1CF8..1CF9; INHERITED 5723 0x1CFA, // 1CFA ; COMMON 5724 0x1CFB, // 1CFB..1CFF; UNKNOWN 5725 0x1D00, // 1D00..1D25; LATIN 5726 0x1D26, // 1D26..1D2A; GREEK 5727 0x1D2B, // 1D2B ; CYRILLIC 5728 0x1D2C, // 1D2C..1D5C; LATIN 5729 0x1D5D, // 1D5D..1D61; GREEK 5730 0x1D62, // 1D62..1D65; LATIN 5731 0x1D66, // 1D66..1D6A; GREEK 5732 0x1D6B, // 1D6B..1D77; LATIN 5733 0x1D78, // 1D78 ; CYRILLIC 5734 0x1D79, // 1D79..1DBE; LATIN 5735 0x1DBF, // 1DBF ; GREEK 5736 0x1DC0, // 1DC0..1DF9; INHERITED 5737 0x1DFA, // 1DFA ; UNKNOWN 5738 0x1DFB, // 1DFB..1DFF; INHERITED 5739 0x1E00, // 1E00..1EFF; LATIN 5740 0x1F00, // 1F00..1F15; GREEK 5741 0x1F16, // 1F16..1F17; UNKNOWN 5742 0x1F18, // 1F18..1F1D; GREEK 5743 0x1F1E, // 1F1E..1F1F; UNKNOWN 5744 0x1F20, // 1F20..1F45; GREEK 5745 0x1F46, // 1F46..1F47; UNKNOWN 5746 0x1F48, // 1F48..1F4D; GREEK 5747 0x1F4E, // 1F4E..1F4F; UNKNOWN 5748 0x1F50, // 1F50..1F57; GREEK 5749 0x1F58, // 1F58 ; UNKNOWN 5750 0x1F59, // 1F59 ; GREEK 5751 0x1F5A, // 1F5A ; UNKNOWN 5752 0x1F5B, // 1F5B ; GREEK 5753 0x1F5C, // 1F5C ; UNKNOWN 5754 0x1F5D, // 1F5D ; GREEK 5755 0x1F5E, // 1F5E ; UNKNOWN 5756 0x1F5F, // 1F5F..1F7D; GREEK 5757 0x1F7E, // 1F7E..1F7F; UNKNOWN 5758 0x1F80, // 1F80..1FB4; GREEK 5759 0x1FB5, // 1FB5 ; UNKNOWN 5760 0x1FB6, // 1FB6..1FC4; GREEK 5761 0x1FC5, // 1FC5 ; UNKNOWN 5762 0x1FC6, // 1FC6..1FD3; GREEK 5763 0x1FD4, // 1FD4..1FD5; UNKNOWN 5764 0x1FD6, // 1FD6..1FDB; GREEK 5765 0x1FDC, // 1FDC ; UNKNOWN 5766 0x1FDD, // 1FDD..1FEF; GREEK 5767 0x1FF0, // 1FF0..1FF1; UNKNOWN 5768 0x1FF2, // 1FF2..1FF4; GREEK 5769 0x1FF5, // 1FF5 ; UNKNOWN 5770 0x1FF6, // 1FF6..1FFE; GREEK 5771 0x1FFF, // 1FFF ; UNKNOWN 5772 0x2000, // 2000..200B; COMMON 5773 0x200C, // 200C..200D; INHERITED 5774 0x200E, // 200E..2064; COMMON 5775 0x2065, // 2065 ; UNKNOWN 5776 0x2066, // 2066..2070; COMMON 5777 0x2071, // 2071 ; LATIN 5778 0x2072, // 2072..2073; UNKNOWN 5779 0x2074, // 2074..207E; COMMON 5780 0x207F, // 207F ; LATIN 5781 0x2080, // 2080..208E; COMMON 5782 0x208F, // 208F ; UNKNOWN 5783 0x2090, // 2090..209C; LATIN 5784 0x209D, // 209D..209F; UNKNOWN 5785 0x20A0, // 20A0..20BF; COMMON 5786 0x20C0, // 20C0..20CF; UNKNOWN 5787 0x20D0, // 20D0..20F0; INHERITED 5788 0x20F1, // 20F1..20FF; UNKNOWN 5789 0x2100, // 2100..2125; COMMON 5790 0x2126, // 2126 ; GREEK 5791 0x2127, // 2127..2129; COMMON 5792 0x212A, // 212A..212B; LATIN 5793 0x212C, // 212C..2131; COMMON 5794 0x2132, // 2132 ; LATIN 5795 0x2133, // 2133..214D; COMMON 5796 0x214E, // 214E ; LATIN 5797 0x214F, // 214F..215F; COMMON 5798 0x2160, // 2160..2188; LATIN 5799 0x2189, // 2189..218B; COMMON 5800 0x218C, // 218C..218F; UNKNOWN 5801 0x2190, // 2190..2426; COMMON 5802 0x2427, // 2427..243F; UNKNOWN 5803 0x2440, // 2440..244A; COMMON 5804 0x244B, // 244B..245F; UNKNOWN 5805 0x2460, // 2460..27FF; COMMON 5806 0x2800, // 2800..28FF; BRAILLE 5807 0x2900, // 2900..2B73; COMMON 5808 0x2B74, // 2B74..2B75; UNKNOWN 5809 0x2B76, // 2B76..2B95; COMMON 5810 0x2B96, // 2B96 ; UNKNOWN 5811 0x2B97, // 2B97..2BFF; COMMON 5812 0x2C00, // 2C00..2C2E; GLAGOLITIC 5813 0x2C2F, // 2C2F ; UNKNOWN 5814 0x2C30, // 2C30..2C5E; GLAGOLITIC 5815 0x2C5F, // 2C5F ; UNKNOWN 5816 0x2C60, // 2C60..2C7F; LATIN 5817 0x2C80, // 2C80..2CF3; COPTIC 5818 0x2CF4, // 2CF4..2CF8; UNKNOWN 5819 0x2CF9, // 2CF9..2CFF; COPTIC 5820 0x2D00, // 2D00..2D25; GEORGIAN 5821 0x2D26, // 2D26 ; UNKNOWN 5822 0x2D27, // 2D27 ; GEORGIAN 5823 0x2D28, // 2D28..2D2C; UNKNOWN 5824 0x2D2D, // 2D2D ; GEORGIAN 5825 0x2D2E, // 2D2E..2D2F; UNKNOWN 5826 0x2D30, // 2D30..2D67; TIFINAGH 5827 0x2D68, // 2D68..2D6E; UNKNOWN 5828 0x2D6F, // 2D6F..2D70; TIFINAGH 5829 0x2D71, // 2D71..2D7E; UNKNOWN 5830 0x2D7F, // 2D7F ; TIFINAGH 5831 0x2D80, // 2D80..2D96; ETHIOPIC 5832 0x2D97, // 2D97..2D9F; UNKNOWN 5833 0x2DA0, // 2DA0..2DA6; ETHIOPIC 5834 0x2DA7, // 2DA7 ; UNKNOWN 5835 0x2DA8, // 2DA8..2DAE; ETHIOPIC 5836 0x2DAF, // 2DAF ; UNKNOWN 5837 0x2DB0, // 2DB0..2DB6; ETHIOPIC 5838 0x2DB7, // 2DB7 ; UNKNOWN 5839 0x2DB8, // 2DB8..2DBE; ETHIOPIC 5840 0x2DBF, // 2DBF ; UNKNOWN 5841 0x2DC0, // 2DC0..2DC6; ETHIOPIC 5842 0x2DC7, // 2DC7 ; UNKNOWN 5843 0x2DC8, // 2DC8..2DCE; ETHIOPIC 5844 0x2DCF, // 2DCF ; UNKNOWN 5845 0x2DD0, // 2DD0..2DD6; ETHIOPIC 5846 0x2DD7, // 2DD7 ; UNKNOWN 5847 0x2DD8, // 2DD8..2DDE; ETHIOPIC 5848 0x2DDF, // 2DDF ; UNKNOWN 5849 0x2DE0, // 2DE0..2DFF; CYRILLIC 5850 0x2E00, // 2E00..2E52; COMMON 5851 0x2E53, // 2E53..2E7F; UNKNOWN 5852 0x2E80, // 2E80..2E99; HAN 5853 0x2E9A, // 2E9A ; UNKNOWN 5854 0x2E9B, // 2E9B..2EF3; HAN 5855 0x2EF4, // 2EF4..2EFF; UNKNOWN 5856 0x2F00, // 2F00..2FD5; HAN 5857 0x2FD6, // 2FD6..2FEF; UNKNOWN 5858 0x2FF0, // 2FF0..2FFB; COMMON 5859 0x2FFC, // 2FFC..2FFF; UNKNOWN 5860 0x3000, // 3000..3004; COMMON 5861 0x3005, // 3005 ; HAN 5862 0x3006, // 3006 ; COMMON 5863 0x3007, // 3007 ; HAN 5864 0x3008, // 3008..3020; COMMON 5865 0x3021, // 3021..3029; HAN 5866 0x302A, // 302A..302D; INHERITED 5867 0x302E, // 302E..302F; HANGUL 5868 0x3030, // 3030..3037; COMMON 5869 0x3038, // 3038..303B; HAN 5870 0x303C, // 303C..303F; COMMON 5871 0x3040, // 3040 ; UNKNOWN 5872 0x3041, // 3041..3096; HIRAGANA 5873 0x3097, // 3097..3098; UNKNOWN 5874 0x3099, // 3099..309A; INHERITED 5875 0x309B, // 309B..309C; COMMON 5876 0x309D, // 309D..309F; HIRAGANA 5877 0x30A0, // 30A0 ; COMMON 5878 0x30A1, // 30A1..30FA; KATAKANA 5879 0x30FB, // 30FB..30FC; COMMON 5880 0x30FD, // 30FD..30FF; KATAKANA 5881 0x3100, // 3100..3104; UNKNOWN 5882 0x3105, // 3105..312F; BOPOMOFO 5883 0x3130, // 3130 ; UNKNOWN 5884 0x3131, // 3131..318E; HANGUL 5885 0x318F, // 318F ; UNKNOWN 5886 0x3190, // 3190..319F; COMMON 5887 0x31A0, // 31A0..31BF; BOPOMOFO 5888 0x31C0, // 31C0..31E3; COMMON 5889 0x31E4, // 31E4..31EF; UNKNOWN 5890 0x31F0, // 31F0..31FF; KATAKANA 5891 0x3200, // 3200..321E; HANGUL 5892 0x321F, // 321F ; UNKNOWN 5893 0x3220, // 3220..325F; COMMON 5894 0x3260, // 3260..327E; HANGUL 5895 0x327F, // 327F..32CF; COMMON 5896 0x32D0, // 32D0..32FE; KATAKANA 5897 0x32FF, // 32FF ; COMMON 5898 0x3300, // 3300..3357; KATAKANA 5899 0x3358, // 3358..33FF; COMMON 5900 0x3400, // 3400..4DBF; HAN 5901 0x4DC0, // 4DC0..4DFF; COMMON 5902 0x4E00, // 4E00..9FFC; HAN 5903 0x9FFD, // 9FFD..9FFF; UNKNOWN 5904 0xA000, // A000..A48C; YI 5905 0xA48D, // A48D..A48F; UNKNOWN 5906 0xA490, // A490..A4C6; YI 5907 0xA4C7, // A4C7..A4CF; UNKNOWN 5908 0xA4D0, // A4D0..A4FF; LISU 5909 0xA500, // A500..A62B; VAI 5910 0xA62C, // A62C..A63F; UNKNOWN 5911 0xA640, // A640..A69F; CYRILLIC 5912 0xA6A0, // A6A0..A6F7; BAMUM 5913 0xA6F8, // A6F8..A6FF; UNKNOWN 5914 0xA700, // A700..A721; COMMON 5915 0xA722, // A722..A787; LATIN 5916 0xA788, // A788..A78A; COMMON 5917 0xA78B, // A78B..A7BF; LATIN 5918 0xA7C0, // A7C0..A7C1; UNKNOWN 5919 0xA7C2, // A7C2..A7CA; LATIN 5920 0xA7CB, // A7CB..A7F4; UNKNOWN 5921 0xA7F5, // A7F5..A7FF; LATIN 5922 0xA800, // A800..A82C; SYLOTI_NAGRI 5923 0xA82D, // A82D..A82F; UNKNOWN 5924 0xA830, // A830..A839; COMMON 5925 0xA83A, // A83A..A83F; UNKNOWN 5926 0xA840, // A840..A877; PHAGS_PA 5927 0xA878, // A878..A87F; UNKNOWN 5928 0xA880, // A880..A8C5; SAURASHTRA 5929 0xA8C6, // A8C6..A8CD; UNKNOWN 5930 0xA8CE, // A8CE..A8D9; SAURASHTRA 5931 0xA8DA, // A8DA..A8DF; UNKNOWN 5932 0xA8E0, // A8E0..A8FF; DEVANAGARI 5933 0xA900, // A900..A92D; KAYAH_LI 5934 0xA92E, // A92E ; COMMON 5935 0xA92F, // A92F ; KAYAH_LI 5936 0xA930, // A930..A953; REJANG 5937 0xA954, // A954..A95E; UNKNOWN 5938 0xA95F, // A95F ; REJANG 5939 0xA960, // A960..A97C; HANGUL 5940 0xA97D, // A97D..A97F; UNKNOWN 5941 0xA980, // A980..A9CD; JAVANESE 5942 0xA9CE, // A9CE ; UNKNOWN 5943 0xA9CF, // A9CF ; COMMON 5944 0xA9D0, // A9D0..A9D9; JAVANESE 5945 0xA9DA, // A9DA..A9DD; UNKNOWN 5946 0xA9DE, // A9DE..A9DF; JAVANESE 5947 0xA9E0, // A9E0..A9FE; MYANMAR 5948 0xA9FF, // A9FF ; UNKNOWN 5949 0xAA00, // AA00..AA36; CHAM 5950 0xAA37, // AA37..AA3F; UNKNOWN 5951 0xAA40, // AA40..AA4D; CHAM 5952 0xAA4E, // AA4E..AA4F; UNKNOWN 5953 0xAA50, // AA50..AA59; CHAM 5954 0xAA5A, // AA5A..AA5B; UNKNOWN 5955 0xAA5C, // AA5C..AA5F; CHAM 5956 0xAA60, // AA60..AA7F; MYANMAR 5957 0xAA80, // AA80..AAC2; TAI_VIET 5958 0xAAC3, // AAC3..AADA; UNKNOWN 5959 0xAADB, // AADB..AADF; TAI_VIET 5960 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 5961 0xAAF7, // AAF7..AB00; UNKNOWN 5962 0xAB01, // AB01..AB06; ETHIOPIC 5963 0xAB07, // AB07..AB08; UNKNOWN 5964 0xAB09, // AB09..AB0E; ETHIOPIC 5965 0xAB0F, // AB0F..AB10; UNKNOWN 5966 0xAB11, // AB11..AB16; ETHIOPIC 5967 0xAB17, // AB17..AB1F; UNKNOWN 5968 0xAB20, // AB20..AB26; ETHIOPIC 5969 0xAB27, // AB27 ; UNKNOWN 5970 0xAB28, // AB28..AB2E; ETHIOPIC 5971 0xAB2F, // AB2F ; UNKNOWN 5972 0xAB30, // AB30..AB5A; LATIN 5973 0xAB5B, // AB5B ; COMMON 5974 0xAB5C, // AB5C..AB64; LATIN 5975 0xAB65, // AB65 ; GREEK 5976 0xAB66, // AB66..AB69; LATIN 5977 0xAB6A, // AB6A..AB6B; COMMON 5978 0xAB6C, // AB6C..AB6F; UNKNOWN 5979 0xAB70, // AB70..ABBF; CHEROKEE 5980 0xABC0, // ABC0..ABED; MEETEI_MAYEK 5981 0xABEE, // ABEE..ABEF; UNKNOWN 5982 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 5983 0xABFA, // ABFA..ABFF; UNKNOWN 5984 0xAC00, // AC00..D7A3; HANGUL 5985 0xD7A4, // D7A4..D7AF; UNKNOWN 5986 0xD7B0, // D7B0..D7C6; HANGUL 5987 0xD7C7, // D7C7..D7CA; UNKNOWN 5988 0xD7CB, // D7CB..D7FB; HANGUL 5989 0xD7FC, // D7FC..F8FF; UNKNOWN 5990 0xF900, // F900..FA6D; HAN 5991 0xFA6E, // FA6E..FA6F; UNKNOWN 5992 0xFA70, // FA70..FAD9; HAN 5993 0xFADA, // FADA..FAFF; UNKNOWN 5994 0xFB00, // FB00..FB06; LATIN 5995 0xFB07, // FB07..FB12; UNKNOWN 5996 0xFB13, // FB13..FB17; ARMENIAN 5997 0xFB18, // FB18..FB1C; UNKNOWN 5998 0xFB1D, // FB1D..FB36; HEBREW 5999 0xFB37, // FB37 ; UNKNOWN 6000 0xFB38, // FB38..FB3C; HEBREW 6001 0xFB3D, // FB3D ; UNKNOWN 6002 0xFB3E, // FB3E ; HEBREW 6003 0xFB3F, // FB3F ; UNKNOWN 6004 0xFB40, // FB40..FB41; HEBREW 6005 0xFB42, // FB42 ; UNKNOWN 6006 0xFB43, // FB43..FB44; HEBREW 6007 0xFB45, // FB45 ; UNKNOWN 6008 0xFB46, // FB46..FB4F; HEBREW 6009 0xFB50, // FB50..FBC1; ARABIC 6010 0xFBC2, // FBC2..FBD2; UNKNOWN 6011 0xFBD3, // FBD3..FD3D; ARABIC 6012 0xFD3E, // FD3E..FD3F; COMMON 6013 0xFD40, // FD40..FD4F; UNKNOWN 6014 0xFD50, // FD50..FD8F; ARABIC 6015 0xFD90, // FD90..FD91; UNKNOWN 6016 0xFD92, // FD92..FDC7; ARABIC 6017 0xFDC8, // FDC8..FDEF; UNKNOWN 6018 0xFDF0, // FDF0..FDFD; ARABIC 6019 0xFDFE, // FDFE..FDFF; UNKNOWN 6020 0xFE00, // FE00..FE0F; INHERITED 6021 0xFE10, // FE10..FE19; COMMON 6022 0xFE1A, // FE1A..FE1F; UNKNOWN 6023 0xFE20, // FE20..FE2D; INHERITED 6024 0xFE2E, // FE2E..FE2F; CYRILLIC 6025 0xFE30, // FE30..FE52; COMMON 6026 0xFE53, // FE53 ; UNKNOWN 6027 0xFE54, // FE54..FE66; COMMON 6028 0xFE67, // FE67 ; UNKNOWN 6029 0xFE68, // FE68..FE6B; COMMON 6030 0xFE6C, // FE6C..FE6F; UNKNOWN 6031 0xFE70, // FE70..FE74; ARABIC 6032 0xFE75, // FE75 ; UNKNOWN 6033 0xFE76, // FE76..FEFC; ARABIC 6034 0xFEFD, // FEFD..FEFE; UNKNOWN 6035 0xFEFF, // FEFF ; COMMON 6036 0xFF00, // FF00 ; UNKNOWN 6037 0xFF01, // FF01..FF20; COMMON 6038 0xFF21, // FF21..FF3A; LATIN 6039 0xFF3B, // FF3B..FF40; COMMON 6040 0xFF41, // FF41..FF5A; LATIN 6041 0xFF5B, // FF5B..FF65; COMMON 6042 0xFF66, // FF66..FF6F; KATAKANA 6043 0xFF70, // FF70 ; COMMON 6044 0xFF71, // FF71..FF9D; KATAKANA 6045 0xFF9E, // FF9E..FF9F; COMMON 6046 0xFFA0, // FFA0..FFBE; HANGUL 6047 0xFFBF, // FFBF..FFC1; UNKNOWN 6048 0xFFC2, // FFC2..FFC7; HANGUL 6049 0xFFC8, // FFC8..FFC9; UNKNOWN 6050 0xFFCA, // FFCA..FFCF; HANGUL 6051 0xFFD0, // FFD0..FFD1; UNKNOWN 6052 0xFFD2, // FFD2..FFD7; HANGUL 6053 0xFFD8, // FFD8..FFD9; UNKNOWN 6054 0xFFDA, // FFDA..FFDC; HANGUL 6055 0xFFDD, // FFDD..FFDF; UNKNOWN 6056 0xFFE0, // FFE0..FFE6; COMMON 6057 0xFFE7, // FFE7 ; UNKNOWN 6058 0xFFE8, // FFE8..FFEE; COMMON 6059 0xFFEF, // FFEF..FFF8; UNKNOWN 6060 0xFFF9, // FFF9..FFFD; COMMON 6061 0xFFFE, // FFFE..FFFF; UNKNOWN 6062 0x10000, // 10000..1000B; LINEAR_B 6063 0x1000C, // 1000C ; UNKNOWN 6064 0x1000D, // 1000D..10026; LINEAR_B 6065 0x10027, // 10027 ; UNKNOWN 6066 0x10028, // 10028..1003A; LINEAR_B 6067 0x1003B, // 1003B ; UNKNOWN 6068 0x1003C, // 1003C..1003D; LINEAR_B 6069 0x1003E, // 1003E ; UNKNOWN 6070 0x1003F, // 1003F..1004D; LINEAR_B 6071 0x1004E, // 1004E..1004F; UNKNOWN 6072 0x10050, // 10050..1005D; LINEAR_B 6073 0x1005E, // 1005E..1007F; UNKNOWN 6074 0x10080, // 10080..100FA; LINEAR_B 6075 0x100FB, // 100FB..100FF; UNKNOWN 6076 0x10100, // 10100..10102; COMMON 6077 0x10103, // 10103..10106; UNKNOWN 6078 0x10107, // 10107..10133; COMMON 6079 0x10134, // 10134..10136; UNKNOWN 6080 0x10137, // 10137..1013F; COMMON 6081 0x10140, // 10140..1018E; GREEK 6082 0x1018F, // 1018F ; UNKNOWN 6083 0x10190, // 10190..1019C; COMMON 6084 0x1019D, // 1019D..1019F; UNKNOWN 6085 0x101A0, // 101A0 ; GREEK 6086 0x101A1, // 101A1..101CF; UNKNOWN 6087 0x101D0, // 101D0..101FC; COMMON 6088 0x101FD, // 101FD ; INHERITED 6089 0x101FE, // 101FE..1027F; UNKNOWN 6090 0x10280, // 10280..1029C; LYCIAN 6091 0x1029D, // 1029D..1029F; UNKNOWN 6092 0x102A0, // 102A0..102D0; CARIAN 6093 0x102D1, // 102D1..102DF; UNKNOWN 6094 0x102E0, // 102E0 ; INHERITED 6095 0x102E1, // 102E1..102FB; COMMON 6096 0x102FC, // 102FC..102FF; UNKNOWN 6097 0x10300, // 10300..10323; OLD_ITALIC 6098 0x10324, // 10324..1032C; UNKNOWN 6099 0x1032D, // 1032D..1032F; OLD_ITALIC 6100 0x10330, // 10330..1034A; GOTHIC 6101 0x1034B, // 1034B..1034F; UNKNOWN 6102 0x10350, // 10350..1037A; OLD_PERMIC 6103 0x1037B, // 1037B..1037F; UNKNOWN 6104 0x10380, // 10380..1039D; UGARITIC 6105 0x1039E, // 1039E ; UNKNOWN 6106 0x1039F, // 1039F ; UGARITIC 6107 0x103A0, // 103A0..103C3; OLD_PERSIAN 6108 0x103C4, // 103C4..103C7; UNKNOWN 6109 0x103C8, // 103C8..103D5; OLD_PERSIAN 6110 0x103D6, // 103D6..103FF; UNKNOWN 6111 0x10400, // 10400..1044F; DESERET 6112 0x10450, // 10450..1047F; SHAVIAN 6113 0x10480, // 10480..1049D; OSMANYA 6114 0x1049E, // 1049E..1049F; UNKNOWN 6115 0x104A0, // 104A0..104A9; OSMANYA 6116 0x104AA, // 104AA..104AF; UNKNOWN 6117 0x104B0, // 104B0..104D3; OSAGE 6118 0x104D4, // 104D4..104D7; UNKNOWN 6119 0x104D8, // 104D8..104FB; OSAGE 6120 0x104FC, // 104FC..104FF; UNKNOWN 6121 0x10500, // 10500..10527; ELBASAN 6122 0x10528, // 10528..1052F; UNKNOWN 6123 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 6124 0x10564, // 10564..1056E; UNKNOWN 6125 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 6126 0x10570, // 10570..105FF; UNKNOWN 6127 0x10600, // 10600..10736; LINEAR_A 6128 0x10737, // 10737..1073F; UNKNOWN 6129 0x10740, // 10740..10755; LINEAR_A 6130 0x10756, // 10756..1075F; UNKNOWN 6131 0x10760, // 10760..10767; LINEAR_A 6132 0x10768, // 10768..107FF; UNKNOWN 6133 0x10800, // 10800..10805; CYPRIOT 6134 0x10806, // 10806..10807; UNKNOWN 6135 0x10808, // 10808 ; CYPRIOT 6136 0x10809, // 10809 ; UNKNOWN 6137 0x1080A, // 1080A..10835; CYPRIOT 6138 0x10836, // 10836 ; UNKNOWN 6139 0x10837, // 10837..10838; CYPRIOT 6140 0x10839, // 10839..1083B; UNKNOWN 6141 0x1083C, // 1083C ; CYPRIOT 6142 0x1083D, // 1083D..1083E; UNKNOWN 6143 0x1083F, // 1083F ; CYPRIOT 6144 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 6145 0x10856, // 10856 ; UNKNOWN 6146 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 6147 0x10860, // 10860..1087F; PALMYRENE 6148 0x10880, // 10880..1089E; NABATAEAN 6149 0x1089F, // 1089F..108A6; UNKNOWN 6150 0x108A7, // 108A7..108AF; NABATAEAN 6151 0x108B0, // 108B0..108DF; UNKNOWN 6152 0x108E0, // 108E0..108F2; HATRAN 6153 0x108F3, // 108F3 ; UNKNOWN 6154 0x108F4, // 108F4..108F5; HATRAN 6155 0x108F6, // 108F6..108FA; UNKNOWN 6156 0x108FB, // 108FB..108FF; HATRAN 6157 0x10900, // 10900..1091B; PHOENICIAN 6158 0x1091C, // 1091C..1091E; UNKNOWN 6159 0x1091F, // 1091F ; PHOENICIAN 6160 0x10920, // 10920..10939; LYDIAN 6161 0x1093A, // 1093A..1093E; UNKNOWN 6162 0x1093F, // 1093F ; LYDIAN 6163 0x10940, // 10940..1097F; UNKNOWN 6164 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 6165 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 6166 0x109B8, // 109B8..109BB; UNKNOWN 6167 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 6168 0x109D0, // 109D0..109D1; UNKNOWN 6169 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 6170 0x10A00, // 10A00..10A03; KHAROSHTHI 6171 0x10A04, // 10A04 ; UNKNOWN 6172 0x10A05, // 10A05..10A06; KHAROSHTHI 6173 0x10A07, // 10A07..10A0B; UNKNOWN 6174 0x10A0C, // 10A0C..10A13; KHAROSHTHI 6175 0x10A14, // 10A14 ; UNKNOWN 6176 0x10A15, // 10A15..10A17; KHAROSHTHI 6177 0x10A18, // 10A18 ; UNKNOWN 6178 0x10A19, // 10A19..10A35; KHAROSHTHI 6179 0x10A36, // 10A36..10A37; UNKNOWN 6180 0x10A38, // 10A38..10A3A; KHAROSHTHI 6181 0x10A3B, // 10A3B..10A3E; UNKNOWN 6182 0x10A3F, // 10A3F..10A48; KHAROSHTHI 6183 0x10A49, // 10A49..10A4F; UNKNOWN 6184 0x10A50, // 10A50..10A58; KHAROSHTHI 6185 0x10A59, // 10A59..10A5F; UNKNOWN 6186 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 6187 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 6188 0x10AA0, // 10AA0..10ABF; UNKNOWN 6189 0x10AC0, // 10AC0..10AE6; MANICHAEAN 6190 0x10AE7, // 10AE7..10AEA; UNKNOWN 6191 0x10AEB, // 10AEB..10AF6; MANICHAEAN 6192 0x10AF7, // 10AF7..10AFF; UNKNOWN 6193 0x10B00, // 10B00..10B35; AVESTAN 6194 0x10B36, // 10B36..10B38; UNKNOWN 6195 0x10B39, // 10B39..10B3F; AVESTAN 6196 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 6197 0x10B56, // 10B56..10B57; UNKNOWN 6198 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 6199 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 6200 0x10B73, // 10B73..10B77; UNKNOWN 6201 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 6202 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 6203 0x10B92, // 10B92..10B98; UNKNOWN 6204 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 6205 0x10B9D, // 10B9D..10BA8; UNKNOWN 6206 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 6207 0x10BB0, // 10BB0..10BFF; UNKNOWN 6208 0x10C00, // 10C00..10C48; OLD_TURKIC 6209 0x10C49, // 10C49..10C7F; UNKNOWN 6210 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 6211 0x10CB3, // 10CB3..10CBF; UNKNOWN 6212 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 6213 0x10CF3, // 10CF3..10CF9; UNKNOWN 6214 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 6215 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 6216 0x10D28, // 10D28..10D2F; UNKNOWN 6217 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 6218 0x10D3A, // 10D3A..10E5F; UNKNOWN 6219 0x10E60, // 10E60..10E7E; ARABIC 6220 0x10E7F, // 10E7F ; UNKNOWN 6221 0x10E80, // 10E80..10EA9; YEZIDI 6222 0x10EAA, // 10EAA ; UNKNOWN 6223 0x10EAB, // 10EAB..10EAD; YEZIDI 6224 0x10EAE, // 10EAE..10EAF; UNKNOWN 6225 0x10EB0, // 10EB0..10EB1; YEZIDI 6226 0x10EB2, // 10EB2..10EFF; UNKNOWN 6227 0x10F00, // 10F00..10F27; OLD_SOGDIAN 6228 0x10F28, // 10F28..10F2F; UNKNOWN 6229 0x10F30, // 10F30..10F59; SOGDIAN 6230 0x10F5A, // 10F5A..10FAF; UNKNOWN 6231 0x10FB0, // 10FB0..10FCB; CHORASMIAN 6232 0x10FCC, // 10FCC..10FDF; UNKNOWN 6233 0x10FE0, // 10FE0..10FF6; ELYMAIC 6234 0x10FF7, // 10FF7..10FFF; UNKNOWN 6235 0x11000, // 11000..1104D; BRAHMI 6236 0x1104E, // 1104E..11051; UNKNOWN 6237 0x11052, // 11052..1106F; BRAHMI 6238 0x11070, // 11070..1107E; UNKNOWN 6239 0x1107F, // 1107F ; BRAHMI 6240 0x11080, // 11080..110C1; KAITHI 6241 0x110C2, // 110C2..110CC; UNKNOWN 6242 0x110CD, // 110CD ; KAITHI 6243 0x110CE, // 110CE..110CF; UNKNOWN 6244 0x110D0, // 110D0..110E8; SORA_SOMPENG 6245 0x110E9, // 110E9..110EF; UNKNOWN 6246 0x110F0, // 110F0..110F9; SORA_SOMPENG 6247 0x110FA, // 110FA..110FF; UNKNOWN 6248 0x11100, // 11100..11134; CHAKMA 6249 0x11135, // 11135 ; UNKNOWN 6250 0x11136, // 11136..11147; CHAKMA 6251 0x11148, // 11148..1114F; UNKNOWN 6252 0x11150, // 11150..11176; MAHAJANI 6253 0x11177, // 11177..1117F; UNKNOWN 6254 0x11180, // 11180..111DF; SHARADA 6255 0x111E0, // 111E0 ; UNKNOWN 6256 0x111E1, // 111E1..111F4; SINHALA 6257 0x111F5, // 111F5..111FF; UNKNOWN 6258 0x11200, // 11200..11211; KHOJKI 6259 0x11212, // 11212 ; UNKNOWN 6260 0x11213, // 11213..1123E; KHOJKI 6261 0x1123F, // 1123F..1127F; UNKNOWN 6262 0x11280, // 11280..11286; MULTANI 6263 0x11287, // 11287 ; UNKNOWN 6264 0x11288, // 11288 ; MULTANI 6265 0x11289, // 11289 ; UNKNOWN 6266 0x1128A, // 1128A..1128D; MULTANI 6267 0x1128E, // 1128E ; UNKNOWN 6268 0x1128F, // 1128F..1129D; MULTANI 6269 0x1129E, // 1129E ; UNKNOWN 6270 0x1129F, // 1129F..112A9; MULTANI 6271 0x112AA, // 112AA..112AF; UNKNOWN 6272 0x112B0, // 112B0..112EA; KHUDAWADI 6273 0x112EB, // 112EB..112EF; UNKNOWN 6274 0x112F0, // 112F0..112F9; KHUDAWADI 6275 0x112FA, // 112FA..112FF; UNKNOWN 6276 0x11300, // 11300..11303; GRANTHA 6277 0x11304, // 11304 ; UNKNOWN 6278 0x11305, // 11305..1130C; GRANTHA 6279 0x1130D, // 1130D..1130E; UNKNOWN 6280 0x1130F, // 1130F..11310; GRANTHA 6281 0x11311, // 11311..11312; UNKNOWN 6282 0x11313, // 11313..11328; GRANTHA 6283 0x11329, // 11329 ; UNKNOWN 6284 0x1132A, // 1132A..11330; GRANTHA 6285 0x11331, // 11331 ; UNKNOWN 6286 0x11332, // 11332..11333; GRANTHA 6287 0x11334, // 11334 ; UNKNOWN 6288 0x11335, // 11335..11339; GRANTHA 6289 0x1133A, // 1133A ; UNKNOWN 6290 0x1133B, // 1133B ; INHERITED 6291 0x1133C, // 1133C..11344; GRANTHA 6292 0x11345, // 11345..11346; UNKNOWN 6293 0x11347, // 11347..11348; GRANTHA 6294 0x11349, // 11349..1134A; UNKNOWN 6295 0x1134B, // 1134B..1134D; GRANTHA 6296 0x1134E, // 1134E..1134F; UNKNOWN 6297 0x11350, // 11350 ; GRANTHA 6298 0x11351, // 11351..11356; UNKNOWN 6299 0x11357, // 11357 ; GRANTHA 6300 0x11358, // 11358..1135C; UNKNOWN 6301 0x1135D, // 1135D..11363; GRANTHA 6302 0x11364, // 11364..11365; UNKNOWN 6303 0x11366, // 11366..1136C; GRANTHA 6304 0x1136D, // 1136D..1136F; UNKNOWN 6305 0x11370, // 11370..11374; GRANTHA 6306 0x11375, // 11375..113FF; UNKNOWN 6307 0x11400, // 11400..1145B; NEWA 6308 0x1145C, // 1145C ; UNKNOWN 6309 0x1145D, // 1145D..11461; NEWA 6310 0x11462, // 11462..1147F; UNKNOWN 6311 0x11480, // 11480..114C7; TIRHUTA 6312 0x114C8, // 114C8..114CF; UNKNOWN 6313 0x114D0, // 114D0..114D9; TIRHUTA 6314 0x114DA, // 114DA..1157F; UNKNOWN 6315 0x11580, // 11580..115B5; SIDDHAM 6316 0x115B6, // 115B6..115B7; UNKNOWN 6317 0x115B8, // 115B8..115DD; SIDDHAM 6318 0x115DE, // 115DE..115FF; UNKNOWN 6319 0x11600, // 11600..11644; MODI 6320 0x11645, // 11645..1164F; UNKNOWN 6321 0x11650, // 11650..11659; MODI 6322 0x1165A, // 1165A..1165F; UNKNOWN 6323 0x11660, // 11660..1166C; MONGOLIAN 6324 0x1166D, // 1166D..1167F; UNKNOWN 6325 0x11680, // 11680..116B8; TAKRI 6326 0x116B9, // 116B9..116BF; UNKNOWN 6327 0x116C0, // 116C0..116C9; TAKRI 6328 0x116CA, // 116CA..116FF; UNKNOWN 6329 0x11700, // 11700..1171A; AHOM 6330 0x1171B, // 1171B..1171C; UNKNOWN 6331 0x1171D, // 1171D..1172B; AHOM 6332 0x1172C, // 1172C..1172F; UNKNOWN 6333 0x11730, // 11730..1173F; AHOM 6334 0x11740, // 11740..117FF; UNKNOWN 6335 0x11800, // 11800..1183B; DOGRA 6336 0x1183C, // 1183C..1189F; UNKNOWN 6337 0x118A0, // 118A0..118F2; WARANG_CITI 6338 0x118F3, // 118F3..118FE; UNKNOWN 6339 0x118FF, // 118FF ; WARANG_CITI 6340 0x11900, // 11900..11906; DIVES_AKURU 6341 0x11907, // 11907..11908; UNKNOWN 6342 0x11909, // 11909 ; DIVES_AKURU 6343 0x1190A, // 1190A..1190B; UNKNOWN 6344 0x1190C, // 1190C..11913; DIVES_AKURU 6345 0x11914, // 11914 ; UNKNOWN 6346 0x11915, // 11915..11916; DIVES_AKURU 6347 0x11917, // 11917 ; UNKNOWN 6348 0x11918, // 11918..11935; DIVES_AKURU 6349 0x11936, // 11936 ; UNKNOWN 6350 0x11937, // 11937..11938; DIVES_AKURU 6351 0x11939, // 11939..1193A; UNKNOWN 6352 0x1193B, // 1193B..11946; DIVES_AKURU 6353 0x11947, // 11947..1194F; UNKNOWN 6354 0x11950, // 11950..11959; DIVES_AKURU 6355 0x1195A, // 1195A..1199F; UNKNOWN 6356 0x119A0, // 119A0..119A7; NANDINAGARI 6357 0x119A8, // 119A8..119A9; UNKNOWN 6358 0x119AA, // 119AA..119D7; NANDINAGARI 6359 0x119D8, // 119D8..119D9; UNKNOWN 6360 0x119DA, // 119DA..119E4; NANDINAGARI 6361 0x119E5, // 119E5..119FF; UNKNOWN 6362 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6363 0x11A48, // 11A48..11A4F; UNKNOWN 6364 0x11A50, // 11A50..11AA2; SOYOMBO 6365 0x11AA3, // 11AA3..11ABF; UNKNOWN 6366 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6367 0x11AF9, // 11AF9..11BFF; UNKNOWN 6368 0x11C00, // 11C00..11C08; BHAIKSUKI 6369 0x11C09, // 11C09 ; UNKNOWN 6370 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6371 0x11C37, // 11C37 ; UNKNOWN 6372 0x11C38, // 11C38..11C45; BHAIKSUKI 6373 0x11C46, // 11C46..11C4F; UNKNOWN 6374 0x11C50, // 11C50..11C6C; BHAIKSUKI 6375 0x11C6D, // 11C6D..11C6F; UNKNOWN 6376 0x11C70, // 11C70..11C8F; MARCHEN 6377 0x11C90, // 11C90..11C91; UNKNOWN 6378 0x11C92, // 11C92..11CA7; MARCHEN 6379 0x11CA8, // 11CA8 ; UNKNOWN 6380 0x11CA9, // 11CA9..11CB6; MARCHEN 6381 0x11CB7, // 11CB7..11CFF; UNKNOWN 6382 0x11D00, // 11D00..11D06; MASARAM_GONDI 6383 0x11D07, // 11D07 ; UNKNOWN 6384 0x11D08, // 11D08..11D09; MASARAM_GONDI 6385 0x11D0A, // 11D0A ; UNKNOWN 6386 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6387 0x11D37, // 11D37..11D39; UNKNOWN 6388 0x11D3A, // 11D3A ; MASARAM_GONDI 6389 0x11D3B, // 11D3B ; UNKNOWN 6390 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6391 0x11D3E, // 11D3E ; UNKNOWN 6392 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6393 0x11D48, // 11D48..11D4F; UNKNOWN 6394 0x11D50, // 11D50..11D59; MASARAM_GONDI 6395 0x11D5A, // 11D5A..11D5F; UNKNOWN 6396 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6397 0x11D66, // 11D66 ; UNKNOWN 6398 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6399 0x11D69, // 11D69 ; UNKNOWN 6400 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6401 0x11D8F, // 11D8F ; UNKNOWN 6402 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6403 0x11D92, // 11D92 ; UNKNOWN 6404 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6405 0x11D99, // 11D99..11D9F; UNKNOWN 6406 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6407 0x11DAA, // 11DAA..11EDF; UNKNOWN 6408 0x11EE0, // 11EE0..11EF8; MAKASAR 6409 0x11EF9, // 11EF9..11FAF; UNKNOWN 6410 0x11FB0, // 11FB0 ; LISU 6411 0x11FB1, // 11FB1..11FBF; UNKNOWN 6412 0x11FC0, // 11FC0..11FF1; TAMIL 6413 0x11FF2, // 11FF2..11FFE; UNKNOWN 6414 0x11FFF, // 11FFF ; TAMIL 6415 0x12000, // 12000..12399; CUNEIFORM 6416 0x1239A, // 1239A..123FF; UNKNOWN 6417 0x12400, // 12400..1246E; CUNEIFORM 6418 0x1246F, // 1246F ; UNKNOWN 6419 0x12470, // 12470..12474; CUNEIFORM 6420 0x12475, // 12475..1247F; UNKNOWN 6421 0x12480, // 12480..12543; CUNEIFORM 6422 0x12544, // 12544..12FFF; UNKNOWN 6423 0x13000, // 13000..1342E; EGYPTIAN_HIEROGLYPHS 6424 0x1342F, // 1342F ; UNKNOWN 6425 0x13430, // 13430..13438; EGYPTIAN_HIEROGLYPHS 6426 0x13439, // 13439..143FF; UNKNOWN 6427 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6428 0x14647, // 14647..167FF; UNKNOWN 6429 0x16800, // 16800..16A38; BAMUM 6430 0x16A39, // 16A39..16A3F; UNKNOWN 6431 0x16A40, // 16A40..16A5E; MRO 6432 0x16A5F, // 16A5F ; UNKNOWN 6433 0x16A60, // 16A60..16A69; MRO 6434 0x16A6A, // 16A6A..16A6D; UNKNOWN 6435 0x16A6E, // 16A6E..16A6F; MRO 6436 0x16A70, // 16A70..16ACF; UNKNOWN 6437 0x16AD0, // 16AD0..16AED; BASSA_VAH 6438 0x16AEE, // 16AEE..16AEF; UNKNOWN 6439 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6440 0x16AF6, // 16AF6..16AFF; UNKNOWN 6441 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6442 0x16B46, // 16B46..16B4F; UNKNOWN 6443 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6444 0x16B5A, // 16B5A ; UNKNOWN 6445 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6446 0x16B62, // 16B62 ; UNKNOWN 6447 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6448 0x16B78, // 16B78..16B7C; UNKNOWN 6449 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6450 0x16B90, // 16B90..16E3F; UNKNOWN 6451 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6452 0x16E9B, // 16E9B..16EFF; UNKNOWN 6453 0x16F00, // 16F00..16F4A; MIAO 6454 0x16F4B, // 16F4B..16F4E; UNKNOWN 6455 0x16F4F, // 16F4F..16F87; MIAO 6456 0x16F88, // 16F88..16F8E; UNKNOWN 6457 0x16F8F, // 16F8F..16F9F; MIAO 6458 0x16FA0, // 16FA0..16FDF; UNKNOWN 6459 0x16FE0, // 16FE0 ; TANGUT 6460 0x16FE1, // 16FE1 ; NUSHU 6461 0x16FE2, // 16FE2..16FE3; COMMON 6462 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT 6463 0x16FE5, // 16FE5..16FEF; UNKNOWN 6464 0x16FF0, // 16FF0..16FF1; HAN 6465 0x16FF2, // 16FF2..16FFF; UNKNOWN 6466 0x17000, // 17000..187F7; TANGUT 6467 0x187F8, // 187F8..187FF; UNKNOWN 6468 0x18800, // 18800..18AFF; TANGUT 6469 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT 6470 0x18CD6, // 18CD6..18CFF; UNKNOWN 6471 0x18D00, // 18D00..18D08; TANGUT 6472 0x18D09, // 18D09..1AFFF; UNKNOWN 6473 0x1B000, // 1B000 ; KATAKANA 6474 0x1B001, // 1B001..1B11E; HIRAGANA 6475 0x1B11F, // 1B11F..1B14F; UNKNOWN 6476 0x1B150, // 1B150..1B152; HIRAGANA 6477 0x1B153, // 1B153..1B163; UNKNOWN 6478 0x1B164, // 1B164..1B167; KATAKANA 6479 0x1B168, // 1B168..1B16F; UNKNOWN 6480 0x1B170, // 1B170..1B2FB; NUSHU 6481 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6482 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6483 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6484 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6485 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6486 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6487 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6488 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6489 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6490 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6491 0x1BCA0, // 1BCA0..1BCA3; COMMON 6492 0x1BCA4, // 1BCA4..1CFFF; UNKNOWN 6493 0x1D000, // 1D000..1D0F5; COMMON 6494 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6495 0x1D100, // 1D100..1D126; COMMON 6496 0x1D127, // 1D127..1D128; UNKNOWN 6497 0x1D129, // 1D129..1D166; COMMON 6498 0x1D167, // 1D167..1D169; INHERITED 6499 0x1D16A, // 1D16A..1D17A; COMMON 6500 0x1D17B, // 1D17B..1D182; INHERITED 6501 0x1D183, // 1D183..1D184; COMMON 6502 0x1D185, // 1D185..1D18B; INHERITED 6503 0x1D18C, // 1D18C..1D1A9; COMMON 6504 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6505 0x1D1AE, // 1D1AE..1D1E8; COMMON 6506 0x1D1E9, // 1D1E9..1D1FF; UNKNOWN 6507 0x1D200, // 1D200..1D245; GREEK 6508 0x1D246, // 1D246..1D2DF; UNKNOWN 6509 0x1D2E0, // 1D2E0..1D2F3; COMMON 6510 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6511 0x1D300, // 1D300..1D356; COMMON 6512 0x1D357, // 1D357..1D35F; UNKNOWN 6513 0x1D360, // 1D360..1D378; COMMON 6514 0x1D379, // 1D379..1D3FF; UNKNOWN 6515 0x1D400, // 1D400..1D454; COMMON 6516 0x1D455, // 1D455 ; UNKNOWN 6517 0x1D456, // 1D456..1D49C; COMMON 6518 0x1D49D, // 1D49D ; UNKNOWN 6519 0x1D49E, // 1D49E..1D49F; COMMON 6520 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 6521 0x1D4A2, // 1D4A2 ; COMMON 6522 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 6523 0x1D4A5, // 1D4A5..1D4A6; COMMON 6524 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 6525 0x1D4A9, // 1D4A9..1D4AC; COMMON 6526 0x1D4AD, // 1D4AD ; UNKNOWN 6527 0x1D4AE, // 1D4AE..1D4B9; COMMON 6528 0x1D4BA, // 1D4BA ; UNKNOWN 6529 0x1D4BB, // 1D4BB ; COMMON 6530 0x1D4BC, // 1D4BC ; UNKNOWN 6531 0x1D4BD, // 1D4BD..1D4C3; COMMON 6532 0x1D4C4, // 1D4C4 ; UNKNOWN 6533 0x1D4C5, // 1D4C5..1D505; COMMON 6534 0x1D506, // 1D506 ; UNKNOWN 6535 0x1D507, // 1D507..1D50A; COMMON 6536 0x1D50B, // 1D50B..1D50C; UNKNOWN 6537 0x1D50D, // 1D50D..1D514; COMMON 6538 0x1D515, // 1D515 ; UNKNOWN 6539 0x1D516, // 1D516..1D51C; COMMON 6540 0x1D51D, // 1D51D ; UNKNOWN 6541 0x1D51E, // 1D51E..1D539; COMMON 6542 0x1D53A, // 1D53A ; UNKNOWN 6543 0x1D53B, // 1D53B..1D53E; COMMON 6544 0x1D53F, // 1D53F ; UNKNOWN 6545 0x1D540, // 1D540..1D544; COMMON 6546 0x1D545, // 1D545 ; UNKNOWN 6547 0x1D546, // 1D546 ; COMMON 6548 0x1D547, // 1D547..1D549; UNKNOWN 6549 0x1D54A, // 1D54A..1D550; COMMON 6550 0x1D551, // 1D551 ; UNKNOWN 6551 0x1D552, // 1D552..1D6A5; COMMON 6552 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 6553 0x1D6A8, // 1D6A8..1D7CB; COMMON 6554 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 6555 0x1D7CE, // 1D7CE..1D7FF; COMMON 6556 0x1D800, // 1D800..1DA8B; SIGNWRITING 6557 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 6558 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 6559 0x1DAA0, // 1DAA0 ; UNKNOWN 6560 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 6561 0x1DAB0, // 1DAB0..1DFFF; UNKNOWN 6562 0x1E000, // 1E000..1E006; GLAGOLITIC 6563 0x1E007, // 1E007 ; UNKNOWN 6564 0x1E008, // 1E008..1E018; GLAGOLITIC 6565 0x1E019, // 1E019..1E01A; UNKNOWN 6566 0x1E01B, // 1E01B..1E021; GLAGOLITIC 6567 0x1E022, // 1E022 ; UNKNOWN 6568 0x1E023, // 1E023..1E024; GLAGOLITIC 6569 0x1E025, // 1E025 ; UNKNOWN 6570 0x1E026, // 1E026..1E02A; GLAGOLITIC 6571 0x1E02B, // 1E02B..1E0FF; UNKNOWN 6572 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 6573 0x1E12D, // 1E12D..1E12F; UNKNOWN 6574 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 6575 0x1E13E, // 1E13E..1E13F; UNKNOWN 6576 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 6577 0x1E14A, // 1E14A..1E14D; UNKNOWN 6578 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 6579 0x1E150, // 1E150..1E2BF; UNKNOWN 6580 0x1E2C0, // 1E2C0..1E2F9; WANCHO 6581 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 6582 0x1E2FF, // 1E2FF ; WANCHO 6583 0x1E300, // 1E300..1E7FF; UNKNOWN 6584 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 6585 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 6586 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 6587 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 6588 0x1E900, // 1E900..1E94B; ADLAM 6589 0x1E94C, // 1E94C..1E94F; UNKNOWN 6590 0x1E950, // 1E950..1E959; ADLAM 6591 0x1E95A, // 1E95A..1E95D; UNKNOWN 6592 0x1E95E, // 1E95E..1E95F; ADLAM 6593 0x1E960, // 1E960..1EC70; UNKNOWN 6594 0x1EC71, // 1EC71..1ECB4; COMMON 6595 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 6596 0x1ED01, // 1ED01..1ED3D; COMMON 6597 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 6598 0x1EE00, // 1EE00..1EE03; ARABIC 6599 0x1EE04, // 1EE04 ; UNKNOWN 6600 0x1EE05, // 1EE05..1EE1F; ARABIC 6601 0x1EE20, // 1EE20 ; UNKNOWN 6602 0x1EE21, // 1EE21..1EE22; ARABIC 6603 0x1EE23, // 1EE23 ; UNKNOWN 6604 0x1EE24, // 1EE24 ; ARABIC 6605 0x1EE25, // 1EE25..1EE26; UNKNOWN 6606 0x1EE27, // 1EE27 ; ARABIC 6607 0x1EE28, // 1EE28 ; UNKNOWN 6608 0x1EE29, // 1EE29..1EE32; ARABIC 6609 0x1EE33, // 1EE33 ; UNKNOWN 6610 0x1EE34, // 1EE34..1EE37; ARABIC 6611 0x1EE38, // 1EE38 ; UNKNOWN 6612 0x1EE39, // 1EE39 ; ARABIC 6613 0x1EE3A, // 1EE3A ; UNKNOWN 6614 0x1EE3B, // 1EE3B ; ARABIC 6615 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 6616 0x1EE42, // 1EE42 ; ARABIC 6617 0x1EE43, // 1EE43..1EE46; UNKNOWN 6618 0x1EE47, // 1EE47 ; ARABIC 6619 0x1EE48, // 1EE48 ; UNKNOWN 6620 0x1EE49, // 1EE49 ; ARABIC 6621 0x1EE4A, // 1EE4A ; UNKNOWN 6622 0x1EE4B, // 1EE4B ; ARABIC 6623 0x1EE4C, // 1EE4C ; UNKNOWN 6624 0x1EE4D, // 1EE4D..1EE4F; ARABIC 6625 0x1EE50, // 1EE50 ; UNKNOWN 6626 0x1EE51, // 1EE51..1EE52; ARABIC 6627 0x1EE53, // 1EE53 ; UNKNOWN 6628 0x1EE54, // 1EE54 ; ARABIC 6629 0x1EE55, // 1EE55..1EE56; UNKNOWN 6630 0x1EE57, // 1EE57 ; ARABIC 6631 0x1EE58, // 1EE58 ; UNKNOWN 6632 0x1EE59, // 1EE59 ; ARABIC 6633 0x1EE5A, // 1EE5A ; UNKNOWN 6634 0x1EE5B, // 1EE5B ; ARABIC 6635 0x1EE5C, // 1EE5C ; UNKNOWN 6636 0x1EE5D, // 1EE5D ; ARABIC 6637 0x1EE5E, // 1EE5E ; UNKNOWN 6638 0x1EE5F, // 1EE5F ; ARABIC 6639 0x1EE60, // 1EE60 ; UNKNOWN 6640 0x1EE61, // 1EE61..1EE62; ARABIC 6641 0x1EE63, // 1EE63 ; UNKNOWN 6642 0x1EE64, // 1EE64 ; ARABIC 6643 0x1EE65, // 1EE65..1EE66; UNKNOWN 6644 0x1EE67, // 1EE67..1EE6A; ARABIC 6645 0x1EE6B, // 1EE6B ; UNKNOWN 6646 0x1EE6C, // 1EE6C..1EE72; ARABIC 6647 0x1EE73, // 1EE73 ; UNKNOWN 6648 0x1EE74, // 1EE74..1EE77; ARABIC 6649 0x1EE78, // 1EE78 ; UNKNOWN 6650 0x1EE79, // 1EE79..1EE7C; ARABIC 6651 0x1EE7D, // 1EE7D ; UNKNOWN 6652 0x1EE7E, // 1EE7E ; ARABIC 6653 0x1EE7F, // 1EE7F ; UNKNOWN 6654 0x1EE80, // 1EE80..1EE89; ARABIC 6655 0x1EE8A, // 1EE8A ; UNKNOWN 6656 0x1EE8B, // 1EE8B..1EE9B; ARABIC 6657 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 6658 0x1EEA1, // 1EEA1..1EEA3; ARABIC 6659 0x1EEA4, // 1EEA4 ; UNKNOWN 6660 0x1EEA5, // 1EEA5..1EEA9; ARABIC 6661 0x1EEAA, // 1EEAA ; UNKNOWN 6662 0x1EEAB, // 1EEAB..1EEBB; ARABIC 6663 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 6664 0x1EEF0, // 1EEF0..1EEF1; ARABIC 6665 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 6666 0x1F000, // 1F000..1F02B; COMMON 6667 0x1F02C, // 1F02C..1F02F; UNKNOWN 6668 0x1F030, // 1F030..1F093; COMMON 6669 0x1F094, // 1F094..1F09F; UNKNOWN 6670 0x1F0A0, // 1F0A0..1F0AE; COMMON 6671 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 6672 0x1F0B1, // 1F0B1..1F0BF; COMMON 6673 0x1F0C0, // 1F0C0 ; UNKNOWN 6674 0x1F0C1, // 1F0C1..1F0CF; COMMON 6675 0x1F0D0, // 1F0D0 ; UNKNOWN 6676 0x1F0D1, // 1F0D1..1F0F5; COMMON 6677 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 6678 0x1F100, // 1F100..1F1AD; COMMON 6679 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN 6680 0x1F1E6, // 1F1E6..1F1FF; COMMON 6681 0x1F200, // 1F200 ; HIRAGANA 6682 0x1F201, // 1F201..1F202; COMMON 6683 0x1F203, // 1F203..1F20F; UNKNOWN 6684 0x1F210, // 1F210..1F23B; COMMON 6685 0x1F23C, // 1F23C..1F23F; UNKNOWN 6686 0x1F240, // 1F240..1F248; COMMON 6687 0x1F249, // 1F249..1F24F; UNKNOWN 6688 0x1F250, // 1F250..1F251; COMMON 6689 0x1F252, // 1F252..1F25F; UNKNOWN 6690 0x1F260, // 1F260..1F265; COMMON 6691 0x1F266, // 1F266..1F2FF; UNKNOWN 6692 0x1F300, // 1F300..1F6D7; COMMON 6693 0x1F6D8, // 1F6D8..1F6DF; UNKNOWN 6694 0x1F6E0, // 1F6E0..1F6EC; COMMON 6695 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 6696 0x1F6F0, // 1F6F0..1F6FC; COMMON 6697 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN 6698 0x1F700, // 1F700..1F773; COMMON 6699 0x1F774, // 1F774..1F77F; UNKNOWN 6700 0x1F780, // 1F780..1F7D8; COMMON 6701 0x1F7D9, // 1F7D9..1F7DF; UNKNOWN 6702 0x1F7E0, // 1F7E0..1F7EB; COMMON 6703 0x1F7EC, // 1F7EC..1F7FF; UNKNOWN 6704 0x1F800, // 1F800..1F80B; COMMON 6705 0x1F80C, // 1F80C..1F80F; UNKNOWN 6706 0x1F810, // 1F810..1F847; COMMON 6707 0x1F848, // 1F848..1F84F; UNKNOWN 6708 0x1F850, // 1F850..1F859; COMMON 6709 0x1F85A, // 1F85A..1F85F; UNKNOWN 6710 0x1F860, // 1F860..1F887; COMMON 6711 0x1F888, // 1F888..1F88F; UNKNOWN 6712 0x1F890, // 1F890..1F8AD; COMMON 6713 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN 6714 0x1F8B0, // 1F8B0..1F8B1; COMMON 6715 0x1F8B2, // 1F8B2..1F8FF; UNKNOWN 6716 0x1F900, // 1F900..1F978; COMMON 6717 0x1F979, // 1F979 ; UNKNOWN 6718 0x1F97A, // 1F97A..1F9CB; COMMON 6719 0x1F9CC, // 1F9CC ; UNKNOWN 6720 0x1F9CD, // 1F9CD..1FA53; COMMON 6721 0x1FA54, // 1FA54..1FA5F; UNKNOWN 6722 0x1FA60, // 1FA60..1FA6D; COMMON 6723 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 6724 0x1FA70, // 1FA70..1FA74; COMMON 6725 0x1FA75, // 1FA75..1FA77; UNKNOWN 6726 0x1FA78, // 1FA78..1FA7A; COMMON 6727 0x1FA7B, // 1FA7B..1FA7F; UNKNOWN 6728 0x1FA80, // 1FA80..1FA86; COMMON 6729 0x1FA87, // 1FA87..1FA8F; UNKNOWN 6730 0x1FA90, // 1FA90..1FAA8; COMMON 6731 0x1FAA9, // 1FAA9..1FAAF; UNKNOWN 6732 0x1FAB0, // 1FAB0..1FAB6; COMMON 6733 0x1FAB7, // 1FAB7..1FABF; UNKNOWN 6734 0x1FAC0, // 1FAC0..1FAC2; COMMON 6735 0x1FAC3, // 1FAC3..1FACF; UNKNOWN 6736 0x1FAD0, // 1FAD0..1FAD6; COMMON 6737 0x1FAD7, // 1FAD7..1FAFF; UNKNOWN 6738 0x1FB00, // 1FB00..1FB92; COMMON 6739 0x1FB93, // 1FB93 ; UNKNOWN 6740 0x1FB94, // 1FB94..1FBCA; COMMON 6741 0x1FBCB, // 1FBCB..1FBEF; UNKNOWN 6742 0x1FBF0, // 1FBF0..1FBF9; COMMON 6743 0x1FBFA, // 1FBFA..1FFFF; UNKNOWN 6744 0x20000, // 20000..2A6DD; HAN 6745 0x2A6DE, // 2A6DE..2A6FF; UNKNOWN 6746 0x2A700, // 2A700..2B734; HAN 6747 0x2B735, // 2B735..2B73F; UNKNOWN 6748 0x2B740, // 2B740..2B81D; HAN 6749 0x2B81E, // 2B81E..2B81F; UNKNOWN 6750 0x2B820, // 2B820..2CEA1; HAN 6751 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 6752 0x2CEB0, // 2CEB0..2EBE0; HAN 6753 0x2EBE1, // 2EBE1..2F7FF; UNKNOWN 6754 0x2F800, // 2F800..2FA1D; HAN 6755 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN 6756 0x30000, // 30000..3134A; HAN 6757 0x3134B, // 3134B..E0000; UNKNOWN 6758 0xE0001, // E0001 ; COMMON 6759 0xE0002, // E0002..E001F; UNKNOWN 6760 0xE0020, // E0020..E007F; COMMON 6761 0xE0080, // E0080..E00FF; UNKNOWN 6762 0xE0100, // E0100..E01EF; INHERITED 6763 0xE01F0, // E01F0..10FFFF; UNKNOWN 6764 }; 6765 6766 private static final UnicodeScript[] scripts = { 6767 COMMON, // 0000..0040 6768 LATIN, // 0041..005A 6769 COMMON, // 005B..0060 6770 LATIN, // 0061..007A 6771 COMMON, // 007B..00A9 6772 LATIN, // 00AA 6773 COMMON, // 00AB..00B9 6774 LATIN, // 00BA 6775 COMMON, // 00BB..00BF 6776 LATIN, // 00C0..00D6 6777 COMMON, // 00D7 6778 LATIN, // 00D8..00F6 6779 COMMON, // 00F7 6780 LATIN, // 00F8..02B8 6781 COMMON, // 02B9..02DF 6782 LATIN, // 02E0..02E4 6783 COMMON, // 02E5..02E9 6784 BOPOMOFO, // 02EA..02EB 6785 COMMON, // 02EC..02FF 6786 INHERITED, // 0300..036F 6787 GREEK, // 0370..0373 6788 COMMON, // 0374 6789 GREEK, // 0375..0377 6790 UNKNOWN, // 0378..0379 6791 GREEK, // 037A..037D 6792 COMMON, // 037E 6793 GREEK, // 037F 6794 UNKNOWN, // 0380..0383 6795 GREEK, // 0384 6796 COMMON, // 0385 6797 GREEK, // 0386 6798 COMMON, // 0387 6799 GREEK, // 0388..038A 6800 UNKNOWN, // 038B 6801 GREEK, // 038C 6802 UNKNOWN, // 038D 6803 GREEK, // 038E..03A1 6804 UNKNOWN, // 03A2 6805 GREEK, // 03A3..03E1 6806 COPTIC, // 03E2..03EF 6807 GREEK, // 03F0..03FF 6808 CYRILLIC, // 0400..0484 6809 INHERITED, // 0485..0486 6810 CYRILLIC, // 0487..052F 6811 UNKNOWN, // 0530 6812 ARMENIAN, // 0531..0556 6813 UNKNOWN, // 0557..0558 6814 ARMENIAN, // 0559..058A 6815 UNKNOWN, // 058B..058C 6816 ARMENIAN, // 058D..058F 6817 UNKNOWN, // 0590 6818 HEBREW, // 0591..05C7 6819 UNKNOWN, // 05C8..05CF 6820 HEBREW, // 05D0..05EA 6821 UNKNOWN, // 05EB..05EE 6822 HEBREW, // 05EF..05F4 6823 UNKNOWN, // 05F5..05FF 6824 ARABIC, // 0600..0604 6825 COMMON, // 0605 6826 ARABIC, // 0606..060B 6827 COMMON, // 060C 6828 ARABIC, // 060D..061A 6829 COMMON, // 061B 6830 ARABIC, // 061C 6831 UNKNOWN, // 061D 6832 ARABIC, // 061E 6833 COMMON, // 061F 6834 ARABIC, // 0620..063F 6835 COMMON, // 0640 6836 ARABIC, // 0641..064A 6837 INHERITED, // 064B..0655 6838 ARABIC, // 0656..066F 6839 INHERITED, // 0670 6840 ARABIC, // 0671..06DC 6841 COMMON, // 06DD 6842 ARABIC, // 06DE..06FF 6843 SYRIAC, // 0700..070D 6844 UNKNOWN, // 070E 6845 SYRIAC, // 070F..074A 6846 UNKNOWN, // 074B..074C 6847 SYRIAC, // 074D..074F 6848 ARABIC, // 0750..077F 6849 THAANA, // 0780..07B1 6850 UNKNOWN, // 07B2..07BF 6851 NKO, // 07C0..07FA 6852 UNKNOWN, // 07FB..07FC 6853 NKO, // 07FD..07FF 6854 SAMARITAN, // 0800..082D 6855 UNKNOWN, // 082E..082F 6856 SAMARITAN, // 0830..083E 6857 UNKNOWN, // 083F 6858 MANDAIC, // 0840..085B 6859 UNKNOWN, // 085C..085D 6860 MANDAIC, // 085E 6861 UNKNOWN, // 085F 6862 SYRIAC, // 0860..086A 6863 UNKNOWN, // 086B..089F 6864 ARABIC, // 08A0..08B4 6865 UNKNOWN, // 08B5 6866 ARABIC, // 08B6..08C7 6867 UNKNOWN, // 08C8..08D2 6868 ARABIC, // 08D3..08E1 6869 COMMON, // 08E2 6870 ARABIC, // 08E3..08FF 6871 DEVANAGARI, // 0900..0950 6872 INHERITED, // 0951..0954 6873 DEVANAGARI, // 0955..0963 6874 COMMON, // 0964..0965 6875 DEVANAGARI, // 0966..097F 6876 BENGALI, // 0980..0983 6877 UNKNOWN, // 0984 6878 BENGALI, // 0985..098C 6879 UNKNOWN, // 098D..098E 6880 BENGALI, // 098F..0990 6881 UNKNOWN, // 0991..0992 6882 BENGALI, // 0993..09A8 6883 UNKNOWN, // 09A9 6884 BENGALI, // 09AA..09B0 6885 UNKNOWN, // 09B1 6886 BENGALI, // 09B2 6887 UNKNOWN, // 09B3..09B5 6888 BENGALI, // 09B6..09B9 6889 UNKNOWN, // 09BA..09BB 6890 BENGALI, // 09BC..09C4 6891 UNKNOWN, // 09C5..09C6 6892 BENGALI, // 09C7..09C8 6893 UNKNOWN, // 09C9..09CA 6894 BENGALI, // 09CB..09CE 6895 UNKNOWN, // 09CF..09D6 6896 BENGALI, // 09D7 6897 UNKNOWN, // 09D8..09DB 6898 BENGALI, // 09DC..09DD 6899 UNKNOWN, // 09DE 6900 BENGALI, // 09DF..09E3 6901 UNKNOWN, // 09E4..09E5 6902 BENGALI, // 09E6..09FE 6903 UNKNOWN, // 09FF..0A00 6904 GURMUKHI, // 0A01..0A03 6905 UNKNOWN, // 0A04 6906 GURMUKHI, // 0A05..0A0A 6907 UNKNOWN, // 0A0B..0A0E 6908 GURMUKHI, // 0A0F..0A10 6909 UNKNOWN, // 0A11..0A12 6910 GURMUKHI, // 0A13..0A28 6911 UNKNOWN, // 0A29 6912 GURMUKHI, // 0A2A..0A30 6913 UNKNOWN, // 0A31 6914 GURMUKHI, // 0A32..0A33 6915 UNKNOWN, // 0A34 6916 GURMUKHI, // 0A35..0A36 6917 UNKNOWN, // 0A37 6918 GURMUKHI, // 0A38..0A39 6919 UNKNOWN, // 0A3A..0A3B 6920 GURMUKHI, // 0A3C 6921 UNKNOWN, // 0A3D 6922 GURMUKHI, // 0A3E..0A42 6923 UNKNOWN, // 0A43..0A46 6924 GURMUKHI, // 0A47..0A48 6925 UNKNOWN, // 0A49..0A4A 6926 GURMUKHI, // 0A4B..0A4D 6927 UNKNOWN, // 0A4E..0A50 6928 GURMUKHI, // 0A51 6929 UNKNOWN, // 0A52..0A58 6930 GURMUKHI, // 0A59..0A5C 6931 UNKNOWN, // 0A5D 6932 GURMUKHI, // 0A5E 6933 UNKNOWN, // 0A5F..0A65 6934 GURMUKHI, // 0A66..0A76 6935 UNKNOWN, // 0A77..0A80 6936 GUJARATI, // 0A81..0A83 6937 UNKNOWN, // 0A84 6938 GUJARATI, // 0A85..0A8D 6939 UNKNOWN, // 0A8E 6940 GUJARATI, // 0A8F..0A91 6941 UNKNOWN, // 0A92 6942 GUJARATI, // 0A93..0AA8 6943 UNKNOWN, // 0AA9 6944 GUJARATI, // 0AAA..0AB0 6945 UNKNOWN, // 0AB1 6946 GUJARATI, // 0AB2..0AB3 6947 UNKNOWN, // 0AB4 6948 GUJARATI, // 0AB5..0AB9 6949 UNKNOWN, // 0ABA..0ABB 6950 GUJARATI, // 0ABC..0AC5 6951 UNKNOWN, // 0AC6 6952 GUJARATI, // 0AC7..0AC9 6953 UNKNOWN, // 0ACA 6954 GUJARATI, // 0ACB..0ACD 6955 UNKNOWN, // 0ACE..0ACF 6956 GUJARATI, // 0AD0 6957 UNKNOWN, // 0AD1..0ADF 6958 GUJARATI, // 0AE0..0AE3 6959 UNKNOWN, // 0AE4..0AE5 6960 GUJARATI, // 0AE6..0AF1 6961 UNKNOWN, // 0AF2..0AF8 6962 GUJARATI, // 0AF9..0AFF 6963 UNKNOWN, // 0B00 6964 ORIYA, // 0B01..0B03 6965 UNKNOWN, // 0B04 6966 ORIYA, // 0B05..0B0C 6967 UNKNOWN, // 0B0D..0B0E 6968 ORIYA, // 0B0F..0B10 6969 UNKNOWN, // 0B11..0B12 6970 ORIYA, // 0B13..0B28 6971 UNKNOWN, // 0B29 6972 ORIYA, // 0B2A..0B30 6973 UNKNOWN, // 0B31 6974 ORIYA, // 0B32..0B33 6975 UNKNOWN, // 0B34 6976 ORIYA, // 0B35..0B39 6977 UNKNOWN, // 0B3A..0B3B 6978 ORIYA, // 0B3C..0B44 6979 UNKNOWN, // 0B45..0B46 6980 ORIYA, // 0B47..0B48 6981 UNKNOWN, // 0B49..0B4A 6982 ORIYA, // 0B4B..0B4D 6983 UNKNOWN, // 0B4E..0B54 6984 ORIYA, // 0B55..0B57 6985 UNKNOWN, // 0B58..0B5B 6986 ORIYA, // 0B5C..0B5D 6987 UNKNOWN, // 0B5E 6988 ORIYA, // 0B5F..0B63 6989 UNKNOWN, // 0B64..0B65 6990 ORIYA, // 0B66..0B77 6991 UNKNOWN, // 0B78..0B81 6992 TAMIL, // 0B82..0B83 6993 UNKNOWN, // 0B84 6994 TAMIL, // 0B85..0B8A 6995 UNKNOWN, // 0B8B..0B8D 6996 TAMIL, // 0B8E..0B90 6997 UNKNOWN, // 0B91 6998 TAMIL, // 0B92..0B95 6999 UNKNOWN, // 0B96..0B98 7000 TAMIL, // 0B99..0B9A 7001 UNKNOWN, // 0B9B 7002 TAMIL, // 0B9C 7003 UNKNOWN, // 0B9D 7004 TAMIL, // 0B9E..0B9F 7005 UNKNOWN, // 0BA0..0BA2 7006 TAMIL, // 0BA3..0BA4 7007 UNKNOWN, // 0BA5..0BA7 7008 TAMIL, // 0BA8..0BAA 7009 UNKNOWN, // 0BAB..0BAD 7010 TAMIL, // 0BAE..0BB9 7011 UNKNOWN, // 0BBA..0BBD 7012 TAMIL, // 0BBE..0BC2 7013 UNKNOWN, // 0BC3..0BC5 7014 TAMIL, // 0BC6..0BC8 7015 UNKNOWN, // 0BC9 7016 TAMIL, // 0BCA..0BCD 7017 UNKNOWN, // 0BCE..0BCF 7018 TAMIL, // 0BD0 7019 UNKNOWN, // 0BD1..0BD6 7020 TAMIL, // 0BD7 7021 UNKNOWN, // 0BD8..0BE5 7022 TAMIL, // 0BE6..0BFA 7023 UNKNOWN, // 0BFB..0BFF 7024 TELUGU, // 0C00..0C0C 7025 UNKNOWN, // 0C0D 7026 TELUGU, // 0C0E..0C10 7027 UNKNOWN, // 0C11 7028 TELUGU, // 0C12..0C28 7029 UNKNOWN, // 0C29 7030 TELUGU, // 0C2A..0C39 7031 UNKNOWN, // 0C3A..0C3C 7032 TELUGU, // 0C3D..0C44 7033 UNKNOWN, // 0C45 7034 TELUGU, // 0C46..0C48 7035 UNKNOWN, // 0C49 7036 TELUGU, // 0C4A..0C4D 7037 UNKNOWN, // 0C4E..0C54 7038 TELUGU, // 0C55..0C56 7039 UNKNOWN, // 0C57 7040 TELUGU, // 0C58..0C5A 7041 UNKNOWN, // 0C5B..0C5F 7042 TELUGU, // 0C60..0C63 7043 UNKNOWN, // 0C64..0C65 7044 TELUGU, // 0C66..0C6F 7045 UNKNOWN, // 0C70..0C76 7046 TELUGU, // 0C77..0C7F 7047 KANNADA, // 0C80..0C8C 7048 UNKNOWN, // 0C8D 7049 KANNADA, // 0C8E..0C90 7050 UNKNOWN, // 0C91 7051 KANNADA, // 0C92..0CA8 7052 UNKNOWN, // 0CA9 7053 KANNADA, // 0CAA..0CB3 7054 UNKNOWN, // 0CB4 7055 KANNADA, // 0CB5..0CB9 7056 UNKNOWN, // 0CBA..0CBB 7057 KANNADA, // 0CBC..0CC4 7058 UNKNOWN, // 0CC5 7059 KANNADA, // 0CC6..0CC8 7060 UNKNOWN, // 0CC9 7061 KANNADA, // 0CCA..0CCD 7062 UNKNOWN, // 0CCE..0CD4 7063 KANNADA, // 0CD5..0CD6 7064 UNKNOWN, // 0CD7..0CDD 7065 KANNADA, // 0CDE 7066 UNKNOWN, // 0CDF 7067 KANNADA, // 0CE0..0CE3 7068 UNKNOWN, // 0CE4..0CE5 7069 KANNADA, // 0CE6..0CEF 7070 UNKNOWN, // 0CF0 7071 KANNADA, // 0CF1..0CF2 7072 UNKNOWN, // 0CF3..0CFF 7073 MALAYALAM, // 0D00..0D0C 7074 UNKNOWN, // 0D0D 7075 MALAYALAM, // 0D0E..0D10 7076 UNKNOWN, // 0D11 7077 MALAYALAM, // 0D12..0D44 7078 UNKNOWN, // 0D45 7079 MALAYALAM, // 0D46..0D48 7080 UNKNOWN, // 0D49 7081 MALAYALAM, // 0D4A..0D4F 7082 UNKNOWN, // 0D50..0D53 7083 MALAYALAM, // 0D54..0D63 7084 UNKNOWN, // 0D64..0D65 7085 MALAYALAM, // 0D66..0D7F 7086 UNKNOWN, // 0D80 7087 SINHALA, // 0D81..0D83 7088 UNKNOWN, // 0D84 7089 SINHALA, // 0D85..0D96 7090 UNKNOWN, // 0D97..0D99 7091 SINHALA, // 0D9A..0DB1 7092 UNKNOWN, // 0DB2 7093 SINHALA, // 0DB3..0DBB 7094 UNKNOWN, // 0DBC 7095 SINHALA, // 0DBD 7096 UNKNOWN, // 0DBE..0DBF 7097 SINHALA, // 0DC0..0DC6 7098 UNKNOWN, // 0DC7..0DC9 7099 SINHALA, // 0DCA 7100 UNKNOWN, // 0DCB..0DCE 7101 SINHALA, // 0DCF..0DD4 7102 UNKNOWN, // 0DD5 7103 SINHALA, // 0DD6 7104 UNKNOWN, // 0DD7 7105 SINHALA, // 0DD8..0DDF 7106 UNKNOWN, // 0DE0..0DE5 7107 SINHALA, // 0DE6..0DEF 7108 UNKNOWN, // 0DF0..0DF1 7109 SINHALA, // 0DF2..0DF4 7110 UNKNOWN, // 0DF5..0E00 7111 THAI, // 0E01..0E3A 7112 UNKNOWN, // 0E3B..0E3E 7113 COMMON, // 0E3F 7114 THAI, // 0E40..0E5B 7115 UNKNOWN, // 0E5C..0E80 7116 LAO, // 0E81..0E82 7117 UNKNOWN, // 0E83 7118 LAO, // 0E84 7119 UNKNOWN, // 0E85 7120 LAO, // 0E86..0E8A 7121 UNKNOWN, // 0E8B 7122 LAO, // 0E8C..0EA3 7123 UNKNOWN, // 0EA4 7124 LAO, // 0EA5 7125 UNKNOWN, // 0EA6 7126 LAO, // 0EA7..0EBD 7127 UNKNOWN, // 0EBE..0EBF 7128 LAO, // 0EC0..0EC4 7129 UNKNOWN, // 0EC5 7130 LAO, // 0EC6 7131 UNKNOWN, // 0EC7 7132 LAO, // 0EC8..0ECD 7133 UNKNOWN, // 0ECE..0ECF 7134 LAO, // 0ED0..0ED9 7135 UNKNOWN, // 0EDA..0EDB 7136 LAO, // 0EDC..0EDF 7137 UNKNOWN, // 0EE0..0EFF 7138 TIBETAN, // 0F00..0F47 7139 UNKNOWN, // 0F48 7140 TIBETAN, // 0F49..0F6C 7141 UNKNOWN, // 0F6D..0F70 7142 TIBETAN, // 0F71..0F97 7143 UNKNOWN, // 0F98 7144 TIBETAN, // 0F99..0FBC 7145 UNKNOWN, // 0FBD 7146 TIBETAN, // 0FBE..0FCC 7147 UNKNOWN, // 0FCD 7148 TIBETAN, // 0FCE..0FD4 7149 COMMON, // 0FD5..0FD8 7150 TIBETAN, // 0FD9..0FDA 7151 UNKNOWN, // 0FDB..0FFF 7152 MYANMAR, // 1000..109F 7153 GEORGIAN, // 10A0..10C5 7154 UNKNOWN, // 10C6 7155 GEORGIAN, // 10C7 7156 UNKNOWN, // 10C8..10CC 7157 GEORGIAN, // 10CD 7158 UNKNOWN, // 10CE..10CF 7159 GEORGIAN, // 10D0..10FA 7160 COMMON, // 10FB 7161 GEORGIAN, // 10FC..10FF 7162 HANGUL, // 1100..11FF 7163 ETHIOPIC, // 1200..1248 7164 UNKNOWN, // 1249 7165 ETHIOPIC, // 124A..124D 7166 UNKNOWN, // 124E..124F 7167 ETHIOPIC, // 1250..1256 7168 UNKNOWN, // 1257 7169 ETHIOPIC, // 1258 7170 UNKNOWN, // 1259 7171 ETHIOPIC, // 125A..125D 7172 UNKNOWN, // 125E..125F 7173 ETHIOPIC, // 1260..1288 7174 UNKNOWN, // 1289 7175 ETHIOPIC, // 128A..128D 7176 UNKNOWN, // 128E..128F 7177 ETHIOPIC, // 1290..12B0 7178 UNKNOWN, // 12B1 7179 ETHIOPIC, // 12B2..12B5 7180 UNKNOWN, // 12B6..12B7 7181 ETHIOPIC, // 12B8..12BE 7182 UNKNOWN, // 12BF 7183 ETHIOPIC, // 12C0 7184 UNKNOWN, // 12C1 7185 ETHIOPIC, // 12C2..12C5 7186 UNKNOWN, // 12C6..12C7 7187 ETHIOPIC, // 12C8..12D6 7188 UNKNOWN, // 12D7 7189 ETHIOPIC, // 12D8..1310 7190 UNKNOWN, // 1311 7191 ETHIOPIC, // 1312..1315 7192 UNKNOWN, // 1316..1317 7193 ETHIOPIC, // 1318..135A 7194 UNKNOWN, // 135B..135C 7195 ETHIOPIC, // 135D..137C 7196 UNKNOWN, // 137D..137F 7197 ETHIOPIC, // 1380..1399 7198 UNKNOWN, // 139A..139F 7199 CHEROKEE, // 13A0..13F5 7200 UNKNOWN, // 13F6..13F7 7201 CHEROKEE, // 13F8..13FD 7202 UNKNOWN, // 13FE..13FF 7203 CANADIAN_ABORIGINAL, // 1400..167F 7204 OGHAM, // 1680..169C 7205 UNKNOWN, // 169D..169F 7206 RUNIC, // 16A0..16EA 7207 COMMON, // 16EB..16ED 7208 RUNIC, // 16EE..16F8 7209 UNKNOWN, // 16F9..16FF 7210 TAGALOG, // 1700..170C 7211 UNKNOWN, // 170D 7212 TAGALOG, // 170E..1714 7213 UNKNOWN, // 1715..171F 7214 HANUNOO, // 1720..1734 7215 COMMON, // 1735..1736 7216 UNKNOWN, // 1737..173F 7217 BUHID, // 1740..1753 7218 UNKNOWN, // 1754..175F 7219 TAGBANWA, // 1760..176C 7220 UNKNOWN, // 176D 7221 TAGBANWA, // 176E..1770 7222 UNKNOWN, // 1771 7223 TAGBANWA, // 1772..1773 7224 UNKNOWN, // 1774..177F 7225 KHMER, // 1780..17DD 7226 UNKNOWN, // 17DE..17DF 7227 KHMER, // 17E0..17E9 7228 UNKNOWN, // 17EA..17EF 7229 KHMER, // 17F0..17F9 7230 UNKNOWN, // 17FA..17FF 7231 MONGOLIAN, // 1800..1801 7232 COMMON, // 1802..1803 7233 MONGOLIAN, // 1804 7234 COMMON, // 1805 7235 MONGOLIAN, // 1806..180E 7236 UNKNOWN, // 180F 7237 MONGOLIAN, // 1810..1819 7238 UNKNOWN, // 181A..181F 7239 MONGOLIAN, // 1820..1878 7240 UNKNOWN, // 1879..187F 7241 MONGOLIAN, // 1880..18AA 7242 UNKNOWN, // 18AB..18AF 7243 CANADIAN_ABORIGINAL, // 18B0..18F5 7244 UNKNOWN, // 18F6..18FF 7245 LIMBU, // 1900..191E 7246 UNKNOWN, // 191F 7247 LIMBU, // 1920..192B 7248 UNKNOWN, // 192C..192F 7249 LIMBU, // 1930..193B 7250 UNKNOWN, // 193C..193F 7251 LIMBU, // 1940 7252 UNKNOWN, // 1941..1943 7253 LIMBU, // 1944..194F 7254 TAI_LE, // 1950..196D 7255 UNKNOWN, // 196E..196F 7256 TAI_LE, // 1970..1974 7257 UNKNOWN, // 1975..197F 7258 NEW_TAI_LUE, // 1980..19AB 7259 UNKNOWN, // 19AC..19AF 7260 NEW_TAI_LUE, // 19B0..19C9 7261 UNKNOWN, // 19CA..19CF 7262 NEW_TAI_LUE, // 19D0..19DA 7263 UNKNOWN, // 19DB..19DD 7264 NEW_TAI_LUE, // 19DE..19DF 7265 KHMER, // 19E0..19FF 7266 BUGINESE, // 1A00..1A1B 7267 UNKNOWN, // 1A1C..1A1D 7268 BUGINESE, // 1A1E..1A1F 7269 TAI_THAM, // 1A20..1A5E 7270 UNKNOWN, // 1A5F 7271 TAI_THAM, // 1A60..1A7C 7272 UNKNOWN, // 1A7D..1A7E 7273 TAI_THAM, // 1A7F..1A89 7274 UNKNOWN, // 1A8A..1A8F 7275 TAI_THAM, // 1A90..1A99 7276 UNKNOWN, // 1A9A..1A9F 7277 TAI_THAM, // 1AA0..1AAD 7278 UNKNOWN, // 1AAE..1AAF 7279 INHERITED, // 1AB0..1AC0 7280 UNKNOWN, // 1AC1..1AFF 7281 BALINESE, // 1B00..1B4B 7282 UNKNOWN, // 1B4C..1B4F 7283 BALINESE, // 1B50..1B7C 7284 UNKNOWN, // 1B7D..1B7F 7285 SUNDANESE, // 1B80..1BBF 7286 BATAK, // 1BC0..1BF3 7287 UNKNOWN, // 1BF4..1BFB 7288 BATAK, // 1BFC..1BFF 7289 LEPCHA, // 1C00..1C37 7290 UNKNOWN, // 1C38..1C3A 7291 LEPCHA, // 1C3B..1C49 7292 UNKNOWN, // 1C4A..1C4C 7293 LEPCHA, // 1C4D..1C4F 7294 OL_CHIKI, // 1C50..1C7F 7295 CYRILLIC, // 1C80..1C88 7296 UNKNOWN, // 1C89..1C8F 7297 GEORGIAN, // 1C90..1CBA 7298 UNKNOWN, // 1CBB..1CBC 7299 GEORGIAN, // 1CBD..1CBF 7300 SUNDANESE, // 1CC0..1CC7 7301 UNKNOWN, // 1CC8..1CCF 7302 INHERITED, // 1CD0..1CD2 7303 COMMON, // 1CD3 7304 INHERITED, // 1CD4..1CE0 7305 COMMON, // 1CE1 7306 INHERITED, // 1CE2..1CE8 7307 COMMON, // 1CE9..1CEC 7308 INHERITED, // 1CED 7309 COMMON, // 1CEE..1CF3 7310 INHERITED, // 1CF4 7311 COMMON, // 1CF5..1CF7 7312 INHERITED, // 1CF8..1CF9 7313 COMMON, // 1CFA 7314 UNKNOWN, // 1CFB..1CFF 7315 LATIN, // 1D00..1D25 7316 GREEK, // 1D26..1D2A 7317 CYRILLIC, // 1D2B 7318 LATIN, // 1D2C..1D5C 7319 GREEK, // 1D5D..1D61 7320 LATIN, // 1D62..1D65 7321 GREEK, // 1D66..1D6A 7322 LATIN, // 1D6B..1D77 7323 CYRILLIC, // 1D78 7324 LATIN, // 1D79..1DBE 7325 GREEK, // 1DBF 7326 INHERITED, // 1DC0..1DF9 7327 UNKNOWN, // 1DFA 7328 INHERITED, // 1DFB..1DFF 7329 LATIN, // 1E00..1EFF 7330 GREEK, // 1F00..1F15 7331 UNKNOWN, // 1F16..1F17 7332 GREEK, // 1F18..1F1D 7333 UNKNOWN, // 1F1E..1F1F 7334 GREEK, // 1F20..1F45 7335 UNKNOWN, // 1F46..1F47 7336 GREEK, // 1F48..1F4D 7337 UNKNOWN, // 1F4E..1F4F 7338 GREEK, // 1F50..1F57 7339 UNKNOWN, // 1F58 7340 GREEK, // 1F59 7341 UNKNOWN, // 1F5A 7342 GREEK, // 1F5B 7343 UNKNOWN, // 1F5C 7344 GREEK, // 1F5D 7345 UNKNOWN, // 1F5E 7346 GREEK, // 1F5F..1F7D 7347 UNKNOWN, // 1F7E..1F7F 7348 GREEK, // 1F80..1FB4 7349 UNKNOWN, // 1FB5 7350 GREEK, // 1FB6..1FC4 7351 UNKNOWN, // 1FC5 7352 GREEK, // 1FC6..1FD3 7353 UNKNOWN, // 1FD4..1FD5 7354 GREEK, // 1FD6..1FDB 7355 UNKNOWN, // 1FDC 7356 GREEK, // 1FDD..1FEF 7357 UNKNOWN, // 1FF0..1FF1 7358 GREEK, // 1FF2..1FF4 7359 UNKNOWN, // 1FF5 7360 GREEK, // 1FF6..1FFE 7361 UNKNOWN, // 1FFF 7362 COMMON, // 2000..200B 7363 INHERITED, // 200C..200D 7364 COMMON, // 200E..2064 7365 UNKNOWN, // 2065 7366 COMMON, // 2066..2070 7367 LATIN, // 2071 7368 UNKNOWN, // 2072..2073 7369 COMMON, // 2074..207E 7370 LATIN, // 207F 7371 COMMON, // 2080..208E 7372 UNKNOWN, // 208F 7373 LATIN, // 2090..209C 7374 UNKNOWN, // 209D..209F 7375 COMMON, // 20A0..20BF 7376 UNKNOWN, // 20C0..20CF 7377 INHERITED, // 20D0..20F0 7378 UNKNOWN, // 20F1..20FF 7379 COMMON, // 2100..2125 7380 GREEK, // 2126 7381 COMMON, // 2127..2129 7382 LATIN, // 212A..212B 7383 COMMON, // 212C..2131 7384 LATIN, // 2132 7385 COMMON, // 2133..214D 7386 LATIN, // 214E 7387 COMMON, // 214F..215F 7388 LATIN, // 2160..2188 7389 COMMON, // 2189..218B 7390 UNKNOWN, // 218C..218F 7391 COMMON, // 2190..2426 7392 UNKNOWN, // 2427..243F 7393 COMMON, // 2440..244A 7394 UNKNOWN, // 244B..245F 7395 COMMON, // 2460..27FF 7396 BRAILLE, // 2800..28FF 7397 COMMON, // 2900..2B73 7398 UNKNOWN, // 2B74..2B75 7399 COMMON, // 2B76..2B95 7400 UNKNOWN, // 2B96 7401 COMMON, // 2B97..2BFF 7402 GLAGOLITIC, // 2C00..2C2E 7403 UNKNOWN, // 2C2F 7404 GLAGOLITIC, // 2C30..2C5E 7405 UNKNOWN, // 2C5F 7406 LATIN, // 2C60..2C7F 7407 COPTIC, // 2C80..2CF3 7408 UNKNOWN, // 2CF4..2CF8 7409 COPTIC, // 2CF9..2CFF 7410 GEORGIAN, // 2D00..2D25 7411 UNKNOWN, // 2D26 7412 GEORGIAN, // 2D27 7413 UNKNOWN, // 2D28..2D2C 7414 GEORGIAN, // 2D2D 7415 UNKNOWN, // 2D2E..2D2F 7416 TIFINAGH, // 2D30..2D67 7417 UNKNOWN, // 2D68..2D6E 7418 TIFINAGH, // 2D6F..2D70 7419 UNKNOWN, // 2D71..2D7E 7420 TIFINAGH, // 2D7F 7421 ETHIOPIC, // 2D80..2D96 7422 UNKNOWN, // 2D97..2D9F 7423 ETHIOPIC, // 2DA0..2DA6 7424 UNKNOWN, // 2DA7 7425 ETHIOPIC, // 2DA8..2DAE 7426 UNKNOWN, // 2DAF 7427 ETHIOPIC, // 2DB0..2DB6 7428 UNKNOWN, // 2DB7 7429 ETHIOPIC, // 2DB8..2DBE 7430 UNKNOWN, // 2DBF 7431 ETHIOPIC, // 2DC0..2DC6 7432 UNKNOWN, // 2DC7 7433 ETHIOPIC, // 2DC8..2DCE 7434 UNKNOWN, // 2DCF 7435 ETHIOPIC, // 2DD0..2DD6 7436 UNKNOWN, // 2DD7 7437 ETHIOPIC, // 2DD8..2DDE 7438 UNKNOWN, // 2DDF 7439 CYRILLIC, // 2DE0..2DFF 7440 COMMON, // 2E00..2E52 7441 UNKNOWN, // 2E53..2E7F 7442 HAN, // 2E80..2E99 7443 UNKNOWN, // 2E9A 7444 HAN, // 2E9B..2EF3 7445 UNKNOWN, // 2EF4..2EFF 7446 HAN, // 2F00..2FD5 7447 UNKNOWN, // 2FD6..2FEF 7448 COMMON, // 2FF0..2FFB 7449 UNKNOWN, // 2FFC..2FFF 7450 COMMON, // 3000..3004 7451 HAN, // 3005 7452 COMMON, // 3006 7453 HAN, // 3007 7454 COMMON, // 3008..3020 7455 HAN, // 3021..3029 7456 INHERITED, // 302A..302D 7457 HANGUL, // 302E..302F 7458 COMMON, // 3030..3037 7459 HAN, // 3038..303B 7460 COMMON, // 303C..303F 7461 UNKNOWN, // 3040 7462 HIRAGANA, // 3041..3096 7463 UNKNOWN, // 3097..3098 7464 INHERITED, // 3099..309A 7465 COMMON, // 309B..309C 7466 HIRAGANA, // 309D..309F 7467 COMMON, // 30A0 7468 KATAKANA, // 30A1..30FA 7469 COMMON, // 30FB..30FC 7470 KATAKANA, // 30FD..30FF 7471 UNKNOWN, // 3100..3104 7472 BOPOMOFO, // 3105..312F 7473 UNKNOWN, // 3130 7474 HANGUL, // 3131..318E 7475 UNKNOWN, // 318F 7476 COMMON, // 3190..319F 7477 BOPOMOFO, // 31A0..31BF 7478 COMMON, // 31C0..31E3 7479 UNKNOWN, // 31E4..31EF 7480 KATAKANA, // 31F0..31FF 7481 HANGUL, // 3200..321E 7482 UNKNOWN, // 321F 7483 COMMON, // 3220..325F 7484 HANGUL, // 3260..327E 7485 COMMON, // 327F..32CF 7486 KATAKANA, // 32D0..32FE 7487 COMMON, // 32FF 7488 KATAKANA, // 3300..3357 7489 COMMON, // 3358..33FF 7490 HAN, // 3400..4DBF 7491 COMMON, // 4DC0..4DFF 7492 HAN, // 4E00..9FFC 7493 UNKNOWN, // 9FFD..9FFF 7494 YI, // A000..A48C 7495 UNKNOWN, // A48D..A48F 7496 YI, // A490..A4C6 7497 UNKNOWN, // A4C7..A4CF 7498 LISU, // A4D0..A4FF 7499 VAI, // A500..A62B 7500 UNKNOWN, // A62C..A63F 7501 CYRILLIC, // A640..A69F 7502 BAMUM, // A6A0..A6F7 7503 UNKNOWN, // A6F8..A6FF 7504 COMMON, // A700..A721 7505 LATIN, // A722..A787 7506 COMMON, // A788..A78A 7507 LATIN, // A78B..A7BF 7508 UNKNOWN, // A7C0..A7C1 7509 LATIN, // A7C2..A7CA 7510 UNKNOWN, // A7CB..A7F4 7511 LATIN, // A7F5..A7FF 7512 SYLOTI_NAGRI, // A800..A82C 7513 UNKNOWN, // A82D..A82F 7514 COMMON, // A830..A839 7515 UNKNOWN, // A83A..A83F 7516 PHAGS_PA, // A840..A877 7517 UNKNOWN, // A878..A87F 7518 SAURASHTRA, // A880..A8C5 7519 UNKNOWN, // A8C6..A8CD 7520 SAURASHTRA, // A8CE..A8D9 7521 UNKNOWN, // A8DA..A8DF 7522 DEVANAGARI, // A8E0..A8FF 7523 KAYAH_LI, // A900..A92D 7524 COMMON, // A92E 7525 KAYAH_LI, // A92F 7526 REJANG, // A930..A953 7527 UNKNOWN, // A954..A95E 7528 REJANG, // A95F 7529 HANGUL, // A960..A97C 7530 UNKNOWN, // A97D..A97F 7531 JAVANESE, // A980..A9CD 7532 UNKNOWN, // A9CE 7533 COMMON, // A9CF 7534 JAVANESE, // A9D0..A9D9 7535 UNKNOWN, // A9DA..A9DD 7536 JAVANESE, // A9DE..A9DF 7537 MYANMAR, // A9E0..A9FE 7538 UNKNOWN, // A9FF 7539 CHAM, // AA00..AA36 7540 UNKNOWN, // AA37..AA3F 7541 CHAM, // AA40..AA4D 7542 UNKNOWN, // AA4E..AA4F 7543 CHAM, // AA50..AA59 7544 UNKNOWN, // AA5A..AA5B 7545 CHAM, // AA5C..AA5F 7546 MYANMAR, // AA60..AA7F 7547 TAI_VIET, // AA80..AAC2 7548 UNKNOWN, // AAC3..AADA 7549 TAI_VIET, // AADB..AADF 7550 MEETEI_MAYEK, // AAE0..AAF6 7551 UNKNOWN, // AAF7..AB00 7552 ETHIOPIC, // AB01..AB06 7553 UNKNOWN, // AB07..AB08 7554 ETHIOPIC, // AB09..AB0E 7555 UNKNOWN, // AB0F..AB10 7556 ETHIOPIC, // AB11..AB16 7557 UNKNOWN, // AB17..AB1F 7558 ETHIOPIC, // AB20..AB26 7559 UNKNOWN, // AB27 7560 ETHIOPIC, // AB28..AB2E 7561 UNKNOWN, // AB2F 7562 LATIN, // AB30..AB5A 7563 COMMON, // AB5B 7564 LATIN, // AB5C..AB64 7565 GREEK, // AB65 7566 LATIN, // AB66..AB69 7567 COMMON, // AB6A..AB6B 7568 UNKNOWN, // AB6C..AB6F 7569 CHEROKEE, // AB70..ABBF 7570 MEETEI_MAYEK, // ABC0..ABED 7571 UNKNOWN, // ABEE..ABEF 7572 MEETEI_MAYEK, // ABF0..ABF9 7573 UNKNOWN, // ABFA..ABFF 7574 HANGUL, // AC00..D7A3 7575 UNKNOWN, // D7A4..D7AF 7576 HANGUL, // D7B0..D7C6 7577 UNKNOWN, // D7C7..D7CA 7578 HANGUL, // D7CB..D7FB 7579 UNKNOWN, // D7FC..F8FF 7580 HAN, // F900..FA6D 7581 UNKNOWN, // FA6E..FA6F 7582 HAN, // FA70..FAD9 7583 UNKNOWN, // FADA..FAFF 7584 LATIN, // FB00..FB06 7585 UNKNOWN, // FB07..FB12 7586 ARMENIAN, // FB13..FB17 7587 UNKNOWN, // FB18..FB1C 7588 HEBREW, // FB1D..FB36 7589 UNKNOWN, // FB37 7590 HEBREW, // FB38..FB3C 7591 UNKNOWN, // FB3D 7592 HEBREW, // FB3E 7593 UNKNOWN, // FB3F 7594 HEBREW, // FB40..FB41 7595 UNKNOWN, // FB42 7596 HEBREW, // FB43..FB44 7597 UNKNOWN, // FB45 7598 HEBREW, // FB46..FB4F 7599 ARABIC, // FB50..FBC1 7600 UNKNOWN, // FBC2..FBD2 7601 ARABIC, // FBD3..FD3D 7602 COMMON, // FD3E..FD3F 7603 UNKNOWN, // FD40..FD4F 7604 ARABIC, // FD50..FD8F 7605 UNKNOWN, // FD90..FD91 7606 ARABIC, // FD92..FDC7 7607 UNKNOWN, // FDC8..FDEF 7608 ARABIC, // FDF0..FDFD 7609 UNKNOWN, // FDFE..FDFF 7610 INHERITED, // FE00..FE0F 7611 COMMON, // FE10..FE19 7612 UNKNOWN, // FE1A..FE1F 7613 INHERITED, // FE20..FE2D 7614 CYRILLIC, // FE2E..FE2F 7615 COMMON, // FE30..FE52 7616 UNKNOWN, // FE53 7617 COMMON, // FE54..FE66 7618 UNKNOWN, // FE67 7619 COMMON, // FE68..FE6B 7620 UNKNOWN, // FE6C..FE6F 7621 ARABIC, // FE70..FE74 7622 UNKNOWN, // FE75 7623 ARABIC, // FE76..FEFC 7624 UNKNOWN, // FEFD..FEFE 7625 COMMON, // FEFF 7626 UNKNOWN, // FF00 7627 COMMON, // FF01..FF20 7628 LATIN, // FF21..FF3A 7629 COMMON, // FF3B..FF40 7630 LATIN, // FF41..FF5A 7631 COMMON, // FF5B..FF65 7632 KATAKANA, // FF66..FF6F 7633 COMMON, // FF70 7634 KATAKANA, // FF71..FF9D 7635 COMMON, // FF9E..FF9F 7636 HANGUL, // FFA0..FFBE 7637 UNKNOWN, // FFBF..FFC1 7638 HANGUL, // FFC2..FFC7 7639 UNKNOWN, // FFC8..FFC9 7640 HANGUL, // FFCA..FFCF 7641 UNKNOWN, // FFD0..FFD1 7642 HANGUL, // FFD2..FFD7 7643 UNKNOWN, // FFD8..FFD9 7644 HANGUL, // FFDA..FFDC 7645 UNKNOWN, // FFDD..FFDF 7646 COMMON, // FFE0..FFE6 7647 UNKNOWN, // FFE7 7648 COMMON, // FFE8..FFEE 7649 UNKNOWN, // FFEF..FFF8 7650 COMMON, // FFF9..FFFD 7651 UNKNOWN, // FFFE..FFFF 7652 LINEAR_B, // 10000..1000B 7653 UNKNOWN, // 1000C 7654 LINEAR_B, // 1000D..10026 7655 UNKNOWN, // 10027 7656 LINEAR_B, // 10028..1003A 7657 UNKNOWN, // 1003B 7658 LINEAR_B, // 1003C..1003D 7659 UNKNOWN, // 1003E 7660 LINEAR_B, // 1003F..1004D 7661 UNKNOWN, // 1004E..1004F 7662 LINEAR_B, // 10050..1005D 7663 UNKNOWN, // 1005E..1007F 7664 LINEAR_B, // 10080..100FA 7665 UNKNOWN, // 100FB..100FF 7666 COMMON, // 10100..10102 7667 UNKNOWN, // 10103..10106 7668 COMMON, // 10107..10133 7669 UNKNOWN, // 10134..10136 7670 COMMON, // 10137..1013F 7671 GREEK, // 10140..1018E 7672 UNKNOWN, // 1018F 7673 COMMON, // 10190..1019C 7674 UNKNOWN, // 1019D..1019F 7675 GREEK, // 101A0 7676 UNKNOWN, // 101A1..101CF 7677 COMMON, // 101D0..101FC 7678 INHERITED, // 101FD 7679 UNKNOWN, // 101FE..1027F 7680 LYCIAN, // 10280..1029C 7681 UNKNOWN, // 1029D..1029F 7682 CARIAN, // 102A0..102D0 7683 UNKNOWN, // 102D1..102DF 7684 INHERITED, // 102E0 7685 COMMON, // 102E1..102FB 7686 UNKNOWN, // 102FC..102FF 7687 OLD_ITALIC, // 10300..10323 7688 UNKNOWN, // 10324..1032C 7689 OLD_ITALIC, // 1032D..1032F 7690 GOTHIC, // 10330..1034A 7691 UNKNOWN, // 1034B..1034F 7692 OLD_PERMIC, // 10350..1037A 7693 UNKNOWN, // 1037B..1037F 7694 UGARITIC, // 10380..1039D 7695 UNKNOWN, // 1039E 7696 UGARITIC, // 1039F 7697 OLD_PERSIAN, // 103A0..103C3 7698 UNKNOWN, // 103C4..103C7 7699 OLD_PERSIAN, // 103C8..103D5 7700 UNKNOWN, // 103D6..103FF 7701 DESERET, // 10400..1044F 7702 SHAVIAN, // 10450..1047F 7703 OSMANYA, // 10480..1049D 7704 UNKNOWN, // 1049E..1049F 7705 OSMANYA, // 104A0..104A9 7706 UNKNOWN, // 104AA..104AF 7707 OSAGE, // 104B0..104D3 7708 UNKNOWN, // 104D4..104D7 7709 OSAGE, // 104D8..104FB 7710 UNKNOWN, // 104FC..104FF 7711 ELBASAN, // 10500..10527 7712 UNKNOWN, // 10528..1052F 7713 CAUCASIAN_ALBANIAN, // 10530..10563 7714 UNKNOWN, // 10564..1056E 7715 CAUCASIAN_ALBANIAN, // 1056F 7716 UNKNOWN, // 10570..105FF 7717 LINEAR_A, // 10600..10736 7718 UNKNOWN, // 10737..1073F 7719 LINEAR_A, // 10740..10755 7720 UNKNOWN, // 10756..1075F 7721 LINEAR_A, // 10760..10767 7722 UNKNOWN, // 10768..107FF 7723 CYPRIOT, // 10800..10805 7724 UNKNOWN, // 10806..10807 7725 CYPRIOT, // 10808 7726 UNKNOWN, // 10809 7727 CYPRIOT, // 1080A..10835 7728 UNKNOWN, // 10836 7729 CYPRIOT, // 10837..10838 7730 UNKNOWN, // 10839..1083B 7731 CYPRIOT, // 1083C 7732 UNKNOWN, // 1083D..1083E 7733 CYPRIOT, // 1083F 7734 IMPERIAL_ARAMAIC, // 10840..10855 7735 UNKNOWN, // 10856 7736 IMPERIAL_ARAMAIC, // 10857..1085F 7737 PALMYRENE, // 10860..1087F 7738 NABATAEAN, // 10880..1089E 7739 UNKNOWN, // 1089F..108A6 7740 NABATAEAN, // 108A7..108AF 7741 UNKNOWN, // 108B0..108DF 7742 HATRAN, // 108E0..108F2 7743 UNKNOWN, // 108F3 7744 HATRAN, // 108F4..108F5 7745 UNKNOWN, // 108F6..108FA 7746 HATRAN, // 108FB..108FF 7747 PHOENICIAN, // 10900..1091B 7748 UNKNOWN, // 1091C..1091E 7749 PHOENICIAN, // 1091F 7750 LYDIAN, // 10920..10939 7751 UNKNOWN, // 1093A..1093E 7752 LYDIAN, // 1093F 7753 UNKNOWN, // 10940..1097F 7754 MEROITIC_HIEROGLYPHS, // 10980..1099F 7755 MEROITIC_CURSIVE, // 109A0..109B7 7756 UNKNOWN, // 109B8..109BB 7757 MEROITIC_CURSIVE, // 109BC..109CF 7758 UNKNOWN, // 109D0..109D1 7759 MEROITIC_CURSIVE, // 109D2..109FF 7760 KHAROSHTHI, // 10A00..10A03 7761 UNKNOWN, // 10A04 7762 KHAROSHTHI, // 10A05..10A06 7763 UNKNOWN, // 10A07..10A0B 7764 KHAROSHTHI, // 10A0C..10A13 7765 UNKNOWN, // 10A14 7766 KHAROSHTHI, // 10A15..10A17 7767 UNKNOWN, // 10A18 7768 KHAROSHTHI, // 10A19..10A35 7769 UNKNOWN, // 10A36..10A37 7770 KHAROSHTHI, // 10A38..10A3A 7771 UNKNOWN, // 10A3B..10A3E 7772 KHAROSHTHI, // 10A3F..10A48 7773 UNKNOWN, // 10A49..10A4F 7774 KHAROSHTHI, // 10A50..10A58 7775 UNKNOWN, // 10A59..10A5F 7776 OLD_SOUTH_ARABIAN, // 10A60..10A7F 7777 OLD_NORTH_ARABIAN, // 10A80..10A9F 7778 UNKNOWN, // 10AA0..10ABF 7779 MANICHAEAN, // 10AC0..10AE6 7780 UNKNOWN, // 10AE7..10AEA 7781 MANICHAEAN, // 10AEB..10AF6 7782 UNKNOWN, // 10AF7..10AFF 7783 AVESTAN, // 10B00..10B35 7784 UNKNOWN, // 10B36..10B38 7785 AVESTAN, // 10B39..10B3F 7786 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 7787 UNKNOWN, // 10B56..10B57 7788 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 7789 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 7790 UNKNOWN, // 10B73..10B77 7791 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 7792 PSALTER_PAHLAVI, // 10B80..10B91 7793 UNKNOWN, // 10B92..10B98 7794 PSALTER_PAHLAVI, // 10B99..10B9C 7795 UNKNOWN, // 10B9D..10BA8 7796 PSALTER_PAHLAVI, // 10BA9..10BAF 7797 UNKNOWN, // 10BB0..10BFF 7798 OLD_TURKIC, // 10C00..10C48 7799 UNKNOWN, // 10C49..10C7F 7800 OLD_HUNGARIAN, // 10C80..10CB2 7801 UNKNOWN, // 10CB3..10CBF 7802 OLD_HUNGARIAN, // 10CC0..10CF2 7803 UNKNOWN, // 10CF3..10CF9 7804 OLD_HUNGARIAN, // 10CFA..10CFF 7805 HANIFI_ROHINGYA, // 10D00..10D27 7806 UNKNOWN, // 10D28..10D2F 7807 HANIFI_ROHINGYA, // 10D30..10D39 7808 UNKNOWN, // 10D3A..10E5F 7809 ARABIC, // 10E60..10E7E 7810 UNKNOWN, // 10E7F 7811 YEZIDI, // 10E80..10EA9 7812 UNKNOWN, // 10EAA 7813 YEZIDI, // 10EAB..10EAD 7814 UNKNOWN, // 10EAE..10EAF 7815 YEZIDI, // 10EB0..10EB1 7816 UNKNOWN, // 10EB2..10EFF 7817 OLD_SOGDIAN, // 10F00..10F27 7818 UNKNOWN, // 10F28..10F2F 7819 SOGDIAN, // 10F30..10F59 7820 UNKNOWN, // 10F5A..10FAF 7821 CHORASMIAN, // 10FB0..10FCB 7822 UNKNOWN, // 10FCC..10FDF 7823 ELYMAIC, // 10FE0..10FF6 7824 UNKNOWN, // 10FF7..10FFF 7825 BRAHMI, // 11000..1104D 7826 UNKNOWN, // 1104E..11051 7827 BRAHMI, // 11052..1106F 7828 UNKNOWN, // 11070..1107E 7829 BRAHMI, // 1107F 7830 KAITHI, // 11080..110C1 7831 UNKNOWN, // 110C2..110CC 7832 KAITHI, // 110CD 7833 UNKNOWN, // 110CE..110CF 7834 SORA_SOMPENG, // 110D0..110E8 7835 UNKNOWN, // 110E9..110EF 7836 SORA_SOMPENG, // 110F0..110F9 7837 UNKNOWN, // 110FA..110FF 7838 CHAKMA, // 11100..11134 7839 UNKNOWN, // 11135 7840 CHAKMA, // 11136..11147 7841 UNKNOWN, // 11148..1114F 7842 MAHAJANI, // 11150..11176 7843 UNKNOWN, // 11177..1117F 7844 SHARADA, // 11180..111DF 7845 UNKNOWN, // 111E0 7846 SINHALA, // 111E1..111F4 7847 UNKNOWN, // 111F5..111FF 7848 KHOJKI, // 11200..11211 7849 UNKNOWN, // 11212 7850 KHOJKI, // 11213..1123E 7851 UNKNOWN, // 1123F..1127F 7852 MULTANI, // 11280..11286 7853 UNKNOWN, // 11287 7854 MULTANI, // 11288 7855 UNKNOWN, // 11289 7856 MULTANI, // 1128A..1128D 7857 UNKNOWN, // 1128E 7858 MULTANI, // 1128F..1129D 7859 UNKNOWN, // 1129E 7860 MULTANI, // 1129F..112A9 7861 UNKNOWN, // 112AA..112AF 7862 KHUDAWADI, // 112B0..112EA 7863 UNKNOWN, // 112EB..112EF 7864 KHUDAWADI, // 112F0..112F9 7865 UNKNOWN, // 112FA..112FF 7866 GRANTHA, // 11300..11303 7867 UNKNOWN, // 11304 7868 GRANTHA, // 11305..1130C 7869 UNKNOWN, // 1130D..1130E 7870 GRANTHA, // 1130F..11310 7871 UNKNOWN, // 11311..11312 7872 GRANTHA, // 11313..11328 7873 UNKNOWN, // 11329 7874 GRANTHA, // 1132A..11330 7875 UNKNOWN, // 11331 7876 GRANTHA, // 11332..11333 7877 UNKNOWN, // 11334 7878 GRANTHA, // 11335..11339 7879 UNKNOWN, // 1133A 7880 INHERITED, // 1133B 7881 GRANTHA, // 1133C..11344 7882 UNKNOWN, // 11345..11346 7883 GRANTHA, // 11347..11348 7884 UNKNOWN, // 11349..1134A 7885 GRANTHA, // 1134B..1134D 7886 UNKNOWN, // 1134E..1134F 7887 GRANTHA, // 11350 7888 UNKNOWN, // 11351..11356 7889 GRANTHA, // 11357 7890 UNKNOWN, // 11358..1135C 7891 GRANTHA, // 1135D..11363 7892 UNKNOWN, // 11364..11365 7893 GRANTHA, // 11366..1136C 7894 UNKNOWN, // 1136D..1136F 7895 GRANTHA, // 11370..11374 7896 UNKNOWN, // 11375..113FF 7897 NEWA, // 11400..1145B 7898 UNKNOWN, // 1145C 7899 NEWA, // 1145D..11461 7900 UNKNOWN, // 11462..1147F 7901 TIRHUTA, // 11480..114C7 7902 UNKNOWN, // 114C8..114CF 7903 TIRHUTA, // 114D0..114D9 7904 UNKNOWN, // 114DA..1157F 7905 SIDDHAM, // 11580..115B5 7906 UNKNOWN, // 115B6..115B7 7907 SIDDHAM, // 115B8..115DD 7908 UNKNOWN, // 115DE..115FF 7909 MODI, // 11600..11644 7910 UNKNOWN, // 11645..1164F 7911 MODI, // 11650..11659 7912 UNKNOWN, // 1165A..1165F 7913 MONGOLIAN, // 11660..1166C 7914 UNKNOWN, // 1166D..1167F 7915 TAKRI, // 11680..116B8 7916 UNKNOWN, // 116B9..116BF 7917 TAKRI, // 116C0..116C9 7918 UNKNOWN, // 116CA..116FF 7919 AHOM, // 11700..1171A 7920 UNKNOWN, // 1171B..1171C 7921 AHOM, // 1171D..1172B 7922 UNKNOWN, // 1172C..1172F 7923 AHOM, // 11730..1173F 7924 UNKNOWN, // 11740..117FF 7925 DOGRA, // 11800..1183B 7926 UNKNOWN, // 1183C..1189F 7927 WARANG_CITI, // 118A0..118F2 7928 UNKNOWN, // 118F3..118FE 7929 WARANG_CITI, // 118FF 7930 DIVES_AKURU, // 11900..11906 7931 UNKNOWN, // 11907..11908 7932 DIVES_AKURU, // 11909 7933 UNKNOWN, // 1190A..1190B 7934 DIVES_AKURU, // 1190C..11913 7935 UNKNOWN, // 11914 7936 DIVES_AKURU, // 11915..11916 7937 UNKNOWN, // 11917 7938 DIVES_AKURU, // 11918..11935 7939 UNKNOWN, // 11936 7940 DIVES_AKURU, // 11937..11938 7941 UNKNOWN, // 11939..1193A 7942 DIVES_AKURU, // 1193B..11946 7943 UNKNOWN, // 11947..1194F 7944 DIVES_AKURU, // 11950..11959 7945 UNKNOWN, // 1195A..1199F 7946 NANDINAGARI, // 119A0..119A7 7947 UNKNOWN, // 119A8..119A9 7948 NANDINAGARI, // 119AA..119D7 7949 UNKNOWN, // 119D8..119D9 7950 NANDINAGARI, // 119DA..119E4 7951 UNKNOWN, // 119E5..119FF 7952 ZANABAZAR_SQUARE, // 11A00..11A47 7953 UNKNOWN, // 11A48..11A4F 7954 SOYOMBO, // 11A50..11AA2 7955 UNKNOWN, // 11AA3..11ABF 7956 PAU_CIN_HAU, // 11AC0..11AF8 7957 UNKNOWN, // 11AF9..11BFF 7958 BHAIKSUKI, // 11C00..11C08 7959 UNKNOWN, // 11C09 7960 BHAIKSUKI, // 11C0A..11C36 7961 UNKNOWN, // 11C37 7962 BHAIKSUKI, // 11C38..11C45 7963 UNKNOWN, // 11C46..11C4F 7964 BHAIKSUKI, // 11C50..11C6C 7965 UNKNOWN, // 11C6D..11C6F 7966 MARCHEN, // 11C70..11C8F 7967 UNKNOWN, // 11C90..11C91 7968 MARCHEN, // 11C92..11CA7 7969 UNKNOWN, // 11CA8 7970 MARCHEN, // 11CA9..11CB6 7971 UNKNOWN, // 11CB7..11CFF 7972 MASARAM_GONDI, // 11D00..11D06 7973 UNKNOWN, // 11D07 7974 MASARAM_GONDI, // 11D08..11D09 7975 UNKNOWN, // 11D0A 7976 MASARAM_GONDI, // 11D0B..11D36 7977 UNKNOWN, // 11D37..11D39 7978 MASARAM_GONDI, // 11D3A 7979 UNKNOWN, // 11D3B 7980 MASARAM_GONDI, // 11D3C..11D3D 7981 UNKNOWN, // 11D3E 7982 MASARAM_GONDI, // 11D3F..11D47 7983 UNKNOWN, // 11D48..11D4F 7984 MASARAM_GONDI, // 11D50..11D59 7985 UNKNOWN, // 11D5A..11D5F 7986 GUNJALA_GONDI, // 11D60..11D65 7987 UNKNOWN, // 11D66 7988 GUNJALA_GONDI, // 11D67..11D68 7989 UNKNOWN, // 11D69 7990 GUNJALA_GONDI, // 11D6A..11D8E 7991 UNKNOWN, // 11D8F 7992 GUNJALA_GONDI, // 11D90..11D91 7993 UNKNOWN, // 11D92 7994 GUNJALA_GONDI, // 11D93..11D98 7995 UNKNOWN, // 11D99..11D9F 7996 GUNJALA_GONDI, // 11DA0..11DA9 7997 UNKNOWN, // 11DAA..11EDF 7998 MAKASAR, // 11EE0..11EF8 7999 UNKNOWN, // 11EF9..11FAF 8000 LISU, // 11FB0 8001 UNKNOWN, // 11FB1..11FBF 8002 TAMIL, // 11FC0..11FF1 8003 UNKNOWN, // 11FF2..11FFE 8004 TAMIL, // 11FFF 8005 CUNEIFORM, // 12000..12399 8006 UNKNOWN, // 1239A..123FF 8007 CUNEIFORM, // 12400..1246E 8008 UNKNOWN, // 1246F 8009 CUNEIFORM, // 12470..12474 8010 UNKNOWN, // 12475..1247F 8011 CUNEIFORM, // 12480..12543 8012 UNKNOWN, // 12544..12FFF 8013 EGYPTIAN_HIEROGLYPHS, // 13000..1342E 8014 UNKNOWN, // 1342F 8015 EGYPTIAN_HIEROGLYPHS, // 13430..13438 8016 UNKNOWN, // 13439..143FF 8017 ANATOLIAN_HIEROGLYPHS, // 14400..14646 8018 UNKNOWN, // 14647..167FF 8019 BAMUM, // 16800..16A38 8020 UNKNOWN, // 16A39..16A3F 8021 MRO, // 16A40..16A5E 8022 UNKNOWN, // 16A5F 8023 MRO, // 16A60..16A69 8024 UNKNOWN, // 16A6A..16A6D 8025 MRO, // 16A6E..16A6F 8026 UNKNOWN, // 16A70..16ACF 8027 BASSA_VAH, // 16AD0..16AED 8028 UNKNOWN, // 16AEE..16AEF 8029 BASSA_VAH, // 16AF0..16AF5 8030 UNKNOWN, // 16AF6..16AFF 8031 PAHAWH_HMONG, // 16B00..16B45 8032 UNKNOWN, // 16B46..16B4F 8033 PAHAWH_HMONG, // 16B50..16B59 8034 UNKNOWN, // 16B5A 8035 PAHAWH_HMONG, // 16B5B..16B61 8036 UNKNOWN, // 16B62 8037 PAHAWH_HMONG, // 16B63..16B77 8038 UNKNOWN, // 16B78..16B7C 8039 PAHAWH_HMONG, // 16B7D..16B8F 8040 UNKNOWN, // 16B90..16E3F 8041 MEDEFAIDRIN, // 16E40..16E9A 8042 UNKNOWN, // 16E9B..16EFF 8043 MIAO, // 16F00..16F4A 8044 UNKNOWN, // 16F4B..16F4E 8045 MIAO, // 16F4F..16F87 8046 UNKNOWN, // 16F88..16F8E 8047 MIAO, // 16F8F..16F9F 8048 UNKNOWN, // 16FA0..16FDF 8049 TANGUT, // 16FE0 8050 NUSHU, // 16FE1 8051 COMMON, // 16FE2..16FE3 8052 KHITAN_SMALL_SCRIPT, // 16FE4 8053 UNKNOWN, // 16FE5..16FEF 8054 HAN, // 16FF0..16FF1 8055 UNKNOWN, // 16FF2..16FFF 8056 TANGUT, // 17000..187F7 8057 UNKNOWN, // 187F8..187FF 8058 TANGUT, // 18800..18AFF 8059 KHITAN_SMALL_SCRIPT, // 18B00..18CD5 8060 UNKNOWN, // 18CD6..18CFF 8061 TANGUT, // 18D00..18D08 8062 UNKNOWN, // 18D09..1AFFF 8063 KATAKANA, // 1B000 8064 HIRAGANA, // 1B001..1B11E 8065 UNKNOWN, // 1B11F..1B14F 8066 HIRAGANA, // 1B150..1B152 8067 UNKNOWN, // 1B153..1B163 8068 KATAKANA, // 1B164..1B167 8069 UNKNOWN, // 1B168..1B16F 8070 NUSHU, // 1B170..1B2FB 8071 UNKNOWN, // 1B2FC..1BBFF 8072 DUPLOYAN, // 1BC00..1BC6A 8073 UNKNOWN, // 1BC6B..1BC6F 8074 DUPLOYAN, // 1BC70..1BC7C 8075 UNKNOWN, // 1BC7D..1BC7F 8076 DUPLOYAN, // 1BC80..1BC88 8077 UNKNOWN, // 1BC89..1BC8F 8078 DUPLOYAN, // 1BC90..1BC99 8079 UNKNOWN, // 1BC9A..1BC9B 8080 DUPLOYAN, // 1BC9C..1BC9F 8081 COMMON, // 1BCA0..1BCA3 8082 UNKNOWN, // 1BCA4..1CFFF 8083 COMMON, // 1D000..1D0F5 8084 UNKNOWN, // 1D0F6..1D0FF 8085 COMMON, // 1D100..1D126 8086 UNKNOWN, // 1D127..1D128 8087 COMMON, // 1D129..1D166 8088 INHERITED, // 1D167..1D169 8089 COMMON, // 1D16A..1D17A 8090 INHERITED, // 1D17B..1D182 8091 COMMON, // 1D183..1D184 8092 INHERITED, // 1D185..1D18B 8093 COMMON, // 1D18C..1D1A9 8094 INHERITED, // 1D1AA..1D1AD 8095 COMMON, // 1D1AE..1D1E8 8096 UNKNOWN, // 1D1E9..1D1FF 8097 GREEK, // 1D200..1D245 8098 UNKNOWN, // 1D246..1D2DF 8099 COMMON, // 1D2E0..1D2F3 8100 UNKNOWN, // 1D2F4..1D2FF 8101 COMMON, // 1D300..1D356 8102 UNKNOWN, // 1D357..1D35F 8103 COMMON, // 1D360..1D378 8104 UNKNOWN, // 1D379..1D3FF 8105 COMMON, // 1D400..1D454 8106 UNKNOWN, // 1D455 8107 COMMON, // 1D456..1D49C 8108 UNKNOWN, // 1D49D 8109 COMMON, // 1D49E..1D49F 8110 UNKNOWN, // 1D4A0..1D4A1 8111 COMMON, // 1D4A2 8112 UNKNOWN, // 1D4A3..1D4A4 8113 COMMON, // 1D4A5..1D4A6 8114 UNKNOWN, // 1D4A7..1D4A8 8115 COMMON, // 1D4A9..1D4AC 8116 UNKNOWN, // 1D4AD 8117 COMMON, // 1D4AE..1D4B9 8118 UNKNOWN, // 1D4BA 8119 COMMON, // 1D4BB 8120 UNKNOWN, // 1D4BC 8121 COMMON, // 1D4BD..1D4C3 8122 UNKNOWN, // 1D4C4 8123 COMMON, // 1D4C5..1D505 8124 UNKNOWN, // 1D506 8125 COMMON, // 1D507..1D50A 8126 UNKNOWN, // 1D50B..1D50C 8127 COMMON, // 1D50D..1D514 8128 UNKNOWN, // 1D515 8129 COMMON, // 1D516..1D51C 8130 UNKNOWN, // 1D51D 8131 COMMON, // 1D51E..1D539 8132 UNKNOWN, // 1D53A 8133 COMMON, // 1D53B..1D53E 8134 UNKNOWN, // 1D53F 8135 COMMON, // 1D540..1D544 8136 UNKNOWN, // 1D545 8137 COMMON, // 1D546 8138 UNKNOWN, // 1D547..1D549 8139 COMMON, // 1D54A..1D550 8140 UNKNOWN, // 1D551 8141 COMMON, // 1D552..1D6A5 8142 UNKNOWN, // 1D6A6..1D6A7 8143 COMMON, // 1D6A8..1D7CB 8144 UNKNOWN, // 1D7CC..1D7CD 8145 COMMON, // 1D7CE..1D7FF 8146 SIGNWRITING, // 1D800..1DA8B 8147 UNKNOWN, // 1DA8C..1DA9A 8148 SIGNWRITING, // 1DA9B..1DA9F 8149 UNKNOWN, // 1DAA0 8150 SIGNWRITING, // 1DAA1..1DAAF 8151 UNKNOWN, // 1DAB0..1DFFF 8152 GLAGOLITIC, // 1E000..1E006 8153 UNKNOWN, // 1E007 8154 GLAGOLITIC, // 1E008..1E018 8155 UNKNOWN, // 1E019..1E01A 8156 GLAGOLITIC, // 1E01B..1E021 8157 UNKNOWN, // 1E022 8158 GLAGOLITIC, // 1E023..1E024 8159 UNKNOWN, // 1E025 8160 GLAGOLITIC, // 1E026..1E02A 8161 UNKNOWN, // 1E02B..1E0FF 8162 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 8163 UNKNOWN, // 1E12D..1E12F 8164 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 8165 UNKNOWN, // 1E13E..1E13F 8166 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 8167 UNKNOWN, // 1E14A..1E14D 8168 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 8169 UNKNOWN, // 1E150..1E2BF 8170 WANCHO, // 1E2C0..1E2F9 8171 UNKNOWN, // 1E2FA..1E2FE 8172 WANCHO, // 1E2FF 8173 UNKNOWN, // 1E300..1E7FF 8174 MENDE_KIKAKUI, // 1E800..1E8C4 8175 UNKNOWN, // 1E8C5..1E8C6 8176 MENDE_KIKAKUI, // 1E8C7..1E8D6 8177 UNKNOWN, // 1E8D7..1E8FF 8178 ADLAM, // 1E900..1E94B 8179 UNKNOWN, // 1E94C..1E94F 8180 ADLAM, // 1E950..1E959 8181 UNKNOWN, // 1E95A..1E95D 8182 ADLAM, // 1E95E..1E95F 8183 UNKNOWN, // 1E960..1EC70 8184 COMMON, // 1EC71..1ECB4 8185 UNKNOWN, // 1ECB5..1ED00 8186 COMMON, // 1ED01..1ED3D 8187 UNKNOWN, // 1ED3E..1EDFF 8188 ARABIC, // 1EE00..1EE03 8189 UNKNOWN, // 1EE04 8190 ARABIC, // 1EE05..1EE1F 8191 UNKNOWN, // 1EE20 8192 ARABIC, // 1EE21..1EE22 8193 UNKNOWN, // 1EE23 8194 ARABIC, // 1EE24 8195 UNKNOWN, // 1EE25..1EE26 8196 ARABIC, // 1EE27 8197 UNKNOWN, // 1EE28 8198 ARABIC, // 1EE29..1EE32 8199 UNKNOWN, // 1EE33 8200 ARABIC, // 1EE34..1EE37 8201 UNKNOWN, // 1EE38 8202 ARABIC, // 1EE39 8203 UNKNOWN, // 1EE3A 8204 ARABIC, // 1EE3B 8205 UNKNOWN, // 1EE3C..1EE41 8206 ARABIC, // 1EE42 8207 UNKNOWN, // 1EE43..1EE46 8208 ARABIC, // 1EE47 8209 UNKNOWN, // 1EE48 8210 ARABIC, // 1EE49 8211 UNKNOWN, // 1EE4A 8212 ARABIC, // 1EE4B 8213 UNKNOWN, // 1EE4C 8214 ARABIC, // 1EE4D..1EE4F 8215 UNKNOWN, // 1EE50 8216 ARABIC, // 1EE51..1EE52 8217 UNKNOWN, // 1EE53 8218 ARABIC, // 1EE54 8219 UNKNOWN, // 1EE55..1EE56 8220 ARABIC, // 1EE57 8221 UNKNOWN, // 1EE58 8222 ARABIC, // 1EE59 8223 UNKNOWN, // 1EE5A 8224 ARABIC, // 1EE5B 8225 UNKNOWN, // 1EE5C 8226 ARABIC, // 1EE5D 8227 UNKNOWN, // 1EE5E 8228 ARABIC, // 1EE5F 8229 UNKNOWN, // 1EE60 8230 ARABIC, // 1EE61..1EE62 8231 UNKNOWN, // 1EE63 8232 ARABIC, // 1EE64 8233 UNKNOWN, // 1EE65..1EE66 8234 ARABIC, // 1EE67..1EE6A 8235 UNKNOWN, // 1EE6B 8236 ARABIC, // 1EE6C..1EE72 8237 UNKNOWN, // 1EE73 8238 ARABIC, // 1EE74..1EE77 8239 UNKNOWN, // 1EE78 8240 ARABIC, // 1EE79..1EE7C 8241 UNKNOWN, // 1EE7D 8242 ARABIC, // 1EE7E 8243 UNKNOWN, // 1EE7F 8244 ARABIC, // 1EE80..1EE89 8245 UNKNOWN, // 1EE8A 8246 ARABIC, // 1EE8B..1EE9B 8247 UNKNOWN, // 1EE9C..1EEA0 8248 ARABIC, // 1EEA1..1EEA3 8249 UNKNOWN, // 1EEA4 8250 ARABIC, // 1EEA5..1EEA9 8251 UNKNOWN, // 1EEAA 8252 ARABIC, // 1EEAB..1EEBB 8253 UNKNOWN, // 1EEBC..1EEEF 8254 ARABIC, // 1EEF0..1EEF1 8255 UNKNOWN, // 1EEF2..1EFFF 8256 COMMON, // 1F000..1F02B 8257 UNKNOWN, // 1F02C..1F02F 8258 COMMON, // 1F030..1F093 8259 UNKNOWN, // 1F094..1F09F 8260 COMMON, // 1F0A0..1F0AE 8261 UNKNOWN, // 1F0AF..1F0B0 8262 COMMON, // 1F0B1..1F0BF 8263 UNKNOWN, // 1F0C0 8264 COMMON, // 1F0C1..1F0CF 8265 UNKNOWN, // 1F0D0 8266 COMMON, // 1F0D1..1F0F5 8267 UNKNOWN, // 1F0F6..1F0FF 8268 COMMON, // 1F100..1F1AD 8269 UNKNOWN, // 1F1AE..1F1E5 8270 COMMON, // 1F1E6..1F1FF 8271 HIRAGANA, // 1F200 8272 COMMON, // 1F201..1F202 8273 UNKNOWN, // 1F203..1F20F 8274 COMMON, // 1F210..1F23B 8275 UNKNOWN, // 1F23C..1F23F 8276 COMMON, // 1F240..1F248 8277 UNKNOWN, // 1F249..1F24F 8278 COMMON, // 1F250..1F251 8279 UNKNOWN, // 1F252..1F25F 8280 COMMON, // 1F260..1F265 8281 UNKNOWN, // 1F266..1F2FF 8282 COMMON, // 1F300..1F6D7 8283 UNKNOWN, // 1F6D8..1F6DF 8284 COMMON, // 1F6E0..1F6EC 8285 UNKNOWN, // 1F6ED..1F6EF 8286 COMMON, // 1F6F0..1F6FC 8287 UNKNOWN, // 1F6FD..1F6FF 8288 COMMON, // 1F700..1F773 8289 UNKNOWN, // 1F774..1F77F 8290 COMMON, // 1F780..1F7D8 8291 UNKNOWN, // 1F7D9..1F7DF 8292 COMMON, // 1F7E0..1F7EB 8293 UNKNOWN, // 1F7EC..1F7FF 8294 COMMON, // 1F800..1F80B 8295 UNKNOWN, // 1F80C..1F80F 8296 COMMON, // 1F810..1F847 8297 UNKNOWN, // 1F848..1F84F 8298 COMMON, // 1F850..1F859 8299 UNKNOWN, // 1F85A..1F85F 8300 COMMON, // 1F860..1F887 8301 UNKNOWN, // 1F888..1F88F 8302 COMMON, // 1F890..1F8AD 8303 UNKNOWN, // 1F8AE..1F8AF 8304 COMMON, // 1F8B0..1F8B1 8305 UNKNOWN, // 1F8B2..1F8FF 8306 COMMON, // 1F900..1F978 8307 UNKNOWN, // 1F979 8308 COMMON, // 1F97A..1F9CB 8309 UNKNOWN, // 1F9CC 8310 COMMON, // 1F9CD..1FA53 8311 UNKNOWN, // 1FA54..1FA5F 8312 COMMON, // 1FA60..1FA6D 8313 UNKNOWN, // 1FA6E..1FA6F 8314 COMMON, // 1FA70..1FA74 8315 UNKNOWN, // 1FA75..1FA77 8316 COMMON, // 1FA78..1FA7A 8317 UNKNOWN, // 1FA7B..1FA7F 8318 COMMON, // 1FA80..1FA86 8319 UNKNOWN, // 1FA87..1FA8F 8320 COMMON, // 1FA90..1FAA8 8321 UNKNOWN, // 1FAA9..1FAAF 8322 COMMON, // 1FAB0..1FAB6 8323 UNKNOWN, // 1FAB7..1FABF 8324 COMMON, // 1FAC0..1FAC2 8325 UNKNOWN, // 1FAC3..1FACF 8326 COMMON, // 1FAD0..1FAD6 8327 UNKNOWN, // 1FAD7..1FAFF 8328 COMMON, // 1FB00..1FB92 8329 UNKNOWN, // 1FB93 8330 COMMON, // 1FB94..1FBCA 8331 UNKNOWN, // 1FBCB..1FBEF 8332 COMMON, // 1FBF0..1FBF9 8333 UNKNOWN, // 1FBFA..1FFFF 8334 HAN, // 20000..2A6DD 8335 UNKNOWN, // 2A6DE..2A6FF 8336 HAN, // 2A700..2B734 8337 UNKNOWN, // 2B735..2B73F 8338 HAN, // 2B740..2B81D 8339 UNKNOWN, // 2B81E..2B81F 8340 HAN, // 2B820..2CEA1 8341 UNKNOWN, // 2CEA2..2CEAF 8342 HAN, // 2CEB0..2EBE0 8343 UNKNOWN, // 2EBE1..2F7FF 8344 HAN, // 2F800..2FA1D 8345 UNKNOWN, // 2FA1E..2FFFF 8346 HAN, // 30000..3134A 8347 UNKNOWN, // 3134B..E0000 8348 COMMON, // E0001 8349 UNKNOWN, // E0002..E001F 8350 COMMON, // E0020..E007F 8351 UNKNOWN, // E0080..E00FF 8352 INHERITED, // E0100..E01EF 8353 UNKNOWN, // E01F0..10FFFF 8354 }; 8355 8356 private static final HashMap<String, Character.UnicodeScript> aliases; 8357 static { 8358 aliases = new HashMap<>((int)(157 / 0.75f + 1.0f)); 8359 aliases.put("ADLM", ADLAM); 8360 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 8361 aliases.put("AHOM", AHOM); 8362 aliases.put("ARAB", ARABIC); 8363 aliases.put("ARMI", IMPERIAL_ARAMAIC); 8364 aliases.put("ARMN", ARMENIAN); 8365 aliases.put("AVST", AVESTAN); 8366 aliases.put("BALI", BALINESE); 8367 aliases.put("BAMU", BAMUM); 8368 aliases.put("BASS", BASSA_VAH); 8369 aliases.put("BATK", BATAK); 8370 aliases.put("BENG", BENGALI); 8371 aliases.put("BHKS", BHAIKSUKI); 8372 aliases.put("BOPO", BOPOMOFO); 8373 aliases.put("BRAH", BRAHMI); 8374 aliases.put("BRAI", BRAILLE); 8375 aliases.put("BUGI", BUGINESE); 8376 aliases.put("BUHD", BUHID); 8377 aliases.put("CAKM", CHAKMA); 8378 aliases.put("CANS", CANADIAN_ABORIGINAL); 8379 aliases.put("CARI", CARIAN); 8380 aliases.put("CHAM", CHAM); 8381 aliases.put("CHER", CHEROKEE); 8382 aliases.put("CHRS", CHORASMIAN); 8383 aliases.put("COPT", COPTIC); 8384 aliases.put("CPRT", CYPRIOT); 8385 aliases.put("CYRL", CYRILLIC); 8386 aliases.put("DEVA", DEVANAGARI); 8387 aliases.put("DIAK", DIVES_AKURU); 8388 aliases.put("DOGR", DOGRA); 8389 aliases.put("DSRT", DESERET); 8390 aliases.put("DUPL", DUPLOYAN); 8391 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 8392 aliases.put("ELBA", ELBASAN); 8393 aliases.put("ELYM", ELYMAIC); 8394 aliases.put("ETHI", ETHIOPIC); 8395 aliases.put("GEOR", GEORGIAN); 8396 aliases.put("GLAG", GLAGOLITIC); 8397 aliases.put("GONM", MASARAM_GONDI); 8398 aliases.put("GOTH", GOTHIC); 8399 aliases.put("GONG", GUNJALA_GONDI); 8400 aliases.put("GRAN", GRANTHA); 8401 aliases.put("GREK", GREEK); 8402 aliases.put("GUJR", GUJARATI); 8403 aliases.put("GURU", GURMUKHI); 8404 aliases.put("HANG", HANGUL); 8405 aliases.put("HANI", HAN); 8406 aliases.put("HANO", HANUNOO); 8407 aliases.put("HATR", HATRAN); 8408 aliases.put("HEBR", HEBREW); 8409 aliases.put("HIRA", HIRAGANA); 8410 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 8411 aliases.put("HMNG", PAHAWH_HMONG); 8412 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 8413 // it appears we don't have the KATAKANA_OR_HIRAGANA 8414 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 8415 aliases.put("HUNG", OLD_HUNGARIAN); 8416 aliases.put("ITAL", OLD_ITALIC); 8417 aliases.put("JAVA", JAVANESE); 8418 aliases.put("KALI", KAYAH_LI); 8419 aliases.put("KANA", KATAKANA); 8420 aliases.put("KHAR", KHAROSHTHI); 8421 aliases.put("KHMR", KHMER); 8422 aliases.put("KHOJ", KHOJKI); 8423 aliases.put("KITS", KHITAN_SMALL_SCRIPT); 8424 aliases.put("KNDA", KANNADA); 8425 aliases.put("KTHI", KAITHI); 8426 aliases.put("LANA", TAI_THAM); 8427 aliases.put("LAOO", LAO); 8428 aliases.put("LATN", LATIN); 8429 aliases.put("LEPC", LEPCHA); 8430 aliases.put("LIMB", LIMBU); 8431 aliases.put("LINA", LINEAR_A); 8432 aliases.put("LINB", LINEAR_B); 8433 aliases.put("LISU", LISU); 8434 aliases.put("LYCI", LYCIAN); 8435 aliases.put("LYDI", LYDIAN); 8436 aliases.put("MAHJ", MAHAJANI); 8437 aliases.put("MAKA", MAKASAR); 8438 aliases.put("MARC", MARCHEN); 8439 aliases.put("MAND", MANDAIC); 8440 aliases.put("MANI", MANICHAEAN); 8441 aliases.put("MEDF", MEDEFAIDRIN); 8442 aliases.put("MEND", MENDE_KIKAKUI); 8443 aliases.put("MERC", MEROITIC_CURSIVE); 8444 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 8445 aliases.put("MLYM", MALAYALAM); 8446 aliases.put("MODI", MODI); 8447 aliases.put("MONG", MONGOLIAN); 8448 aliases.put("MROO", MRO); 8449 aliases.put("MTEI", MEETEI_MAYEK); 8450 aliases.put("MULT", MULTANI); 8451 aliases.put("MYMR", MYANMAR); 8452 aliases.put("NAND", NANDINAGARI); 8453 aliases.put("NARB", OLD_NORTH_ARABIAN); 8454 aliases.put("NBAT", NABATAEAN); 8455 aliases.put("NEWA", NEWA); 8456 aliases.put("NKOO", NKO); 8457 aliases.put("NSHU", NUSHU); 8458 aliases.put("OGAM", OGHAM); 8459 aliases.put("OLCK", OL_CHIKI); 8460 aliases.put("ORKH", OLD_TURKIC); 8461 aliases.put("ORYA", ORIYA); 8462 aliases.put("OSGE", OSAGE); 8463 aliases.put("OSMA", OSMANYA); 8464 aliases.put("PALM", PALMYRENE); 8465 aliases.put("PAUC", PAU_CIN_HAU); 8466 aliases.put("PERM", OLD_PERMIC); 8467 aliases.put("PHAG", PHAGS_PA); 8468 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 8469 aliases.put("PHLP", PSALTER_PAHLAVI); 8470 aliases.put("PHNX", PHOENICIAN); 8471 aliases.put("PLRD", MIAO); 8472 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 8473 aliases.put("RJNG", REJANG); 8474 aliases.put("ROHG", HANIFI_ROHINGYA); 8475 aliases.put("RUNR", RUNIC); 8476 aliases.put("SAMR", SAMARITAN); 8477 aliases.put("SARB", OLD_SOUTH_ARABIAN); 8478 aliases.put("SAUR", SAURASHTRA); 8479 aliases.put("SGNW", SIGNWRITING); 8480 aliases.put("SHAW", SHAVIAN); 8481 aliases.put("SHRD", SHARADA); 8482 aliases.put("SIDD", SIDDHAM); 8483 aliases.put("SIND", KHUDAWADI); 8484 aliases.put("SINH", SINHALA); 8485 aliases.put("SOGD", SOGDIAN); 8486 aliases.put("SOGO", OLD_SOGDIAN); 8487 aliases.put("SORA", SORA_SOMPENG); 8488 aliases.put("SOYO", SOYOMBO); 8489 aliases.put("SUND", SUNDANESE); 8490 aliases.put("SYLO", SYLOTI_NAGRI); 8491 aliases.put("SYRC", SYRIAC); 8492 aliases.put("TAGB", TAGBANWA); 8493 aliases.put("TAKR", TAKRI); 8494 aliases.put("TALE", TAI_LE); 8495 aliases.put("TALU", NEW_TAI_LUE); 8496 aliases.put("TAML", TAMIL); 8497 aliases.put("TANG", TANGUT); 8498 aliases.put("TAVT", TAI_VIET); 8499 aliases.put("TELU", TELUGU); 8500 aliases.put("TFNG", TIFINAGH); 8501 aliases.put("TGLG", TAGALOG); 8502 aliases.put("THAA", THAANA); 8503 aliases.put("THAI", THAI); 8504 aliases.put("TIBT", TIBETAN); 8505 aliases.put("TIRH", TIRHUTA); 8506 aliases.put("UGAR", UGARITIC); 8507 aliases.put("VAII", VAI); 8508 aliases.put("WARA", WARANG_CITI); 8509 aliases.put("WCHO", WANCHO); 8510 aliases.put("XPEO", OLD_PERSIAN); 8511 aliases.put("XSUX", CUNEIFORM); 8512 aliases.put("YIII", YI); 8513 aliases.put("YEZI", YEZIDI); 8514 aliases.put("ZANB", ZANABAZAR_SQUARE); 8515 aliases.put("ZINH", INHERITED); 8516 aliases.put("ZYYY", COMMON); 8517 aliases.put("ZZZZ", UNKNOWN); 8518 } 8519 8520 /** 8521 * Returns the enum constant representing the Unicode script of which 8522 * the given character (Unicode code point) is assigned to. 8523 * 8524 * @param codePoint the character (Unicode code point) in question. 8525 * @return The {@code UnicodeScript} constant representing the 8526 * Unicode script of which this character is assigned to. 8527 * 8528 * @throws IllegalArgumentException if the specified 8529 * {@code codePoint} is an invalid Unicode code point. 8530 * @see Character#isValidCodePoint(int) 8531 * 8532 */ of(int codePoint)8533 public static UnicodeScript of(int codePoint) { 8534 if (!isValidCodePoint(codePoint)) 8535 throw new IllegalArgumentException( 8536 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8537 int type = getType(codePoint); 8538 // leave SURROGATE and PRIVATE_USE for table lookup 8539 if (type == UNASSIGNED) 8540 return UNKNOWN; 8541 int index = Arrays.binarySearch(scriptStarts, codePoint); 8542 if (index < 0) 8543 index = -index - 2; 8544 return scripts[index]; 8545 } 8546 8547 /** 8548 * Returns the UnicodeScript constant with the given Unicode script 8549 * name or the script name alias. Script names and their aliases are 8550 * determined by The Unicode Standard. The files {@code Scripts<version>.txt} 8551 * and {@code PropertyValueAliases<version>.txt} define script names 8552 * and the script name aliases for a particular version of the 8553 * standard. The {@link Character} class specifies the version of 8554 * the standard that it supports. 8555 * <p> 8556 * Character case is ignored for all of the valid script names. 8557 * The en_US locale's case mapping rules are used to provide 8558 * case-insensitive string comparisons for script name validation. 8559 * 8560 * @param scriptName A {@code UnicodeScript} name. 8561 * @return The {@code UnicodeScript} constant identified 8562 * by {@code scriptName} 8563 * @throws IllegalArgumentException if {@code scriptName} is an 8564 * invalid name 8565 * @throws NullPointerException if {@code scriptName} is null 8566 */ forName(String scriptName)8567 public static final UnicodeScript forName(String scriptName) { 8568 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 8569 //.replace(' ', '_')); 8570 UnicodeScript sc = aliases.get(scriptName); 8571 if (sc != null) 8572 return sc; 8573 return valueOf(scriptName); 8574 } 8575 } 8576 8577 /** 8578 * The value of the {@code Character}. 8579 * 8580 * @serial 8581 */ 8582 private final char value; 8583 8584 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 8585 @java.io.Serial 8586 private static final long serialVersionUID = 3786198910865385080L; 8587 8588 /** 8589 * Constructs a newly allocated {@code Character} object that 8590 * represents the specified {@code char} value. 8591 * 8592 * @param value the value to be represented by the 8593 * {@code Character} object. 8594 * 8595 * @deprecated 8596 * It is rarely appropriate to use this constructor. The static factory 8597 * {@link #valueOf(char)} is generally a better choice, as it is 8598 * likely to yield significantly better space and time performance. 8599 */ 8600 // Android-changed: not yet forRemoval on Android. 8601 @Deprecated(since="9"/*, forRemoval = true*/) Character(char value)8602 public Character(char value) { 8603 this.value = value; 8604 } 8605 8606 private static class CharacterCache { CharacterCache()8607 private CharacterCache(){} 8608 8609 static final Character[] cache; 8610 static Character[] archivedCache; 8611 8612 static { 8613 int size = 127 + 1; 8614 8615 // Load and use the archived cache if it exists 8616 // Android-removed: CDS is not used on Android. 8617 // CDS.initializeFromArchive(CharacterCache.class); 8618 if (archivedCache == null || archivedCache.length != size) { 8619 Character[] c = new Character[size]; 8620 for (int i = 0; i < size; i++) { 8621 c[i] = new Character((char) i); 8622 } 8623 archivedCache = c; 8624 } 8625 cache = archivedCache; 8626 } 8627 } 8628 8629 /** 8630 * Returns a {@code Character} instance representing the specified 8631 * {@code char} value. 8632 * If a new {@code Character} instance is not required, this method 8633 * should generally be used in preference to the constructor 8634 * {@link #Character(char)}, as this method is likely to yield 8635 * significantly better space and time performance by caching 8636 * frequently requested values. 8637 * 8638 * This method will always cache values in the range {@code 8639 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 8640 * cache other values outside of this range. 8641 * 8642 * @param c a char value. 8643 * @return a {@code Character} instance representing {@code c}. 8644 * @since 1.5 8645 */ 8646 @IntrinsicCandidate valueOf(char c)8647 public static Character valueOf(char c) { 8648 if (c <= 127) { // must cache 8649 return CharacterCache.cache[(int)c]; 8650 } 8651 return new Character(c); 8652 } 8653 8654 /** 8655 * Returns the value of this {@code Character} object. 8656 * @return the primitive {@code char} value represented by 8657 * this object. 8658 */ 8659 @IntrinsicCandidate charValue()8660 public char charValue() { 8661 return value; 8662 } 8663 8664 /** 8665 * Returns a hash code for this {@code Character}; equal to the result 8666 * of invoking {@code charValue()}. 8667 * 8668 * @return a hash code value for this {@code Character} 8669 */ 8670 @Override hashCode()8671 public int hashCode() { 8672 return Character.hashCode(value); 8673 } 8674 8675 /** 8676 * Returns a hash code for a {@code char} value; compatible with 8677 * {@code Character.hashCode()}. 8678 * 8679 * @since 1.8 8680 * 8681 * @param value The {@code char} for which to return a hash code. 8682 * @return a hash code value for a {@code char} value. 8683 */ hashCode(char value)8684 public static int hashCode(char value) { 8685 return (int)value; 8686 } 8687 8688 /** 8689 * Compares this object against the specified object. 8690 * The result is {@code true} if and only if the argument is not 8691 * {@code null} and is a {@code Character} object that 8692 * represents the same {@code char} value as this object. 8693 * 8694 * @param obj the object to compare with. 8695 * @return {@code true} if the objects are the same; 8696 * {@code false} otherwise. 8697 */ equals(Object obj)8698 public boolean equals(Object obj) { 8699 if (obj instanceof Character) { 8700 return value == ((Character)obj).charValue(); 8701 } 8702 return false; 8703 } 8704 8705 /** 8706 * Returns a {@code String} object representing this 8707 * {@code Character}'s value. The result is a string of 8708 * length 1 whose sole component is the primitive 8709 * {@code char} value represented by this 8710 * {@code Character} object. 8711 * 8712 * @return a string representation of this object. 8713 */ toString()8714 public String toString() { 8715 return String.valueOf(value); 8716 } 8717 8718 // Android-removed: reference to Character.toString(int) in javadoc. 8719 /** 8720 * Returns a {@code String} object representing the 8721 * specified {@code char}. The result is a string of length 8722 * 1 consisting solely of the specified {@code char}. 8723 * 8724 * @param c the {@code char} to be converted 8725 * @return the string representation of the specified {@code char} 8726 * @since 1.4 8727 */ toString(char c)8728 public static String toString(char c) { 8729 return String.valueOf(c); 8730 } 8731 8732 // BEGIN Android-removed: expose after String.valueOfCodePoint() is imported. 8733 /** 8734 * Returns a {@code String} object representing the 8735 * specified character (Unicode code point). The result is a string of 8736 * length 1 or 2, consisting solely of the specified {@code codePoint}. 8737 * 8738 * @param codePoint the {@code codePoint} to be converted 8739 * @return the string representation of the specified {@code codePoint} 8740 * @throws IllegalArgumentException if the specified 8741 * {@code codePoint} is not a {@linkplain #isValidCodePoint 8742 * valid Unicode code point}. 8743 * @since 11 8744 */ toString(int codePoint)8745 public static String toString(int codePoint) { 8746 return String.valueOfCodePoint(codePoint); 8747 } 8748 8749 /** 8750 * Determines whether the specified code point is a valid 8751 * <a href="http://www.unicode.org/glossary/#code_point"> 8752 * Unicode code point value</a>. 8753 * 8754 * @param codePoint the Unicode code point to be tested 8755 * @return {@code true} if the specified code point value is between 8756 * {@link #MIN_CODE_POINT} and 8757 * {@link #MAX_CODE_POINT} inclusive; 8758 * {@code false} otherwise. 8759 * @since 1.5 8760 */ isValidCodePoint(int codePoint)8761 public static boolean isValidCodePoint(int codePoint) { 8762 // Optimized form of: 8763 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 8764 int plane = codePoint >>> 16; 8765 return plane < ((MAX_CODE_POINT + 1) >>> 16); 8766 } 8767 8768 /** 8769 * Determines whether the specified character (Unicode code point) 8770 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 8771 * Such code points can be represented using a single {@code char}. 8772 * 8773 * @param codePoint the character (Unicode code point) to be to 8774 * @return {@code true} if the specified code point is between 8775 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 8776 * {@code false} otherwise. 8777 * @since 1.7 8778 */ isBmpCodePoint(int codePoint)8779 public static boolean isBmpCodePoint(int codePoint) { 8780 return codePoint >>> 16 == 0; 8781 // Optimized form of: 8782 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 8783 // We consistently use logical shift (>>>) to facilitate 8784 // additional runtime optimizations. 8785 } 8786 8787 /** 8788 * Determines whether the specified character (Unicode code point) 8789 * is in the <a href="#supplementary">supplementary character</a> range. 8790 * 8791 * @param codePoint the character (Unicode code point) to be tested 8792 * @return {@code true} if the specified code point is between 8793 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 8794 * {@link #MAX_CODE_POINT} inclusive; 8795 * {@code false} otherwise. 8796 * @since 1.5 8797 */ isSupplementaryCodePoint(int codePoint)8798 public static boolean isSupplementaryCodePoint(int codePoint) { 8799 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 8800 && codePoint < MAX_CODE_POINT + 1; 8801 } 8802 8803 /** 8804 * Determines if the given {@code char} value is a 8805 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 8806 * Unicode high-surrogate code unit</a> 8807 * (also known as <i>leading-surrogate code unit</i>). 8808 * 8809 * <p>Such values do not represent characters by themselves, 8810 * but are used in the representation of 8811 * <a href="#supplementary">supplementary characters</a> 8812 * in the UTF-16 encoding. 8813 * 8814 * @param ch the {@code char} value to be tested. 8815 * @return {@code true} if the {@code char} value is between 8816 * {@link #MIN_HIGH_SURROGATE} and 8817 * {@link #MAX_HIGH_SURROGATE} inclusive; 8818 * {@code false} otherwise. 8819 * @see Character#isLowSurrogate(char) 8820 * @see Character.UnicodeBlock#of(int) 8821 * @since 1.5 8822 */ isHighSurrogate(char ch)8823 public static boolean isHighSurrogate(char ch) { 8824 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 8825 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 8826 } 8827 8828 /** 8829 * Determines if the given {@code char} value is a 8830 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 8831 * Unicode low-surrogate code unit</a> 8832 * (also known as <i>trailing-surrogate code unit</i>). 8833 * 8834 * <p>Such values do not represent characters by themselves, 8835 * but are used in the representation of 8836 * <a href="#supplementary">supplementary characters</a> 8837 * in the UTF-16 encoding. 8838 * 8839 * @param ch the {@code char} value to be tested. 8840 * @return {@code true} if the {@code char} value is between 8841 * {@link #MIN_LOW_SURROGATE} and 8842 * {@link #MAX_LOW_SURROGATE} inclusive; 8843 * {@code false} otherwise. 8844 * @see Character#isHighSurrogate(char) 8845 * @since 1.5 8846 */ isLowSurrogate(char ch)8847 public static boolean isLowSurrogate(char ch) { 8848 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 8849 } 8850 8851 /** 8852 * Determines if the given {@code char} value is a Unicode 8853 * <i>surrogate code unit</i>. 8854 * 8855 * <p>Such values do not represent characters by themselves, 8856 * but are used in the representation of 8857 * <a href="#supplementary">supplementary characters</a> 8858 * in the UTF-16 encoding. 8859 * 8860 * <p>A char value is a surrogate code unit if and only if it is either 8861 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 8862 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 8863 * 8864 * @param ch the {@code char} value to be tested. 8865 * @return {@code true} if the {@code char} value is between 8866 * {@link #MIN_SURROGATE} and 8867 * {@link #MAX_SURROGATE} inclusive; 8868 * {@code false} otherwise. 8869 * @since 1.7 8870 */ isSurrogate(char ch)8871 public static boolean isSurrogate(char ch) { 8872 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 8873 } 8874 8875 /** 8876 * Determines whether the specified pair of {@code char} 8877 * values is a valid 8878 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 8879 * Unicode surrogate pair</a>. 8880 * 8881 * <p>This method is equivalent to the expression: 8882 * <blockquote><pre>{@code 8883 * isHighSurrogate(high) && isLowSurrogate(low) 8884 * }</pre></blockquote> 8885 * 8886 * @param high the high-surrogate code value to be tested 8887 * @param low the low-surrogate code value to be tested 8888 * @return {@code true} if the specified high and 8889 * low-surrogate code values represent a valid surrogate pair; 8890 * {@code false} otherwise. 8891 * @since 1.5 8892 */ isSurrogatePair(char high, char low)8893 public static boolean isSurrogatePair(char high, char low) { 8894 return isHighSurrogate(high) && isLowSurrogate(low); 8895 } 8896 8897 /** 8898 * Determines the number of {@code char} values needed to 8899 * represent the specified character (Unicode code point). If the 8900 * specified character is equal to or greater than 0x10000, then 8901 * the method returns 2. Otherwise, the method returns 1. 8902 * 8903 * <p>This method doesn't validate the specified character to be a 8904 * valid Unicode code point. The caller must validate the 8905 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 8906 * if necessary. 8907 * 8908 * @param codePoint the character (Unicode code point) to be tested. 8909 * @return 2 if the character is a valid supplementary character; 1 otherwise. 8910 * @see Character#isSupplementaryCodePoint(int) 8911 * @since 1.5 8912 */ charCount(int codePoint)8913 public static int charCount(int codePoint) { 8914 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 8915 } 8916 8917 /** 8918 * Converts the specified surrogate pair to its supplementary code 8919 * point value. This method does not validate the specified 8920 * surrogate pair. The caller must validate it using {@link 8921 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 8922 * 8923 * @param high the high-surrogate code unit 8924 * @param low the low-surrogate code unit 8925 * @return the supplementary code point composed from the 8926 * specified surrogate pair. 8927 * @since 1.5 8928 */ toCodePoint(char high, char low)8929 public static int toCodePoint(char high, char low) { 8930 // Optimized form of: 8931 // return ((high - MIN_HIGH_SURROGATE) << 10) 8932 // + (low - MIN_LOW_SURROGATE) 8933 // + MIN_SUPPLEMENTARY_CODE_POINT; 8934 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 8935 - (MIN_HIGH_SURROGATE << 10) 8936 - MIN_LOW_SURROGATE); 8937 } 8938 8939 /** 8940 * Returns the code point at the given index of the 8941 * {@code CharSequence}. If the {@code char} value at 8942 * the given index in the {@code CharSequence} is in the 8943 * high-surrogate range, the following index is less than the 8944 * length of the {@code CharSequence}, and the 8945 * {@code char} value at the following index is in the 8946 * low-surrogate range, then the supplementary code point 8947 * corresponding to this surrogate pair is returned. Otherwise, 8948 * the {@code char} value at the given index is returned. 8949 * 8950 * @param seq a sequence of {@code char} values (Unicode code 8951 * units) 8952 * @param index the index to the {@code char} values (Unicode 8953 * code units) in {@code seq} to be converted 8954 * @return the Unicode code point at the given index 8955 * @throws NullPointerException if {@code seq} is null. 8956 * @throws IndexOutOfBoundsException if the value 8957 * {@code index} is negative or not less than 8958 * {@link CharSequence#length() seq.length()}. 8959 * @since 1.5 8960 */ codePointAt(CharSequence seq, int index)8961 public static int codePointAt(CharSequence seq, int index) { 8962 char c1 = seq.charAt(index); 8963 if (isHighSurrogate(c1) && ++index < seq.length()) { 8964 char c2 = seq.charAt(index); 8965 if (isLowSurrogate(c2)) { 8966 return toCodePoint(c1, c2); 8967 } 8968 } 8969 return c1; 8970 } 8971 8972 /** 8973 * Returns the code point at the given index of the 8974 * {@code char} array. If the {@code char} value at 8975 * the given index in the {@code char} array is in the 8976 * high-surrogate range, the following index is less than the 8977 * length of the {@code char} array, and the 8978 * {@code char} value at the following index is in the 8979 * low-surrogate range, then the supplementary code point 8980 * corresponding to this surrogate pair is returned. Otherwise, 8981 * the {@code char} value at the given index is returned. 8982 * 8983 * @param a the {@code char} array 8984 * @param index the index to the {@code char} values (Unicode 8985 * code units) in the {@code char} array to be converted 8986 * @return the Unicode code point at the given index 8987 * @throws NullPointerException if {@code a} is null. 8988 * @throws IndexOutOfBoundsException if the value 8989 * {@code index} is negative or not less than 8990 * the length of the {@code char} array. 8991 * @since 1.5 8992 */ codePointAt(char[] a, int index)8993 public static int codePointAt(char[] a, int index) { 8994 return codePointAtImpl(a, index, a.length); 8995 } 8996 8997 /** 8998 * Returns the code point at the given index of the 8999 * {@code char} array, where only array elements with 9000 * {@code index} less than {@code limit} can be used. If 9001 * the {@code char} value at the given index in the 9002 * {@code char} array is in the high-surrogate range, the 9003 * following index is less than the {@code limit}, and the 9004 * {@code char} value at the following index is in the 9005 * low-surrogate range, then the supplementary code point 9006 * corresponding to this surrogate pair is returned. Otherwise, 9007 * the {@code char} value at the given index is returned. 9008 * 9009 * @param a the {@code char} array 9010 * @param index the index to the {@code char} values (Unicode 9011 * code units) in the {@code char} array to be converted 9012 * @param limit the index after the last array element that 9013 * can be used in the {@code char} array 9014 * @return the Unicode code point at the given index 9015 * @throws NullPointerException if {@code a} is null. 9016 * @throws IndexOutOfBoundsException if the {@code index} 9017 * argument is negative or not less than the {@code limit} 9018 * argument, or if the {@code limit} argument is negative or 9019 * greater than the length of the {@code char} array. 9020 * @since 1.5 9021 */ codePointAt(char[] a, int index, int limit)9022 public static int codePointAt(char[] a, int index, int limit) { 9023 if (index >= limit || limit < 0 || limit > a.length) { 9024 throw new IndexOutOfBoundsException(); 9025 } 9026 return codePointAtImpl(a, index, limit); 9027 } 9028 9029 // throws ArrayIndexOutOfBoundsException if index out of bounds codePointAtImpl(char[] a, int index, int limit)9030 static int codePointAtImpl(char[] a, int index, int limit) { 9031 char c1 = a[index]; 9032 if (isHighSurrogate(c1) && ++index < limit) { 9033 char c2 = a[index]; 9034 if (isLowSurrogate(c2)) { 9035 return toCodePoint(c1, c2); 9036 } 9037 } 9038 return c1; 9039 } 9040 9041 /** 9042 * Returns the code point preceding the given index of the 9043 * {@code CharSequence}. If the {@code char} value at 9044 * {@code (index - 1)} in the {@code CharSequence} is in 9045 * the low-surrogate range, {@code (index - 2)} is not 9046 * negative, and the {@code char} value at {@code (index - 2)} 9047 * in the {@code CharSequence} is in the 9048 * high-surrogate range, then the supplementary code point 9049 * corresponding to this surrogate pair is returned. Otherwise, 9050 * the {@code char} value at {@code (index - 1)} is 9051 * returned. 9052 * 9053 * @param seq the {@code CharSequence} instance 9054 * @param index the index following the code point that should be returned 9055 * @return the Unicode code point value before the given index. 9056 * @throws NullPointerException if {@code seq} is null. 9057 * @throws IndexOutOfBoundsException if the {@code index} 9058 * argument is less than 1 or greater than {@link 9059 * CharSequence#length() seq.length()}. 9060 * @since 1.5 9061 */ codePointBefore(CharSequence seq, int index)9062 public static int codePointBefore(CharSequence seq, int index) { 9063 char c2 = seq.charAt(--index); 9064 if (isLowSurrogate(c2) && index > 0) { 9065 char c1 = seq.charAt(--index); 9066 if (isHighSurrogate(c1)) { 9067 return toCodePoint(c1, c2); 9068 } 9069 } 9070 return c2; 9071 } 9072 9073 /** 9074 * Returns the code point preceding the given index of the 9075 * {@code char} array. If the {@code char} value at 9076 * {@code (index - 1)} in the {@code char} array is in 9077 * the low-surrogate range, {@code (index - 2)} is not 9078 * negative, and the {@code char} value at {@code (index - 2)} 9079 * in the {@code char} array is in the 9080 * high-surrogate range, then the supplementary code point 9081 * corresponding to this surrogate pair is returned. Otherwise, 9082 * the {@code char} value at {@code (index - 1)} is 9083 * returned. 9084 * 9085 * @param a the {@code char} array 9086 * @param index the index following the code point that should be returned 9087 * @return the Unicode code point value before the given index. 9088 * @throws NullPointerException if {@code a} is null. 9089 * @throws IndexOutOfBoundsException if the {@code index} 9090 * argument is less than 1 or greater than the length of the 9091 * {@code char} array 9092 * @since 1.5 9093 */ codePointBefore(char[] a, int index)9094 public static int codePointBefore(char[] a, int index) { 9095 return codePointBeforeImpl(a, index, 0); 9096 } 9097 9098 /** 9099 * Returns the code point preceding the given index of the 9100 * {@code char} array, where only array elements with 9101 * {@code index} greater than or equal to {@code start} 9102 * can be used. If the {@code char} value at {@code (index - 1)} 9103 * in the {@code char} array is in the 9104 * low-surrogate range, {@code (index - 2)} is not less than 9105 * {@code start}, and the {@code char} value at 9106 * {@code (index - 2)} in the {@code char} array is in 9107 * the high-surrogate range, then the supplementary code point 9108 * corresponding to this surrogate pair is returned. Otherwise, 9109 * the {@code char} value at {@code (index - 1)} is 9110 * returned. 9111 * 9112 * @param a the {@code char} array 9113 * @param index the index following the code point that should be returned 9114 * @param start the index of the first array element in the 9115 * {@code char} array 9116 * @return the Unicode code point value before the given index. 9117 * @throws NullPointerException if {@code a} is null. 9118 * @throws IndexOutOfBoundsException if the {@code index} 9119 * argument is not greater than the {@code start} argument or 9120 * is greater than the length of the {@code char} array, or 9121 * if the {@code start} argument is negative or not less than 9122 * the length of the {@code char} array. 9123 * @since 1.5 9124 */ codePointBefore(char[] a, int index, int start)9125 public static int codePointBefore(char[] a, int index, int start) { 9126 if (index <= start || start < 0 || start >= a.length) { 9127 throw new IndexOutOfBoundsException(); 9128 } 9129 return codePointBeforeImpl(a, index, start); 9130 } 9131 9132 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds codePointBeforeImpl(char[] a, int index, int start)9133 static int codePointBeforeImpl(char[] a, int index, int start) { 9134 char c2 = a[--index]; 9135 if (isLowSurrogate(c2) && index > start) { 9136 char c1 = a[--index]; 9137 if (isHighSurrogate(c1)) { 9138 return toCodePoint(c1, c2); 9139 } 9140 } 9141 return c2; 9142 } 9143 9144 /** 9145 * Returns the leading surrogate (a 9146 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9147 * high surrogate code unit</a>) of the 9148 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9149 * surrogate pair</a> 9150 * representing the specified supplementary character (Unicode 9151 * code point) in the UTF-16 encoding. If the specified character 9152 * is not a 9153 * <a href="Character.html#supplementary">supplementary character</a>, 9154 * an unspecified {@code char} is returned. 9155 * 9156 * <p>If 9157 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9158 * is {@code true}, then 9159 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 9160 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 9161 * are also always {@code true}. 9162 * 9163 * @param codePoint a supplementary character (Unicode code point) 9164 * @return the leading surrogate code unit used to represent the 9165 * character in the UTF-16 encoding 9166 * @since 1.7 9167 */ highSurrogate(int codePoint)9168 public static char highSurrogate(int codePoint) { 9169 return (char) ((codePoint >>> 10) 9170 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 9171 } 9172 9173 /** 9174 * Returns the trailing surrogate (a 9175 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9176 * low surrogate code unit</a>) of the 9177 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9178 * surrogate pair</a> 9179 * representing the specified supplementary character (Unicode 9180 * code point) in the UTF-16 encoding. If the specified character 9181 * is not a 9182 * <a href="Character.html#supplementary">supplementary character</a>, 9183 * an unspecified {@code char} is returned. 9184 * 9185 * <p>If 9186 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9187 * is {@code true}, then 9188 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 9189 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 9190 * are also always {@code true}. 9191 * 9192 * @param codePoint a supplementary character (Unicode code point) 9193 * @return the trailing surrogate code unit used to represent the 9194 * character in the UTF-16 encoding 9195 * @since 1.7 9196 */ lowSurrogate(int codePoint)9197 public static char lowSurrogate(int codePoint) { 9198 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 9199 } 9200 9201 /** 9202 * Converts the specified character (Unicode code point) to its 9203 * UTF-16 representation. If the specified code point is a BMP 9204 * (Basic Multilingual Plane or Plane 0) value, the same value is 9205 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 9206 * specified code point is a supplementary character, its 9207 * surrogate values are stored in {@code dst[dstIndex]} 9208 * (high-surrogate) and {@code dst[dstIndex+1]} 9209 * (low-surrogate), and 2 is returned. 9210 * 9211 * @param codePoint the character (Unicode code point) to be converted. 9212 * @param dst an array of {@code char} in which the 9213 * {@code codePoint}'s UTF-16 value is stored. 9214 * @param dstIndex the start index into the {@code dst} 9215 * array where the converted value is stored. 9216 * @return 1 if the code point is a BMP code point, 2 if the 9217 * code point is a supplementary code point. 9218 * @throws IllegalArgumentException if the specified 9219 * {@code codePoint} is not a valid Unicode code point. 9220 * @throws NullPointerException if the specified {@code dst} is null. 9221 * @throws IndexOutOfBoundsException if {@code dstIndex} 9222 * is negative or not less than {@code dst.length}, or if 9223 * {@code dst} at {@code dstIndex} doesn't have enough 9224 * array element(s) to store the resulting {@code char} 9225 * value(s). (If {@code dstIndex} is equal to 9226 * {@code dst.length-1} and the specified 9227 * {@code codePoint} is a supplementary character, the 9228 * high-surrogate value is not stored in 9229 * {@code dst[dstIndex]}.) 9230 * @since 1.5 9231 */ toChars(int codePoint, char[] dst, int dstIndex)9232 public static int toChars(int codePoint, char[] dst, int dstIndex) { 9233 if (isBmpCodePoint(codePoint)) { 9234 dst[dstIndex] = (char) codePoint; 9235 return 1; 9236 } else if (isValidCodePoint(codePoint)) { 9237 toSurrogates(codePoint, dst, dstIndex); 9238 return 2; 9239 } else { 9240 throw new IllegalArgumentException( 9241 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9242 } 9243 } 9244 9245 /** 9246 * Converts the specified character (Unicode code point) to its 9247 * UTF-16 representation stored in a {@code char} array. If 9248 * the specified code point is a BMP (Basic Multilingual Plane or 9249 * Plane 0) value, the resulting {@code char} array has 9250 * the same value as {@code codePoint}. If the specified code 9251 * point is a supplementary code point, the resulting 9252 * {@code char} array has the corresponding surrogate pair. 9253 * 9254 * @param codePoint a Unicode code point 9255 * @return a {@code char} array having 9256 * {@code codePoint}'s UTF-16 representation. 9257 * @throws IllegalArgumentException if the specified 9258 * {@code codePoint} is not a valid Unicode code point. 9259 * @since 1.5 9260 */ toChars(int codePoint)9261 public static char[] toChars(int codePoint) { 9262 if (isBmpCodePoint(codePoint)) { 9263 return new char[] { (char) codePoint }; 9264 } else if (isValidCodePoint(codePoint)) { 9265 char[] result = new char[2]; 9266 toSurrogates(codePoint, result, 0); 9267 return result; 9268 } else { 9269 throw new IllegalArgumentException( 9270 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9271 } 9272 } 9273 toSurrogates(int codePoint, char[] dst, int index)9274 static void toSurrogates(int codePoint, char[] dst, int index) { 9275 // We write elements "backwards" to guarantee all-or-nothing 9276 dst[index+1] = lowSurrogate(codePoint); 9277 dst[index] = highSurrogate(codePoint); 9278 } 9279 9280 /** 9281 * Returns the number of Unicode code points in the text range of 9282 * the specified char sequence. The text range begins at the 9283 * specified {@code beginIndex} and extends to the 9284 * {@code char} at index {@code endIndex - 1}. Thus the 9285 * length (in {@code char}s) of the text range is 9286 * {@code endIndex-beginIndex}. Unpaired surrogates within 9287 * the text range count as one code point each. 9288 * 9289 * @param seq the char sequence 9290 * @param beginIndex the index to the first {@code char} of 9291 * the text range. 9292 * @param endIndex the index after the last {@code char} of 9293 * the text range. 9294 * @return the number of Unicode code points in the specified text 9295 * range 9296 * @throws NullPointerException if {@code seq} is null. 9297 * @throws IndexOutOfBoundsException if the 9298 * {@code beginIndex} is negative, or {@code endIndex} 9299 * is larger than the length of the given sequence, or 9300 * {@code beginIndex} is larger than {@code endIndex}. 9301 * @since 1.5 9302 */ codePointCount(CharSequence seq, int beginIndex, int endIndex)9303 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 9304 int length = seq.length(); 9305 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 9306 throw new IndexOutOfBoundsException(); 9307 } 9308 int n = endIndex - beginIndex; 9309 for (int i = beginIndex; i < endIndex; ) { 9310 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 9311 isLowSurrogate(seq.charAt(i))) { 9312 n--; 9313 i++; 9314 } 9315 } 9316 return n; 9317 } 9318 9319 /** 9320 * Returns the number of Unicode code points in a subarray of the 9321 * {@code char} array argument. The {@code offset} 9322 * argument is the index of the first {@code char} of the 9323 * subarray and the {@code count} argument specifies the 9324 * length of the subarray in {@code char}s. Unpaired 9325 * surrogates within the subarray count as one code point each. 9326 * 9327 * @param a the {@code char} array 9328 * @param offset the index of the first {@code char} in the 9329 * given {@code char} array 9330 * @param count the length of the subarray in {@code char}s 9331 * @return the number of Unicode code points in the specified subarray 9332 * @throws NullPointerException if {@code a} is null. 9333 * @throws IndexOutOfBoundsException if {@code offset} or 9334 * {@code count} is negative, or if {@code offset + 9335 * count} is larger than the length of the given array. 9336 * @since 1.5 9337 */ codePointCount(char[] a, int offset, int count)9338 public static int codePointCount(char[] a, int offset, int count) { 9339 if (count > a.length - offset || offset < 0 || count < 0) { 9340 throw new IndexOutOfBoundsException(); 9341 } 9342 return codePointCountImpl(a, offset, count); 9343 } 9344 codePointCountImpl(char[] a, int offset, int count)9345 static int codePointCountImpl(char[] a, int offset, int count) { 9346 int endIndex = offset + count; 9347 int n = count; 9348 for (int i = offset; i < endIndex; ) { 9349 if (isHighSurrogate(a[i++]) && i < endIndex && 9350 isLowSurrogate(a[i])) { 9351 n--; 9352 i++; 9353 } 9354 } 9355 return n; 9356 } 9357 9358 /** 9359 * Returns the index within the given char sequence that is offset 9360 * from the given {@code index} by {@code codePointOffset} 9361 * code points. Unpaired surrogates within the text range given by 9362 * {@code index} and {@code codePointOffset} count as 9363 * one code point each. 9364 * 9365 * @param seq the char sequence 9366 * @param index the index to be offset 9367 * @param codePointOffset the offset in code points 9368 * @return the index within the char sequence 9369 * @throws NullPointerException if {@code seq} is null. 9370 * @throws IndexOutOfBoundsException if {@code index} 9371 * is negative or larger then the length of the char sequence, 9372 * or if {@code codePointOffset} is positive and the 9373 * subsequence starting with {@code index} has fewer than 9374 * {@code codePointOffset} code points, or if 9375 * {@code codePointOffset} is negative and the subsequence 9376 * before {@code index} has fewer than the absolute value 9377 * of {@code codePointOffset} code points. 9378 * @since 1.5 9379 */ offsetByCodePoints(CharSequence seq, int index, int codePointOffset)9380 public static int offsetByCodePoints(CharSequence seq, int index, 9381 int codePointOffset) { 9382 int length = seq.length(); 9383 if (index < 0 || index > length) { 9384 throw new IndexOutOfBoundsException(); 9385 } 9386 9387 int x = index; 9388 if (codePointOffset >= 0) { 9389 int i; 9390 for (i = 0; x < length && i < codePointOffset; i++) { 9391 if (isHighSurrogate(seq.charAt(x++)) && x < length && 9392 isLowSurrogate(seq.charAt(x))) { 9393 x++; 9394 } 9395 } 9396 if (i < codePointOffset) { 9397 throw new IndexOutOfBoundsException(); 9398 } 9399 } else { 9400 int i; 9401 for (i = codePointOffset; x > 0 && i < 0; i++) { 9402 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 9403 isHighSurrogate(seq.charAt(x-1))) { 9404 x--; 9405 } 9406 } 9407 if (i < 0) { 9408 throw new IndexOutOfBoundsException(); 9409 } 9410 } 9411 return x; 9412 } 9413 9414 /** 9415 * Returns the index within the given {@code char} subarray 9416 * that is offset from the given {@code index} by 9417 * {@code codePointOffset} code points. The 9418 * {@code start} and {@code count} arguments specify a 9419 * subarray of the {@code char} array. Unpaired surrogates 9420 * within the text range given by {@code index} and 9421 * {@code codePointOffset} count as one code point each. 9422 * 9423 * @param a the {@code char} array 9424 * @param start the index of the first {@code char} of the 9425 * subarray 9426 * @param count the length of the subarray in {@code char}s 9427 * @param index the index to be offset 9428 * @param codePointOffset the offset in code points 9429 * @return the index within the subarray 9430 * @throws NullPointerException if {@code a} is null. 9431 * @throws IndexOutOfBoundsException 9432 * if {@code start} or {@code count} is negative, 9433 * or if {@code start + count} is larger than the length of 9434 * the given array, 9435 * or if {@code index} is less than {@code start} or 9436 * larger then {@code start + count}, 9437 * or if {@code codePointOffset} is positive and the text range 9438 * starting with {@code index} and ending with {@code start + count - 1} 9439 * has fewer than {@code codePointOffset} code 9440 * points, 9441 * or if {@code codePointOffset} is negative and the text range 9442 * starting with {@code start} and ending with {@code index - 1} 9443 * has fewer than the absolute value of 9444 * {@code codePointOffset} code points. 9445 * @since 1.5 9446 */ offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset)9447 public static int offsetByCodePoints(char[] a, int start, int count, 9448 int index, int codePointOffset) { 9449 if (count > a.length-start || start < 0 || count < 0 9450 || index < start || index > start+count) { 9451 throw new IndexOutOfBoundsException(); 9452 } 9453 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 9454 } 9455 offsetByCodePointsImpl(char[]a, int start, int count, int index, int codePointOffset)9456 static int offsetByCodePointsImpl(char[]a, int start, int count, 9457 int index, int codePointOffset) { 9458 int x = index; 9459 if (codePointOffset >= 0) { 9460 int limit = start + count; 9461 int i; 9462 for (i = 0; x < limit && i < codePointOffset; i++) { 9463 if (isHighSurrogate(a[x++]) && x < limit && 9464 isLowSurrogate(a[x])) { 9465 x++; 9466 } 9467 } 9468 if (i < codePointOffset) { 9469 throw new IndexOutOfBoundsException(); 9470 } 9471 } else { 9472 int i; 9473 for (i = codePointOffset; x > start && i < 0; i++) { 9474 if (isLowSurrogate(a[--x]) && x > start && 9475 isHighSurrogate(a[x-1])) { 9476 x--; 9477 } 9478 } 9479 if (i < 0) { 9480 throw new IndexOutOfBoundsException(); 9481 } 9482 } 9483 return x; 9484 } 9485 9486 /** 9487 * Determines if the specified character is a lowercase character. 9488 * <p> 9489 * A character is lowercase if its general category type, provided 9490 * by {@code Character.getType(ch)}, is 9491 * {@code LOWERCASE_LETTER}, or it has contributory property 9492 * Other_Lowercase as defined by the Unicode Standard. 9493 * <p> 9494 * The following are examples of lowercase characters: 9495 * <blockquote><pre> 9496 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9497 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9498 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9499 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9500 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9501 * </pre></blockquote> 9502 * <p> Many other Unicode characters are lowercase too. 9503 * 9504 * <p><b>Note:</b> This method cannot handle <a 9505 * href="#supplementary"> supplementary characters</a>. To support 9506 * all Unicode characters, including supplementary characters, use 9507 * the {@link #isLowerCase(int)} method. 9508 * 9509 * @param ch the character to be tested. 9510 * @return {@code true} if the character is lowercase; 9511 * {@code false} otherwise. 9512 * @see Character#isLowerCase(char) 9513 * @see Character#isTitleCase(char) 9514 * @see Character#toLowerCase(char) 9515 * @see Character#getType(char) 9516 */ isLowerCase(char ch)9517 public static boolean isLowerCase(char ch) { 9518 return isLowerCase((int)ch); 9519 } 9520 9521 /** 9522 * Determines if the specified character (Unicode code point) is a 9523 * lowercase character. 9524 * <p> 9525 * A character is lowercase if its general category type, provided 9526 * by {@link Character#getType getType(codePoint)}, is 9527 * {@code LOWERCASE_LETTER}, or it has contributory property 9528 * Other_Lowercase as defined by the Unicode Standard. 9529 * <p> 9530 * The following are examples of lowercase characters: 9531 * <blockquote><pre> 9532 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9533 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9534 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9535 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9536 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9537 * </pre></blockquote> 9538 * <p> Many other Unicode characters are lowercase too. 9539 * 9540 * @param codePoint the character (Unicode code point) to be tested. 9541 * @return {@code true} if the character is lowercase; 9542 * {@code false} otherwise. 9543 * @see Character#isLowerCase(int) 9544 * @see Character#isTitleCase(int) 9545 * @see Character#toLowerCase(int) 9546 * @see Character#getType(int) 9547 * @since 1.5 9548 */ 9549 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9550 /* 9551 public static boolean isLowerCase(int codePoint) { 9552 return CharacterData.of(codePoint).isLowerCase(codePoint); 9553 } 9554 */ isLowerCase(int codePoint)9555 public static boolean isLowerCase(int codePoint) { 9556 return isLowerCaseImpl(codePoint); 9557 } 9558 9559 @FastNative isLowerCaseImpl(int codePoint)9560 static native boolean isLowerCaseImpl(int codePoint); 9561 // END Android-changed: Reimplement methods natively on top of ICU4C. 9562 9563 /** 9564 * Determines if the specified character is an uppercase character. 9565 * <p> 9566 * A character is uppercase if its general category type, provided by 9567 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 9568 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9569 * <p> 9570 * The following are examples of uppercase characters: 9571 * <blockquote><pre> 9572 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9573 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9574 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9575 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9576 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9577 * </pre></blockquote> 9578 * <p> Many other Unicode characters are uppercase too. 9579 * 9580 * <p><b>Note:</b> This method cannot handle <a 9581 * href="#supplementary"> supplementary characters</a>. To support 9582 * all Unicode characters, including supplementary characters, use 9583 * the {@link #isUpperCase(int)} method. 9584 * 9585 * @param ch the character to be tested. 9586 * @return {@code true} if the character is uppercase; 9587 * {@code false} otherwise. 9588 * @see Character#isLowerCase(char) 9589 * @see Character#isTitleCase(char) 9590 * @see Character#toUpperCase(char) 9591 * @see Character#getType(char) 9592 * @since 1.0 9593 */ isUpperCase(char ch)9594 public static boolean isUpperCase(char ch) { 9595 return isUpperCase((int)ch); 9596 } 9597 9598 /** 9599 * Determines if the specified character (Unicode code point) is an uppercase character. 9600 * <p> 9601 * A character is uppercase if its general category type, provided by 9602 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 9603 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9604 * <p> 9605 * The following are examples of uppercase characters: 9606 * <blockquote><pre> 9607 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9608 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9609 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9610 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9611 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9612 * </pre></blockquote> 9613 * <p> Many other Unicode characters are uppercase too. 9614 * 9615 * @param codePoint the character (Unicode code point) to be tested. 9616 * @return {@code true} if the character is uppercase; 9617 * {@code false} otherwise. 9618 * @see Character#isLowerCase(int) 9619 * @see Character#isTitleCase(int) 9620 * @see Character#toUpperCase(int) 9621 * @see Character#getType(int) 9622 * @since 1.5 9623 */ 9624 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9625 /* 9626 public static boolean isUpperCase(int codePoint) { 9627 return CharacterData.of(codePoint).isUpperCase(codePoint); 9628 } 9629 */ isUpperCase(int codePoint)9630 public static boolean isUpperCase(int codePoint) { 9631 return isUpperCaseImpl(codePoint); 9632 } 9633 9634 @FastNative isUpperCaseImpl(int codePoint)9635 static native boolean isUpperCaseImpl(int codePoint); 9636 // END Android-changed: Reimplement methods natively on top of ICU4C. 9637 9638 /** 9639 * Determines if the specified character is a titlecase character. 9640 * <p> 9641 * A character is a titlecase character if its general 9642 * category type, provided by {@code Character.getType(ch)}, 9643 * is {@code TITLECASE_LETTER}. 9644 * <p> 9645 * Some characters look like pairs of Latin letters. For example, there 9646 * is an uppercase letter that looks like "LJ" and has a corresponding 9647 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 9648 * is the appropriate form to use when rendering a word in lowercase 9649 * with initial capitals, as for a book title. 9650 * <p> 9651 * These are some of the Unicode characters for which this method returns 9652 * {@code true}: 9653 * <ul> 9654 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 9655 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 9656 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 9657 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 9658 * </ul> 9659 * <p> Many other Unicode characters are titlecase too. 9660 * 9661 * <p><b>Note:</b> This method cannot handle <a 9662 * href="#supplementary"> supplementary characters</a>. To support 9663 * all Unicode characters, including supplementary characters, use 9664 * the {@link #isTitleCase(int)} method. 9665 * 9666 * @param ch the character to be tested. 9667 * @return {@code true} if the character is titlecase; 9668 * {@code false} otherwise. 9669 * @see Character#isLowerCase(char) 9670 * @see Character#isUpperCase(char) 9671 * @see Character#toTitleCase(char) 9672 * @see Character#getType(char) 9673 * @since 1.0.2 9674 */ isTitleCase(char ch)9675 public static boolean isTitleCase(char ch) { 9676 return isTitleCase((int)ch); 9677 } 9678 9679 /** 9680 * Determines if the specified character (Unicode code point) is a titlecase character. 9681 * <p> 9682 * A character is a titlecase character if its general 9683 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 9684 * is {@code TITLECASE_LETTER}. 9685 * <p> 9686 * Some characters look like pairs of Latin letters. For example, there 9687 * is an uppercase letter that looks like "LJ" and has a corresponding 9688 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 9689 * is the appropriate form to use when rendering a word in lowercase 9690 * with initial capitals, as for a book title. 9691 * <p> 9692 * These are some of the Unicode characters for which this method returns 9693 * {@code true}: 9694 * <ul> 9695 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 9696 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 9697 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 9698 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 9699 * </ul> 9700 * <p> Many other Unicode characters are titlecase too. 9701 * 9702 * @param codePoint the character (Unicode code point) to be tested. 9703 * @return {@code true} if the character is titlecase; 9704 * {@code false} otherwise. 9705 * @see Character#isLowerCase(int) 9706 * @see Character#isUpperCase(int) 9707 * @see Character#toTitleCase(int) 9708 * @see Character#getType(int) 9709 * @since 1.5 9710 */ 9711 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9712 /* 9713 public static boolean isTitleCase(int codePoint) { 9714 return getType(codePoint) == Character.TITLECASE_LETTER; 9715 } 9716 */ isTitleCase(int codePoint)9717 public static boolean isTitleCase(int codePoint) { 9718 return isTitleCaseImpl(codePoint); 9719 } 9720 9721 @FastNative isTitleCaseImpl(int codePoint)9722 static native boolean isTitleCaseImpl(int codePoint); 9723 // END Android-changed: Reimplement methods natively on top of ICU4C. 9724 9725 /** 9726 * Determines if the specified character is a digit. 9727 * <p> 9728 * A character is a digit if its general category type, provided 9729 * by {@code Character.getType(ch)}, is 9730 * {@code DECIMAL_DIGIT_NUMBER}. 9731 * <p> 9732 * Some Unicode character ranges that contain digits: 9733 * <ul> 9734 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 9735 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 9736 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 9737 * Arabic-Indic digits 9738 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 9739 * Extended Arabic-Indic digits 9740 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 9741 * Devanagari digits 9742 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 9743 * Fullwidth digits 9744 * </ul> 9745 * 9746 * Many other character ranges contain digits as well. 9747 * 9748 * <p><b>Note:</b> This method cannot handle <a 9749 * href="#supplementary"> supplementary characters</a>. To support 9750 * all Unicode characters, including supplementary characters, use 9751 * the {@link #isDigit(int)} method. 9752 * 9753 * @param ch the character to be tested. 9754 * @return {@code true} if the character is a digit; 9755 * {@code false} otherwise. 9756 * @see Character#digit(char, int) 9757 * @see Character#forDigit(int, int) 9758 * @see Character#getType(char) 9759 */ isDigit(char ch)9760 public static boolean isDigit(char ch) { 9761 return isDigit((int)ch); 9762 } 9763 9764 /** 9765 * Determines if the specified character (Unicode code point) is a digit. 9766 * <p> 9767 * A character is a digit if its general category type, provided 9768 * by {@link Character#getType(int) getType(codePoint)}, is 9769 * {@code DECIMAL_DIGIT_NUMBER}. 9770 * <p> 9771 * Some Unicode character ranges that contain digits: 9772 * <ul> 9773 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 9774 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 9775 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 9776 * Arabic-Indic digits 9777 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 9778 * Extended Arabic-Indic digits 9779 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 9780 * Devanagari digits 9781 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 9782 * Fullwidth digits 9783 * </ul> 9784 * 9785 * Many other character ranges contain digits as well. 9786 * 9787 * @param codePoint the character (Unicode code point) to be tested. 9788 * @return {@code true} if the character is a digit; 9789 * {@code false} otherwise. 9790 * @see Character#forDigit(int, int) 9791 * @see Character#getType(int) 9792 * @since 1.5 9793 */ 9794 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9795 /* 9796 public static boolean isDigit(int codePoint) { 9797 return CharacterData.of(codePoint).isDigit(codePoint); 9798 } 9799 */ isDigit(int codePoint)9800 public static boolean isDigit(int codePoint) { 9801 return isDigitImpl(codePoint); 9802 } 9803 9804 @FastNative isDigitImpl(int codePoint)9805 static native boolean isDigitImpl(int codePoint); 9806 // END Android-changed: Reimplement methods natively on top of ICU4C. 9807 9808 /** 9809 * Determines if a character is defined in Unicode. 9810 * <p> 9811 * A character is defined if at least one of the following is true: 9812 * <ul> 9813 * <li>It has an entry in the UnicodeData file. 9814 * <li>It has a value in a range defined by the UnicodeData file. 9815 * </ul> 9816 * 9817 * <p><b>Note:</b> This method cannot handle <a 9818 * href="#supplementary"> supplementary characters</a>. To support 9819 * all Unicode characters, including supplementary characters, use 9820 * the {@link #isDefined(int)} method. 9821 * 9822 * @param ch the character to be tested 9823 * @return {@code true} if the character has a defined meaning 9824 * in Unicode; {@code false} otherwise. 9825 * @see Character#isDigit(char) 9826 * @see Character#isLetter(char) 9827 * @see Character#isLetterOrDigit(char) 9828 * @see Character#isLowerCase(char) 9829 * @see Character#isTitleCase(char) 9830 * @see Character#isUpperCase(char) 9831 * @since 1.0.2 9832 */ isDefined(char ch)9833 public static boolean isDefined(char ch) { 9834 return isDefined((int)ch); 9835 } 9836 9837 /** 9838 * Determines if a character (Unicode code point) is defined in Unicode. 9839 * <p> 9840 * A character is defined if at least one of the following is true: 9841 * <ul> 9842 * <li>It has an entry in the UnicodeData file. 9843 * <li>It has a value in a range defined by the UnicodeData file. 9844 * </ul> 9845 * 9846 * @param codePoint the character (Unicode code point) to be tested. 9847 * @return {@code true} if the character has a defined meaning 9848 * in Unicode; {@code false} otherwise. 9849 * @see Character#isDigit(int) 9850 * @see Character#isLetter(int) 9851 * @see Character#isLetterOrDigit(int) 9852 * @see Character#isLowerCase(int) 9853 * @see Character#isTitleCase(int) 9854 * @see Character#isUpperCase(int) 9855 * @since 1.5 9856 */ 9857 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9858 /* 9859 public static boolean isDefined(int codePoint) { 9860 return getType(codePoint) != Character.UNASSIGNED; 9861 } 9862 */ isDefined(int codePoint)9863 public static boolean isDefined(int codePoint) { 9864 return isDefinedImpl(codePoint); 9865 } 9866 9867 @FastNative isDefinedImpl(int codePoint)9868 static native boolean isDefinedImpl(int codePoint); 9869 // END Android-changed: Reimplement methods natively on top of ICU4C. 9870 9871 /** 9872 * Determines if the specified character is a letter. 9873 * <p> 9874 * A character is considered to be a letter if its general 9875 * category type, provided by {@code Character.getType(ch)}, 9876 * is any of the following: 9877 * <ul> 9878 * <li> {@code UPPERCASE_LETTER} 9879 * <li> {@code LOWERCASE_LETTER} 9880 * <li> {@code TITLECASE_LETTER} 9881 * <li> {@code MODIFIER_LETTER} 9882 * <li> {@code OTHER_LETTER} 9883 * </ul> 9884 * 9885 * Not all letters have case. Many characters are 9886 * letters but are neither uppercase nor lowercase nor titlecase. 9887 * 9888 * <p><b>Note:</b> This method cannot handle <a 9889 * href="#supplementary"> supplementary characters</a>. To support 9890 * all Unicode characters, including supplementary characters, use 9891 * the {@link #isLetter(int)} method. 9892 * 9893 * @param ch the character to be tested. 9894 * @return {@code true} if the character is a letter; 9895 * {@code false} otherwise. 9896 * @see Character#isDigit(char) 9897 * @see Character#isJavaIdentifierStart(char) 9898 * @see Character#isJavaLetter(char) 9899 * @see Character#isJavaLetterOrDigit(char) 9900 * @see Character#isLetterOrDigit(char) 9901 * @see Character#isLowerCase(char) 9902 * @see Character#isTitleCase(char) 9903 * @see Character#isUnicodeIdentifierStart(char) 9904 * @see Character#isUpperCase(char) 9905 */ isLetter(char ch)9906 public static boolean isLetter(char ch) { 9907 return isLetter((int)ch); 9908 } 9909 9910 /** 9911 * Determines if the specified character (Unicode code point) is a letter. 9912 * <p> 9913 * A character is considered to be a letter if its general 9914 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 9915 * is any of the following: 9916 * <ul> 9917 * <li> {@code UPPERCASE_LETTER} 9918 * <li> {@code LOWERCASE_LETTER} 9919 * <li> {@code TITLECASE_LETTER} 9920 * <li> {@code MODIFIER_LETTER} 9921 * <li> {@code OTHER_LETTER} 9922 * </ul> 9923 * 9924 * Not all letters have case. Many characters are 9925 * letters but are neither uppercase nor lowercase nor titlecase. 9926 * 9927 * @param codePoint the character (Unicode code point) to be tested. 9928 * @return {@code true} if the character is a letter; 9929 * {@code false} otherwise. 9930 * @see Character#isDigit(int) 9931 * @see Character#isJavaIdentifierStart(int) 9932 * @see Character#isLetterOrDigit(int) 9933 * @see Character#isLowerCase(int) 9934 * @see Character#isTitleCase(int) 9935 * @see Character#isUnicodeIdentifierStart(int) 9936 * @see Character#isUpperCase(int) 9937 * @since 1.5 9938 */ 9939 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9940 /* 9941 public static boolean isLetter(int codePoint) { 9942 return ((((1 << Character.UPPERCASE_LETTER) | 9943 (1 << Character.LOWERCASE_LETTER) | 9944 (1 << Character.TITLECASE_LETTER) | 9945 (1 << Character.MODIFIER_LETTER) | 9946 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 9947 != 0; 9948 } 9949 */ isLetter(int codePoint)9950 public static boolean isLetter(int codePoint) { 9951 return isLetterImpl(codePoint); 9952 } 9953 9954 @FastNative isLetterImpl(int codePoint)9955 static native boolean isLetterImpl(int codePoint); 9956 // END Android-changed: Reimplement methods natively on top of ICU4C. 9957 9958 /** 9959 * Determines if the specified character is a letter or digit. 9960 * <p> 9961 * A character is considered to be a letter or digit if either 9962 * {@code Character.isLetter(char ch)} or 9963 * {@code Character.isDigit(char ch)} returns 9964 * {@code true} for the character. 9965 * 9966 * <p><b>Note:</b> This method cannot handle <a 9967 * href="#supplementary"> supplementary characters</a>. To support 9968 * all Unicode characters, including supplementary characters, use 9969 * the {@link #isLetterOrDigit(int)} method. 9970 * 9971 * @param ch the character to be tested. 9972 * @return {@code true} if the character is a letter or digit; 9973 * {@code false} otherwise. 9974 * @see Character#isDigit(char) 9975 * @see Character#isJavaIdentifierPart(char) 9976 * @see Character#isJavaLetter(char) 9977 * @see Character#isJavaLetterOrDigit(char) 9978 * @see Character#isLetter(char) 9979 * @see Character#isUnicodeIdentifierPart(char) 9980 * @since 1.0.2 9981 */ isLetterOrDigit(char ch)9982 public static boolean isLetterOrDigit(char ch) { 9983 return isLetterOrDigit((int)ch); 9984 } 9985 9986 /** 9987 * Determines if the specified character (Unicode code point) is a letter or digit. 9988 * <p> 9989 * A character is considered to be a letter or digit if either 9990 * {@link #isLetter(int) isLetter(codePoint)} or 9991 * {@link #isDigit(int) isDigit(codePoint)} returns 9992 * {@code true} for the character. 9993 * 9994 * @param codePoint the character (Unicode code point) to be tested. 9995 * @return {@code true} if the character is a letter or digit; 9996 * {@code false} otherwise. 9997 * @see Character#isDigit(int) 9998 * @see Character#isJavaIdentifierPart(int) 9999 * @see Character#isLetter(int) 10000 * @see Character#isUnicodeIdentifierPart(int) 10001 * @since 1.5 10002 */ 10003 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10004 /* 10005 public static boolean isLetterOrDigit(int codePoint) { 10006 return ((((1 << Character.UPPERCASE_LETTER) | 10007 (1 << Character.LOWERCASE_LETTER) | 10008 (1 << Character.TITLECASE_LETTER) | 10009 (1 << Character.MODIFIER_LETTER) | 10010 (1 << Character.OTHER_LETTER) | 10011 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 10012 != 0; 10013 } 10014 */ isLetterOrDigit(int codePoint)10015 public static boolean isLetterOrDigit(int codePoint) { 10016 return isLetterOrDigitImpl(codePoint); 10017 } 10018 10019 @FastNative isLetterOrDigitImpl(int codePoint)10020 static native boolean isLetterOrDigitImpl(int codePoint); 10021 // END Android-changed: Reimplement methods natively on top of ICU4C. 10022 10023 /** 10024 * Determines if the specified character is permissible as the first 10025 * character in a Java identifier. 10026 * <p> 10027 * A character may start a Java identifier if and only if 10028 * one of the following conditions is true: 10029 * <ul> 10030 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10031 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10032 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10033 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10034 * </ul> 10035 * 10036 * @param ch the character to be tested. 10037 * @return {@code true} if the character may start a Java 10038 * identifier; {@code false} otherwise. 10039 * @see Character#isJavaLetterOrDigit(char) 10040 * @see Character#isJavaIdentifierStart(char) 10041 * @see Character#isJavaIdentifierPart(char) 10042 * @see Character#isLetter(char) 10043 * @see Character#isLetterOrDigit(char) 10044 * @see Character#isUnicodeIdentifierStart(char) 10045 * @since 1.0.2 10046 * @deprecated Replaced by isJavaIdentifierStart(char). 10047 */ 10048 @Deprecated(since="1.1") isJavaLetter(char ch)10049 public static boolean isJavaLetter(char ch) { 10050 return isJavaIdentifierStart(ch); 10051 } 10052 10053 /** 10054 * Determines if the specified character may be part of a Java 10055 * identifier as other than the first character. 10056 * <p> 10057 * A character may be part of a Java identifier if and only if one 10058 * of the following conditions is true: 10059 * <ul> 10060 * <li> it is a letter 10061 * <li> it is a currency symbol (such as {@code '$'}) 10062 * <li> it is a connecting punctuation character (such as {@code '_'}) 10063 * <li> it is a digit 10064 * <li> it is a numeric letter (such as a Roman numeral character) 10065 * <li> it is a combining mark 10066 * <li> it is a non-spacing mark 10067 * <li> {@code isIdentifierIgnorable} returns 10068 * {@code true} for the character. 10069 * </ul> 10070 * 10071 * @param ch the character to be tested. 10072 * @return {@code true} if the character may be part of a 10073 * Java identifier; {@code false} otherwise. 10074 * @see Character#isJavaLetter(char) 10075 * @see Character#isJavaIdentifierStart(char) 10076 * @see Character#isJavaIdentifierPart(char) 10077 * @see Character#isLetter(char) 10078 * @see Character#isLetterOrDigit(char) 10079 * @see Character#isUnicodeIdentifierPart(char) 10080 * @see Character#isIdentifierIgnorable(char) 10081 * @since 1.0.2 10082 * @deprecated Replaced by isJavaIdentifierPart(char). 10083 */ 10084 @Deprecated(since="1.1") isJavaLetterOrDigit(char ch)10085 public static boolean isJavaLetterOrDigit(char ch) { 10086 return isJavaIdentifierPart(ch); 10087 } 10088 10089 /** 10090 * Determines if the specified character (Unicode code point) is alphabetic. 10091 * <p> 10092 * A character is considered to be alphabetic if its general category type, 10093 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 10094 * the following: 10095 * <ul> 10096 * <li> {@code UPPERCASE_LETTER} 10097 * <li> {@code LOWERCASE_LETTER} 10098 * <li> {@code TITLECASE_LETTER} 10099 * <li> {@code MODIFIER_LETTER} 10100 * <li> {@code OTHER_LETTER} 10101 * <li> {@code LETTER_NUMBER} 10102 * </ul> 10103 * or it has contributory property Other_Alphabetic as defined by the 10104 * Unicode Standard. 10105 * 10106 * @param codePoint the character (Unicode code point) to be tested. 10107 * @return {@code true} if the character is a Unicode alphabet 10108 * character, {@code false} otherwise. 10109 * @since 1.7 10110 */ 10111 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10112 /* 10113 public static boolean isAlphabetic(int codePoint) { 10114 return (((((1 << Character.UPPERCASE_LETTER) | 10115 (1 << Character.LOWERCASE_LETTER) | 10116 (1 << Character.TITLECASE_LETTER) | 10117 (1 << Character.MODIFIER_LETTER) | 10118 (1 << Character.OTHER_LETTER) | 10119 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 10120 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 10121 } 10122 */ isAlphabetic(int codePoint)10123 public static boolean isAlphabetic(int codePoint) { 10124 return isAlphabeticImpl(codePoint); 10125 } 10126 10127 @FastNative isAlphabeticImpl(int codePoint)10128 static native boolean isAlphabeticImpl(int codePoint); 10129 // END Android-changed: Reimplement methods natively on top of ICU4C. 10130 10131 /** 10132 * Determines if the specified character (Unicode code point) is a CJKV 10133 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 10134 * the Unicode Standard. 10135 * 10136 * @param codePoint the character (Unicode code point) to be tested. 10137 * @return {@code true} if the character is a Unicode ideograph 10138 * character, {@code false} otherwise. 10139 * @since 1.7 10140 */ 10141 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10142 /* 10143 public static boolean isIdeographic(int codePoint) { 10144 return CharacterData.of(codePoint).isIdeographic(codePoint); 10145 } 10146 */ isIdeographic(int codePoint)10147 public static boolean isIdeographic(int codePoint) { 10148 return isIdeographicImpl(codePoint); 10149 } 10150 @FastNative isIdeographicImpl(int codePoint)10151 static native boolean isIdeographicImpl(int codePoint); 10152 // END Android-changed: Reimplement methods natively on top of ICU4C. 10153 10154 // Android-changed: Removed @see tag (target does not exist on Android): 10155 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10156 /** 10157 * Determines if the specified character is 10158 * permissible as the first character in a Java identifier. 10159 * <p> 10160 * A character may start a Java identifier if and only if 10161 * one of the following conditions is true: 10162 * <ul> 10163 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10164 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10165 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10166 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10167 * </ul> 10168 * 10169 * <p><b>Note:</b> This method cannot handle <a 10170 * href="#supplementary"> supplementary characters</a>. To support 10171 * all Unicode characters, including supplementary characters, use 10172 * the {@link #isJavaIdentifierStart(int)} method. 10173 * 10174 * @param ch the character to be tested. 10175 * @return {@code true} if the character may start a Java identifier; 10176 * {@code false} otherwise. 10177 * @see Character#isJavaIdentifierPart(char) 10178 * @see Character#isLetter(char) 10179 * @see Character#isUnicodeIdentifierStart(char) 10180 * @since 1.1 10181 */ isJavaIdentifierStart(char ch)10182 public static boolean isJavaIdentifierStart(char ch) { 10183 return isJavaIdentifierStart((int)ch); 10184 } 10185 10186 // Android-changed: Removed @see tag (target does not exist on Android): 10187 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10188 /** 10189 * Determines if the character (Unicode code point) is 10190 * permissible as the first character in a Java identifier. 10191 * <p> 10192 * A character may start a Java identifier if and only if 10193 * one of the following conditions is true: 10194 * <ul> 10195 * <li> {@link #isLetter(int) isLetter(codePoint)} 10196 * returns {@code true} 10197 * <li> {@link #getType(int) getType(codePoint)} 10198 * returns {@code LETTER_NUMBER} 10199 * <li> the referenced character is a currency symbol (such as {@code '$'}) 10200 * <li> the referenced character is a connecting punctuation character 10201 * (such as {@code '_'}). 10202 * </ul> 10203 * 10204 * @param codePoint the character (Unicode code point) to be tested. 10205 * @return {@code true} if the character may start a Java identifier; 10206 * {@code false} otherwise. 10207 * @see Character#isJavaIdentifierPart(int) 10208 * @see Character#isLetter(int) 10209 * @see Character#isUnicodeIdentifierStart(int) 10210 * @since 1.5 10211 */ 10212 // BEGIN Android-changed: Use ICU. 10213 /* 10214 public static boolean isJavaIdentifierStart(int codePoint) { 10215 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 10216 } 10217 */ isJavaIdentifierStart(int codePoint)10218 public static boolean isJavaIdentifierStart(int codePoint) { 10219 // Use precomputed bitmasks to optimize the ASCII range. 10220 if (codePoint < 64) { 10221 return (codePoint == '$'); // There's only one character in this range. 10222 } else if (codePoint < 128) { 10223 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 10224 } 10225 return ((1 << getType(codePoint)) 10226 & ((1 << UPPERCASE_LETTER) 10227 | (1 << LOWERCASE_LETTER) 10228 | (1 << TITLECASE_LETTER) 10229 | (1 << MODIFIER_LETTER) 10230 | (1 << OTHER_LETTER) 10231 | (1 << CURRENCY_SYMBOL) 10232 | (1 << CONNECTOR_PUNCTUATION) 10233 | (1 << LETTER_NUMBER))) != 0; 10234 } 10235 // END Android-changed: Use ICU. 10236 10237 // Android-changed: Removed @see tag (target does not exist on Android): 10238 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10239 /** 10240 * Determines if the specified character may be part of a Java 10241 * identifier as other than the first character. 10242 * <p> 10243 * A character may be part of a Java identifier if any of the following 10244 * conditions are true: 10245 * <ul> 10246 * <li> it is a letter 10247 * <li> it is a currency symbol (such as {@code '$'}) 10248 * <li> it is a connecting punctuation character (such as {@code '_'}) 10249 * <li> it is a digit 10250 * <li> it is a numeric letter (such as a Roman numeral character) 10251 * <li> it is a combining mark 10252 * <li> it is a non-spacing mark 10253 * <li> {@code isIdentifierIgnorable} returns 10254 * {@code true} for the character 10255 * </ul> 10256 * 10257 * <p><b>Note:</b> This method cannot handle <a 10258 * href="#supplementary"> supplementary characters</a>. To support 10259 * all Unicode characters, including supplementary characters, use 10260 * the {@link #isJavaIdentifierPart(int)} method. 10261 * 10262 * @param ch the character to be tested. 10263 * @return {@code true} if the character may be part of a 10264 * Java identifier; {@code false} otherwise. 10265 * @see Character#isIdentifierIgnorable(char) 10266 * @see Character#isJavaIdentifierStart(char) 10267 * @see Character#isLetterOrDigit(char) 10268 * @see Character#isUnicodeIdentifierPart(char) 10269 * @since 1.1 10270 */ isJavaIdentifierPart(char ch)10271 public static boolean isJavaIdentifierPart(char ch) { 10272 return isJavaIdentifierPart((int)ch); 10273 } 10274 10275 // Android-changed: Removed @see tag (target does not exist on Android): 10276 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10277 /** 10278 * Determines if the character (Unicode code point) may be part of a Java 10279 * identifier as other than the first character. 10280 * <p> 10281 * A character may be part of a Java identifier if any of the following 10282 * conditions are true: 10283 * <ul> 10284 * <li> it is a letter 10285 * <li> it is a currency symbol (such as {@code '$'}) 10286 * <li> it is a connecting punctuation character (such as {@code '_'}) 10287 * <li> it is a digit 10288 * <li> it is a numeric letter (such as a Roman numeral character) 10289 * <li> it is a combining mark 10290 * <li> it is a non-spacing mark 10291 * <li> {@link #isIdentifierIgnorable(int) 10292 * isIdentifierIgnorable(codePoint)} returns {@code true} for 10293 * the code point 10294 * </ul> 10295 * 10296 * @param codePoint the character (Unicode code point) to be tested. 10297 * @return {@code true} if the character may be part of a 10298 * Java identifier; {@code false} otherwise. 10299 * @see Character#isIdentifierIgnorable(int) 10300 * @see Character#isJavaIdentifierStart(int) 10301 * @see Character#isLetterOrDigit(int) 10302 * @see Character#isUnicodeIdentifierPart(int) 10303 * @since 1.5 10304 */ 10305 // BEGIN Android-changed: Use ICU. 10306 /* 10307 public static boolean isJavaIdentifierPart(int codePoint) { 10308 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 10309 } 10310 */ isJavaIdentifierPart(int codePoint)10311 public static boolean isJavaIdentifierPart(int codePoint) { 10312 // Use precomputed bitmasks to optimize the ASCII range. 10313 if (codePoint < 64) { 10314 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 10315 } else if (codePoint < 128) { 10316 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 10317 } 10318 return ((1 << getType(codePoint)) 10319 & ((1 << UPPERCASE_LETTER) 10320 | (1 << LOWERCASE_LETTER) 10321 | (1 << TITLECASE_LETTER) 10322 | (1 << MODIFIER_LETTER) 10323 | (1 << OTHER_LETTER) 10324 | (1 << CURRENCY_SYMBOL) 10325 | (1 << CONNECTOR_PUNCTUATION) 10326 | (1 << DECIMAL_DIGIT_NUMBER) 10327 | (1 << LETTER_NUMBER) 10328 | (1 << FORMAT) 10329 | (1 << COMBINING_SPACING_MARK) 10330 | (1 << NON_SPACING_MARK))) != 0 10331 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 10332 || (codePoint >= 0x7f && codePoint <= 0x9f); 10333 } 10334 // END Android-changed: Use ICU. 10335 10336 /** 10337 * Determines if the specified character is permissible as the 10338 * first character in a Unicode identifier. 10339 * <p> 10340 * A character may start a Unicode identifier if and only if 10341 * one of the following conditions is true: 10342 * <ul> 10343 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10344 * <li> {@link #getType(char) getType(ch)} returns 10345 * {@code LETTER_NUMBER}. 10346 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10347 * {@code Other_ID_Start}</a> character. 10348 * </ul> 10349 * <p> 10350 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10351 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10352 * with the following profile of UAX31: 10353 * <pre> 10354 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10355 * </pre> 10356 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10357 * compatibility. 10358 * 10359 * <p><b>Note:</b> This method cannot handle <a 10360 * href="#supplementary"> supplementary characters</a>. To support 10361 * all Unicode characters, including supplementary characters, use 10362 * the {@link #isUnicodeIdentifierStart(int)} method. 10363 * 10364 * @param ch the character to be tested. 10365 * @return {@code true} if the character may start a Unicode 10366 * identifier; {@code false} otherwise. 10367 * @see Character#isJavaIdentifierStart(char) 10368 * @see Character#isLetter(char) 10369 * @see Character#isUnicodeIdentifierPart(char) 10370 * @since 1.1 10371 */ isUnicodeIdentifierStart(char ch)10372 public static boolean isUnicodeIdentifierStart(char ch) { 10373 return isUnicodeIdentifierStart((int)ch); 10374 } 10375 10376 /** 10377 * Determines if the specified character (Unicode code point) is permissible as the 10378 * first character in a Unicode identifier. 10379 * <p> 10380 * A character may start a Unicode identifier if and only if 10381 * one of the following conditions is true: 10382 * <ul> 10383 * <li> {@link #isLetter(int) isLetter(codePoint)} 10384 * returns {@code true} 10385 * <li> {@link #getType(int) getType(codePoint)} 10386 * returns {@code LETTER_NUMBER}. 10387 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10388 * {@code Other_ID_Start}</a> character. 10389 * </ul> 10390 * <p> 10391 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10392 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10393 * with the following profile of UAX31: 10394 * <pre> 10395 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10396 * </pre> 10397 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10398 * compatibility. 10399 * 10400 * @param codePoint the character (Unicode code point) to be tested. 10401 * @return {@code true} if the character may start a Unicode 10402 * identifier; {@code false} otherwise. 10403 * @see Character#isJavaIdentifierStart(int) 10404 * @see Character#isLetter(int) 10405 * @see Character#isUnicodeIdentifierPart(int) 10406 * @since 1.5 10407 */ 10408 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10409 /* 10410 public static boolean isUnicodeIdentifierStart(int codePoint) { 10411 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 10412 } 10413 */ isUnicodeIdentifierStart(int codePoint)10414 public static boolean isUnicodeIdentifierStart(int codePoint) { 10415 return isUnicodeIdentifierStartImpl(codePoint); 10416 } 10417 10418 @FastNative isUnicodeIdentifierStartImpl(int codePoint)10419 static native boolean isUnicodeIdentifierStartImpl(int codePoint); 10420 // END Android-changed: Reimplement methods natively on top of ICU4C. 10421 10422 /** 10423 * Determines if the specified character may be part of a Unicode 10424 * identifier as other than the first character. 10425 * <p> 10426 * A character may be part of a Unicode identifier if and only if 10427 * one of the following statements is true: 10428 * <ul> 10429 * <li> it is a letter 10430 * <li> it is a connecting punctuation character (such as {@code '_'}) 10431 * <li> it is a digit 10432 * <li> it is a numeric letter (such as a Roman numeral character) 10433 * <li> it is a combining mark 10434 * <li> it is a non-spacing mark 10435 * <li> {@code isIdentifierIgnorable} returns 10436 * {@code true} for this character. 10437 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10438 * {@code Other_ID_Start}</a> character. 10439 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10440 * {@code Other_ID_Continue}</a> character. 10441 * </ul> 10442 * <p> 10443 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10444 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10445 * with the following profile of UAX31: 10446 * <pre> 10447 * Continue := Start + ID_Continue + ignorable 10448 * Medial := empty 10449 * ignorable := isIdentifierIgnorable(char) returns true for the character 10450 * </pre> 10451 * {@code ignorable} is added to {@code Continue} for backward 10452 * compatibility. 10453 * 10454 * <p><b>Note:</b> This method cannot handle <a 10455 * href="#supplementary"> supplementary characters</a>. To support 10456 * all Unicode characters, including supplementary characters, use 10457 * the {@link #isUnicodeIdentifierPart(int)} method. 10458 * 10459 * @param ch the character to be tested. 10460 * @return {@code true} if the character may be part of a 10461 * Unicode identifier; {@code false} otherwise. 10462 * @see Character#isIdentifierIgnorable(char) 10463 * @see Character#isJavaIdentifierPart(char) 10464 * @see Character#isLetterOrDigit(char) 10465 * @see Character#isUnicodeIdentifierStart(char) 10466 * @since 1.1 10467 */ isUnicodeIdentifierPart(char ch)10468 public static boolean isUnicodeIdentifierPart(char ch) { 10469 return isUnicodeIdentifierPart((int)ch); 10470 } 10471 10472 /** 10473 * Determines if the specified character (Unicode code point) may be part of a Unicode 10474 * identifier as other than the first character. 10475 * <p> 10476 * A character may be part of a Unicode identifier if and only if 10477 * one of the following statements is true: 10478 * <ul> 10479 * <li> it is a letter 10480 * <li> it is a connecting punctuation character (such as {@code '_'}) 10481 * <li> it is a digit 10482 * <li> it is a numeric letter (such as a Roman numeral character) 10483 * <li> it is a combining mark 10484 * <li> it is a non-spacing mark 10485 * <li> {@code isIdentifierIgnorable} returns 10486 * {@code true} for this character. 10487 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10488 * {@code Other_ID_Start}</a> character. 10489 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10490 * {@code Other_ID_Continue}</a> character. 10491 * </ul> 10492 * <p> 10493 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10494 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10495 * with the following profile of UAX31: 10496 * <pre> 10497 * Continue := Start + ID_Continue + ignorable 10498 * Medial := empty 10499 * ignorable := isIdentifierIgnorable(int) returns true for the character 10500 * </pre> 10501 * {@code ignorable} is added to {@code Continue} for backward 10502 * compatibility. 10503 * 10504 * @param codePoint the character (Unicode code point) to be tested. 10505 * @return {@code true} if the character may be part of a 10506 * Unicode identifier; {@code false} otherwise. 10507 * @see Character#isIdentifierIgnorable(int) 10508 * @see Character#isJavaIdentifierPart(int) 10509 * @see Character#isLetterOrDigit(int) 10510 * @see Character#isUnicodeIdentifierStart(int) 10511 * @since 1.5 10512 */ 10513 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10514 /* 10515 public static boolean isUnicodeIdentifierPart(int codePoint) { 10516 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 10517 } 10518 */ isUnicodeIdentifierPart(int codePoint)10519 public static boolean isUnicodeIdentifierPart(int codePoint) { 10520 return isUnicodeIdentifierPartImpl(codePoint); 10521 } 10522 10523 @FastNative isUnicodeIdentifierPartImpl(int codePoint)10524 static native boolean isUnicodeIdentifierPartImpl(int codePoint); 10525 // END Android-changed: Reimplement methods natively on top of ICU4C. 10526 10527 /** 10528 * Determines if the specified character should be regarded as 10529 * an ignorable character in a Java identifier or a Unicode identifier. 10530 * <p> 10531 * The following Unicode characters are ignorable in a Java identifier 10532 * or a Unicode identifier: 10533 * <ul> 10534 * <li>ISO control characters that are not whitespace 10535 * <ul> 10536 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10537 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10538 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10539 * </ul> 10540 * 10541 * <li>all characters that have the {@code FORMAT} general 10542 * category value 10543 * </ul> 10544 * 10545 * <p><b>Note:</b> This method cannot handle <a 10546 * href="#supplementary"> supplementary characters</a>. To support 10547 * all Unicode characters, including supplementary characters, use 10548 * the {@link #isIdentifierIgnorable(int)} method. 10549 * 10550 * @param ch the character to be tested. 10551 * @return {@code true} if the character is an ignorable control 10552 * character that may be part of a Java or Unicode identifier; 10553 * {@code false} otherwise. 10554 * @see Character#isJavaIdentifierPart(char) 10555 * @see Character#isUnicodeIdentifierPart(char) 10556 * @since 1.1 10557 */ isIdentifierIgnorable(char ch)10558 public static boolean isIdentifierIgnorable(char ch) { 10559 return isIdentifierIgnorable((int)ch); 10560 } 10561 10562 /** 10563 * Determines if the specified character (Unicode code point) should be regarded as 10564 * an ignorable character in a Java identifier or a Unicode identifier. 10565 * <p> 10566 * The following Unicode characters are ignorable in a Java identifier 10567 * or a Unicode identifier: 10568 * <ul> 10569 * <li>ISO control characters that are not whitespace 10570 * <ul> 10571 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10572 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10573 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10574 * </ul> 10575 * 10576 * <li>all characters that have the {@code FORMAT} general 10577 * category value 10578 * </ul> 10579 * 10580 * @param codePoint the character (Unicode code point) to be tested. 10581 * @return {@code true} if the character is an ignorable control 10582 * character that may be part of a Java or Unicode identifier; 10583 * {@code false} otherwise. 10584 * @see Character#isJavaIdentifierPart(int) 10585 * @see Character#isUnicodeIdentifierPart(int) 10586 * @since 1.5 10587 */ 10588 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10589 /* 10590 public static boolean isIdentifierIgnorable(int codePoint) { 10591 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 10592 } 10593 */ isIdentifierIgnorable(int codePoint)10594 public static boolean isIdentifierIgnorable(int codePoint) { 10595 return isIdentifierIgnorableImpl(codePoint); 10596 } 10597 10598 @FastNative isIdentifierIgnorableImpl(int codePoint)10599 static native boolean isIdentifierIgnorableImpl(int codePoint); 10600 // END Android-changed: Reimplement methods natively on top of ICU4C. 10601 10602 /** 10603 * Converts the character argument to lowercase using case 10604 * mapping information from the UnicodeData file. 10605 * <p> 10606 * Note that 10607 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 10608 * does not always return {@code true} for some ranges of 10609 * characters, particularly those that are symbols or ideographs. 10610 * 10611 * <p>In general, {@link String#toLowerCase()} should be used to map 10612 * characters to lowercase. {@code String} case mapping methods 10613 * have several benefits over {@code Character} case mapping methods. 10614 * {@code String} case mapping methods can perform locale-sensitive 10615 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10616 * the {@code Character} case mapping methods cannot. 10617 * 10618 * <p><b>Note:</b> This method cannot handle <a 10619 * href="#supplementary"> supplementary characters</a>. To support 10620 * all Unicode characters, including supplementary characters, use 10621 * the {@link #toLowerCase(int)} method. 10622 * 10623 * @param ch the character to be converted. 10624 * @return the lowercase equivalent of the character, if any; 10625 * otherwise, the character itself. 10626 * @see Character#isLowerCase(char) 10627 * @see String#toLowerCase() 10628 */ toLowerCase(char ch)10629 public static char toLowerCase(char ch) { 10630 return (char)toLowerCase((int)ch); 10631 } 10632 10633 /** 10634 * Converts the character (Unicode code point) argument to 10635 * lowercase using case mapping information from the UnicodeData 10636 * file. 10637 * 10638 * <p> Note that 10639 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 10640 * does not always return {@code true} for some ranges of 10641 * characters, particularly those that are symbols or ideographs. 10642 * 10643 * <p>In general, {@link String#toLowerCase()} should be used to map 10644 * characters to lowercase. {@code String} case mapping methods 10645 * have several benefits over {@code Character} case mapping methods. 10646 * {@code String} case mapping methods can perform locale-sensitive 10647 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10648 * the {@code Character} case mapping methods cannot. 10649 * 10650 * @param codePoint the character (Unicode code point) to be converted. 10651 * @return the lowercase equivalent of the character (Unicode code 10652 * point), if any; otherwise, the character itself. 10653 * @see Character#isLowerCase(int) 10654 * @see String#toLowerCase() 10655 * 10656 * @since 1.5 10657 */ 10658 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10659 /* 10660 public static int toLowerCase(int codePoint) { 10661 return CharacterData.of(codePoint).toLowerCase(codePoint); 10662 } 10663 */ toLowerCase(int codePoint)10664 public static int toLowerCase(int codePoint) { 10665 if (codePoint >= 'A' && codePoint <= 'Z') { 10666 return codePoint + ('a' - 'A'); 10667 } 10668 10669 // All ASCII codepoints except the ones above remain unchanged. 10670 if (codePoint < 0x80) { 10671 return codePoint; 10672 } 10673 10674 return toLowerCaseImpl(codePoint); 10675 } 10676 10677 @FastNative toLowerCaseImpl(int codePoint)10678 static native int toLowerCaseImpl(int codePoint); 10679 // END Android-changed: Reimplement methods natively on top of ICU4C. 10680 10681 /** 10682 * Converts the character argument to uppercase using case mapping 10683 * information from the UnicodeData file. 10684 * <p> 10685 * Note that 10686 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 10687 * does not always return {@code true} for some ranges of 10688 * characters, particularly those that are symbols or ideographs. 10689 * 10690 * <p>In general, {@link String#toUpperCase()} should be used to map 10691 * characters to uppercase. {@code String} case mapping methods 10692 * have several benefits over {@code Character} case mapping methods. 10693 * {@code String} case mapping methods can perform locale-sensitive 10694 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10695 * the {@code Character} case mapping methods cannot. 10696 * 10697 * <p><b>Note:</b> This method cannot handle <a 10698 * href="#supplementary"> supplementary characters</a>. To support 10699 * all Unicode characters, including supplementary characters, use 10700 * the {@link #toUpperCase(int)} method. 10701 * 10702 * @param ch the character to be converted. 10703 * @return the uppercase equivalent of the character, if any; 10704 * otherwise, the character itself. 10705 * @see Character#isUpperCase(char) 10706 * @see String#toUpperCase() 10707 */ toUpperCase(char ch)10708 public static char toUpperCase(char ch) { 10709 return (char)toUpperCase((int)ch); 10710 } 10711 10712 /** 10713 * Converts the character (Unicode code point) argument to 10714 * uppercase using case mapping information from the UnicodeData 10715 * file. 10716 * 10717 * <p>Note that 10718 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 10719 * does not always return {@code true} for some ranges of 10720 * characters, particularly those that are symbols or ideographs. 10721 * 10722 * <p>In general, {@link String#toUpperCase()} should be used to map 10723 * characters to uppercase. {@code String} case mapping methods 10724 * have several benefits over {@code Character} case mapping methods. 10725 * {@code String} case mapping methods can perform locale-sensitive 10726 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10727 * the {@code Character} case mapping methods cannot. 10728 * 10729 * @param codePoint the character (Unicode code point) to be converted. 10730 * @return the uppercase equivalent of the character, if any; 10731 * otherwise, the character itself. 10732 * @see Character#isUpperCase(int) 10733 * @see String#toUpperCase() 10734 * 10735 * @since 1.5 10736 */ 10737 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10738 /* 10739 public static int toUpperCase(int codePoint) { 10740 return CharacterData.of(codePoint).toUpperCase(codePoint); 10741 } 10742 */ toUpperCase(int codePoint)10743 public static int toUpperCase(int codePoint) { 10744 if (codePoint >= 'a' && codePoint <= 'z') { 10745 return codePoint - ('a' - 'A'); 10746 } 10747 10748 // All ASCII codepoints except the ones above remain unchanged. 10749 if (codePoint < 0x80) { 10750 return codePoint; 10751 } 10752 10753 return toUpperCaseImpl(codePoint); 10754 } 10755 10756 @FastNative toUpperCaseImpl(int codePoint)10757 static native int toUpperCaseImpl(int codePoint); 10758 // END Android-changed: Reimplement methods natively on top of ICU4C. 10759 10760 /** 10761 * Converts the character argument to titlecase using case mapping 10762 * information from the UnicodeData file. If a character has no 10763 * explicit titlecase mapping and is not itself a titlecase char 10764 * according to UnicodeData, then the uppercase mapping is 10765 * returned as an equivalent titlecase mapping. If the 10766 * {@code char} argument is already a titlecase 10767 * {@code char}, the same {@code char} value will be 10768 * returned. 10769 * <p> 10770 * Note that 10771 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 10772 * does not always return {@code true} for some ranges of 10773 * characters. 10774 * 10775 * <p><b>Note:</b> This method cannot handle <a 10776 * href="#supplementary"> supplementary characters</a>. To support 10777 * all Unicode characters, including supplementary characters, use 10778 * the {@link #toTitleCase(int)} method. 10779 * 10780 * @param ch the character to be converted. 10781 * @return the titlecase equivalent of the character, if any; 10782 * otherwise, the character itself. 10783 * @see Character#isTitleCase(char) 10784 * @see Character#toLowerCase(char) 10785 * @see Character#toUpperCase(char) 10786 * @since 1.0.2 10787 */ toTitleCase(char ch)10788 public static char toTitleCase(char ch) { 10789 return (char)toTitleCase((int)ch); 10790 } 10791 10792 /** 10793 * Converts the character (Unicode code point) argument to titlecase using case mapping 10794 * information from the UnicodeData file. If a character has no 10795 * explicit titlecase mapping and is not itself a titlecase char 10796 * according to UnicodeData, then the uppercase mapping is 10797 * returned as an equivalent titlecase mapping. If the 10798 * character argument is already a titlecase 10799 * character, the same character value will be 10800 * returned. 10801 * 10802 * <p>Note that 10803 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 10804 * does not always return {@code true} for some ranges of 10805 * characters. 10806 * 10807 * @param codePoint the character (Unicode code point) to be converted. 10808 * @return the titlecase equivalent of the character, if any; 10809 * otherwise, the character itself. 10810 * @see Character#isTitleCase(int) 10811 * @see Character#toLowerCase(int) 10812 * @see Character#toUpperCase(int) 10813 * @since 1.5 10814 */ 10815 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10816 /* 10817 public static int toTitleCase(int codePoint) { 10818 return CharacterData.of(codePoint).toTitleCase(codePoint); 10819 } 10820 */ toTitleCase(int codePoint)10821 public static int toTitleCase(int codePoint) { 10822 return toTitleCaseImpl(codePoint); 10823 } 10824 10825 @FastNative toTitleCaseImpl(int codePoint)10826 static native int toTitleCaseImpl(int codePoint); 10827 // END Android-changed: Reimplement methods natively on top of ICU4C. 10828 10829 /** 10830 * Returns the numeric value of the character {@code ch} in the 10831 * specified radix. 10832 * <p> 10833 * If the radix is not in the range {@code MIN_RADIX} ≤ 10834 * {@code radix} ≤ {@code MAX_RADIX} or if the 10835 * value of {@code ch} is not a valid digit in the specified 10836 * radix, {@code -1} is returned. A character is a valid digit 10837 * if at least one of the following is true: 10838 * <ul> 10839 * <li>The method {@code isDigit} is {@code true} of the character 10840 * and the Unicode decimal digit value of the character (or its 10841 * single-character decomposition) is less than the specified radix. 10842 * In this case the decimal digit value is returned. 10843 * <li>The character is one of the uppercase Latin letters 10844 * {@code 'A'} through {@code 'Z'} and its code is less than 10845 * {@code radix + 'A' - 10}. 10846 * In this case, {@code ch - 'A' + 10} 10847 * is returned. 10848 * <li>The character is one of the lowercase Latin letters 10849 * {@code 'a'} through {@code 'z'} and its code is less than 10850 * {@code radix + 'a' - 10}. 10851 * In this case, {@code ch - 'a' + 10} 10852 * is returned. 10853 * <li>The character is one of the fullwidth uppercase Latin letters A 10854 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 10855 * and its code is less than 10856 * {@code radix + '\u005CuFF21' - 10}. 10857 * In this case, {@code ch - '\u005CuFF21' + 10} 10858 * is returned. 10859 * <li>The character is one of the fullwidth lowercase Latin letters a 10860 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 10861 * and its code is less than 10862 * {@code radix + '\u005CuFF41' - 10}. 10863 * In this case, {@code ch - '\u005CuFF41' + 10} 10864 * is returned. 10865 * </ul> 10866 * 10867 * <p><b>Note:</b> This method cannot handle <a 10868 * href="#supplementary"> supplementary characters</a>. To support 10869 * all Unicode characters, including supplementary characters, use 10870 * the {@link #digit(int, int)} method. 10871 * 10872 * @param ch the character to be converted. 10873 * @param radix the radix. 10874 * @return the numeric value represented by the character in the 10875 * specified radix. 10876 * @see Character#forDigit(int, int) 10877 * @see Character#isDigit(char) 10878 */ digit(char ch, int radix)10879 public static int digit(char ch, int radix) { 10880 return digit((int)ch, radix); 10881 } 10882 10883 /** 10884 * Returns the numeric value of the specified character (Unicode 10885 * code point) in the specified radix. 10886 * 10887 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 10888 * {@code radix} ≤ {@code MAX_RADIX} or if the 10889 * character is not a valid digit in the specified 10890 * radix, {@code -1} is returned. A character is a valid digit 10891 * if at least one of the following is true: 10892 * <ul> 10893 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 10894 * and the Unicode decimal digit value of the character (or its 10895 * single-character decomposition) is less than the specified radix. 10896 * In this case the decimal digit value is returned. 10897 * <li>The character is one of the uppercase Latin letters 10898 * {@code 'A'} through {@code 'Z'} and its code is less than 10899 * {@code radix + 'A' - 10}. 10900 * In this case, {@code codePoint - 'A' + 10} 10901 * is returned. 10902 * <li>The character is one of the lowercase Latin letters 10903 * {@code 'a'} through {@code 'z'} and its code is less than 10904 * {@code radix + 'a' - 10}. 10905 * In this case, {@code codePoint - 'a' + 10} 10906 * is returned. 10907 * <li>The character is one of the fullwidth uppercase Latin letters A 10908 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 10909 * and its code is less than 10910 * {@code radix + '\u005CuFF21' - 10}. 10911 * In this case, 10912 * {@code codePoint - '\u005CuFF21' + 10} 10913 * is returned. 10914 * <li>The character is one of the fullwidth lowercase Latin letters a 10915 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 10916 * and its code is less than 10917 * {@code radix + '\u005CuFF41'- 10}. 10918 * In this case, 10919 * {@code codePoint - '\u005CuFF41' + 10} 10920 * is returned. 10921 * </ul> 10922 * 10923 * @param codePoint the character (Unicode code point) to be converted. 10924 * @param radix the radix. 10925 * @return the numeric value represented by the character in the 10926 * specified radix. 10927 * @see Character#forDigit(int, int) 10928 * @see Character#isDigit(int) 10929 * @since 1.5 10930 */ 10931 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10932 /* 10933 public static int digit(int codePoint, int radix) { 10934 return CharacterData.of(codePoint).digit(codePoint, radix); 10935 } 10936 */ digit(int codePoint, int radix)10937 public static int digit(int codePoint, int radix) { 10938 if (radix < MIN_RADIX || radix > MAX_RADIX) { 10939 return -1; 10940 } 10941 if (codePoint < 128) { 10942 // Optimized for ASCII 10943 int result = -1; 10944 if ('0' <= codePoint && codePoint <= '9') { 10945 result = codePoint - '0'; 10946 } else if ('a' <= codePoint && codePoint <= 'z') { 10947 result = 10 + (codePoint - 'a'); 10948 } else if ('A' <= codePoint && codePoint <= 'Z') { 10949 result = 10 + (codePoint - 'A'); 10950 } 10951 return result < radix ? result : -1; 10952 } 10953 return digitImpl(codePoint, radix); 10954 } 10955 10956 @FastNative digitImpl(int codePoint, int radix)10957 native static int digitImpl(int codePoint, int radix); 10958 // END Android-changed: Reimplement methods natively on top of ICU4C. 10959 10960 /** 10961 * Returns the {@code int} value that the specified Unicode 10962 * character represents. For example, the character 10963 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 10964 * an int with a value of 50. 10965 * <p> 10966 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 10967 * {@code '\u005Cu005A'}), lowercase 10968 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 10969 * full width variant ({@code '\u005CuFF21'} through 10970 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 10971 * {@code '\u005CuFF5A'}) forms have numeric values from 10 10972 * through 35. This is independent of the Unicode specification, 10973 * which does not assign numeric values to these {@code char} 10974 * values. 10975 * <p> 10976 * If the character does not have a numeric value, then -1 is returned. 10977 * If the character has a numeric value that cannot be represented as a 10978 * nonnegative integer (for example, a fractional value), then -2 10979 * is returned. 10980 * 10981 * <p><b>Note:</b> This method cannot handle <a 10982 * href="#supplementary"> supplementary characters</a>. To support 10983 * all Unicode characters, including supplementary characters, use 10984 * the {@link #getNumericValue(int)} method. 10985 * 10986 * @param ch the character to be converted. 10987 * @return the numeric value of the character, as a nonnegative {@code int} 10988 * value; -2 if the character has a numeric value but the value 10989 * can not be represented as a nonnegative {@code int} value; 10990 * -1 if the character has no numeric value. 10991 * @see Character#forDigit(int, int) 10992 * @see Character#isDigit(char) 10993 * @since 1.1 10994 */ getNumericValue(char ch)10995 public static int getNumericValue(char ch) { 10996 return getNumericValue((int)ch); 10997 } 10998 10999 /** 11000 * Returns the {@code int} value that the specified 11001 * character (Unicode code point) represents. For example, the character 11002 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 11003 * an {@code int} with a value of 50. 11004 * <p> 11005 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11006 * {@code '\u005Cu005A'}), lowercase 11007 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11008 * full width variant ({@code '\u005CuFF21'} through 11009 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11010 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11011 * through 35. This is independent of the Unicode specification, 11012 * which does not assign numeric values to these {@code char} 11013 * values. 11014 * <p> 11015 * If the character does not have a numeric value, then -1 is returned. 11016 * If the character has a numeric value that cannot be represented as a 11017 * nonnegative integer (for example, a fractional value), then -2 11018 * is returned. 11019 * 11020 * @param codePoint the character (Unicode code point) to be converted. 11021 * @return the numeric value of the character, as a nonnegative {@code int} 11022 * value; -2 if the character has a numeric value but the value 11023 * can not be represented as a nonnegative {@code int} value; 11024 * -1 if the character has no numeric value. 11025 * @see Character#forDigit(int, int) 11026 * @see Character#isDigit(int) 11027 * @since 1.5 11028 */ 11029 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11030 /* 11031 public static int getNumericValue(int codePoint) { 11032 return CharacterData.of(codePoint).getNumericValue(codePoint); 11033 } 11034 */ getNumericValue(int codePoint)11035 public static int getNumericValue(int codePoint) { 11036 // This is both an optimization and papers over differences between Java and ICU. 11037 if (codePoint < 128) { 11038 if (codePoint >= '0' && codePoint <= '9') { 11039 return codePoint - '0'; 11040 } 11041 if (codePoint >= 'a' && codePoint <= 'z') { 11042 return codePoint - ('a' - 10); 11043 } 11044 if (codePoint >= 'A' && codePoint <= 'Z') { 11045 return codePoint - ('A' - 10); 11046 } 11047 return -1; 11048 } 11049 // Full-width uppercase A-Z. 11050 if (codePoint >= 0xff21 && codePoint <= 0xff3a) { 11051 return codePoint - 0xff17; 11052 } 11053 // Full-width lowercase a-z. 11054 if (codePoint >= 0xff41 && codePoint <= 0xff5a) { 11055 return codePoint - 0xff37; 11056 } 11057 return getNumericValueImpl(codePoint); 11058 } 11059 11060 @FastNative getNumericValueImpl(int codePoint)11061 native static int getNumericValueImpl(int codePoint); 11062 // END Android-changed: Reimplement methods natively on top of ICU4C. 11063 11064 /** 11065 * Determines if the specified character is ISO-LATIN-1 white space. 11066 * This method returns {@code true} for the following five 11067 * characters only: 11068 * <table class="striped"> 11069 * <caption style="display:none">truechars</caption> 11070 * <thead> 11071 * <tr><th scope="col">Character 11072 * <th scope="col">Code 11073 * <th scope="col">Name 11074 * </thead> 11075 * <tbody> 11076 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 11077 * <td>{@code HORIZONTAL TABULATION}</td></tr> 11078 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 11079 * <td>{@code NEW LINE}</td></tr> 11080 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 11081 * <td>{@code FORM FEED}</td></tr> 11082 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 11083 * <td>{@code CARRIAGE RETURN}</td></tr> 11084 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 11085 * <td>{@code SPACE}</td></tr> 11086 * </tbody> 11087 * </table> 11088 * 11089 * @param ch the character to be tested. 11090 * @return {@code true} if the character is ISO-LATIN-1 white 11091 * space; {@code false} otherwise. 11092 * @see Character#isSpaceChar(char) 11093 * @see Character#isWhitespace(char) 11094 * @deprecated Replaced by isWhitespace(char). 11095 */ 11096 @Deprecated(since="1.1") isSpace(char ch)11097 public static boolean isSpace(char ch) { 11098 return (ch <= 0x0020) && 11099 (((((1L << 0x0009) | 11100 (1L << 0x000A) | 11101 (1L << 0x000C) | 11102 (1L << 0x000D) | 11103 (1L << 0x0020)) >> ch) & 1L) != 0); 11104 } 11105 11106 11107 /** 11108 * Determines if the specified character is a Unicode space character. 11109 * A character is considered to be a space character if and only if 11110 * it is specified to be a space character by the Unicode Standard. This 11111 * method returns true if the character's general category type is any of 11112 * the following: 11113 * <ul> 11114 * <li> {@code SPACE_SEPARATOR} 11115 * <li> {@code LINE_SEPARATOR} 11116 * <li> {@code PARAGRAPH_SEPARATOR} 11117 * </ul> 11118 * 11119 * <p><b>Note:</b> This method cannot handle <a 11120 * href="#supplementary"> supplementary characters</a>. To support 11121 * all Unicode characters, including supplementary characters, use 11122 * the {@link #isSpaceChar(int)} method. 11123 * 11124 * @param ch the character to be tested. 11125 * @return {@code true} if the character is a space character; 11126 * {@code false} otherwise. 11127 * @see Character#isWhitespace(char) 11128 * @since 1.1 11129 */ isSpaceChar(char ch)11130 public static boolean isSpaceChar(char ch) { 11131 return isSpaceChar((int)ch); 11132 } 11133 11134 /** 11135 * Determines if the specified character (Unicode code point) is a 11136 * Unicode space character. A character is considered to be a 11137 * space character if and only if it is specified to be a space 11138 * character by the Unicode Standard. This method returns true if 11139 * the character's general category type is any of the following: 11140 * 11141 * <ul> 11142 * <li> {@link #SPACE_SEPARATOR} 11143 * <li> {@link #LINE_SEPARATOR} 11144 * <li> {@link #PARAGRAPH_SEPARATOR} 11145 * </ul> 11146 * 11147 * @param codePoint the character (Unicode code point) to be tested. 11148 * @return {@code true} if the character is a space character; 11149 * {@code false} otherwise. 11150 * @see Character#isWhitespace(int) 11151 * @since 1.5 11152 */ 11153 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11154 /* 11155 public static boolean isSpaceChar(int codePoint) { 11156 return ((((1 << Character.SPACE_SEPARATOR) | 11157 (1 << Character.LINE_SEPARATOR) | 11158 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 11159 != 0; 11160 } 11161 */ isSpaceChar(int codePoint)11162 public static boolean isSpaceChar(int codePoint) { 11163 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 11164 // SPACE or NO-BREAK SPACE? 11165 if (codePoint == 0x20 || codePoint == 0xa0) { 11166 return true; 11167 } 11168 if (codePoint < 0x1000) { 11169 return false; 11170 } 11171 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 11172 if (codePoint == 0x1680 || codePoint == 0x180e) { 11173 return true; 11174 } 11175 if (codePoint < 0x2000) { 11176 return false; 11177 } 11178 if (codePoint <= 0xffff) { 11179 // Other whitespace from General Punctuation... 11180 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f || 11181 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 11182 } 11183 // Let icu4c worry about non-BMP code points. 11184 return isSpaceCharImpl(codePoint); 11185 } 11186 11187 @FastNative isSpaceCharImpl(int codePoint)11188 static native boolean isSpaceCharImpl(int codePoint); 11189 // END Android-changed: Reimplement methods natively on top of ICU4C. 11190 11191 /** 11192 * Determines if the specified character is white space according to Java. 11193 * A character is a Java whitespace character if and only if it satisfies 11194 * one of the following criteria: 11195 * <ul> 11196 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 11197 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 11198 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11199 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11200 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11201 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11202 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11203 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11204 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11205 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11206 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11207 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11208 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11209 * </ul> 11210 * 11211 * <p><b>Note:</b> This method cannot handle <a 11212 * href="#supplementary"> supplementary characters</a>. To support 11213 * all Unicode characters, including supplementary characters, use 11214 * the {@link #isWhitespace(int)} method. 11215 * 11216 * @param ch the character to be tested. 11217 * @return {@code true} if the character is a Java whitespace 11218 * character; {@code false} otherwise. 11219 * @see Character#isSpaceChar(char) 11220 * @since 1.1 11221 */ isWhitespace(char ch)11222 public static boolean isWhitespace(char ch) { 11223 return isWhitespace((int)ch); 11224 } 11225 11226 /** 11227 * Determines if the specified character (Unicode code point) is 11228 * white space according to Java. A character is a Java 11229 * whitespace character if and only if it satisfies one of the 11230 * following criteria: 11231 * <ul> 11232 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 11233 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 11234 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11235 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11236 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11237 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11238 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11239 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11240 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11241 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11242 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11243 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11244 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11245 * </ul> 11246 * 11247 * @param codePoint the character (Unicode code point) to be tested. 11248 * @return {@code true} if the character is a Java whitespace 11249 * character; {@code false} otherwise. 11250 * @see Character#isSpaceChar(int) 11251 * @since 1.5 11252 */ 11253 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11254 /* 11255 public static boolean isWhitespace(int codePoint) { 11256 return CharacterData.of(codePoint).isWhitespace(codePoint); 11257 } 11258 */ isWhitespace(int codePoint)11259 public static boolean isWhitespace(int codePoint) { 11260 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 11261 // Any ASCII whitespace character? 11262 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) { 11263 return true; 11264 } 11265 if (codePoint < 0x1000) { 11266 return false; 11267 } 11268 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 11269 if (codePoint == 0x1680 || codePoint == 0x180e) { 11270 return true; 11271 } 11272 if (codePoint < 0x2000) { 11273 return false; 11274 } 11275 // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE). 11276 if (codePoint == 0x2007 || codePoint == 0x202f) { 11277 return false; 11278 } 11279 if (codePoint <= 0xffff) { 11280 // Other whitespace from General Punctuation... 11281 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f || 11282 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 11283 } 11284 // Let icu4c worry about non-BMP code points. 11285 return isWhitespaceImpl(codePoint); 11286 } 11287 11288 @FastNative isWhitespaceImpl(int codePoint)11289 native static boolean isWhitespaceImpl(int codePoint); 11290 // END Android-changed: Reimplement methods natively on top of ICU4C. 11291 11292 /** 11293 * Determines if the specified character is an ISO control 11294 * character. A character is considered to be an ISO control 11295 * character if its code is in the range {@code '\u005Cu0000'} 11296 * through {@code '\u005Cu001F'} or in the range 11297 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11298 * 11299 * <p><b>Note:</b> This method cannot handle <a 11300 * href="#supplementary"> supplementary characters</a>. To support 11301 * all Unicode characters, including supplementary characters, use 11302 * the {@link #isISOControl(int)} method. 11303 * 11304 * @param ch the character to be tested. 11305 * @return {@code true} if the character is an ISO control character; 11306 * {@code false} otherwise. 11307 * 11308 * @see Character#isSpaceChar(char) 11309 * @see Character#isWhitespace(char) 11310 * @since 1.1 11311 */ isISOControl(char ch)11312 public static boolean isISOControl(char ch) { 11313 return isISOControl((int)ch); 11314 } 11315 11316 /** 11317 * Determines if the referenced character (Unicode code point) is an ISO control 11318 * character. A character is considered to be an ISO control 11319 * character if its code is in the range {@code '\u005Cu0000'} 11320 * through {@code '\u005Cu001F'} or in the range 11321 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11322 * 11323 * @param codePoint the character (Unicode code point) to be tested. 11324 * @return {@code true} if the character is an ISO control character; 11325 * {@code false} otherwise. 11326 * @see Character#isSpaceChar(int) 11327 * @see Character#isWhitespace(int) 11328 * @since 1.5 11329 */ isISOControl(int codePoint)11330 public static boolean isISOControl(int codePoint) { 11331 // Optimized form of: 11332 // (codePoint >= 0x00 && codePoint <= 0x1F) || 11333 // (codePoint >= 0x7F && codePoint <= 0x9F); 11334 return codePoint <= 0x9F && 11335 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 11336 } 11337 11338 /** 11339 * Returns a value indicating a character's general category. 11340 * 11341 * <p><b>Note:</b> This method cannot handle <a 11342 * href="#supplementary"> supplementary characters</a>. To support 11343 * all Unicode characters, including supplementary characters, use 11344 * the {@link #getType(int)} method. 11345 * 11346 * @param ch the character to be tested. 11347 * @return a value of type {@code int} representing the 11348 * character's general category. 11349 * @see Character#COMBINING_SPACING_MARK 11350 * @see Character#CONNECTOR_PUNCTUATION 11351 * @see Character#CONTROL 11352 * @see Character#CURRENCY_SYMBOL 11353 * @see Character#DASH_PUNCTUATION 11354 * @see Character#DECIMAL_DIGIT_NUMBER 11355 * @see Character#ENCLOSING_MARK 11356 * @see Character#END_PUNCTUATION 11357 * @see Character#FINAL_QUOTE_PUNCTUATION 11358 * @see Character#FORMAT 11359 * @see Character#INITIAL_QUOTE_PUNCTUATION 11360 * @see Character#LETTER_NUMBER 11361 * @see Character#LINE_SEPARATOR 11362 * @see Character#LOWERCASE_LETTER 11363 * @see Character#MATH_SYMBOL 11364 * @see Character#MODIFIER_LETTER 11365 * @see Character#MODIFIER_SYMBOL 11366 * @see Character#NON_SPACING_MARK 11367 * @see Character#OTHER_LETTER 11368 * @see Character#OTHER_NUMBER 11369 * @see Character#OTHER_PUNCTUATION 11370 * @see Character#OTHER_SYMBOL 11371 * @see Character#PARAGRAPH_SEPARATOR 11372 * @see Character#PRIVATE_USE 11373 * @see Character#SPACE_SEPARATOR 11374 * @see Character#START_PUNCTUATION 11375 * @see Character#SURROGATE 11376 * @see Character#TITLECASE_LETTER 11377 * @see Character#UNASSIGNED 11378 * @see Character#UPPERCASE_LETTER 11379 * @since 1.1 11380 */ getType(char ch)11381 public static int getType(char ch) { 11382 return getType((int)ch); 11383 } 11384 11385 /** 11386 * Returns a value indicating a character's general category. 11387 * 11388 * @param codePoint the character (Unicode code point) to be tested. 11389 * @return a value of type {@code int} representing the 11390 * character's general category. 11391 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 11392 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 11393 * @see Character#CONTROL CONTROL 11394 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 11395 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 11396 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 11397 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 11398 * @see Character#END_PUNCTUATION END_PUNCTUATION 11399 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 11400 * @see Character#FORMAT FORMAT 11401 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 11402 * @see Character#LETTER_NUMBER LETTER_NUMBER 11403 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 11404 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 11405 * @see Character#MATH_SYMBOL MATH_SYMBOL 11406 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 11407 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 11408 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 11409 * @see Character#OTHER_LETTER OTHER_LETTER 11410 * @see Character#OTHER_NUMBER OTHER_NUMBER 11411 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 11412 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 11413 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 11414 * @see Character#PRIVATE_USE PRIVATE_USE 11415 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 11416 * @see Character#START_PUNCTUATION START_PUNCTUATION 11417 * @see Character#SURROGATE SURROGATE 11418 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 11419 * @see Character#UNASSIGNED UNASSIGNED 11420 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 11421 * @since 1.5 11422 */ 11423 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11424 /* 11425 public static int getType(int codePoint) { 11426 return CharacterData.of(codePoint).getType(codePoint); 11427 } 11428 */ getType(int codePoint)11429 public static int getType(int codePoint) { 11430 int type = getTypeImpl(codePoint); 11431 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 11432 if (type <= Character.FORMAT) { 11433 return type; 11434 } 11435 return (type + 1); 11436 } 11437 11438 @FastNative getTypeImpl(int codePoint)11439 static native int getTypeImpl(int codePoint); 11440 // END Android-changed: Reimplement methods natively on top of ICU4C. 11441 11442 /** 11443 * Determines the character representation for a specific digit in 11444 * the specified radix. If the value of {@code radix} is not a 11445 * valid radix, or the value of {@code digit} is not a valid 11446 * digit in the specified radix, the null character 11447 * ({@code '\u005Cu0000'}) is returned. 11448 * <p> 11449 * The {@code radix} argument is valid if it is greater than or 11450 * equal to {@code MIN_RADIX} and less than or equal to 11451 * {@code MAX_RADIX}. The {@code digit} argument is valid if 11452 * {@code 0 <= digit < radix}. 11453 * <p> 11454 * If the digit is less than 10, then 11455 * {@code '0' + digit} is returned. Otherwise, the value 11456 * {@code 'a' + digit - 10} is returned. 11457 * 11458 * @param digit the number to convert to a character. 11459 * @param radix the radix. 11460 * @return the {@code char} representation of the specified digit 11461 * in the specified radix. 11462 * @see Character#MIN_RADIX 11463 * @see Character#MAX_RADIX 11464 * @see Character#digit(char, int) 11465 */ forDigit(int digit, int radix)11466 public static char forDigit(int digit, int radix) { 11467 if ((digit >= radix) || (digit < 0)) { 11468 return '\0'; 11469 } 11470 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 11471 return '\0'; 11472 } 11473 if (digit < 10) { 11474 return (char)('0' + digit); 11475 } 11476 return (char)('a' - 10 + digit); 11477 } 11478 11479 /** 11480 * Returns the Unicode directionality property for the given 11481 * character. Character directionality is used to calculate the 11482 * visual ordering of text. The directionality value of undefined 11483 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 11484 * 11485 * <p><b>Note:</b> This method cannot handle <a 11486 * href="#supplementary"> supplementary characters</a>. To support 11487 * all Unicode characters, including supplementary characters, use 11488 * the {@link #getDirectionality(int)} method. 11489 * 11490 * @param ch {@code char} for which the directionality property 11491 * is requested. 11492 * @return the directionality property of the {@code char} value. 11493 * 11494 * @see Character#DIRECTIONALITY_UNDEFINED 11495 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 11496 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 11497 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11498 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 11499 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11500 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11501 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 11502 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11503 * @see Character#DIRECTIONALITY_NONSPACING_MARK 11504 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 11505 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 11506 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 11507 * @see Character#DIRECTIONALITY_WHITESPACE 11508 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 11509 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11510 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11511 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11512 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11513 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11514 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11515 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11516 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 11517 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11518 * @since 1.4 11519 */ getDirectionality(char ch)11520 public static byte getDirectionality(char ch) { 11521 return getDirectionality((int)ch); 11522 } 11523 11524 /** 11525 * Returns the Unicode directionality property for the given 11526 * character (Unicode code point). Character directionality is 11527 * used to calculate the visual ordering of text. The 11528 * directionality value of undefined character is {@link 11529 * #DIRECTIONALITY_UNDEFINED}. 11530 * 11531 * @param codePoint the character (Unicode code point) for which 11532 * the directionality property is requested. 11533 * @return the directionality property of the character. 11534 * 11535 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 11536 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 11537 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 11538 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11539 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 11540 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11541 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11542 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 11543 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11544 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 11545 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 11546 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 11547 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 11548 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 11549 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 11550 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11551 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11552 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11553 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11554 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11555 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11556 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11557 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 11558 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11559 * @since 1.5 11560 */ 11561 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11562 /* 11563 public static byte getDirectionality(int codePoint) { 11564 return CharacterData.of(codePoint).getDirectionality(codePoint); 11565 } 11566 */ getDirectionality(int codePoint)11567 public static byte getDirectionality(int codePoint) { 11568 if (getType(codePoint) == Character.UNASSIGNED) { 11569 return Character.DIRECTIONALITY_UNDEFINED; 11570 } 11571 11572 byte directionality = getDirectionalityImpl(codePoint); 11573 if (directionality >= 0 && directionality < DIRECTIONALITY.length) { 11574 return DIRECTIONALITY[directionality]; 11575 } 11576 return Character.DIRECTIONALITY_UNDEFINED; 11577 } 11578 11579 @FastNative getDirectionalityImpl(int codePoint)11580 native static byte getDirectionalityImpl(int codePoint); 11581 // END Android-changed: Reimplement methods natively on top of ICU4C. 11582 11583 /** 11584 * Determines whether the character is mirrored according to the 11585 * Unicode specification. Mirrored characters should have their 11586 * glyphs horizontally mirrored when displayed in text that is 11587 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 11588 * PARENTHESIS is semantically defined to be an <i>opening 11589 * parenthesis</i>. This will appear as a "(" in text that is 11590 * left-to-right but as a ")" in text that is right-to-left. 11591 * 11592 * <p><b>Note:</b> This method cannot handle <a 11593 * href="#supplementary"> supplementary characters</a>. To support 11594 * all Unicode characters, including supplementary characters, use 11595 * the {@link #isMirrored(int)} method. 11596 * 11597 * @param ch {@code char} for which the mirrored property is requested 11598 * @return {@code true} if the char is mirrored, {@code false} 11599 * if the {@code char} is not mirrored or is not defined. 11600 * @since 1.4 11601 */ isMirrored(char ch)11602 public static boolean isMirrored(char ch) { 11603 return isMirrored((int)ch); 11604 } 11605 11606 /** 11607 * Determines whether the specified character (Unicode code point) 11608 * is mirrored according to the Unicode specification. Mirrored 11609 * characters should have their glyphs horizontally mirrored when 11610 * displayed in text that is right-to-left. For example, 11611 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 11612 * defined to be an <i>opening parenthesis</i>. This will appear 11613 * as a "(" in text that is left-to-right but as a ")" in text 11614 * that is right-to-left. 11615 * 11616 * @param codePoint the character (Unicode code point) to be tested. 11617 * @return {@code true} if the character is mirrored, {@code false} 11618 * if the character is not mirrored or is not defined. 11619 * @since 1.5 11620 */ 11621 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11622 /* 11623 public static boolean isMirrored(int codePoint) { 11624 return CharacterData.of(codePoint).isMirrored(codePoint); 11625 } 11626 */ isMirrored(int codePoint)11627 public static boolean isMirrored(int codePoint) { 11628 return isMirroredImpl(codePoint); 11629 } 11630 11631 @FastNative isMirroredImpl(int codePoint)11632 native static boolean isMirroredImpl(int codePoint); 11633 // END Android-changed: Reimplement methods natively on top of ICU4C. 11634 11635 /** 11636 * Compares two {@code Character} objects numerically. 11637 * 11638 * @param anotherCharacter the {@code Character} to be compared. 11639 * @return the value {@code 0} if the argument {@code Character} 11640 * is equal to this {@code Character}; a value less than 11641 * {@code 0} if this {@code Character} is numerically less 11642 * than the {@code Character} argument; and a value greater than 11643 * {@code 0} if this {@code Character} is numerically greater 11644 * than the {@code Character} argument (unsigned comparison). 11645 * Note that this is strictly a numerical comparison; it is not 11646 * locale-dependent. 11647 * @since 1.2 11648 */ compareTo(Character anotherCharacter)11649 public int compareTo(Character anotherCharacter) { 11650 return compare(this.value, anotherCharacter.value); 11651 } 11652 11653 /** 11654 * Compares two {@code char} values numerically. 11655 * The value returned is identical to what would be returned by: 11656 * <pre> 11657 * Character.valueOf(x).compareTo(Character.valueOf(y)) 11658 * </pre> 11659 * 11660 * @param x the first {@code char} to compare 11661 * @param y the second {@code char} to compare 11662 * @return the value {@code 0} if {@code x == y}; 11663 * a value less than {@code 0} if {@code x < y}; and 11664 * a value greater than {@code 0} if {@code x > y} 11665 * @since 1.7 11666 */ compare(char x, char y)11667 public static int compare(char x, char y) { 11668 return x - y; 11669 } 11670 11671 // BEGIN Android-removed: Use ICU. 11672 /** 11673 * Converts the character (Unicode code point) argument to uppercase using 11674 * information from the UnicodeData file. 11675 * 11676 * @param codePoint the character (Unicode code point) to be converted. 11677 * @return either the uppercase equivalent of the character, if 11678 * any, or an error flag ({@code Character.ERROR}) 11679 * that indicates that a 1:M {@code char} mapping exists. 11680 * @see Character#isLowerCase(char) 11681 * @see Character#isUpperCase(char) 11682 * @see Character#toLowerCase(char) 11683 * @see Character#toTitleCase(char) 11684 * @since 1.4 11685 * 11686 static int toUpperCaseEx(int codePoint) { 11687 assert isValidCodePoint(codePoint); 11688 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 11689 } 11690 11691 /** 11692 * Converts the character (Unicode code point) argument to uppercase using case 11693 * mapping information from the SpecialCasing file in the Unicode 11694 * specification. If a character has no explicit uppercase 11695 * mapping, then the {@code char} itself is returned in the 11696 * {@code char[]}. 11697 * 11698 * @param codePoint the character (Unicode code point) to be converted. 11699 * @return a {@code char[]} with the uppercased character. 11700 * @since 1.4 11701 * 11702 static char[] toUpperCaseCharArray(int codePoint) { 11703 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 11704 assert isBmpCodePoint(codePoint); 11705 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 11706 } 11707 */ 11708 // END Android-removed: Use ICU. 11709 11710 /** 11711 * The number of bits used to represent a {@code char} value in unsigned 11712 * binary form, constant {@code 16}. 11713 * 11714 * @since 1.5 11715 */ 11716 public static final int SIZE = 16; 11717 11718 /** 11719 * The number of bytes used to represent a {@code char} value in unsigned 11720 * binary form. 11721 * 11722 * @since 1.8 11723 */ 11724 public static final int BYTES = SIZE / Byte.SIZE; 11725 11726 /** 11727 * Returns the value obtained by reversing the order of the bytes in the 11728 * specified {@code char} value. 11729 * 11730 * @param ch The {@code char} of which to reverse the byte order. 11731 * @return the value obtained by reversing (or, equivalently, swapping) 11732 * the bytes in the specified {@code char} value. 11733 * @since 1.5 11734 */ 11735 @IntrinsicCandidate reverseBytes(char ch)11736 public static char reverseBytes(char ch) { 11737 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 11738 } 11739 11740 /** 11741 * Returns the Unicode name of the specified character 11742 * {@code codePoint}, or null if the code point is 11743 * {@link #UNASSIGNED unassigned}. 11744 * <p> 11745 * Note: if the specified character is not assigned a name by 11746 * the <i>UnicodeData</i> file (part of the Unicode Character 11747 * Database maintained by the Unicode Consortium), the returned 11748 * name is the same as the result of expression: 11749 * 11750 * <blockquote>{@code 11751 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11752 * + " " 11753 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11754 * 11755 * }</blockquote> 11756 * 11757 * @param codePoint the character (Unicode code point) 11758 * 11759 * @return the Unicode name of the specified character, or null if 11760 * the code point is unassigned. 11761 * 11762 * @throws IllegalArgumentException if the specified 11763 * {@code codePoint} is not a valid Unicode 11764 * code point. 11765 * 11766 * @since 1.7 11767 */ getName(int codePoint)11768 public static String getName(int codePoint) { 11769 if (!isValidCodePoint(codePoint)) { 11770 throw new IllegalArgumentException( 11771 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 11772 } 11773 // Android-changed: Use ICU. 11774 // String name = CharacterName.get(codePoint); 11775 String name = getNameImpl(codePoint); 11776 if (name != null) 11777 return name; 11778 if (getType(codePoint) == UNASSIGNED) 11779 return null; 11780 UnicodeBlock block = UnicodeBlock.of(codePoint); 11781 if (block != null) 11782 return block.toString().replace('_', ' ') + " " 11783 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11784 // should never come here 11785 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11786 } 11787 11788 // BEGIN Android-removed: expose after CharacterName.getCodePoint() is imported. 11789 /** 11790 * Returns the code point value of the Unicode character specified by 11791 * the given Unicode character name. 11792 * <p> 11793 * Note: if a character is not assigned a name by the <i>UnicodeData</i> 11794 * file (part of the Unicode Character Database maintained by the Unicode 11795 * Consortium), its name is defined as the result of expression: 11796 * 11797 * <blockquote>{@code 11798 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11799 * + " " 11800 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11801 * 11802 * }</blockquote> 11803 * <p> 11804 * The {@code name} matching is case insensitive, with any leading and 11805 * trailing whitespace character removed. 11806 * 11807 * @param name the Unicode character name 11808 * 11809 * @return the code point value of the character specified by its name. 11810 * 11811 * @throws IllegalArgumentException if the specified {@code name} 11812 * is not a valid Unicode character name. 11813 * @throws NullPointerException if {@code name} is {@code null} 11814 * 11815 * @since 9 11816 */ codePointOf(String name)11817 public static int codePointOf(String name) { 11818 name = name.trim().toUpperCase(Locale.ROOT); 11819 // Android-changed: Use ICU4C. 11820 // int cp = CharacterName.getInstance().getCodePoint(name); 11821 int cp = codePointOfImpl(name); 11822 if (cp != -1) 11823 return cp; 11824 try { 11825 int off = name.lastIndexOf(' '); 11826 if (off != -1) { 11827 cp = Integer.parseInt(name, off + 1, name.length(), 16); 11828 if (isValidCodePoint(cp) && name.equals(getName(cp))) 11829 return cp; 11830 } 11831 } catch (Exception x) {} 11832 throw new IllegalArgumentException("Unrecognized character name :" + name); 11833 } 11834 // END Android-removed: expose after CharacterName.getCodePoint() is imported. 11835 11836 // Android-added: Use ICU. 11837 // Implement getNameImpl() and codePointOfImpl() natively. getNameImpl(int codePoint)11838 private static native String getNameImpl(int codePoint); 11839 11840 @FastNative codePointOfImpl(String name)11841 private static native int codePointOfImpl(String name); 11842 } 11843