1 /* 2 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.awt.font; 27 28 import java.io.IOException; 29 import java.io.ObjectOutputStream; 30 import java.util.Arrays; 31 import java.util.Comparator; 32 import java.util.EnumSet; 33 import java.util.Set; 34 35 /** 36 * The <code>NumericShaper</code> class is used to convert Latin-1 (European) 37 * digits to other Unicode decimal digits. Users of this class will 38 * primarily be people who wish to present data using 39 * national digit shapes, but find it more convenient to represent the 40 * data internally using Latin-1 (European) digits. This does not 41 * interpret the deprecated numeric shape selector character (U+206E). 42 * <p> 43 * Instances of <code>NumericShaper</code> are typically applied 44 * as attributes to text with the 45 * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute 46 * of the <code>TextAttribute</code> class. 47 * For example, this code snippet causes a <code>TextLayout</code> to 48 * shape European digits to Arabic in an Arabic context:<br> 49 * <blockquote><pre> 50 * Map map = new HashMap(); 51 * map.put(TextAttribute.NUMERIC_SHAPING, 52 * NumericShaper.getContextualShaper(NumericShaper.ARABIC)); 53 * FontRenderContext frc = ...; 54 * TextLayout layout = new TextLayout(text, map, frc); 55 * layout.draw(g2d, x, y); 56 * </pre></blockquote> 57 * <br> 58 * It is also possible to perform numeric shaping explicitly using instances 59 * of <code>NumericShaper</code>, as this code snippet demonstrates:<br> 60 * <blockquote><pre> 61 * char[] text = ...; 62 * // shape all EUROPEAN digits (except zero) to ARABIC digits 63 * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC); 64 * shaper.shape(text, start, count); 65 * 66 * // shape European digits to ARABIC digits if preceding text is Arabic, or 67 * // shape European digits to TAMIL digits if preceding text is Tamil, or 68 * // leave European digits alone if there is no preceding text, or 69 * // preceding text is neither Arabic nor Tamil 70 * NumericShaper shaper = 71 * NumericShaper.getContextualShaper(NumericShaper.ARABIC | 72 * NumericShaper.TAMIL, 73 * NumericShaper.EUROPEAN); 74 * shaper.shape(text, start, count); 75 * </pre></blockquote> 76 * 77 * <p><b>Bit mask- and enum-based Unicode ranges</b></p> 78 * 79 * <p>This class supports two different programming interfaces to 80 * represent Unicode ranges for script-specific digits: bit 81 * mask-based ones, such as {@link #ARABIC NumericShaper.ARABIC}, and 82 * enum-based ones, such as {@link NumericShaper.Range#ARABIC}. 83 * Multiple ranges can be specified by ORing bit mask-based constants, 84 * such as: 85 * <blockquote><pre> 86 * NumericShaper.ARABIC | NumericShaper.TAMIL 87 * </pre></blockquote> 88 * or creating a {@code Set} with the {@link NumericShaper.Range} 89 * constants, such as: 90 * <blockquote><pre> 91 * EnumSet.of(NumericShaper.Scirpt.ARABIC, NumericShaper.Range.TAMIL) 92 * </pre></blockquote> 93 * The enum-based ranges are a super set of the bit mask-based ones. 94 * 95 * <p>If the two interfaces are mixed (including serialization), 96 * Unicode range values are mapped to their counterparts where such 97 * mapping is possible, such as {@code NumericShaper.Range.ARABIC} 98 * from/to {@code NumericShaper.ARABIC}. If any unmappable range 99 * values are specified, such as {@code NumericShaper.Range.BALINESE}, 100 * those ranges are ignored. 101 * 102 * <p><b>Decimal Digits Precedence</b></p> 103 * 104 * <p>A Unicode range may have more than one set of decimal digits. If 105 * multiple decimal digits sets are specified for the same Unicode 106 * range, one of the sets will take precedence as follows. 107 * 108 * <table border=1 cellspacing=3 cellpadding=0 summary="NumericShaper constants precedence."> 109 * <tr> 110 * <th class="TableHeadingColor">Unicode Range</th> 111 * <th class="TableHeadingColor"><code>NumericShaper</code> Constants</th> 112 * <th class="TableHeadingColor">Precedence</th> 113 * </tr> 114 * <tr> 115 * <td rowspan="2">Arabic</td> 116 * <td>{@link NumericShaper#ARABIC NumericShaper.ARABIC}<br> 117 * {@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td> 118 * <td>{@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td> 119 * </tr> 120 * <tr> 121 * <td>{@link NumericShaper.Range#ARABIC}<br> 122 * {@link NumericShaper.Range#EASTERN_ARABIC}</td> 123 * <td>{@link NumericShaper.Range#EASTERN_ARABIC}</td> 124 * </tr> 125 * <tr> 126 * <td>Tai Tham</td> 127 * <td>{@link NumericShaper.Range#TAI_THAM_HORA}<br> 128 * {@link NumericShaper.Range#TAI_THAM_THAM}</td> 129 * <td>{@link NumericShaper.Range#TAI_THAM_THAM}</td> 130 * </tr> 131 * </table> 132 * 133 * @since 1.4 134 */ 135 136 public final class NumericShaper implements java.io.Serializable { 137 /** 138 * A {@code NumericShaper.Range} represents a Unicode range of a 139 * script having its own decimal digits. For example, the {@link 140 * NumericShaper.Range#THAI} range has the Thai digits, THAI DIGIT 141 * ZERO (U+0E50) to THAI DIGIT NINE (U+0E59). 142 * 143 * <p>The <code>Range</code> enum replaces the traditional bit 144 * mask-based values (e.g., {@link NumericShaper#ARABIC}), and 145 * supports more Unicode ranges than the bit mask-based ones. For 146 * example, the following code using the bit mask: 147 * <blockquote><pre> 148 * NumericShaper.getContextualShaper(NumericShaper.ARABIC | 149 * NumericShaper.TAMIL, 150 * NumericShaper.EUROPEAN); 151 * </pre></blockquote> 152 * can be written using this enum as: 153 * <blockquote><pre> 154 * NumericShaper.getContextualShaper(EnumSet.of( 155 * NumericShaper.Range.ARABIC, 156 * NumericShaper.Range.TAMIL), 157 * NumericShaper.Range.EUROPEAN); 158 * </pre></blockquote> 159 * 160 * @since 1.7 161 */ 162 public static enum Range { 163 // The order of EUROPEAN to MOGOLIAN must be consistent 164 // with the bitmask-based constants. 165 /** 166 * The Latin (European) range with the Latin (ASCII) digits. 167 */ 168 EUROPEAN ('\u0030', '\u0000', '\u0300'), 169 /** 170 * The Arabic range with the Arabic-Indic digits. 171 */ 172 ARABIC ('\u0660', '\u0600', '\u0780'), 173 /** 174 * The Arabic range with the Eastern Arabic-Indic digits. 175 */ 176 EASTERN_ARABIC ('\u06f0', '\u0600', '\u0780'), 177 /** 178 * The Devanagari range with the Devanagari digits. 179 */ 180 DEVANAGARI ('\u0966', '\u0900', '\u0980'), 181 /** 182 * The Bengali range with the Bengali digits. 183 */ 184 BENGALI ('\u09e6', '\u0980', '\u0a00'), 185 /** 186 * The Gurmukhi range with the Gurmukhi digits. 187 */ 188 GURMUKHI ('\u0a66', '\u0a00', '\u0a80'), 189 /** 190 * The Gujarati range with the Gujarati digits. 191 */ 192 GUJARATI ('\u0ae6', '\u0b00', '\u0b80'), 193 /** 194 * The Oriya range with the Oriya digits. 195 */ 196 ORIYA ('\u0b66', '\u0b00', '\u0b80'), 197 /** 198 * The Tamil range with the Tamil digits. 199 */ 200 TAMIL ('\u0be6', '\u0b80', '\u0c00'), 201 /** 202 * The Telugu range with the Telugu digits. 203 */ 204 TELUGU ('\u0c66', '\u0c00', '\u0c80'), 205 /** 206 * The Kannada range with the Kannada digits. 207 */ 208 KANNADA ('\u0ce6', '\u0c80', '\u0d00'), 209 /** 210 * The Malayalam range with the Malayalam digits. 211 */ 212 MALAYALAM ('\u0d66', '\u0d00', '\u0d80'), 213 /** 214 * The Thai range with the Thai digits. 215 */ 216 THAI ('\u0e50', '\u0e00', '\u0e80'), 217 /** 218 * The Lao range with the Lao digits. 219 */ 220 LAO ('\u0ed0', '\u0e80', '\u0f00'), 221 /** 222 * The Tibetan range with the Tibetan digits. 223 */ 224 TIBETAN ('\u0f20', '\u0f00', '\u1000'), 225 /** 226 * The Myanmar range with the Myanmar digits. 227 */ 228 MYANMAR ('\u1040', '\u1000', '\u1080'), 229 /** 230 * The Ethiopic range with the Ethiopic digits. Ethiopic 231 * does not have a decimal digit 0 so Latin (European) 0 is 232 * used. 233 */ 234 ETHIOPIC ('\u1369', '\u1200', '\u1380') { 235 @Override getNumericBase()236 char getNumericBase() { return 1; } 237 }, 238 /** 239 * The Khmer range with the Khmer digits. 240 */ 241 KHMER ('\u17e0', '\u1780', '\u1800'), 242 /** 243 * The Mongolian range with the Mongolian digits. 244 */ 245 MONGOLIAN ('\u1810', '\u1800', '\u1900'), 246 // The order of EUROPEAN to MOGOLIAN must be consistent 247 // with the bitmask-based constants. 248 249 /** 250 * The N'Ko range with the N'Ko digits. 251 */ 252 NKO ('\u07c0', '\u07c0', '\u0800'), 253 /** 254 * The Myanmar range with the Myanmar Shan digits. 255 */ 256 MYANMAR_SHAN ('\u1090', '\u1000', '\u10a0'), 257 /** 258 * The Limbu range with the Limbu digits. 259 */ 260 LIMBU ('\u1946', '\u1900', '\u1950'), 261 /** 262 * The New Tai Lue range with the New Tai Lue digits. 263 */ 264 NEW_TAI_LUE ('\u19d0', '\u1980', '\u19e0'), 265 /** 266 * The Balinese range with the Balinese digits. 267 */ 268 BALINESE ('\u1b50', '\u1b00', '\u1b80'), 269 /** 270 * The Sundanese range with the Sundanese digits. 271 */ 272 SUNDANESE ('\u1bb0', '\u1b80', '\u1bc0'), 273 /** 274 * The Lepcha range with the Lepcha digits. 275 */ 276 LEPCHA ('\u1c40', '\u1c00', '\u1c50'), 277 /** 278 * The Ol Chiki range with the Ol Chiki digits. 279 */ 280 OL_CHIKI ('\u1c50', '\u1c50', '\u1c80'), 281 /** 282 * The Vai range with the Vai digits. 283 */ 284 VAI ('\ua620', '\ua500', '\ua640'), 285 /** 286 * The Saurashtra range with the Saurashtra digits. 287 */ 288 SAURASHTRA ('\ua8d0', '\ua880', '\ua8e0'), 289 /** 290 * The Kayah Li range with the Kayah Li digits. 291 */ 292 KAYAH_LI ('\ua900', '\ua900', '\ua930'), 293 /** 294 * The Cham range with the Cham digits. 295 */ 296 CHAM ('\uaa50', '\uaa00', '\uaa60'), 297 /** 298 * The Tai Tham Hora range with the Tai Tham Hora digits. 299 */ 300 TAI_THAM_HORA ('\u1a80', '\u1a20', '\u1ab0'), 301 /** 302 * The Tai Tham Tham range with the Tai Tham Tham digits. 303 */ 304 TAI_THAM_THAM ('\u1a90', '\u1a20', '\u1ab0'), 305 /** 306 * The Javanese range with the Javanese digits. 307 */ 308 JAVANESE ('\ua9d0', '\ua980', '\ua9e0'), 309 /** 310 * The Meetei Mayek range with the Meetei Mayek digits. 311 */ 312 MEETEI_MAYEK ('\uabf0', '\uabc0', '\uac00'); 313 toRangeIndex(Range script)314 private static int toRangeIndex(Range script) { 315 int index = script.ordinal(); 316 return index < NUM_KEYS ? index : -1; 317 } 318 indexToRange(int index)319 private static Range indexToRange(int index) { 320 return index < NUM_KEYS ? Range.values()[index] : null; 321 } 322 toRangeMask(Set<Range> ranges)323 private static int toRangeMask(Set<Range> ranges) { 324 int m = 0; 325 for (Range range : ranges) { 326 int index = range.ordinal(); 327 if (index < NUM_KEYS) { 328 m |= 1 << index; 329 } 330 } 331 return m; 332 } 333 maskToRangeSet(int mask)334 private static Set<Range> maskToRangeSet(int mask) { 335 Set<Range> set = EnumSet.noneOf(Range.class); 336 Range[] a = Range.values(); 337 for (int i = 0; i < NUM_KEYS; i++) { 338 if ((mask & (1 << i)) != 0) { 339 set.add(a[i]); 340 } 341 } 342 return set; 343 } 344 345 // base character of range digits 346 private final int base; 347 // Unicode range 348 private final int start, // inclusive 349 end; // exclusive 350 Range(int base, int start, int end)351 private Range(int base, int start, int end) { 352 this.base = base - ('0' + getNumericBase()); 353 this.start = start; 354 this.end = end; 355 } 356 getDigitBase()357 private int getDigitBase() { 358 return base; 359 } 360 getNumericBase()361 char getNumericBase() { 362 return 0; 363 } 364 inRange(int c)365 private boolean inRange(int c) { 366 return start <= c && c < end; 367 } 368 } 369 370 /** index of context for contextual shaping - values range from 0 to 18 */ 371 private int key; 372 373 /** flag indicating whether to shape contextually (high bit) and which 374 * digit ranges to shape (bits 0-18) 375 */ 376 private int mask; 377 378 /** 379 * The context {@code Range} for contextual shaping or the {@code 380 * Range} for non-contextual shaping. {@code null} for the bit 381 * mask-based API. 382 * 383 * @since 1.7 384 */ 385 private Range shapingRange; 386 387 /** 388 * {@code Set<Range>} indicating which Unicode ranges to 389 * shape. {@code null} for the bit mask-based API. 390 */ 391 private transient Set<Range> rangeSet; 392 393 /** 394 * rangeSet.toArray() value. Sorted by Range.base when the number 395 * of elements is greater then BSEARCH_THRESHOLD. 396 */ 397 private transient Range[] rangeArray; 398 399 /** 400 * If more than BSEARCH_THRESHOLD ranges are specified, binary search is used. 401 */ 402 private static final int BSEARCH_THRESHOLD = 3; 403 404 private static final long serialVersionUID = -8022764705923730308L; 405 406 /** Identifies the Latin-1 (European) and extended range, and 407 * Latin-1 (European) decimal base. 408 */ 409 public static final int EUROPEAN = 1<<0; 410 411 /** Identifies the ARABIC range and decimal base. */ 412 public static final int ARABIC = 1<<1; 413 414 /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */ 415 public static final int EASTERN_ARABIC = 1<<2; 416 417 /** Identifies the DEVANAGARI range and decimal base. */ 418 public static final int DEVANAGARI = 1<<3; 419 420 /** Identifies the BENGALI range and decimal base. */ 421 public static final int BENGALI = 1<<4; 422 423 /** Identifies the GURMUKHI range and decimal base. */ 424 public static final int GURMUKHI = 1<<5; 425 426 /** Identifies the GUJARATI range and decimal base. */ 427 public static final int GUJARATI = 1<<6; 428 429 /** Identifies the ORIYA range and decimal base. */ 430 public static final int ORIYA = 1<<7; 431 432 /** Identifies the TAMIL range and decimal base. */ 433 // TAMIL DIGIT ZERO was added in Unicode 4.1 434 public static final int TAMIL = 1<<8; 435 436 /** Identifies the TELUGU range and decimal base. */ 437 public static final int TELUGU = 1<<9; 438 439 /** Identifies the KANNADA range and decimal base. */ 440 public static final int KANNADA = 1<<10; 441 442 /** Identifies the MALAYALAM range and decimal base. */ 443 public static final int MALAYALAM = 1<<11; 444 445 /** Identifies the THAI range and decimal base. */ 446 public static final int THAI = 1<<12; 447 448 /** Identifies the LAO range and decimal base. */ 449 public static final int LAO = 1<<13; 450 451 /** Identifies the TIBETAN range and decimal base. */ 452 public static final int TIBETAN = 1<<14; 453 454 /** Identifies the MYANMAR range and decimal base. */ 455 public static final int MYANMAR = 1<<15; 456 457 /** Identifies the ETHIOPIC range and decimal base. */ 458 public static final int ETHIOPIC = 1<<16; 459 460 /** Identifies the KHMER range and decimal base. */ 461 public static final int KHMER = 1<<17; 462 463 /** Identifies the MONGOLIAN range and decimal base. */ 464 public static final int MONGOLIAN = 1<<18; 465 466 /** Identifies all ranges, for full contextual shaping. 467 * 468 * <p>This constant specifies all of the bit mask-based 469 * ranges. Use {@code EmunSet.allOf(NumericShaper.Range.class)} to 470 * specify all of the enum-based ranges. 471 */ 472 public static final int ALL_RANGES = 0x0007ffff; 473 474 private static final int EUROPEAN_KEY = 0; 475 private static final int ARABIC_KEY = 1; 476 private static final int EASTERN_ARABIC_KEY = 2; 477 private static final int DEVANAGARI_KEY = 3; 478 private static final int BENGALI_KEY = 4; 479 private static final int GURMUKHI_KEY = 5; 480 private static final int GUJARATI_KEY = 6; 481 private static final int ORIYA_KEY = 7; 482 private static final int TAMIL_KEY = 8; 483 private static final int TELUGU_KEY = 9; 484 private static final int KANNADA_KEY = 10; 485 private static final int MALAYALAM_KEY = 11; 486 private static final int THAI_KEY = 12; 487 private static final int LAO_KEY = 13; 488 private static final int TIBETAN_KEY = 14; 489 private static final int MYANMAR_KEY = 15; 490 private static final int ETHIOPIC_KEY = 16; 491 private static final int KHMER_KEY = 17; 492 private static final int MONGOLIAN_KEY = 18; 493 494 private static final int NUM_KEYS = MONGOLIAN_KEY + 1; // fixed 495 496 private static final int CONTEXTUAL_MASK = 1<<31; 497 498 private static final char[] bases = { 499 '\u0030' - '\u0030', // EUROPEAN 500 '\u0660' - '\u0030', // ARABIC-INDIC 501 '\u06f0' - '\u0030', // EXTENDED ARABIC-INDIC (EASTERN_ARABIC) 502 '\u0966' - '\u0030', // DEVANAGARI 503 '\u09e6' - '\u0030', // BENGALI 504 '\u0a66' - '\u0030', // GURMUKHI 505 '\u0ae6' - '\u0030', // GUJARATI 506 '\u0b66' - '\u0030', // ORIYA 507 '\u0be6' - '\u0030', // TAMIL - zero was added in Unicode 4.1 508 '\u0c66' - '\u0030', // TELUGU 509 '\u0ce6' - '\u0030', // KANNADA 510 '\u0d66' - '\u0030', // MALAYALAM 511 '\u0e50' - '\u0030', // THAI 512 '\u0ed0' - '\u0030', // LAO 513 '\u0f20' - '\u0030', // TIBETAN 514 '\u1040' - '\u0030', // MYANMAR 515 '\u1369' - '\u0031', // ETHIOPIC - no zero 516 '\u17e0' - '\u0030', // KHMER 517 '\u1810' - '\u0030', // MONGOLIAN 518 }; 519 520 // some ranges adjoin or overlap, rethink if we want to do a binary search on this 521 522 private static final char[] contexts = { 523 '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended) 524 '\u0600', '\u0780', // ARABIC 525 '\u0600', '\u0780', // EASTERN_ARABIC -- note overlap with arabic 526 '\u0900', '\u0980', // DEVANAGARI 527 '\u0980', '\u0a00', // BENGALI 528 '\u0a00', '\u0a80', // GURMUKHI 529 '\u0a80', '\u0b00', // GUJARATI 530 '\u0b00', '\u0b80', // ORIYA 531 '\u0b80', '\u0c00', // TAMIL 532 '\u0c00', '\u0c80', // TELUGU 533 '\u0c80', '\u0d00', // KANNADA 534 '\u0d00', '\u0d80', // MALAYALAM 535 '\u0e00', '\u0e80', // THAI 536 '\u0e80', '\u0f00', // LAO 537 '\u0f00', '\u1000', // TIBETAN 538 '\u1000', '\u1080', // MYANMAR 539 '\u1200', '\u1380', // ETHIOPIC - note missing zero 540 '\u1780', '\u1800', // KHMER 541 '\u1800', '\u1900', // MONGOLIAN 542 '\uffff', 543 }; 544 545 // assume most characters are near each other so probing the cache is infrequent, 546 // and a linear probe is ok. 547 548 private static int ctCache = 0; 549 private static int ctCacheLimit = contexts.length - 2; 550 551 // warning, synchronize access to this as it modifies state getContextKey(char c)552 private static int getContextKey(char c) { 553 if (c < contexts[ctCache]) { 554 while (ctCache > 0 && c < contexts[ctCache]) --ctCache; 555 } else if (c >= contexts[ctCache + 1]) { 556 while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache; 557 } 558 559 // if we're not in a known range, then return EUROPEAN as the range key 560 return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY; 561 } 562 563 // cache for the NumericShaper.Range version 564 private transient volatile Range currentRange = Range.EUROPEAN; 565 rangeForCodePoint(final int codepoint)566 private Range rangeForCodePoint(final int codepoint) { 567 if (currentRange.inRange(codepoint)) { 568 return currentRange; 569 } 570 571 final Range[] ranges = rangeArray; 572 if (ranges.length > BSEARCH_THRESHOLD) { 573 int lo = 0; 574 int hi = ranges.length - 1; 575 while (lo <= hi) { 576 int mid = (lo + hi) / 2; 577 Range range = ranges[mid]; 578 if (codepoint < range.start) { 579 hi = mid - 1; 580 } else if (codepoint >= range.end) { 581 lo = mid + 1; 582 } else { 583 currentRange = range; 584 return range; 585 } 586 } 587 } else { 588 for (int i = 0; i < ranges.length; i++) { 589 if (ranges[i].inRange(codepoint)) { 590 return ranges[i]; 591 } 592 } 593 } 594 return Range.EUROPEAN; 595 } 596 597 /* 598 * A range table of strong directional characters (types L, R, AL). 599 * Even (left) indexes are starts of ranges of non-strong-directional (or undefined) 600 * characters, odd (right) indexes are starts of ranges of strong directional 601 * characters. 602 */ 603 private static int[] strongTable = { 604 0x0000, 0x0041, 605 0x005b, 0x0061, 606 0x007b, 0x00aa, 607 0x00ab, 0x00b5, 608 0x00b6, 0x00ba, 609 0x00bb, 0x00c0, 610 0x00d7, 0x00d8, 611 0x00f7, 0x00f8, 612 0x02b9, 0x02bb, 613 0x02c2, 0x02d0, 614 0x02d2, 0x02e0, 615 0x02e5, 0x02ee, 616 0x02ef, 0x0370, 617 0x0374, 0x0376, 618 0x037e, 0x0386, 619 0x0387, 0x0388, 620 0x03f6, 0x03f7, 621 0x0483, 0x048a, 622 0x058a, 0x05be, 623 0x05bf, 0x05c0, 624 0x05c1, 0x05c3, 625 0x05c4, 0x05c6, 626 0x05c7, 0x05d0, 627 0x0600, 0x0608, 628 0x0609, 0x060b, 629 0x060c, 0x060d, 630 0x060e, 0x061b, 631 0x064b, 0x066d, 632 0x0670, 0x0671, 633 0x06d6, 0x06e5, 634 0x06e7, 0x06ee, 635 0x06f0, 0x06fa, 636 0x0711, 0x0712, 637 0x0730, 0x074d, 638 0x07a6, 0x07b1, 639 0x07eb, 0x07f4, 640 0x07f6, 0x07fa, 641 0x0816, 0x081a, 642 0x081b, 0x0824, 643 0x0825, 0x0828, 644 0x0829, 0x0830, 645 0x0859, 0x085e, 646 0x08e4, 0x0903, 647 0x093a, 0x093b, 648 0x093c, 0x093d, 649 0x0941, 0x0949, 650 0x094d, 0x094e, 651 0x0951, 0x0958, 652 0x0962, 0x0964, 653 0x0981, 0x0982, 654 0x09bc, 0x09bd, 655 0x09c1, 0x09c7, 656 0x09cd, 0x09ce, 657 0x09e2, 0x09e6, 658 0x09f2, 0x09f4, 659 0x09fb, 0x0a03, 660 0x0a3c, 0x0a3e, 661 0x0a41, 0x0a59, 662 0x0a70, 0x0a72, 663 0x0a75, 0x0a83, 664 0x0abc, 0x0abd, 665 0x0ac1, 0x0ac9, 666 0x0acd, 0x0ad0, 667 0x0ae2, 0x0ae6, 668 0x0af1, 0x0b02, 669 0x0b3c, 0x0b3d, 670 0x0b3f, 0x0b40, 671 0x0b41, 0x0b47, 672 0x0b4d, 0x0b57, 673 0x0b62, 0x0b66, 674 0x0b82, 0x0b83, 675 0x0bc0, 0x0bc1, 676 0x0bcd, 0x0bd0, 677 0x0bf3, 0x0c01, 678 0x0c3e, 0x0c41, 679 0x0c46, 0x0c58, 680 0x0c62, 0x0c66, 681 0x0c78, 0x0c7f, 682 0x0cbc, 0x0cbd, 683 0x0ccc, 0x0cd5, 684 0x0ce2, 0x0ce6, 685 0x0d41, 0x0d46, 686 0x0d4d, 0x0d4e, 687 0x0d62, 0x0d66, 688 0x0dca, 0x0dcf, 689 0x0dd2, 0x0dd8, 690 0x0e31, 0x0e32, 691 0x0e34, 0x0e40, 692 0x0e47, 0x0e4f, 693 0x0eb1, 0x0eb2, 694 0x0eb4, 0x0ebd, 695 0x0ec8, 0x0ed0, 696 0x0f18, 0x0f1a, 697 0x0f35, 0x0f36, 698 0x0f37, 0x0f38, 699 0x0f39, 0x0f3e, 700 0x0f71, 0x0f7f, 701 0x0f80, 0x0f85, 702 0x0f86, 0x0f88, 703 0x0f8d, 0x0fbe, 704 0x0fc6, 0x0fc7, 705 0x102d, 0x1031, 706 0x1032, 0x1038, 707 0x1039, 0x103b, 708 0x103d, 0x103f, 709 0x1058, 0x105a, 710 0x105e, 0x1061, 711 0x1071, 0x1075, 712 0x1082, 0x1083, 713 0x1085, 0x1087, 714 0x108d, 0x108e, 715 0x109d, 0x109e, 716 0x135d, 0x1360, 717 0x1390, 0x13a0, 718 0x1400, 0x1401, 719 0x1680, 0x1681, 720 0x169b, 0x16a0, 721 0x1712, 0x1720, 722 0x1732, 0x1735, 723 0x1752, 0x1760, 724 0x1772, 0x1780, 725 0x17b4, 0x17b6, 726 0x17b7, 0x17be, 727 0x17c6, 0x17c7, 728 0x17c9, 0x17d4, 729 0x17db, 0x17dc, 730 0x17dd, 0x17e0, 731 0x17f0, 0x1810, 732 0x18a9, 0x18aa, 733 0x1920, 0x1923, 734 0x1927, 0x1929, 735 0x1932, 0x1933, 736 0x1939, 0x1946, 737 0x19de, 0x1a00, 738 0x1a17, 0x1a19, 739 0x1a56, 0x1a57, 740 0x1a58, 0x1a61, 741 0x1a62, 0x1a63, 742 0x1a65, 0x1a6d, 743 0x1a73, 0x1a80, 744 0x1b00, 0x1b04, 745 0x1b34, 0x1b35, 746 0x1b36, 0x1b3b, 747 0x1b3c, 0x1b3d, 748 0x1b42, 0x1b43, 749 0x1b6b, 0x1b74, 750 0x1b80, 0x1b82, 751 0x1ba2, 0x1ba6, 752 0x1ba8, 0x1baa, 753 0x1bab, 0x1bac, 754 0x1be6, 0x1be7, 755 0x1be8, 0x1bea, 756 0x1bed, 0x1bee, 757 0x1bef, 0x1bf2, 758 0x1c2c, 0x1c34, 759 0x1c36, 0x1c3b, 760 0x1cd0, 0x1cd3, 761 0x1cd4, 0x1ce1, 762 0x1ce2, 0x1ce9, 763 0x1ced, 0x1cee, 764 0x1cf4, 0x1cf5, 765 0x1dc0, 0x1e00, 766 0x1fbd, 0x1fbe, 767 0x1fbf, 0x1fc2, 768 0x1fcd, 0x1fd0, 769 0x1fdd, 0x1fe0, 770 0x1fed, 0x1ff2, 771 0x1ffd, 0x200e, 772 0x2010, 0x2071, 773 0x2074, 0x207f, 774 0x2080, 0x2090, 775 0x20a0, 0x2102, 776 0x2103, 0x2107, 777 0x2108, 0x210a, 778 0x2114, 0x2115, 779 0x2116, 0x2119, 780 0x211e, 0x2124, 781 0x2125, 0x2126, 782 0x2127, 0x2128, 783 0x2129, 0x212a, 784 0x212e, 0x212f, 785 0x213a, 0x213c, 786 0x2140, 0x2145, 787 0x214a, 0x214e, 788 0x2150, 0x2160, 789 0x2189, 0x2336, 790 0x237b, 0x2395, 791 0x2396, 0x249c, 792 0x24ea, 0x26ac, 793 0x26ad, 0x2800, 794 0x2900, 0x2c00, 795 0x2ce5, 0x2ceb, 796 0x2cef, 0x2cf2, 797 0x2cf9, 0x2d00, 798 0x2d7f, 0x2d80, 799 0x2de0, 0x3005, 800 0x3008, 0x3021, 801 0x302a, 0x3031, 802 0x3036, 0x3038, 803 0x303d, 0x3041, 804 0x3099, 0x309d, 805 0x30a0, 0x30a1, 806 0x30fb, 0x30fc, 807 0x31c0, 0x31f0, 808 0x321d, 0x3220, 809 0x3250, 0x3260, 810 0x327c, 0x327f, 811 0x32b1, 0x32c0, 812 0x32cc, 0x32d0, 813 0x3377, 0x337b, 814 0x33de, 0x33e0, 815 0x33ff, 0x3400, 816 0x4dc0, 0x4e00, 817 0xa490, 0xa4d0, 818 0xa60d, 0xa610, 819 0xa66f, 0xa680, 820 0xa69f, 0xa6a0, 821 0xa6f0, 0xa6f2, 822 0xa700, 0xa722, 823 0xa788, 0xa789, 824 0xa802, 0xa803, 825 0xa806, 0xa807, 826 0xa80b, 0xa80c, 827 0xa825, 0xa827, 828 0xa828, 0xa830, 829 0xa838, 0xa840, 830 0xa874, 0xa880, 831 0xa8c4, 0xa8ce, 832 0xa8e0, 0xa8f2, 833 0xa926, 0xa92e, 834 0xa947, 0xa952, 835 0xa980, 0xa983, 836 0xa9b3, 0xa9b4, 837 0xa9b6, 0xa9ba, 838 0xa9bc, 0xa9bd, 839 0xaa29, 0xaa2f, 840 0xaa31, 0xaa33, 841 0xaa35, 0xaa40, 842 0xaa43, 0xaa44, 843 0xaa4c, 0xaa4d, 844 0xaab0, 0xaab1, 845 0xaab2, 0xaab5, 846 0xaab7, 0xaab9, 847 0xaabe, 0xaac0, 848 0xaac1, 0xaac2, 849 0xaaec, 0xaaee, 850 0xaaf6, 0xab01, 851 0xabe5, 0xabe6, 852 0xabe8, 0xabe9, 853 0xabed, 0xabf0, 854 0xfb1e, 0xfb1f, 855 0xfb29, 0xfb2a, 856 0xfd3e, 0xfd50, 857 0xfdfd, 0xfe70, 858 0xfeff, 0xff21, 859 0xff3b, 0xff41, 860 0xff5b, 0xff66, 861 0xffe0, 0x10000, 862 0x10101, 0x10102, 863 0x10140, 0x101d0, 864 0x101fd, 0x10280, 865 0x1091f, 0x10920, 866 0x10a01, 0x10a10, 867 0x10a38, 0x10a40, 868 0x10b39, 0x10b40, 869 0x10e60, 0x11000, 870 0x11001, 0x11002, 871 0x11038, 0x11047, 872 0x11052, 0x11066, 873 0x11080, 0x11082, 874 0x110b3, 0x110b7, 875 0x110b9, 0x110bb, 876 0x11100, 0x11103, 877 0x11127, 0x1112c, 878 0x1112d, 0x11136, 879 0x11180, 0x11182, 880 0x111b6, 0x111bf, 881 0x116ab, 0x116ac, 882 0x116ad, 0x116ae, 883 0x116b0, 0x116b6, 884 0x116b7, 0x116c0, 885 0x16f8f, 0x16f93, 886 0x1d167, 0x1d16a, 887 0x1d173, 0x1d183, 888 0x1d185, 0x1d18c, 889 0x1d1aa, 0x1d1ae, 890 0x1d200, 0x1d360, 891 0x1d6db, 0x1d6dc, 892 0x1d715, 0x1d716, 893 0x1d74f, 0x1d750, 894 0x1d789, 0x1d78a, 895 0x1d7c3, 0x1d7c4, 896 0x1d7ce, 0x1ee00, 897 0x1eef0, 0x1f110, 898 0x1f16a, 0x1f170, 899 0x1f300, 0x1f48c, 900 0x1f48d, 0x1f524, 901 0x1f525, 0x20000, 902 0xe0001, 0xf0000, 903 0x10fffe, 0x10ffff // sentinel 904 }; 905 906 907 // use a binary search with a cache 908 909 private transient volatile int stCache = 0; 910 isStrongDirectional(char c)911 private boolean isStrongDirectional(char c) { 912 int cachedIndex = stCache; 913 if (c < strongTable[cachedIndex]) { 914 cachedIndex = search(c, strongTable, 0, cachedIndex); 915 } else if (c >= strongTable[cachedIndex + 1]) { 916 cachedIndex = search(c, strongTable, cachedIndex + 1, 917 strongTable.length - cachedIndex - 1); 918 } 919 boolean val = (cachedIndex & 0x1) == 1; 920 stCache = cachedIndex; 921 return val; 922 } 923 getKeyFromMask(int mask)924 private static int getKeyFromMask(int mask) { 925 int key = 0; 926 while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) { 927 ++key; 928 } 929 if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) { 930 throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask)); 931 } 932 return key; 933 } 934 935 /** 936 * Returns a shaper for the provided unicode range. All 937 * Latin-1 (EUROPEAN) digits are converted 938 * to the corresponding decimal unicode digits. 939 * @param singleRange the specified Unicode range 940 * @return a non-contextual numeric shaper 941 * @throws IllegalArgumentException if the range is not a single range 942 */ getShaper(int singleRange)943 public static NumericShaper getShaper(int singleRange) { 944 int key = getKeyFromMask(singleRange); 945 return new NumericShaper(key, singleRange); 946 } 947 948 /** 949 * Returns a shaper for the provided Unicode 950 * range. All Latin-1 (EUROPEAN) digits are converted to the 951 * corresponding decimal digits of the specified Unicode range. 952 * 953 * @param singleRange the Unicode range given by a {@link 954 * NumericShaper.Range} constant. 955 * @return a non-contextual {@code NumericShaper}. 956 * @throws NullPointerException if {@code singleRange} is {@code null} 957 * @since 1.7 958 */ getShaper(Range singleRange)959 public static NumericShaper getShaper(Range singleRange) { 960 return new NumericShaper(singleRange, EnumSet.of(singleRange)); 961 } 962 963 /** 964 * Returns a contextual shaper for the provided unicode range(s). 965 * Latin-1 (EUROPEAN) digits are converted to the decimal digits 966 * corresponding to the range of the preceding text, if the 967 * range is one of the provided ranges. Multiple ranges are 968 * represented by or-ing the values together, such as, 969 * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The 970 * shaper assumes EUROPEAN as the starting context, that is, if 971 * EUROPEAN digits are encountered before any strong directional 972 * text in the string, the context is presumed to be EUROPEAN, and 973 * so the digits will not shape. 974 * @param ranges the specified Unicode ranges 975 * @return a shaper for the specified ranges 976 */ getContextualShaper(int ranges)977 public static NumericShaper getContextualShaper(int ranges) { 978 ranges |= CONTEXTUAL_MASK; 979 return new NumericShaper(EUROPEAN_KEY, ranges); 980 } 981 982 /** 983 * Returns a contextual shaper for the provided Unicode 984 * range(s). The Latin-1 (EUROPEAN) digits are converted to the 985 * decimal digits corresponding to the range of the preceding 986 * text, if the range is one of the provided ranges. 987 * 988 * <p>The shaper assumes EUROPEAN as the starting context, that 989 * is, if EUROPEAN digits are encountered before any strong 990 * directional text in the string, the context is presumed to be 991 * EUROPEAN, and so the digits will not shape. 992 * 993 * @param ranges the specified Unicode ranges 994 * @return a contextual shaper for the specified ranges 995 * @throws NullPointerException if {@code ranges} is {@code null}. 996 * @since 1.7 997 */ getContextualShaper(Set<Range> ranges)998 public static NumericShaper getContextualShaper(Set<Range> ranges) { 999 NumericShaper shaper = new NumericShaper(Range.EUROPEAN, ranges); 1000 shaper.mask = CONTEXTUAL_MASK; 1001 return shaper; 1002 } 1003 1004 /** 1005 * Returns a contextual shaper for the provided unicode range(s). 1006 * Latin-1 (EUROPEAN) digits will be converted to the decimal digits 1007 * corresponding to the range of the preceding text, if the 1008 * range is one of the provided ranges. Multiple ranges are 1009 * represented by or-ing the values together, for example, 1010 * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The 1011 * shaper uses defaultContext as the starting context. 1012 * @param ranges the specified Unicode ranges 1013 * @param defaultContext the starting context, such as 1014 * <code>NumericShaper.EUROPEAN</code> 1015 * @return a shaper for the specified Unicode ranges. 1016 * @throws IllegalArgumentException if the specified 1017 * <code>defaultContext</code> is not a single valid range. 1018 */ getContextualShaper(int ranges, int defaultContext)1019 public static NumericShaper getContextualShaper(int ranges, int defaultContext) { 1020 int key = getKeyFromMask(defaultContext); 1021 ranges |= CONTEXTUAL_MASK; 1022 return new NumericShaper(key, ranges); 1023 } 1024 1025 /** 1026 * Returns a contextual shaper for the provided Unicode range(s). 1027 * The Latin-1 (EUROPEAN) digits will be converted to the decimal 1028 * digits corresponding to the range of the preceding text, if the 1029 * range is one of the provided ranges. The shaper uses {@code 1030 * defaultContext} as the starting context. 1031 * 1032 * @param ranges the specified Unicode ranges 1033 * @param defaultContext the starting context, such as 1034 * {@code NumericShaper.Range.EUROPEAN} 1035 * @return a contextual shaper for the specified Unicode ranges. 1036 * @throws NullPointerException 1037 * if {@code ranges} or {@code defaultContext} is {@code null} 1038 * @since 1.7 1039 */ getContextualShaper(Set<Range> ranges, Range defaultContext)1040 public static NumericShaper getContextualShaper(Set<Range> ranges, 1041 Range defaultContext) { 1042 if (defaultContext == null) { 1043 throw new NullPointerException(); 1044 } 1045 NumericShaper shaper = new NumericShaper(defaultContext, ranges); 1046 shaper.mask = CONTEXTUAL_MASK; 1047 return shaper; 1048 } 1049 1050 /** 1051 * Private constructor. 1052 */ NumericShaper(int key, int mask)1053 private NumericShaper(int key, int mask) { 1054 this.key = key; 1055 this.mask = mask; 1056 } 1057 NumericShaper(Range defaultContext, Set<Range> ranges)1058 private NumericShaper(Range defaultContext, Set<Range> ranges) { 1059 shapingRange = defaultContext; 1060 rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null. 1061 1062 // Give precedance to EASTERN_ARABIC if both ARABIC and 1063 // EASTERN_ARABIC are specified. 1064 if (rangeSet.contains(Range.EASTERN_ARABIC) 1065 && rangeSet.contains(Range.ARABIC)) { 1066 rangeSet.remove(Range.ARABIC); 1067 } 1068 1069 // As well as the above case, give precedance to TAI_THAM_THAM if both 1070 // TAI_THAM_HORA and TAI_THAM_THAM are specified. 1071 if (rangeSet.contains(Range.TAI_THAM_THAM) 1072 && rangeSet.contains(Range.TAI_THAM_HORA)) { 1073 rangeSet.remove(Range.TAI_THAM_HORA); 1074 } 1075 1076 rangeArray = rangeSet.toArray(new Range[rangeSet.size()]); 1077 if (rangeArray.length > BSEARCH_THRESHOLD) { 1078 // sort rangeArray for binary search 1079 Arrays.sort(rangeArray, 1080 new Comparator<Range>() { 1081 public int compare(Range s1, Range s2) { 1082 return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1; 1083 } 1084 }); 1085 } 1086 } 1087 1088 /** 1089 * Converts the digits in the text that occur between start and 1090 * start + count. 1091 * @param text an array of characters to convert 1092 * @param start the index into <code>text</code> to start 1093 * converting 1094 * @param count the number of characters in <code>text</code> 1095 * to convert 1096 * @throws IndexOutOfBoundsException if start or start + count is 1097 * out of bounds 1098 * @throws NullPointerException if text is null 1099 */ shape(char[] text, int start, int count)1100 public void shape(char[] text, int start, int count) { 1101 checkParams(text, start, count); 1102 if (isContextual()) { 1103 if (rangeSet == null) { 1104 shapeContextually(text, start, count, key); 1105 } else { 1106 shapeContextually(text, start, count, shapingRange); 1107 } 1108 } else { 1109 shapeNonContextually(text, start, count); 1110 } 1111 } 1112 1113 /** 1114 * Converts the digits in the text that occur between start and 1115 * start + count, using the provided context. 1116 * Context is ignored if the shaper is not a contextual shaper. 1117 * @param text an array of characters 1118 * @param start the index into <code>text</code> to start 1119 * converting 1120 * @param count the number of characters in <code>text</code> 1121 * to convert 1122 * @param context the context to which to convert the 1123 * characters, such as <code>NumericShaper.EUROPEAN</code> 1124 * @throws IndexOutOfBoundsException if start or start + count is 1125 * out of bounds 1126 * @throws NullPointerException if text is null 1127 * @throws IllegalArgumentException if this is a contextual shaper 1128 * and the specified <code>context</code> is not a single valid 1129 * range. 1130 */ shape(char[] text, int start, int count, int context)1131 public void shape(char[] text, int start, int count, int context) { 1132 checkParams(text, start, count); 1133 if (isContextual()) { 1134 int ctxKey = getKeyFromMask(context); 1135 if (rangeSet == null) { 1136 shapeContextually(text, start, count, ctxKey); 1137 } else { 1138 shapeContextually(text, start, count, Range.values()[ctxKey]); 1139 } 1140 } else { 1141 shapeNonContextually(text, start, count); 1142 } 1143 } 1144 1145 /** 1146 * Converts the digits in the text that occur between {@code 1147 * start} and {@code start + count}, using the provided {@code 1148 * context}. {@code Context} is ignored if the shaper is not a 1149 * contextual shaper. 1150 * 1151 * @param text a {@code char} array 1152 * @param start the index into {@code text} to start converting 1153 * @param count the number of {@code char}s in {@code text} 1154 * to convert 1155 * @param context the context to which to convert the characters, 1156 * such as {@code NumericShaper.Range.EUROPEAN} 1157 * @throws IndexOutOfBoundsException 1158 * if {@code start} or {@code start + count} is out of bounds 1159 * @throws NullPointerException 1160 * if {@code text} or {@code context} is null 1161 * @since 1.7 1162 */ shape(char[] text, int start, int count, Range context)1163 public void shape(char[] text, int start, int count, Range context) { 1164 checkParams(text, start, count); 1165 if (context == null) { 1166 throw new NullPointerException("context is null"); 1167 } 1168 1169 if (isContextual()) { 1170 if (rangeSet != null) { 1171 shapeContextually(text, start, count, context); 1172 } else { 1173 int key = Range.toRangeIndex(context); 1174 if (key >= 0) { 1175 shapeContextually(text, start, count, key); 1176 } else { 1177 shapeContextually(text, start, count, shapingRange); 1178 } 1179 } 1180 } else { 1181 shapeNonContextually(text, start, count); 1182 } 1183 } 1184 checkParams(char[] text, int start, int count)1185 private void checkParams(char[] text, int start, int count) { 1186 if (text == null) { 1187 throw new NullPointerException("text is null"); 1188 } 1189 if ((start < 0) 1190 || (start > text.length) 1191 || ((start + count) < 0) 1192 || ((start + count) > text.length)) { 1193 throw new IndexOutOfBoundsException( 1194 "bad start or count for text of length " + text.length); 1195 } 1196 } 1197 1198 /** 1199 * Returns a <code>boolean</code> indicating whether or not 1200 * this shaper shapes contextually. 1201 * @return <code>true</code> if this shaper is contextual; 1202 * <code>false</code> otherwise. 1203 */ isContextual()1204 public boolean isContextual() { 1205 return (mask & CONTEXTUAL_MASK) != 0; 1206 } 1207 1208 /** 1209 * Returns an <code>int</code> that ORs together the values for 1210 * all the ranges that will be shaped. 1211 * <p> 1212 * For example, to check if a shaper shapes to Arabic, you would use the 1213 * following: 1214 * <blockquote> 1215 * {@code if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... } 1216 * </blockquote> 1217 * 1218 * <p>Note that this method supports only the bit mask-based 1219 * ranges. Call {@link #getRangeSet()} for the enum-based ranges. 1220 * 1221 * @return the values for all the ranges to be shaped. 1222 */ getRanges()1223 public int getRanges() { 1224 return mask & ~CONTEXTUAL_MASK; 1225 } 1226 1227 /** 1228 * Returns a {@code Set} representing all the Unicode ranges in 1229 * this {@code NumericShaper} that will be shaped. 1230 * 1231 * @return all the Unicode ranges to be shaped. 1232 * @since 1.7 1233 */ getRangeSet()1234 public Set<Range> getRangeSet() { 1235 if (rangeSet != null) { 1236 return EnumSet.copyOf(rangeSet); 1237 } 1238 return Range.maskToRangeSet(mask); 1239 } 1240 1241 /** 1242 * Perform non-contextual shaping. 1243 */ shapeNonContextually(char[] text, int start, int count)1244 private void shapeNonContextually(char[] text, int start, int count) { 1245 int base; 1246 char minDigit = '0'; 1247 if (shapingRange != null) { 1248 base = shapingRange.getDigitBase(); 1249 minDigit += shapingRange.getNumericBase(); 1250 } else { 1251 base = bases[key]; 1252 if (key == ETHIOPIC_KEY) { 1253 minDigit++; // Ethiopic doesn't use decimal zero 1254 } 1255 } 1256 for (int i = start, e = start + count; i < e; ++i) { 1257 char c = text[i]; 1258 if (c >= minDigit && c <= '\u0039') { 1259 text[i] = (char)(c + base); 1260 } 1261 } 1262 } 1263 1264 /** 1265 * Perform contextual shaping. 1266 * Synchronized to protect caches used in getContextKey. 1267 */ shapeContextually(char[] text, int start, int count, int ctxKey)1268 private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) { 1269 1270 // if we don't support this context, then don't shape 1271 if ((mask & (1<<ctxKey)) == 0) { 1272 ctxKey = EUROPEAN_KEY; 1273 } 1274 int lastkey = ctxKey; 1275 1276 int base = bases[ctxKey]; 1277 char minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero 1278 1279 synchronized (NumericShaper.class) { 1280 for (int i = start, e = start + count; i < e; ++i) { 1281 char c = text[i]; 1282 if (c >= minDigit && c <= '\u0039') { 1283 text[i] = (char)(c + base); 1284 } 1285 1286 if (isStrongDirectional(c)) { 1287 int newkey = getContextKey(c); 1288 if (newkey != lastkey) { 1289 lastkey = newkey; 1290 1291 ctxKey = newkey; 1292 if (((mask & EASTERN_ARABIC) != 0) && 1293 (ctxKey == ARABIC_KEY || 1294 ctxKey == EASTERN_ARABIC_KEY)) { 1295 ctxKey = EASTERN_ARABIC_KEY; 1296 } else if (((mask & ARABIC) != 0) && 1297 (ctxKey == ARABIC_KEY || 1298 ctxKey == EASTERN_ARABIC_KEY)) { 1299 ctxKey = ARABIC_KEY; 1300 } else if ((mask & (1<<ctxKey)) == 0) { 1301 ctxKey = EUROPEAN_KEY; 1302 } 1303 1304 base = bases[ctxKey]; 1305 1306 minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero 1307 } 1308 } 1309 } 1310 } 1311 } 1312 shapeContextually(char[] text, int start, int count, Range ctxKey)1313 private void shapeContextually(char[] text, int start, int count, Range ctxKey) { 1314 // if we don't support the specified context, then don't shape. 1315 if (ctxKey == null || !rangeSet.contains(ctxKey)) { 1316 ctxKey = Range.EUROPEAN; 1317 } 1318 1319 Range lastKey = ctxKey; 1320 int base = ctxKey.getDigitBase(); 1321 char minDigit = (char)('0' + ctxKey.getNumericBase()); 1322 final int end = start + count; 1323 for (int i = start; i < end; ++i) { 1324 char c = text[i]; 1325 if (c >= minDigit && c <= '9') { 1326 text[i] = (char)(c + base); 1327 continue; 1328 } 1329 if (isStrongDirectional(c)) { 1330 ctxKey = rangeForCodePoint(c); 1331 if (ctxKey != lastKey) { 1332 lastKey = ctxKey; 1333 base = ctxKey.getDigitBase(); 1334 minDigit = (char)('0' + ctxKey.getNumericBase()); 1335 } 1336 } 1337 } 1338 } 1339 1340 /** 1341 * Returns a hash code for this shaper. 1342 * @return this shaper's hash code. 1343 * @see java.lang.Object#hashCode 1344 */ hashCode()1345 public int hashCode() { 1346 int hash = mask; 1347 if (rangeSet != null) { 1348 // Use the CONTEXTUAL_MASK bit only for the enum-based 1349 // NumericShaper. A deserialized NumericShaper might have 1350 // bit masks. 1351 hash &= CONTEXTUAL_MASK; 1352 hash ^= rangeSet.hashCode(); 1353 } 1354 return hash; 1355 } 1356 1357 /** 1358 * Returns {@code true} if the specified object is an instance of 1359 * <code>NumericShaper</code> and shapes identically to this one, 1360 * regardless of the range representations, the bit mask or the 1361 * enum. For example, the following code produces {@code "true"}. 1362 * <blockquote><pre> 1363 * NumericShaper ns1 = NumericShaper.getShaper(NumericShaper.ARABIC); 1364 * NumericShaper ns2 = NumericShaper.getShaper(NumericShaper.Range.ARABIC); 1365 * System.out.println(ns1.equals(ns2)); 1366 * </pre></blockquote> 1367 * 1368 * @param o the specified object to compare to this 1369 * <code>NumericShaper</code> 1370 * @return <code>true</code> if <code>o</code> is an instance 1371 * of <code>NumericShaper</code> and shapes in the same way; 1372 * <code>false</code> otherwise. 1373 * @see java.lang.Object#equals(java.lang.Object) 1374 */ equals(Object o)1375 public boolean equals(Object o) { 1376 if (o != null) { 1377 try { 1378 NumericShaper rhs = (NumericShaper)o; 1379 if (rangeSet != null) { 1380 if (rhs.rangeSet != null) { 1381 return isContextual() == rhs.isContextual() 1382 && rangeSet.equals(rhs.rangeSet) 1383 && shapingRange == rhs.shapingRange; 1384 } 1385 return isContextual() == rhs.isContextual() 1386 && rangeSet.equals(Range.maskToRangeSet(rhs.mask)) 1387 && shapingRange == Range.indexToRange(rhs.key); 1388 } else if (rhs.rangeSet != null) { 1389 Set<Range> rset = Range.maskToRangeSet(mask); 1390 Range srange = Range.indexToRange(key); 1391 return isContextual() == rhs.isContextual() 1392 && rset.equals(rhs.rangeSet) 1393 && srange == rhs.shapingRange; 1394 } 1395 return rhs.mask == mask && rhs.key == key; 1396 } 1397 catch (ClassCastException e) { 1398 } 1399 } 1400 return false; 1401 } 1402 1403 /** 1404 * Returns a <code>String</code> that describes this shaper. This method 1405 * is used for debugging purposes only. 1406 * @return a <code>String</code> describing this shaper. 1407 */ toString()1408 public String toString() { 1409 StringBuilder buf = new StringBuilder(super.toString()); 1410 1411 buf.append("[contextual:").append(isContextual()); 1412 1413 String[] keyNames = null; 1414 if (isContextual()) { 1415 buf.append(", context:"); 1416 buf.append(shapingRange == null ? Range.values()[key] : shapingRange); 1417 } 1418 1419 if (rangeSet == null) { 1420 buf.append(", range(s): "); 1421 boolean first = true; 1422 for (int i = 0; i < NUM_KEYS; ++i) { 1423 if ((mask & (1 << i)) != 0) { 1424 if (first) { 1425 first = false; 1426 } else { 1427 buf.append(", "); 1428 } 1429 buf.append(Range.values()[i]); 1430 } 1431 } 1432 } else { 1433 buf.append(", range set: ").append(rangeSet); 1434 } 1435 buf.append(']'); 1436 1437 return buf.toString(); 1438 } 1439 1440 /** 1441 * Returns the index of the high bit in value (assuming le, actually 1442 * power of 2 >= value). value must be positive. 1443 */ getHighBit(int value)1444 private static int getHighBit(int value) { 1445 if (value <= 0) { 1446 return -32; 1447 } 1448 1449 int bit = 0; 1450 1451 if (value >= 1 << 16) { 1452 value >>= 16; 1453 bit += 16; 1454 } 1455 1456 if (value >= 1 << 8) { 1457 value >>= 8; 1458 bit += 8; 1459 } 1460 1461 if (value >= 1 << 4) { 1462 value >>= 4; 1463 bit += 4; 1464 } 1465 1466 if (value >= 1 << 2) { 1467 value >>= 2; 1468 bit += 2; 1469 } 1470 1471 if (value >= 1 << 1) { 1472 bit += 1; 1473 } 1474 1475 return bit; 1476 } 1477 1478 /** 1479 * fast binary search over subrange of array. 1480 */ search(int value, int[] array, int start, int length)1481 private static int search(int value, int[] array, int start, int length) 1482 { 1483 int power = 1 << getHighBit(length); 1484 int extra = length - power; 1485 int probe = power; 1486 int index = start; 1487 1488 if (value >= array[index + extra]) { 1489 index += extra; 1490 } 1491 1492 while (probe > 1) { 1493 probe >>= 1; 1494 1495 if (value >= array[index + probe]) { 1496 index += probe; 1497 } 1498 } 1499 1500 return index; 1501 } 1502 1503 /** 1504 * Converts the {@code NumericShaper.Range} enum-based parameters, 1505 * if any, to the bit mask-based counterparts and writes this 1506 * object to the {@code stream}. Any enum constants that have no 1507 * bit mask-based counterparts are ignored in the conversion. 1508 * 1509 * @param stream the output stream to write to 1510 * @throws IOException if an I/O error occurs while writing to {@code stream} 1511 * @since 1.7 1512 */ writeObject(ObjectOutputStream stream)1513 private void writeObject(ObjectOutputStream stream) throws IOException { 1514 if (shapingRange != null) { 1515 int index = Range.toRangeIndex(shapingRange); 1516 if (index >= 0) { 1517 key = index; 1518 } 1519 } 1520 if (rangeSet != null) { 1521 mask |= Range.toRangeMask(rangeSet); 1522 } 1523 stream.defaultWriteObject(); 1524 } 1525 } 1526