1 /*
2  * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.awt.font;
27 
28 import java.io.IOException;
29 import java.io.ObjectOutputStream;
30 import java.util.Arrays;
31 import java.util.Comparator;
32 import java.util.EnumSet;
33 import java.util.Set;
34 
35 /**
36  * The <code>NumericShaper</code> class is used to convert Latin-1 (European)
37  * digits to other Unicode decimal digits.  Users of this class will
38  * primarily be people who wish to present data using
39  * national digit shapes, but find it more convenient to represent the
40  * data internally using Latin-1 (European) digits.  This does not
41  * interpret the deprecated numeric shape selector character (U+206E).
42  * <p>
43  * Instances of <code>NumericShaper</code> are typically applied
44  * as attributes to text with the
45  * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
46  * of the <code>TextAttribute</code> class.
47  * For example, this code snippet causes a <code>TextLayout</code> to
48  * shape European digits to Arabic in an Arabic context:<br>
49  * <blockquote><pre>
50  * Map map = new HashMap();
51  * map.put(TextAttribute.NUMERIC_SHAPING,
52  *     NumericShaper.getContextualShaper(NumericShaper.ARABIC));
53  * FontRenderContext frc = ...;
54  * TextLayout layout = new TextLayout(text, map, frc);
55  * layout.draw(g2d, x, y);
56  * </pre></blockquote>
57  * <br>
58  * It is also possible to perform numeric shaping explicitly using instances
59  * of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
60  * <blockquote><pre>
61  * char[] text = ...;
62  * // shape all EUROPEAN digits (except zero) to ARABIC digits
63  * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
64  * shaper.shape(text, start, count);
65  *
66  * // shape European digits to ARABIC digits if preceding text is Arabic, or
67  * // shape European digits to TAMIL digits if preceding text is Tamil, or
68  * // leave European digits alone if there is no preceding text, or
69  * // preceding text is neither Arabic nor Tamil
70  * NumericShaper shaper =
71  *     NumericShaper.getContextualShaper(NumericShaper.ARABIC |
72  *                                         NumericShaper.TAMIL,
73  *                                       NumericShaper.EUROPEAN);
74  * shaper.shape(text, start, count);
75  * </pre></blockquote>
76  *
77  * <p><b>Bit mask- and enum-based Unicode ranges</b></p>
78  *
79  * <p>This class supports two different programming interfaces to
80  * represent Unicode ranges for script-specific digits: bit
81  * mask-based ones, such as {@link #ARABIC NumericShaper.ARABIC}, and
82  * enum-based ones, such as {@link NumericShaper.Range#ARABIC}.
83  * Multiple ranges can be specified by ORing bit mask-based constants,
84  * such as:
85  * <blockquote><pre>
86  * NumericShaper.ARABIC | NumericShaper.TAMIL
87  * </pre></blockquote>
88  * or creating a {@code Set} with the {@link NumericShaper.Range}
89  * constants, such as:
90  * <blockquote><pre>
91  * EnumSet.of(NumericShaper.Scirpt.ARABIC, NumericShaper.Range.TAMIL)
92  * </pre></blockquote>
93  * The enum-based ranges are a super set of the bit mask-based ones.
94  *
95  * <p>If the two interfaces are mixed (including serialization),
96  * Unicode range values are mapped to their counterparts where such
97  * mapping is possible, such as {@code NumericShaper.Range.ARABIC}
98  * from/to {@code NumericShaper.ARABIC}.  If any unmappable range
99  * values are specified, such as {@code NumericShaper.Range.BALINESE},
100  * those ranges are ignored.
101  *
102  * <p><b>Decimal Digits Precedence</b></p>
103  *
104  * <p>A Unicode range may have more than one set of decimal digits. If
105  * multiple decimal digits sets are specified for the same Unicode
106  * range, one of the sets will take precedence as follows.
107  *
108  * <table border=1 cellspacing=3 cellpadding=0 summary="NumericShaper constants precedence.">
109  *    <tr>
110  *       <th class="TableHeadingColor">Unicode Range</th>
111  *       <th class="TableHeadingColor"><code>NumericShaper</code> Constants</th>
112  *       <th class="TableHeadingColor">Precedence</th>
113  *    </tr>
114  *    <tr>
115  *       <td rowspan="2">Arabic</td>
116  *       <td>{@link NumericShaper#ARABIC NumericShaper.ARABIC}<br>
117  *           {@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td>
118  *       <td>{@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td>
119  *    </tr>
120  *    <tr>
121  *       <td>{@link NumericShaper.Range#ARABIC}<br>
122  *           {@link NumericShaper.Range#EASTERN_ARABIC}</td>
123  *       <td>{@link NumericShaper.Range#EASTERN_ARABIC}</td>
124  *    </tr>
125  *    <tr>
126  *       <td>Tai Tham</td>
127  *       <td>{@link NumericShaper.Range#TAI_THAM_HORA}<br>
128  *           {@link NumericShaper.Range#TAI_THAM_THAM}</td>
129  *       <td>{@link NumericShaper.Range#TAI_THAM_THAM}</td>
130  *    </tr>
131  * </table>
132  *
133  * @since 1.4
134  */
135 
136 public final class NumericShaper implements java.io.Serializable {
137     /**
138      * A {@code NumericShaper.Range} represents a Unicode range of a
139      * script having its own decimal digits. For example, the {@link
140      * NumericShaper.Range#THAI} range has the Thai digits, THAI DIGIT
141      * ZERO (U+0E50) to THAI DIGIT NINE (U+0E59).
142      *
143      * <p>The <code>Range</code> enum replaces the traditional bit
144      * mask-based values (e.g., {@link NumericShaper#ARABIC}), and
145      * supports more Unicode ranges than the bit mask-based ones. For
146      * example, the following code using the bit mask:
147      * <blockquote><pre>
148      * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
149      *                                     NumericShaper.TAMIL,
150      *                                   NumericShaper.EUROPEAN);
151      * </pre></blockquote>
152      * can be written using this enum as:
153      * <blockquote><pre>
154      * NumericShaper.getContextualShaper(EnumSet.of(
155      *                                     NumericShaper.Range.ARABIC,
156      *                                     NumericShaper.Range.TAMIL),
157      *                                   NumericShaper.Range.EUROPEAN);
158      * </pre></blockquote>
159      *
160      * @since 1.7
161      */
162     public static enum Range {
163         // The order of EUROPEAN to MOGOLIAN must be consistent
164         // with the bitmask-based constants.
165         /**
166          * The Latin (European) range with the Latin (ASCII) digits.
167          */
168         EUROPEAN        ('\u0030', '\u0000', '\u0300'),
169         /**
170          * The Arabic range with the Arabic-Indic digits.
171          */
172         ARABIC          ('\u0660', '\u0600', '\u0780'),
173         /**
174          * The Arabic range with the Eastern Arabic-Indic digits.
175          */
176         EASTERN_ARABIC  ('\u06f0', '\u0600', '\u0780'),
177         /**
178          * The Devanagari range with the Devanagari digits.
179          */
180         DEVANAGARI      ('\u0966', '\u0900', '\u0980'),
181         /**
182          * The Bengali range with the Bengali digits.
183          */
184         BENGALI         ('\u09e6', '\u0980', '\u0a00'),
185         /**
186          * The Gurmukhi range with the Gurmukhi digits.
187          */
188         GURMUKHI        ('\u0a66', '\u0a00', '\u0a80'),
189         /**
190          * The Gujarati range with the Gujarati digits.
191          */
192         GUJARATI        ('\u0ae6', '\u0b00', '\u0b80'),
193         /**
194          * The Oriya range with the Oriya digits.
195          */
196         ORIYA           ('\u0b66', '\u0b00', '\u0b80'),
197         /**
198          * The Tamil range with the Tamil digits.
199          */
200         TAMIL           ('\u0be6', '\u0b80', '\u0c00'),
201         /**
202          * The Telugu range with the Telugu digits.
203          */
204         TELUGU          ('\u0c66', '\u0c00', '\u0c80'),
205         /**
206          * The Kannada range with the Kannada digits.
207          */
208         KANNADA         ('\u0ce6', '\u0c80', '\u0d00'),
209         /**
210          * The Malayalam range with the Malayalam digits.
211          */
212         MALAYALAM       ('\u0d66', '\u0d00', '\u0d80'),
213         /**
214          * The Thai range with the Thai digits.
215          */
216         THAI            ('\u0e50', '\u0e00', '\u0e80'),
217         /**
218          * The Lao range with the Lao digits.
219          */
220         LAO             ('\u0ed0', '\u0e80', '\u0f00'),
221         /**
222          * The Tibetan range with the Tibetan digits.
223          */
224         TIBETAN         ('\u0f20', '\u0f00', '\u1000'),
225         /**
226          * The Myanmar range with the Myanmar digits.
227          */
228         MYANMAR         ('\u1040', '\u1000', '\u1080'),
229         /**
230          * The Ethiopic range with the Ethiopic digits. Ethiopic
231          * does not have a decimal digit 0 so Latin (European) 0 is
232          * used.
233          */
234         ETHIOPIC        ('\u1369', '\u1200', '\u1380') {
235             @Override
getNumericBase()236             char getNumericBase() { return 1; }
237         },
238         /**
239          * The Khmer range with the Khmer digits.
240          */
241         KHMER           ('\u17e0', '\u1780', '\u1800'),
242         /**
243          * The Mongolian range with the Mongolian digits.
244          */
245         MONGOLIAN       ('\u1810', '\u1800', '\u1900'),
246         // The order of EUROPEAN to MOGOLIAN must be consistent
247         // with the bitmask-based constants.
248 
249         /**
250          * The N'Ko range with the N'Ko digits.
251          */
252         NKO             ('\u07c0', '\u07c0', '\u0800'),
253         /**
254          * The Myanmar range with the Myanmar Shan digits.
255          */
256         MYANMAR_SHAN    ('\u1090', '\u1000', '\u10a0'),
257         /**
258          * The Limbu range with the Limbu digits.
259          */
260         LIMBU           ('\u1946', '\u1900', '\u1950'),
261         /**
262          * The New Tai Lue range with the New Tai Lue digits.
263          */
264         NEW_TAI_LUE     ('\u19d0', '\u1980', '\u19e0'),
265         /**
266          * The Balinese range with the Balinese digits.
267          */
268         BALINESE        ('\u1b50', '\u1b00', '\u1b80'),
269         /**
270          * The Sundanese range with the Sundanese digits.
271          */
272         SUNDANESE       ('\u1bb0', '\u1b80', '\u1bc0'),
273         /**
274          * The Lepcha range with the Lepcha digits.
275          */
276         LEPCHA          ('\u1c40', '\u1c00', '\u1c50'),
277         /**
278          * The Ol Chiki range with the Ol Chiki digits.
279          */
280         OL_CHIKI        ('\u1c50', '\u1c50', '\u1c80'),
281         /**
282          * The Vai range with the Vai digits.
283          */
284         VAI             ('\ua620', '\ua500', '\ua640'),
285         /**
286          * The Saurashtra range with the Saurashtra digits.
287          */
288         SAURASHTRA      ('\ua8d0', '\ua880', '\ua8e0'),
289         /**
290          * The Kayah Li range with the Kayah Li digits.
291          */
292         KAYAH_LI        ('\ua900', '\ua900', '\ua930'),
293         /**
294          * The Cham range with the Cham digits.
295          */
296         CHAM            ('\uaa50', '\uaa00', '\uaa60'),
297         /**
298          * The Tai Tham Hora range with the Tai Tham Hora digits.
299          */
300         TAI_THAM_HORA   ('\u1a80', '\u1a20', '\u1ab0'),
301         /**
302          * The Tai Tham Tham range with the Tai Tham Tham digits.
303          */
304         TAI_THAM_THAM   ('\u1a90', '\u1a20', '\u1ab0'),
305         /**
306          * The Javanese range with the Javanese digits.
307          */
308         JAVANESE        ('\ua9d0', '\ua980', '\ua9e0'),
309         /**
310          * The Meetei Mayek range with the Meetei Mayek digits.
311          */
312         MEETEI_MAYEK    ('\uabf0', '\uabc0', '\uac00');
313 
toRangeIndex(Range script)314         private static int toRangeIndex(Range script) {
315             int index = script.ordinal();
316             return index < NUM_KEYS ? index : -1;
317         }
318 
indexToRange(int index)319         private static Range indexToRange(int index) {
320             return index < NUM_KEYS ? Range.values()[index] : null;
321         }
322 
toRangeMask(Set<Range> ranges)323         private static int toRangeMask(Set<Range> ranges) {
324             int m = 0;
325             for (Range range : ranges) {
326                 int index = range.ordinal();
327                 if (index < NUM_KEYS) {
328                     m |= 1 << index;
329                 }
330             }
331             return m;
332         }
333 
maskToRangeSet(int mask)334         private static Set<Range> maskToRangeSet(int mask) {
335             Set<Range> set = EnumSet.noneOf(Range.class);
336             Range[] a = Range.values();
337             for (int i = 0; i < NUM_KEYS; i++) {
338                 if ((mask & (1 << i)) != 0) {
339                     set.add(a[i]);
340                 }
341             }
342             return set;
343         }
344 
345         // base character of range digits
346         private final int base;
347         // Unicode range
348         private final int start, // inclusive
349                           end;   // exclusive
350 
Range(int base, int start, int end)351         private Range(int base, int start, int end) {
352             this.base = base - ('0' + getNumericBase());
353             this.start = start;
354             this.end = end;
355         }
356 
getDigitBase()357         private int getDigitBase() {
358             return base;
359         }
360 
getNumericBase()361         char getNumericBase() {
362             return 0;
363         }
364 
inRange(int c)365         private boolean inRange(int c) {
366             return start <= c && c < end;
367         }
368     }
369 
370     /** index of context for contextual shaping - values range from 0 to 18 */
371     private int key;
372 
373     /** flag indicating whether to shape contextually (high bit) and which
374      *  digit ranges to shape (bits 0-18)
375      */
376     private int mask;
377 
378     /**
379      * The context {@code Range} for contextual shaping or the {@code
380      * Range} for non-contextual shaping. {@code null} for the bit
381      * mask-based API.
382      *
383      * @since 1.7
384      */
385     private Range shapingRange;
386 
387     /**
388      * {@code Set<Range>} indicating which Unicode ranges to
389      * shape. {@code null} for the bit mask-based API.
390      */
391     private transient Set<Range> rangeSet;
392 
393     /**
394      * rangeSet.toArray() value. Sorted by Range.base when the number
395      * of elements is greater then BSEARCH_THRESHOLD.
396      */
397     private transient Range[] rangeArray;
398 
399     /**
400      * If more than BSEARCH_THRESHOLD ranges are specified, binary search is used.
401      */
402     private static final int BSEARCH_THRESHOLD = 3;
403 
404     private static final long serialVersionUID = -8022764705923730308L;
405 
406     /** Identifies the Latin-1 (European) and extended range, and
407      *  Latin-1 (European) decimal base.
408      */
409     public static final int EUROPEAN = 1<<0;
410 
411     /** Identifies the ARABIC range and decimal base. */
412     public static final int ARABIC = 1<<1;
413 
414     /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
415     public static final int EASTERN_ARABIC = 1<<2;
416 
417     /** Identifies the DEVANAGARI range and decimal base. */
418     public static final int DEVANAGARI = 1<<3;
419 
420     /** Identifies the BENGALI range and decimal base. */
421     public static final int BENGALI = 1<<4;
422 
423     /** Identifies the GURMUKHI range and decimal base. */
424     public static final int GURMUKHI = 1<<5;
425 
426     /** Identifies the GUJARATI range and decimal base. */
427     public static final int GUJARATI = 1<<6;
428 
429     /** Identifies the ORIYA range and decimal base. */
430     public static final int ORIYA = 1<<7;
431 
432     /** Identifies the TAMIL range and decimal base. */
433     // TAMIL DIGIT ZERO was added in Unicode 4.1
434     public static final int TAMIL = 1<<8;
435 
436     /** Identifies the TELUGU range and decimal base. */
437     public static final int TELUGU = 1<<9;
438 
439     /** Identifies the KANNADA range and decimal base. */
440     public static final int KANNADA = 1<<10;
441 
442     /** Identifies the MALAYALAM range and decimal base. */
443     public static final int MALAYALAM = 1<<11;
444 
445     /** Identifies the THAI range and decimal base. */
446     public static final int THAI = 1<<12;
447 
448     /** Identifies the LAO range and decimal base. */
449     public static final int LAO = 1<<13;
450 
451     /** Identifies the TIBETAN range and decimal base. */
452     public static final int TIBETAN = 1<<14;
453 
454     /** Identifies the MYANMAR range and decimal base. */
455     public static final int MYANMAR = 1<<15;
456 
457     /** Identifies the ETHIOPIC range and decimal base. */
458     public static final int ETHIOPIC = 1<<16;
459 
460     /** Identifies the KHMER range and decimal base. */
461     public static final int KHMER = 1<<17;
462 
463     /** Identifies the MONGOLIAN range and decimal base. */
464     public static final int MONGOLIAN = 1<<18;
465 
466     /** Identifies all ranges, for full contextual shaping.
467      *
468      * <p>This constant specifies all of the bit mask-based
469      * ranges. Use {@code EmunSet.allOf(NumericShaper.Range.class)} to
470      * specify all of the enum-based ranges.
471      */
472     public static final int ALL_RANGES = 0x0007ffff;
473 
474     private static final int EUROPEAN_KEY = 0;
475     private static final int ARABIC_KEY = 1;
476     private static final int EASTERN_ARABIC_KEY = 2;
477     private static final int DEVANAGARI_KEY = 3;
478     private static final int BENGALI_KEY = 4;
479     private static final int GURMUKHI_KEY = 5;
480     private static final int GUJARATI_KEY = 6;
481     private static final int ORIYA_KEY = 7;
482     private static final int TAMIL_KEY = 8;
483     private static final int TELUGU_KEY = 9;
484     private static final int KANNADA_KEY = 10;
485     private static final int MALAYALAM_KEY = 11;
486     private static final int THAI_KEY = 12;
487     private static final int LAO_KEY = 13;
488     private static final int TIBETAN_KEY = 14;
489     private static final int MYANMAR_KEY = 15;
490     private static final int ETHIOPIC_KEY = 16;
491     private static final int KHMER_KEY = 17;
492     private static final int MONGOLIAN_KEY = 18;
493 
494     private static final int NUM_KEYS = MONGOLIAN_KEY + 1; // fixed
495 
496     private static final int CONTEXTUAL_MASK = 1<<31;
497 
498     private static final char[] bases = {
499         '\u0030' - '\u0030', // EUROPEAN
500         '\u0660' - '\u0030', // ARABIC-INDIC
501         '\u06f0' - '\u0030', // EXTENDED ARABIC-INDIC (EASTERN_ARABIC)
502         '\u0966' - '\u0030', // DEVANAGARI
503         '\u09e6' - '\u0030', // BENGALI
504         '\u0a66' - '\u0030', // GURMUKHI
505         '\u0ae6' - '\u0030', // GUJARATI
506         '\u0b66' - '\u0030', // ORIYA
507         '\u0be6' - '\u0030', // TAMIL - zero was added in Unicode 4.1
508         '\u0c66' - '\u0030', // TELUGU
509         '\u0ce6' - '\u0030', // KANNADA
510         '\u0d66' - '\u0030', // MALAYALAM
511         '\u0e50' - '\u0030', // THAI
512         '\u0ed0' - '\u0030', // LAO
513         '\u0f20' - '\u0030', // TIBETAN
514         '\u1040' - '\u0030', // MYANMAR
515         '\u1369' - '\u0031', // ETHIOPIC - no zero
516         '\u17e0' - '\u0030', // KHMER
517         '\u1810' - '\u0030', // MONGOLIAN
518     };
519 
520     // some ranges adjoin or overlap, rethink if we want to do a binary search on this
521 
522     private static final char[] contexts = {
523         '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
524         '\u0600', '\u0780', // ARABIC
525         '\u0600', '\u0780', // EASTERN_ARABIC -- note overlap with arabic
526         '\u0900', '\u0980', // DEVANAGARI
527         '\u0980', '\u0a00', // BENGALI
528         '\u0a00', '\u0a80', // GURMUKHI
529         '\u0a80', '\u0b00', // GUJARATI
530         '\u0b00', '\u0b80', // ORIYA
531         '\u0b80', '\u0c00', // TAMIL
532         '\u0c00', '\u0c80', // TELUGU
533         '\u0c80', '\u0d00', // KANNADA
534         '\u0d00', '\u0d80', // MALAYALAM
535         '\u0e00', '\u0e80', // THAI
536         '\u0e80', '\u0f00', // LAO
537         '\u0f00', '\u1000', // TIBETAN
538         '\u1000', '\u1080', // MYANMAR
539         '\u1200', '\u1380', // ETHIOPIC - note missing zero
540         '\u1780', '\u1800', // KHMER
541         '\u1800', '\u1900', // MONGOLIAN
542         '\uffff',
543     };
544 
545     // assume most characters are near each other so probing the cache is infrequent,
546     // and a linear probe is ok.
547 
548     private static int ctCache = 0;
549     private static int ctCacheLimit = contexts.length - 2;
550 
551     // warning, synchronize access to this as it modifies state
getContextKey(char c)552     private static int getContextKey(char c) {
553         if (c < contexts[ctCache]) {
554             while (ctCache > 0 && c < contexts[ctCache]) --ctCache;
555         } else if (c >= contexts[ctCache + 1]) {
556             while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache;
557         }
558 
559         // if we're not in a known range, then return EUROPEAN as the range key
560         return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
561     }
562 
563     // cache for the NumericShaper.Range version
564     private transient volatile Range currentRange = Range.EUROPEAN;
565 
rangeForCodePoint(final int codepoint)566     private Range rangeForCodePoint(final int codepoint) {
567         if (currentRange.inRange(codepoint)) {
568             return currentRange;
569         }
570 
571         final Range[] ranges = rangeArray;
572         if (ranges.length > BSEARCH_THRESHOLD) {
573             int lo = 0;
574             int hi = ranges.length - 1;
575             while (lo <= hi) {
576                 int mid = (lo + hi) / 2;
577                 Range range = ranges[mid];
578                 if (codepoint < range.start) {
579                     hi = mid - 1;
580                 } else if (codepoint >= range.end) {
581                     lo = mid + 1;
582                 } else {
583                     currentRange = range;
584                     return range;
585                 }
586             }
587         } else {
588             for (int i = 0; i < ranges.length; i++) {
589                 if (ranges[i].inRange(codepoint)) {
590                     return ranges[i];
591                 }
592             }
593         }
594         return Range.EUROPEAN;
595     }
596 
597     /*
598      * A range table of strong directional characters (types L, R, AL).
599      * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
600      * characters, odd (right) indexes are starts of ranges of strong directional
601      * characters.
602      */
603     private static int[] strongTable = {
604         0x0000, 0x0041,
605         0x005b, 0x0061,
606         0x007b, 0x00aa,
607         0x00ab, 0x00b5,
608         0x00b6, 0x00ba,
609         0x00bb, 0x00c0,
610         0x00d7, 0x00d8,
611         0x00f7, 0x00f8,
612         0x02b9, 0x02bb,
613         0x02c2, 0x02d0,
614         0x02d2, 0x02e0,
615         0x02e5, 0x02ee,
616         0x02ef, 0x0370,
617         0x0374, 0x0376,
618         0x037e, 0x0386,
619         0x0387, 0x0388,
620         0x03f6, 0x03f7,
621         0x0483, 0x048a,
622         0x058a, 0x05be,
623         0x05bf, 0x05c0,
624         0x05c1, 0x05c3,
625         0x05c4, 0x05c6,
626         0x05c7, 0x05d0,
627         0x0600, 0x0608,
628         0x0609, 0x060b,
629         0x060c, 0x060d,
630         0x060e, 0x061b,
631         0x064b, 0x066d,
632         0x0670, 0x0671,
633         0x06d6, 0x06e5,
634         0x06e7, 0x06ee,
635         0x06f0, 0x06fa,
636         0x0711, 0x0712,
637         0x0730, 0x074d,
638         0x07a6, 0x07b1,
639         0x07eb, 0x07f4,
640         0x07f6, 0x07fa,
641         0x0816, 0x081a,
642         0x081b, 0x0824,
643         0x0825, 0x0828,
644         0x0829, 0x0830,
645         0x0859, 0x085e,
646         0x08e4, 0x0903,
647         0x093a, 0x093b,
648         0x093c, 0x093d,
649         0x0941, 0x0949,
650         0x094d, 0x094e,
651         0x0951, 0x0958,
652         0x0962, 0x0964,
653         0x0981, 0x0982,
654         0x09bc, 0x09bd,
655         0x09c1, 0x09c7,
656         0x09cd, 0x09ce,
657         0x09e2, 0x09e6,
658         0x09f2, 0x09f4,
659         0x09fb, 0x0a03,
660         0x0a3c, 0x0a3e,
661         0x0a41, 0x0a59,
662         0x0a70, 0x0a72,
663         0x0a75, 0x0a83,
664         0x0abc, 0x0abd,
665         0x0ac1, 0x0ac9,
666         0x0acd, 0x0ad0,
667         0x0ae2, 0x0ae6,
668         0x0af1, 0x0b02,
669         0x0b3c, 0x0b3d,
670         0x0b3f, 0x0b40,
671         0x0b41, 0x0b47,
672         0x0b4d, 0x0b57,
673         0x0b62, 0x0b66,
674         0x0b82, 0x0b83,
675         0x0bc0, 0x0bc1,
676         0x0bcd, 0x0bd0,
677         0x0bf3, 0x0c01,
678         0x0c3e, 0x0c41,
679         0x0c46, 0x0c58,
680         0x0c62, 0x0c66,
681         0x0c78, 0x0c7f,
682         0x0cbc, 0x0cbd,
683         0x0ccc, 0x0cd5,
684         0x0ce2, 0x0ce6,
685         0x0d41, 0x0d46,
686         0x0d4d, 0x0d4e,
687         0x0d62, 0x0d66,
688         0x0dca, 0x0dcf,
689         0x0dd2, 0x0dd8,
690         0x0e31, 0x0e32,
691         0x0e34, 0x0e40,
692         0x0e47, 0x0e4f,
693         0x0eb1, 0x0eb2,
694         0x0eb4, 0x0ebd,
695         0x0ec8, 0x0ed0,
696         0x0f18, 0x0f1a,
697         0x0f35, 0x0f36,
698         0x0f37, 0x0f38,
699         0x0f39, 0x0f3e,
700         0x0f71, 0x0f7f,
701         0x0f80, 0x0f85,
702         0x0f86, 0x0f88,
703         0x0f8d, 0x0fbe,
704         0x0fc6, 0x0fc7,
705         0x102d, 0x1031,
706         0x1032, 0x1038,
707         0x1039, 0x103b,
708         0x103d, 0x103f,
709         0x1058, 0x105a,
710         0x105e, 0x1061,
711         0x1071, 0x1075,
712         0x1082, 0x1083,
713         0x1085, 0x1087,
714         0x108d, 0x108e,
715         0x109d, 0x109e,
716         0x135d, 0x1360,
717         0x1390, 0x13a0,
718         0x1400, 0x1401,
719         0x1680, 0x1681,
720         0x169b, 0x16a0,
721         0x1712, 0x1720,
722         0x1732, 0x1735,
723         0x1752, 0x1760,
724         0x1772, 0x1780,
725         0x17b4, 0x17b6,
726         0x17b7, 0x17be,
727         0x17c6, 0x17c7,
728         0x17c9, 0x17d4,
729         0x17db, 0x17dc,
730         0x17dd, 0x17e0,
731         0x17f0, 0x1810,
732         0x18a9, 0x18aa,
733         0x1920, 0x1923,
734         0x1927, 0x1929,
735         0x1932, 0x1933,
736         0x1939, 0x1946,
737         0x19de, 0x1a00,
738         0x1a17, 0x1a19,
739         0x1a56, 0x1a57,
740         0x1a58, 0x1a61,
741         0x1a62, 0x1a63,
742         0x1a65, 0x1a6d,
743         0x1a73, 0x1a80,
744         0x1b00, 0x1b04,
745         0x1b34, 0x1b35,
746         0x1b36, 0x1b3b,
747         0x1b3c, 0x1b3d,
748         0x1b42, 0x1b43,
749         0x1b6b, 0x1b74,
750         0x1b80, 0x1b82,
751         0x1ba2, 0x1ba6,
752         0x1ba8, 0x1baa,
753         0x1bab, 0x1bac,
754         0x1be6, 0x1be7,
755         0x1be8, 0x1bea,
756         0x1bed, 0x1bee,
757         0x1bef, 0x1bf2,
758         0x1c2c, 0x1c34,
759         0x1c36, 0x1c3b,
760         0x1cd0, 0x1cd3,
761         0x1cd4, 0x1ce1,
762         0x1ce2, 0x1ce9,
763         0x1ced, 0x1cee,
764         0x1cf4, 0x1cf5,
765         0x1dc0, 0x1e00,
766         0x1fbd, 0x1fbe,
767         0x1fbf, 0x1fc2,
768         0x1fcd, 0x1fd0,
769         0x1fdd, 0x1fe0,
770         0x1fed, 0x1ff2,
771         0x1ffd, 0x200e,
772         0x2010, 0x2071,
773         0x2074, 0x207f,
774         0x2080, 0x2090,
775         0x20a0, 0x2102,
776         0x2103, 0x2107,
777         0x2108, 0x210a,
778         0x2114, 0x2115,
779         0x2116, 0x2119,
780         0x211e, 0x2124,
781         0x2125, 0x2126,
782         0x2127, 0x2128,
783         0x2129, 0x212a,
784         0x212e, 0x212f,
785         0x213a, 0x213c,
786         0x2140, 0x2145,
787         0x214a, 0x214e,
788         0x2150, 0x2160,
789         0x2189, 0x2336,
790         0x237b, 0x2395,
791         0x2396, 0x249c,
792         0x24ea, 0x26ac,
793         0x26ad, 0x2800,
794         0x2900, 0x2c00,
795         0x2ce5, 0x2ceb,
796         0x2cef, 0x2cf2,
797         0x2cf9, 0x2d00,
798         0x2d7f, 0x2d80,
799         0x2de0, 0x3005,
800         0x3008, 0x3021,
801         0x302a, 0x3031,
802         0x3036, 0x3038,
803         0x303d, 0x3041,
804         0x3099, 0x309d,
805         0x30a0, 0x30a1,
806         0x30fb, 0x30fc,
807         0x31c0, 0x31f0,
808         0x321d, 0x3220,
809         0x3250, 0x3260,
810         0x327c, 0x327f,
811         0x32b1, 0x32c0,
812         0x32cc, 0x32d0,
813         0x3377, 0x337b,
814         0x33de, 0x33e0,
815         0x33ff, 0x3400,
816         0x4dc0, 0x4e00,
817         0xa490, 0xa4d0,
818         0xa60d, 0xa610,
819         0xa66f, 0xa680,
820         0xa69f, 0xa6a0,
821         0xa6f0, 0xa6f2,
822         0xa700, 0xa722,
823         0xa788, 0xa789,
824         0xa802, 0xa803,
825         0xa806, 0xa807,
826         0xa80b, 0xa80c,
827         0xa825, 0xa827,
828         0xa828, 0xa830,
829         0xa838, 0xa840,
830         0xa874, 0xa880,
831         0xa8c4, 0xa8ce,
832         0xa8e0, 0xa8f2,
833         0xa926, 0xa92e,
834         0xa947, 0xa952,
835         0xa980, 0xa983,
836         0xa9b3, 0xa9b4,
837         0xa9b6, 0xa9ba,
838         0xa9bc, 0xa9bd,
839         0xaa29, 0xaa2f,
840         0xaa31, 0xaa33,
841         0xaa35, 0xaa40,
842         0xaa43, 0xaa44,
843         0xaa4c, 0xaa4d,
844         0xaab0, 0xaab1,
845         0xaab2, 0xaab5,
846         0xaab7, 0xaab9,
847         0xaabe, 0xaac0,
848         0xaac1, 0xaac2,
849         0xaaec, 0xaaee,
850         0xaaf6, 0xab01,
851         0xabe5, 0xabe6,
852         0xabe8, 0xabe9,
853         0xabed, 0xabf0,
854         0xfb1e, 0xfb1f,
855         0xfb29, 0xfb2a,
856         0xfd3e, 0xfd50,
857         0xfdfd, 0xfe70,
858         0xfeff, 0xff21,
859         0xff3b, 0xff41,
860         0xff5b, 0xff66,
861         0xffe0, 0x10000,
862         0x10101, 0x10102,
863         0x10140, 0x101d0,
864         0x101fd, 0x10280,
865         0x1091f, 0x10920,
866         0x10a01, 0x10a10,
867         0x10a38, 0x10a40,
868         0x10b39, 0x10b40,
869         0x10e60, 0x11000,
870         0x11001, 0x11002,
871         0x11038, 0x11047,
872         0x11052, 0x11066,
873         0x11080, 0x11082,
874         0x110b3, 0x110b7,
875         0x110b9, 0x110bb,
876         0x11100, 0x11103,
877         0x11127, 0x1112c,
878         0x1112d, 0x11136,
879         0x11180, 0x11182,
880         0x111b6, 0x111bf,
881         0x116ab, 0x116ac,
882         0x116ad, 0x116ae,
883         0x116b0, 0x116b6,
884         0x116b7, 0x116c0,
885         0x16f8f, 0x16f93,
886         0x1d167, 0x1d16a,
887         0x1d173, 0x1d183,
888         0x1d185, 0x1d18c,
889         0x1d1aa, 0x1d1ae,
890         0x1d200, 0x1d360,
891         0x1d6db, 0x1d6dc,
892         0x1d715, 0x1d716,
893         0x1d74f, 0x1d750,
894         0x1d789, 0x1d78a,
895         0x1d7c3, 0x1d7c4,
896         0x1d7ce, 0x1ee00,
897         0x1eef0, 0x1f110,
898         0x1f16a, 0x1f170,
899         0x1f300, 0x1f48c,
900         0x1f48d, 0x1f524,
901         0x1f525, 0x20000,
902         0xe0001, 0xf0000,
903         0x10fffe, 0x10ffff // sentinel
904     };
905 
906 
907     // use a binary search with a cache
908 
909     private transient volatile int stCache = 0;
910 
isStrongDirectional(char c)911     private boolean isStrongDirectional(char c) {
912         int cachedIndex = stCache;
913         if (c < strongTable[cachedIndex]) {
914             cachedIndex = search(c, strongTable, 0, cachedIndex);
915         } else if (c >= strongTable[cachedIndex + 1]) {
916             cachedIndex = search(c, strongTable, cachedIndex + 1,
917                                  strongTable.length - cachedIndex - 1);
918         }
919         boolean val = (cachedIndex & 0x1) == 1;
920         stCache = cachedIndex;
921         return val;
922     }
923 
getKeyFromMask(int mask)924     private static int getKeyFromMask(int mask) {
925         int key = 0;
926         while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
927             ++key;
928         }
929         if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) {
930             throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask));
931         }
932         return key;
933     }
934 
935     /**
936      * Returns a shaper for the provided unicode range.  All
937      * Latin-1 (EUROPEAN) digits are converted
938      * to the corresponding decimal unicode digits.
939      * @param singleRange the specified Unicode range
940      * @return a non-contextual numeric shaper
941      * @throws IllegalArgumentException if the range is not a single range
942      */
getShaper(int singleRange)943     public static NumericShaper getShaper(int singleRange) {
944         int key = getKeyFromMask(singleRange);
945         return new NumericShaper(key, singleRange);
946     }
947 
948     /**
949      * Returns a shaper for the provided Unicode
950      * range. All Latin-1 (EUROPEAN) digits are converted to the
951      * corresponding decimal digits of the specified Unicode range.
952      *
953      * @param singleRange the Unicode range given by a {@link
954      *                    NumericShaper.Range} constant.
955      * @return a non-contextual {@code NumericShaper}.
956      * @throws NullPointerException if {@code singleRange} is {@code null}
957      * @since 1.7
958      */
getShaper(Range singleRange)959     public static NumericShaper getShaper(Range singleRange) {
960         return new NumericShaper(singleRange, EnumSet.of(singleRange));
961     }
962 
963     /**
964      * Returns a contextual shaper for the provided unicode range(s).
965      * Latin-1 (EUROPEAN) digits are converted to the decimal digits
966      * corresponding to the range of the preceding text, if the
967      * range is one of the provided ranges.  Multiple ranges are
968      * represented by or-ing the values together, such as,
969      * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>.  The
970      * shaper assumes EUROPEAN as the starting context, that is, if
971      * EUROPEAN digits are encountered before any strong directional
972      * text in the string, the context is presumed to be EUROPEAN, and
973      * so the digits will not shape.
974      * @param ranges the specified Unicode ranges
975      * @return a shaper for the specified ranges
976      */
getContextualShaper(int ranges)977     public static NumericShaper getContextualShaper(int ranges) {
978         ranges |= CONTEXTUAL_MASK;
979         return new NumericShaper(EUROPEAN_KEY, ranges);
980     }
981 
982     /**
983      * Returns a contextual shaper for the provided Unicode
984      * range(s). The Latin-1 (EUROPEAN) digits are converted to the
985      * decimal digits corresponding to the range of the preceding
986      * text, if the range is one of the provided ranges.
987      *
988      * <p>The shaper assumes EUROPEAN as the starting context, that
989      * is, if EUROPEAN digits are encountered before any strong
990      * directional text in the string, the context is presumed to be
991      * EUROPEAN, and so the digits will not shape.
992      *
993      * @param ranges the specified Unicode ranges
994      * @return a contextual shaper for the specified ranges
995      * @throws NullPointerException if {@code ranges} is {@code null}.
996      * @since 1.7
997      */
getContextualShaper(Set<Range> ranges)998     public static NumericShaper getContextualShaper(Set<Range> ranges) {
999         NumericShaper shaper = new NumericShaper(Range.EUROPEAN, ranges);
1000         shaper.mask = CONTEXTUAL_MASK;
1001         return shaper;
1002     }
1003 
1004     /**
1005      * Returns a contextual shaper for the provided unicode range(s).
1006      * Latin-1 (EUROPEAN) digits will be converted to the decimal digits
1007      * corresponding to the range of the preceding text, if the
1008      * range is one of the provided ranges.  Multiple ranges are
1009      * represented by or-ing the values together, for example,
1010      * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>.  The
1011      * shaper uses defaultContext as the starting context.
1012      * @param ranges the specified Unicode ranges
1013      * @param defaultContext the starting context, such as
1014      * <code>NumericShaper.EUROPEAN</code>
1015      * @return a shaper for the specified Unicode ranges.
1016      * @throws IllegalArgumentException if the specified
1017      * <code>defaultContext</code> is not a single valid range.
1018      */
getContextualShaper(int ranges, int defaultContext)1019     public static NumericShaper getContextualShaper(int ranges, int defaultContext) {
1020         int key = getKeyFromMask(defaultContext);
1021         ranges |= CONTEXTUAL_MASK;
1022         return new NumericShaper(key, ranges);
1023     }
1024 
1025     /**
1026      * Returns a contextual shaper for the provided Unicode range(s).
1027      * The Latin-1 (EUROPEAN) digits will be converted to the decimal
1028      * digits corresponding to the range of the preceding text, if the
1029      * range is one of the provided ranges. The shaper uses {@code
1030      * defaultContext} as the starting context.
1031      *
1032      * @param ranges the specified Unicode ranges
1033      * @param defaultContext the starting context, such as
1034      *                       {@code NumericShaper.Range.EUROPEAN}
1035      * @return a contextual shaper for the specified Unicode ranges.
1036      * @throws NullPointerException
1037      *         if {@code ranges} or {@code defaultContext} is {@code null}
1038      * @since 1.7
1039      */
getContextualShaper(Set<Range> ranges, Range defaultContext)1040     public static NumericShaper getContextualShaper(Set<Range> ranges,
1041                                                     Range defaultContext) {
1042         if (defaultContext == null) {
1043             throw new NullPointerException();
1044         }
1045         NumericShaper shaper = new NumericShaper(defaultContext, ranges);
1046         shaper.mask = CONTEXTUAL_MASK;
1047         return shaper;
1048     }
1049 
1050     /**
1051      * Private constructor.
1052      */
NumericShaper(int key, int mask)1053     private NumericShaper(int key, int mask) {
1054         this.key = key;
1055         this.mask = mask;
1056     }
1057 
NumericShaper(Range defaultContext, Set<Range> ranges)1058     private NumericShaper(Range defaultContext, Set<Range> ranges) {
1059         shapingRange = defaultContext;
1060         rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null.
1061 
1062         // Give precedance to EASTERN_ARABIC if both ARABIC and
1063         // EASTERN_ARABIC are specified.
1064         if (rangeSet.contains(Range.EASTERN_ARABIC)
1065             && rangeSet.contains(Range.ARABIC)) {
1066             rangeSet.remove(Range.ARABIC);
1067         }
1068 
1069         // As well as the above case, give precedance to TAI_THAM_THAM if both
1070         // TAI_THAM_HORA and TAI_THAM_THAM are specified.
1071         if (rangeSet.contains(Range.TAI_THAM_THAM)
1072             && rangeSet.contains(Range.TAI_THAM_HORA)) {
1073             rangeSet.remove(Range.TAI_THAM_HORA);
1074         }
1075 
1076         rangeArray = rangeSet.toArray(new Range[rangeSet.size()]);
1077         if (rangeArray.length > BSEARCH_THRESHOLD) {
1078             // sort rangeArray for binary search
1079             Arrays.sort(rangeArray,
1080                         new Comparator<Range>() {
1081                             public int compare(Range s1, Range s2) {
1082                                 return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1;
1083                             }
1084                         });
1085         }
1086     }
1087 
1088     /**
1089      * Converts the digits in the text that occur between start and
1090      * start + count.
1091      * @param text an array of characters to convert
1092      * @param start the index into <code>text</code> to start
1093      *        converting
1094      * @param count the number of characters in <code>text</code>
1095      *        to convert
1096      * @throws IndexOutOfBoundsException if start or start + count is
1097      *        out of bounds
1098      * @throws NullPointerException if text is null
1099      */
shape(char[] text, int start, int count)1100     public void shape(char[] text, int start, int count) {
1101         checkParams(text, start, count);
1102         if (isContextual()) {
1103             if (rangeSet == null) {
1104                 shapeContextually(text, start, count, key);
1105             } else {
1106                 shapeContextually(text, start, count, shapingRange);
1107             }
1108         } else {
1109             shapeNonContextually(text, start, count);
1110         }
1111     }
1112 
1113     /**
1114      * Converts the digits in the text that occur between start and
1115      * start + count, using the provided context.
1116      * Context is ignored if the shaper is not a contextual shaper.
1117      * @param text an array of characters
1118      * @param start the index into <code>text</code> to start
1119      *        converting
1120      * @param count the number of characters in <code>text</code>
1121      *        to convert
1122      * @param context the context to which to convert the
1123      *        characters, such as <code>NumericShaper.EUROPEAN</code>
1124      * @throws IndexOutOfBoundsException if start or start + count is
1125      *        out of bounds
1126      * @throws NullPointerException if text is null
1127      * @throws IllegalArgumentException if this is a contextual shaper
1128      * and the specified <code>context</code> is not a single valid
1129      * range.
1130      */
shape(char[] text, int start, int count, int context)1131     public void shape(char[] text, int start, int count, int context) {
1132         checkParams(text, start, count);
1133         if (isContextual()) {
1134             int ctxKey = getKeyFromMask(context);
1135             if (rangeSet == null) {
1136                 shapeContextually(text, start, count, ctxKey);
1137             } else {
1138                 shapeContextually(text, start, count, Range.values()[ctxKey]);
1139             }
1140         } else {
1141             shapeNonContextually(text, start, count);
1142         }
1143     }
1144 
1145     /**
1146      * Converts the digits in the text that occur between {@code
1147      * start} and {@code start + count}, using the provided {@code
1148      * context}. {@code Context} is ignored if the shaper is not a
1149      * contextual shaper.
1150      *
1151      * @param text  a {@code char} array
1152      * @param start the index into {@code text} to start converting
1153      * @param count the number of {@code char}s in {@code text}
1154      *              to convert
1155      * @param context the context to which to convert the characters,
1156      *                such as {@code NumericShaper.Range.EUROPEAN}
1157      * @throws IndexOutOfBoundsException
1158      *         if {@code start} or {@code start + count} is out of bounds
1159      * @throws NullPointerException
1160      *         if {@code text} or {@code context} is null
1161      * @since 1.7
1162      */
shape(char[] text, int start, int count, Range context)1163     public void shape(char[] text, int start, int count, Range context) {
1164         checkParams(text, start, count);
1165         if (context == null) {
1166             throw new NullPointerException("context is null");
1167         }
1168 
1169         if (isContextual()) {
1170             if (rangeSet != null) {
1171                 shapeContextually(text, start, count, context);
1172             } else {
1173                 int key = Range.toRangeIndex(context);
1174                 if (key >= 0) {
1175                     shapeContextually(text, start, count, key);
1176                 } else {
1177                     shapeContextually(text, start, count, shapingRange);
1178                 }
1179             }
1180         } else {
1181             shapeNonContextually(text, start, count);
1182         }
1183     }
1184 
checkParams(char[] text, int start, int count)1185     private void checkParams(char[] text, int start, int count) {
1186         if (text == null) {
1187             throw new NullPointerException("text is null");
1188         }
1189         if ((start < 0)
1190             || (start > text.length)
1191             || ((start + count) < 0)
1192             || ((start + count) > text.length)) {
1193             throw new IndexOutOfBoundsException(
1194                 "bad start or count for text of length " + text.length);
1195         }
1196     }
1197 
1198     /**
1199      * Returns a <code>boolean</code> indicating whether or not
1200      * this shaper shapes contextually.
1201      * @return <code>true</code> if this shaper is contextual;
1202      *         <code>false</code> otherwise.
1203      */
isContextual()1204     public boolean isContextual() {
1205         return (mask & CONTEXTUAL_MASK) != 0;
1206     }
1207 
1208     /**
1209      * Returns an <code>int</code> that ORs together the values for
1210      * all the ranges that will be shaped.
1211      * <p>
1212      * For example, to check if a shaper shapes to Arabic, you would use the
1213      * following:
1214      * <blockquote>
1215      *   {@code if ((shaper.getRanges() & shaper.ARABIC) != 0) &#123; ... }
1216      * </blockquote>
1217      *
1218      * <p>Note that this method supports only the bit mask-based
1219      * ranges. Call {@link #getRangeSet()} for the enum-based ranges.
1220      *
1221      * @return the values for all the ranges to be shaped.
1222      */
getRanges()1223     public int getRanges() {
1224         return mask & ~CONTEXTUAL_MASK;
1225     }
1226 
1227     /**
1228      * Returns a {@code Set} representing all the Unicode ranges in
1229      * this {@code NumericShaper} that will be shaped.
1230      *
1231      * @return all the Unicode ranges to be shaped.
1232      * @since 1.7
1233      */
getRangeSet()1234     public Set<Range> getRangeSet() {
1235         if (rangeSet != null) {
1236             return EnumSet.copyOf(rangeSet);
1237         }
1238         return Range.maskToRangeSet(mask);
1239     }
1240 
1241     /**
1242      * Perform non-contextual shaping.
1243      */
shapeNonContextually(char[] text, int start, int count)1244     private void shapeNonContextually(char[] text, int start, int count) {
1245         int base;
1246         char minDigit = '0';
1247         if (shapingRange != null) {
1248             base = shapingRange.getDigitBase();
1249             minDigit += shapingRange.getNumericBase();
1250         } else {
1251             base = bases[key];
1252             if (key == ETHIOPIC_KEY) {
1253                 minDigit++; // Ethiopic doesn't use decimal zero
1254             }
1255         }
1256         for (int i = start, e = start + count; i < e; ++i) {
1257             char c = text[i];
1258             if (c >= minDigit && c <= '\u0039') {
1259                 text[i] = (char)(c + base);
1260             }
1261         }
1262     }
1263 
1264     /**
1265      * Perform contextual shaping.
1266      * Synchronized to protect caches used in getContextKey.
1267      */
shapeContextually(char[] text, int start, int count, int ctxKey)1268     private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {
1269 
1270         // if we don't support this context, then don't shape
1271         if ((mask & (1<<ctxKey)) == 0) {
1272             ctxKey = EUROPEAN_KEY;
1273         }
1274         int lastkey = ctxKey;
1275 
1276         int base = bases[ctxKey];
1277         char minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
1278 
1279         synchronized (NumericShaper.class) {
1280             for (int i = start, e = start + count; i < e; ++i) {
1281                 char c = text[i];
1282                 if (c >= minDigit && c <= '\u0039') {
1283                     text[i] = (char)(c + base);
1284                 }
1285 
1286                 if (isStrongDirectional(c)) {
1287                     int newkey = getContextKey(c);
1288                     if (newkey != lastkey) {
1289                         lastkey = newkey;
1290 
1291                         ctxKey = newkey;
1292                         if (((mask & EASTERN_ARABIC) != 0) &&
1293                              (ctxKey == ARABIC_KEY ||
1294                               ctxKey == EASTERN_ARABIC_KEY)) {
1295                             ctxKey = EASTERN_ARABIC_KEY;
1296                         } else if (((mask & ARABIC) != 0) &&
1297                              (ctxKey == ARABIC_KEY ||
1298                               ctxKey == EASTERN_ARABIC_KEY)) {
1299                             ctxKey = ARABIC_KEY;
1300                         } else if ((mask & (1<<ctxKey)) == 0) {
1301                             ctxKey = EUROPEAN_KEY;
1302                         }
1303 
1304                         base = bases[ctxKey];
1305 
1306                         minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
1307                     }
1308                 }
1309             }
1310         }
1311     }
1312 
shapeContextually(char[] text, int start, int count, Range ctxKey)1313     private void shapeContextually(char[] text, int start, int count, Range ctxKey) {
1314         // if we don't support the specified context, then don't shape.
1315         if (ctxKey == null || !rangeSet.contains(ctxKey)) {
1316             ctxKey = Range.EUROPEAN;
1317         }
1318 
1319         Range lastKey = ctxKey;
1320         int base = ctxKey.getDigitBase();
1321         char minDigit = (char)('0' + ctxKey.getNumericBase());
1322         final int end = start + count;
1323         for (int i = start; i < end; ++i) {
1324             char c = text[i];
1325             if (c >= minDigit && c <= '9') {
1326                 text[i] = (char)(c + base);
1327                 continue;
1328             }
1329             if (isStrongDirectional(c)) {
1330                 ctxKey = rangeForCodePoint(c);
1331                 if (ctxKey != lastKey) {
1332                     lastKey = ctxKey;
1333                     base = ctxKey.getDigitBase();
1334                     minDigit = (char)('0' + ctxKey.getNumericBase());
1335                 }
1336             }
1337         }
1338     }
1339 
1340     /**
1341      * Returns a hash code for this shaper.
1342      * @return this shaper's hash code.
1343      * @see java.lang.Object#hashCode
1344      */
hashCode()1345     public int hashCode() {
1346         int hash = mask;
1347         if (rangeSet != null) {
1348             // Use the CONTEXTUAL_MASK bit only for the enum-based
1349             // NumericShaper. A deserialized NumericShaper might have
1350             // bit masks.
1351             hash &= CONTEXTUAL_MASK;
1352             hash ^= rangeSet.hashCode();
1353         }
1354         return hash;
1355     }
1356 
1357     /**
1358      * Returns {@code true} if the specified object is an instance of
1359      * <code>NumericShaper</code> and shapes identically to this one,
1360      * regardless of the range representations, the bit mask or the
1361      * enum. For example, the following code produces {@code "true"}.
1362      * <blockquote><pre>
1363      * NumericShaper ns1 = NumericShaper.getShaper(NumericShaper.ARABIC);
1364      * NumericShaper ns2 = NumericShaper.getShaper(NumericShaper.Range.ARABIC);
1365      * System.out.println(ns1.equals(ns2));
1366      * </pre></blockquote>
1367      *
1368      * @param o the specified object to compare to this
1369      *          <code>NumericShaper</code>
1370      * @return <code>true</code> if <code>o</code> is an instance
1371      *         of <code>NumericShaper</code> and shapes in the same way;
1372      *         <code>false</code> otherwise.
1373      * @see java.lang.Object#equals(java.lang.Object)
1374      */
equals(Object o)1375     public boolean equals(Object o) {
1376         if (o != null) {
1377             try {
1378                 NumericShaper rhs = (NumericShaper)o;
1379                 if (rangeSet != null) {
1380                     if (rhs.rangeSet != null) {
1381                         return isContextual() == rhs.isContextual()
1382                             && rangeSet.equals(rhs.rangeSet)
1383                             && shapingRange == rhs.shapingRange;
1384                     }
1385                     return isContextual() == rhs.isContextual()
1386                         && rangeSet.equals(Range.maskToRangeSet(rhs.mask))
1387                         && shapingRange == Range.indexToRange(rhs.key);
1388                 } else if (rhs.rangeSet != null) {
1389                     Set<Range> rset = Range.maskToRangeSet(mask);
1390                     Range srange = Range.indexToRange(key);
1391                     return isContextual() == rhs.isContextual()
1392                         && rset.equals(rhs.rangeSet)
1393                         && srange == rhs.shapingRange;
1394                 }
1395                 return rhs.mask == mask && rhs.key == key;
1396             }
1397             catch (ClassCastException e) {
1398             }
1399         }
1400         return false;
1401     }
1402 
1403     /**
1404      * Returns a <code>String</code> that describes this shaper. This method
1405      * is used for debugging purposes only.
1406      * @return a <code>String</code> describing this shaper.
1407      */
toString()1408     public String toString() {
1409         StringBuilder buf = new StringBuilder(super.toString());
1410 
1411         buf.append("[contextual:").append(isContextual());
1412 
1413         String[] keyNames = null;
1414         if (isContextual()) {
1415             buf.append(", context:");
1416             buf.append(shapingRange == null ? Range.values()[key] : shapingRange);
1417         }
1418 
1419         if (rangeSet == null) {
1420             buf.append(", range(s): ");
1421             boolean first = true;
1422             for (int i = 0; i < NUM_KEYS; ++i) {
1423                 if ((mask & (1 << i)) != 0) {
1424                     if (first) {
1425                         first = false;
1426                     } else {
1427                         buf.append(", ");
1428                     }
1429                     buf.append(Range.values()[i]);
1430                 }
1431             }
1432         } else {
1433             buf.append(", range set: ").append(rangeSet);
1434         }
1435         buf.append(']');
1436 
1437         return buf.toString();
1438     }
1439 
1440     /**
1441      * Returns the index of the high bit in value (assuming le, actually
1442      * power of 2 >= value). value must be positive.
1443      */
getHighBit(int value)1444     private static int getHighBit(int value) {
1445         if (value <= 0) {
1446             return -32;
1447         }
1448 
1449         int bit = 0;
1450 
1451         if (value >= 1 << 16) {
1452             value >>= 16;
1453             bit += 16;
1454         }
1455 
1456         if (value >= 1 << 8) {
1457             value >>= 8;
1458             bit += 8;
1459         }
1460 
1461         if (value >= 1 << 4) {
1462             value >>= 4;
1463             bit += 4;
1464         }
1465 
1466         if (value >= 1 << 2) {
1467             value >>= 2;
1468             bit += 2;
1469         }
1470 
1471         if (value >= 1 << 1) {
1472             bit += 1;
1473         }
1474 
1475         return bit;
1476     }
1477 
1478     /**
1479      * fast binary search over subrange of array.
1480      */
search(int value, int[] array, int start, int length)1481     private static int search(int value, int[] array, int start, int length)
1482     {
1483         int power = 1 << getHighBit(length);
1484         int extra = length - power;
1485         int probe = power;
1486         int index = start;
1487 
1488         if (value >= array[index + extra]) {
1489             index += extra;
1490         }
1491 
1492         while (probe > 1) {
1493             probe >>= 1;
1494 
1495             if (value >= array[index + probe]) {
1496                 index += probe;
1497             }
1498         }
1499 
1500         return index;
1501     }
1502 
1503     /**
1504      * Converts the {@code NumericShaper.Range} enum-based parameters,
1505      * if any, to the bit mask-based counterparts and writes this
1506      * object to the {@code stream}. Any enum constants that have no
1507      * bit mask-based counterparts are ignored in the conversion.
1508      *
1509      * @param stream the output stream to write to
1510      * @throws IOException if an I/O error occurs while writing to {@code stream}
1511      * @since 1.7
1512      */
writeObject(ObjectOutputStream stream)1513     private void writeObject(ObjectOutputStream stream) throws IOException {
1514         if (shapingRange != null) {
1515             int index = Range.toRangeIndex(shapingRange);
1516             if (index >= 0) {
1517                 key = index;
1518             }
1519         }
1520         if (rangeSet != null) {
1521             mask |= Range.toRangeMask(rangeSet);
1522         }
1523         stream.defaultWriteObject();
1524     }
1525 }
1526