1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 *   Copyright (C) 2001-2012, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 */
10 
11 package android.icu.text;
12 
13 import android.icu.impl.UBiDiProps;
14 import android.icu.lang.UCharacterDirection;
15 
16 /**
17  * Shape Arabic text on a character basis.
18  *
19  * <p>ArabicShaping performs basic operations for "shaping" Arabic text. It is most
20  * useful for use with legacy data formats and legacy display technology
21  * (simple terminals). All operations are performed on Unicode characters.</p>
22  *
23  * <p>Text-based shaping means that some character code points in the text are
24  * replaced by others depending on the context. It transforms one kind of text
25  * into another. In comparison, modern displays for Arabic text select
26  * appropriate, context-dependent font glyphs for each text element, which means
27  * that they transform text into a glyph vector.</p>
28  *
29  * <p>Text transformations are necessary when modern display technology is not
30  * available or when text needs to be transformed to or from legacy formats that
31  * use "shaped" characters. Since the Arabic script is cursive, connecting
32  * adjacent letters to each other, computers select images for each letter based
33  * on the surrounding letters. This usually results in four images per Arabic
34  * letter: initial, middle, final, and isolated forms. In Unicode, on the other
35  * hand, letters are normally stored abstract, and a display system is expected
36  * to select the necessary glyphs. (This makes searching and other text
37  * processing easier because the same letter has only one code.) It is possible
38  * to mimic this with text transformations because there are characters in
39  * Unicode that are rendered as letters with a specific shape
40  * (or cursive connectivity). They were included for interoperability with
41  * legacy systems and codepages, and for unsophisticated display systems.</p>
42  *
43  * <p>A second kind of text transformations is supported for Arabic digits:
44  * For compatibility with legacy codepages that only include European digits,
45  * it is possible to replace one set of digits by another, changing the
46  * character code points. These operations can be performed for either
47  * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
48  * digits (U+06f0...U+06f9).</p>
49  *
50  * <p>Some replacements may result in more or fewer characters (code points).
51  * By default, this means that the destination buffer may receive text with a
52  * length different from the source length. Some legacy systems rely on the
53  * length of the text to be constant. They expect extra spaces to be added
54  * or consumed either next to the affected character or at the end of the
55  * text.</p>
56  * @hide Only a subset of ICU is exposed in Android
57  */
58 public final class ArabicShaping {
59     private final int options;
60     private boolean isLogical; // convenience
61     private boolean spacesRelativeToTextBeginEnd;
62     private char tailChar;
63 
64     /**
65      * Convert a range of text in the source array, putting the result
66      * into a range of text in the destination array, and return the number
67      * of characters written.
68      *
69      * @param source An array containing the input text
70      * @param sourceStart The start of the range of text to convert
71      * @param sourceLength The length of the range of text to convert
72      * @param dest The destination array that will receive the result.
73      *   It may be <code>NULL</code> only if  <code>destSize</code> is 0.
74      * @param destStart The start of the range of the destination buffer to use.
75      * @param destSize The size (capacity) of the destination buffer.
76      *   If <code>destSize</code> is 0, then no output is produced,
77      *   but the necessary buffer size is returned ("preflighting").  This
78      *   does not validate the text against the options, for example,
79      *   if letters are being unshaped, and spaces are being consumed
80      *   following lamalef, this will not detect a lamalef without a
81      *   corresponding space.  An error will be thrown when the actual
82      *   conversion is attempted.
83      * @return The number of chars written to the destination buffer.
84      *   If an error occurs, then no output was written, or it may be
85      *   incomplete.
86      * @throws ArabicShapingException if the text cannot be converted according to the options.
87      */
shape(char[] source, int sourceStart, int sourceLength, char[] dest, int destStart, int destSize)88     public int shape(char[] source, int sourceStart, int sourceLength,
89                      char[] dest, int destStart, int destSize) throws ArabicShapingException {
90         if (source == null) {
91             throw new IllegalArgumentException("source can not be null");
92         }
93         if (sourceStart < 0 || sourceLength < 0 || sourceStart + sourceLength > source.length) {
94             throw new IllegalArgumentException("bad source start (" + sourceStart +
95                                                ") or length (" + sourceLength +
96                                                ") for buffer of length " + source.length);
97         }
98         if (dest == null && destSize != 0) {
99             throw new IllegalArgumentException("null dest requires destSize == 0");
100         }
101         if ((destSize != 0) &&
102             (destStart < 0 || destSize < 0 || destStart + destSize > dest.length)) {
103             throw new IllegalArgumentException("bad dest start (" + destStart +
104                                                ") or size (" + destSize +
105                                                ") for buffer of length " + dest.length);
106         }
107         /* Validate input options */
108         if ( ((options&TASHKEEL_MASK) != 0) &&
109              !(((options & TASHKEEL_MASK)==TASHKEEL_BEGIN)  ||
110                ((options & TASHKEEL_MASK)==TASHKEEL_END)    ||
111                ((options & TASHKEEL_MASK)==TASHKEEL_RESIZE) ||
112                ((options & TASHKEEL_MASK)==TASHKEEL_REPLACE_BY_TATWEEL))) {
113             throw new IllegalArgumentException("Wrong Tashkeel argument");
114         }
115 
116        ///CLOVER:OFF
117        //According to Steven Loomis, the code is unreachable when you OR all the constants within the if statements
118        if(((options&LAMALEF_MASK) != 0) &&
119               !(((options & LAMALEF_MASK)==LAMALEF_BEGIN)  ||
120                 ((options & LAMALEF_MASK)==LAMALEF_END)    ||
121                 ((options & LAMALEF_MASK)==LAMALEF_RESIZE) ||
122                 ((options & LAMALEF_MASK)==LAMALEF_AUTO)   ||
123                 ((options & LAMALEF_MASK)==LAMALEF_NEAR))) {
124            throw new IllegalArgumentException("Wrong Lam Alef argument");
125        }
126        ///CLOVER:ON
127 
128        /* Validate Tashkeel (Tashkeel replacement options should be enabled in shaping mode only)*/
129        if(((options&TASHKEEL_MASK) != 0) && (options&LETTERS_MASK) == LETTERS_UNSHAPE) {
130             throw new IllegalArgumentException("Tashkeel replacement should not be enabled in deshaping mode ");
131        }
132        return internalShape(source, sourceStart, sourceLength, dest, destStart, destSize);
133     }
134 
135     /**
136      * Convert a range of text in place.  This may only be used if the Length option
137      * does not grow or shrink the text.
138      *
139      * @param source An array containing the input text
140      * @param start The start of the range of text to convert
141      * @param length The length of the range of text to convert
142      * @throws ArabicShapingException if the text cannot be converted according to the options.
143      */
shape(char[] source, int start, int length)144     public void shape(char[] source, int start, int length) throws ArabicShapingException {
145         if ((options & LAMALEF_MASK) == LAMALEF_RESIZE) {
146             throw new ArabicShapingException("Cannot shape in place with length option resize.");
147         }
148         shape(source, start, length, source, start, length);
149     }
150 
151     /**
152      * Convert a string, returning the new string.
153      *
154      * @param text the string to convert
155      * @return the converted string
156      * @throws ArabicShapingException if the string cannot be converted according to the options.
157      */
158     @dalvik.annotation.compat.UnsupportedAppUsage
shape(String text)159     public String shape(String text) throws ArabicShapingException {
160         char[] src = text.toCharArray();
161         char[] dest = src;
162         if (((options & LAMALEF_MASK) == LAMALEF_RESIZE) &&
163             ((options & LETTERS_MASK) == LETTERS_UNSHAPE)) {
164 
165             dest = new char[src.length * 2]; // max
166         }
167         int len = shape(src, 0, src.length, dest, 0, dest.length);
168 
169         return new String(dest, 0, len);
170     }
171 
172     /**
173      * Construct ArabicShaping using the options flags.
174      * The flags are as follows:<br>
175      * 'LENGTH' flags control whether the text can change size, and if not,
176      * how to maintain the size of the text when LamAlef ligatures are
177      * formed or broken.<br>
178      * 'TEXT_DIRECTION' flags control whether the text is read and written
179      * in visual order or in logical order.<br>
180      * 'LETTERS_SHAPE' flags control whether conversion is to or from
181      * presentation forms.<br>
182      * 'DIGITS' flags control whether digits are shaped, and whether from
183      * European to Arabic-Indic or vice-versa.<br>
184      * 'DIGIT_TYPE' flags control whether standard or extended Arabic-Indic
185      * digits are used when performing digit conversion.
186      */
187     @dalvik.annotation.compat.UnsupportedAppUsage
ArabicShaping(int options)188     public ArabicShaping(int options) {
189         this.options = options;
190         if ((options & DIGITS_MASK) > 0x80) {
191             throw new IllegalArgumentException("bad DIGITS options");
192         }
193 
194         isLogical = ( (options & TEXT_DIRECTION_MASK) == TEXT_DIRECTION_LOGICAL );
195         /* Validate options */
196         spacesRelativeToTextBeginEnd = ( (options & SPACES_RELATIVE_TO_TEXT_MASK) == SPACES_RELATIVE_TO_TEXT_BEGIN_END );
197         if ( (options&SHAPE_TAIL_TYPE_MASK) == SHAPE_TAIL_NEW_UNICODE){
198             tailChar = NEW_TAIL_CHAR;
199         } else {
200             tailChar = OLD_TAIL_CHAR;
201         }
202     }
203 
204     /* Seen Tail options */
205     /**
206      * Memory option: the result must have the same length as the source.
207      * Shaping mode: The SEEN family character will expand into two characters using space near
208      *               the SEEN family character(i.e. the space after the character).
209      *               if there are no spaces found, ArabicShapingException will be thrown
210      *
211      * De-shaping mode: Any Seen character followed by Tail character will be
212      *                  replaced by one cell Seen and a space will replace the Tail.
213      * Affects: Seen options
214      */
215     public static final int SEEN_TWOCELL_NEAR = 0x200000;
216 
217     /** Bit mask for Seen memory options.
218      */
219     public static final int SEEN_MASK = 0x700000;
220 
221     /* YehHamza options */
222     /**
223      * Memory option: the result must have the same length as the source.
224      * Shaping mode: The YEHHAMZA character will expand into two characters using space near it
225      *              (i.e. the space after the character)
226      *               if there are no spaces found, ArabicShapingException will be thrown
227      *
228      * De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be
229      *                  replaced by one cell YehHamza and space will replace the Hamza.
230      * Affects: YehHamza options
231      */
232     public static final int YEHHAMZA_TWOCELL_NEAR  = 0x1000000;
233 
234 
235     /** Bit mask for YehHamza memory options.
236      */
237     public static final int YEHHAMZA_MASK = 0x3800000;
238 
239     /* New Tashkeel options */
240     /**
241      * Memory option: the result must have the same length as the source.
242      * Shaping mode: Tashkeel characters will be replaced by spaces.
243      *               Spaces will be placed at beginning of the buffer
244      *
245      * De-shaping mode: N/A
246      * Affects: Tashkeel options
247      */
248     public static final int TASHKEEL_BEGIN = 0x40000;
249 
250     /**
251      * Memory option: the result must have the same length as the source.
252      * Shaping mode: Tashkeel characters will be replaced by spaces.
253      *               Spaces will be placed at end of the buffer
254      *
255      * De-shaping mode: N/A
256      * Affects: Tashkeel options
257      */
258     public static final int TASHKEEL_END = 0x60000;
259 
260     /**
261      * Memory option: allow the result to have a different length than the source.
262      * Shaping mode: Tashkeel characters will be removed, buffer length will shrink.
263      * De-shaping mode: N/A
264      *
265      * Affects: Tashkeel options
266      */
267     public static final int TASHKEEL_RESIZE = 0x80000;
268 
269     /**
270      * Memory option: the result must have the same length as the source.
271      * Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent
272      *               characters (i.e. shaped on Tatweel) or replaced by space if it is not connected.
273      *
274      * De-shaping mode: N/A
275      * Affects: YehHamza options
276      */
277     public static final int TASHKEEL_REPLACE_BY_TATWEEL = 0xC0000;
278 
279     /** Bit mask for Tashkeel replacement with Space or Tatweel memory options.
280      */
281     public static final int TASHKEEL_MASK  = 0xE0000;
282 
283     /* Space location Control options */
284     /**
285      * This option effects the meaning of BEGIN and END options. if this option is not used the default
286      * for BEGIN and END will be as following:
287      * The Default (for both Visual LTR, Visual RTL and Logical Text)
288      *           1. BEGIN always refers to the start address of physical memory.
289      *           2. END always refers to the end address of physical memory.
290      *
291      * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text.
292      *
293      * The affect on BEGIN and END Memory Options will be as following:
294      *    A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text
295      *       (corresponding to the physical memory address end, same as END in default behavior)
296      *    B. BEGIN For Logical text: Same as BEGIN in default behavior.
297      *    C. END For Visual LTR text: This will be the end (left side) of the visual text. (corresponding to
298      *      the physical memory address beginning, same as BEGIN in default behavior)
299      *    D. END For Logical text: Same as END in default behavior.
300      * Affects: All LamAlef BEGIN, END and AUTO options.
301      */
302     public static final int SPACES_RELATIVE_TO_TEXT_BEGIN_END = 0x4000000;
303 
304     /** Bit mask for swapping BEGIN and END for Visual LTR text
305      */
306     public static final int SPACES_RELATIVE_TO_TEXT_MASK = 0x4000000;
307 
308     /**
309      * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73).
310      * If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B)
311      * De-shaping will not use this option as it will always search for both the new Unicode code point for the
312      * TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the
313      * Seen-Family letter accordingly.
314      *
315      * Shaping Mode: Only shaping.
316      * De-shaping Mode: N/A.
317      * Affects: All Seen options
318      */
319     public static final int SHAPE_TAIL_NEW_UNICODE = 0x8000000;
320 
321     /** Bit mask for new Unicode Tail option
322      */
323     public static final int SHAPE_TAIL_TYPE_MASK = 0x8000000;
324 
325     /**
326      * Memory option: allow the result to have a different length than the source.
327      */
328     public static final int LENGTH_GROW_SHRINK = 0;
329 
330     /**
331      * Memory option: allow the result to have a different length than the source.
332      * Affects: LamAlef options
333      * This option is an alias to LENGTH_GROW_SHRINK
334      */
335     public static final int LAMALEF_RESIZE   = 0;
336 
337     /**
338      * Memory option: the result must have the same length as the source.
339      * If more room is necessary, then try to consume spaces next to modified characters.
340      */
341     public static final int LENGTH_FIXED_SPACES_NEAR = 1;
342 
343     /**
344      * Memory option: the result must have the same length as the source.
345      * If more room is necessary, then try to consume spaces next to modified characters.
346      * Affects: LamAlef options
347      * This option is an alias to LENGTH_FIXED_SPACES_NEAR
348      */
349     public static final int LAMALEF_NEAR = 1 ;
350 
351     /**
352      * Memory option: the result must have the same length as the source.
353      * If more room is necessary, then try to consume spaces at the end of the text.
354      */
355     public static final int LENGTH_FIXED_SPACES_AT_END = 2;
356 
357 
358     /**
359      * Memory option: the result must have the same length as the source.
360      * If more room is necessary, then try to consume spaces at the end of the text.
361      * Affects: LamAlef options
362      * This option is an alias to LENGTH_FIXED_SPACES_AT_END
363      */
364     public static final int LAMALEF_END = 2;
365 
366     /**
367      * Memory option: the result must have the same length as the source.
368      * If more room is necessary, then try to consume spaces at the beginning of the text.
369      */
370     public static final int LENGTH_FIXED_SPACES_AT_BEGINNING = 3;
371 
372     /**
373      * Memory option: the result must have the same length as the source.
374      * If more room is necessary, then try to consume spaces at the beginning of the text.
375      * Affects: LamAlef options
376      * This option is an alias to LENGTH_FIXED_SPACES_AT_BEGINNING
377      */
378     public static final int LAMALEF_BEGIN = 3;
379 
380     /**
381      * Memory option: the result must have the same length as the source.
382      * Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end.
383      *               If there is no space at end, use spaces at beginning of the buffer. If there
384      *               is no space at beginning of the buffer, use spaces at the near (i.e. the space
385      *               after the LAMALEF character).
386      *
387      * Deshaping Mode: Perform the same function as the flag equals LAMALEF_END.
388      * Affects: LamAlef options
389      */
390     public static final int LAMALEF_AUTO  = 0x10000;
391 
392     /**
393      * Bit mask for memory options.
394      */
395     public static final int LENGTH_MASK = 0x10003;
396 
397     /** Bit mask for LamAlef memory options.
398      */
399 
400     public static final int LAMALEF_MASK  = 0x10003;
401 
402     /**
403      * Direction indicator: the source is in logical (keyboard) order.
404      */
405     public static final int TEXT_DIRECTION_LOGICAL = 0;
406 
407     /**
408      * Direction indicator:the source is in visual RTL order,
409      * the rightmost displayed character stored first.
410      * This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL
411      */
412     public static final int TEXT_DIRECTION_VISUAL_RTL = 0;
413 
414     /**
415      * Direction indicator: the source is in visual (display) order, that is,
416      * the leftmost displayed character is stored first.
417      */
418     public static final int TEXT_DIRECTION_VISUAL_LTR = 4;
419 
420     /**
421      * Bit mask for direction indicators.
422      */
423     public static final int TEXT_DIRECTION_MASK = 4;
424 
425 
426     /**
427      * Letter shaping option: do not perform letter shaping.
428      */
429     public static final int LETTERS_NOOP = 0;
430 
431     /**
432      * Letter shaping option: replace normative letter characters in the U+0600 (Arabic) block,
433      * by shaped ones in the U+FE70 (Presentation Forms B) block. Performs Lam-Alef ligature
434      * substitution.
435      */
436     public static final int LETTERS_SHAPE = 8;
437 
438     /**
439      * Letter shaping option: replace shaped letter characters in the U+FE70 (Presentation Forms B) block
440      * by normative ones in the U+0600 (Arabic) block.  Converts Lam-Alef ligatures to pairs of Lam and
441      * Alef characters, consuming spaces if required.
442      */
443     public static final int LETTERS_UNSHAPE = 0x10;
444 
445     /**
446      * Letter shaping option: replace normative letter characters in the U+0600 (Arabic) block,
447      * except for the TASHKEEL characters at U+064B...U+0652, by shaped ones in the U+Fe70
448      * (Presentation Forms B) block.  The TASHKEEL characters will always be converted to
449      * the isolated forms rather than to their correct shape.
450      */
451     public static final int LETTERS_SHAPE_TASHKEEL_ISOLATED = 0x18;
452 
453     /**
454      * Bit mask for letter shaping options.
455      */
456     public static final int LETTERS_MASK = 0x18;
457 
458 
459     /**
460      * Digit shaping option: do not perform digit shaping.
461      */
462     public static final int DIGITS_NOOP = 0;
463 
464     /**
465      * Digit shaping option: Replace European digits (U+0030...U+0039) by Arabic-Indic digits.
466      */
467     public static final int DIGITS_EN2AN = 0x20;
468 
469     /**
470      * Digit shaping option: Replace Arabic-Indic digits by European digits (U+0030...U+0039).
471      */
472     public static final int DIGITS_AN2EN = 0x40;
473 
474     /**
475      * Digit shaping option:
476      * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
477      * if the most recent strongly directional character
478      * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
479      * The initial state at the start of the text is assumed to be not an Arabic,
480      * letter, so European digits at the start of the text will not change.
481      * Compare to DIGITS_ALEN2AN_INIT_AL.
482      */
483     public static final int DIGITS_EN2AN_INIT_LR = 0x60;
484 
485     /**
486      * Digit shaping option:
487      * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
488      * if the most recent strongly directional character
489      * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
490      * The initial state at the start of the text is assumed to be an Arabic,
491      * letter, so European digits at the start of the text will change.
492      * Compare to DIGITS_ALEN2AN_INT_LR.
493      */
494     public static final int DIGITS_EN2AN_INIT_AL = 0x80;
495 
496     /** Not a valid option value. */
497     //private static final int DIGITS_RESERVED = 0xa0;
498 
499     /**
500      * Bit mask for digit shaping options.
501      */
502     public static final int DIGITS_MASK = 0xe0;
503 
504     /**
505      * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).
506      */
507     public static final int DIGIT_TYPE_AN = 0;
508 
509     /**
510      * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).
511      */
512     public static final int DIGIT_TYPE_AN_EXTENDED = 0x100;
513 
514     /**
515      * Bit mask for digit type options.
516      */
517     public static final int DIGIT_TYPE_MASK = 0x0100; // 0x3f00?
518 
519     /**
520      * some constants
521      */
522     private static final char HAMZAFE_CHAR       = '\ufe80';
523     private static final char HAMZA06_CHAR       = '\u0621';
524     private static final char YEH_HAMZA_CHAR     = '\u0626';
525     private static final char YEH_HAMZAFE_CHAR   = '\uFE89';
526     private static final char LAMALEF_SPACE_SUB  = '\uffff';
527     private static final char TASHKEEL_SPACE_SUB = '\ufffe';
528     private static final char LAM_CHAR      = '\u0644';
529     private static final char SPACE_CHAR    = '\u0020';
530     private static final char SHADDA_CHAR   = '\uFE7C';
531     private static final char SHADDA06_CHAR = '\u0651';
532     private static final char TATWEEL_CHAR  = '\u0640';
533     private static final char SHADDA_TATWEEL_CHAR = '\uFE7D';
534     private static final char NEW_TAIL_CHAR = '\uFE73';
535     private static final char OLD_TAIL_CHAR = '\u200B';
536     private static final int SHAPE_MODE      = 0;
537     private static final int DESHAPE_MODE    = 1;
538 
539     /**
540      */
541     @Override
equals(Object rhs)542     public boolean equals(Object rhs) {
543         return rhs != null &&
544             rhs.getClass() == ArabicShaping.class &&
545             options == ((ArabicShaping)rhs).options;
546     }
547 
548     /**
549      */
550      ///CLOVER:OFF
551     @Override
hashCode()552     public int hashCode() {
553         return options;
554     }
555 
556     /**
557      */
558     @Override
toString()559     public String toString() {
560         StringBuilder buf = new StringBuilder(super.toString());
561         buf.append('[');
562 
563         switch (options & LAMALEF_MASK) {
564         case LAMALEF_RESIZE: buf.append("LamAlef resize"); break;
565         case LAMALEF_NEAR: buf.append("LamAlef spaces at near"); break;
566         case LAMALEF_BEGIN: buf.append("LamAlef spaces at begin"); break;
567         case LAMALEF_END: buf.append("LamAlef spaces at end"); break;
568         case LAMALEF_AUTO: buf.append("lamAlef auto"); break;
569         }
570         switch (options & TEXT_DIRECTION_MASK) {
571         case TEXT_DIRECTION_LOGICAL: buf.append(", logical"); break;
572         case TEXT_DIRECTION_VISUAL_LTR: buf.append(", visual"); break;
573         }
574         switch (options & LETTERS_MASK) {
575         case LETTERS_NOOP: buf.append(", no letter shaping"); break;
576         case LETTERS_SHAPE: buf.append(", shape letters"); break;
577         case LETTERS_SHAPE_TASHKEEL_ISOLATED: buf.append(", shape letters tashkeel isolated"); break;
578         case LETTERS_UNSHAPE: buf.append(", unshape letters"); break;
579         }
580         switch (options & SEEN_MASK) {
581         case SEEN_TWOCELL_NEAR: buf.append(", Seen at near"); break;
582         }
583         switch (options & YEHHAMZA_MASK) {
584         case YEHHAMZA_TWOCELL_NEAR: buf.append(", Yeh Hamza at near"); break;
585         }
586         switch (options & TASHKEEL_MASK) {
587         case TASHKEEL_BEGIN: buf.append(", Tashkeel at begin"); break;
588         case TASHKEEL_END: buf.append(", Tashkeel at end"); break;
589         case TASHKEEL_REPLACE_BY_TATWEEL: buf.append(", Tashkeel replace with tatweel"); break;
590         case TASHKEEL_RESIZE: buf.append(", Tashkeel resize"); break;
591         }
592 
593         switch (options & DIGITS_MASK) {
594         case DIGITS_NOOP: buf.append(", no digit shaping"); break;
595         case DIGITS_EN2AN: buf.append(", shape digits to AN"); break;
596         case DIGITS_AN2EN: buf.append(", shape digits to EN"); break;
597         case DIGITS_EN2AN_INIT_LR: buf.append(", shape digits to AN contextually: default EN"); break;
598         case DIGITS_EN2AN_INIT_AL: buf.append(", shape digits to AN contextually: default AL"); break;
599         }
600         switch (options & DIGIT_TYPE_MASK) {
601         case DIGIT_TYPE_AN: buf.append(", standard Arabic-Indic digits"); break;
602         case DIGIT_TYPE_AN_EXTENDED: buf.append(", extended Arabic-Indic digits"); break;
603         }
604         buf.append("]");
605 
606         return buf.toString();
607     }
608     ///CLOVER:ON
609 
610     //
611     // ported api
612     //
613 
614     private static final int IRRELEVANT = 4;
615     private static final int LAMTYPE = 16;
616     private static final int ALEFTYPE = 32;
617 
618     private static final int LINKR = 1;
619     private static final int LINKL = 2;
620     private static final int LINK_MASK = 3;
621 
622     private static final int irrelevantPos[] = {
623         0x0, 0x2, 0x4, 0x6, 0x8, 0xA, 0xC, 0xE
624     };
625 
626 /*
627     private static final char convertLamAlef[] =  {
628         '\u0622', // FEF5
629         '\u0622', // FEF6
630         '\u0623', // FEF7
631         '\u0623', // FEF8
632         '\u0625', // FEF9
633         '\u0625', // FEFA
634         '\u0627', // FEFB
635         '\u0627'  // FEFC
636     };
637 */
638 
639     private static final int tailFamilyIsolatedFinal[] = {
640         /* FEB1 */ 1,
641         /* FEB2 */ 1,
642         /* FEB3 */ 0,
643         /* FEB4 */ 0,
644         /* FEB5 */ 1,
645         /* FEB6 */ 1,
646         /* FEB7 */ 0,
647         /* FEB8 */ 0,
648         /* FEB9 */ 1,
649         /* FEBA */ 1,
650         /* FEBB */ 0,
651         /* FEBC */ 0,
652         /* FEBD */ 1,
653         /* FEBE */ 1
654     };
655 
656     private static final int tashkeelMedial[] = {
657         /* FE70 */ 0,
658         /* FE71 */ 1,
659         /* FE72 */ 0,
660         /* FE73 */ 0,
661         /* FE74 */ 0,
662         /* FE75 */ 0,
663         /* FE76 */ 0,
664         /* FE77 */ 1,
665         /* FE78 */ 0,
666         /* FE79 */ 1,
667         /* FE7A */ 0,
668         /* FE7B */ 1,
669         /* FE7C */ 0,
670         /* FE7D */ 1,
671         /* FE7E */ 0,
672         /* FE7F */ 1
673     };
674 
675     private static final char yehHamzaToYeh[] =
676     {
677     /* isolated*/ 0xFEEF,
678     /* final   */ 0xFEF0
679     };
680 
681     private static final char convertNormalizedLamAlef[] = {
682         '\u0622', // 065C
683         '\u0623', // 065D
684         '\u0625', // 065E
685         '\u0627', // 065F
686     };
687 
688     private static final int[] araLink = {
689         1           + 32 + 256 * 0x11,  /*0x0622*/
690         1           + 32 + 256 * 0x13,  /*0x0623*/
691         1                + 256 * 0x15,  /*0x0624*/
692         1           + 32 + 256 * 0x17,  /*0x0625*/
693         1 + 2            + 256 * 0x19,  /*0x0626*/
694         1           + 32 + 256 * 0x1D,  /*0x0627*/
695         1 + 2            + 256 * 0x1F,  /*0x0628*/
696         1                + 256 * 0x23,  /*0x0629*/
697         1 + 2            + 256 * 0x25,  /*0x062A*/
698         1 + 2            + 256 * 0x29,  /*0x062B*/
699         1 + 2            + 256 * 0x2D,  /*0x062C*/
700         1 + 2            + 256 * 0x31,  /*0x062D*/
701         1 + 2            + 256 * 0x35,  /*0x062E*/
702         1                + 256 * 0x39,  /*0x062F*/
703         1                + 256 * 0x3B,  /*0x0630*/
704         1                + 256 * 0x3D,  /*0x0631*/
705         1                + 256 * 0x3F,  /*0x0632*/
706         1 + 2            + 256 * 0x41,  /*0x0633*/
707         1 + 2            + 256 * 0x45,  /*0x0634*/
708         1 + 2            + 256 * 0x49,  /*0x0635*/
709         1 + 2            + 256 * 0x4D,  /*0x0636*/
710         1 + 2            + 256 * 0x51,  /*0x0637*/
711         1 + 2            + 256 * 0x55,  /*0x0638*/
712         1 + 2            + 256 * 0x59,  /*0x0639*/
713         1 + 2            + 256 * 0x5D,  /*0x063A*/
714         0, 0, 0, 0, 0,                  /*0x063B-0x063F*/
715         1 + 2,                          /*0x0640*/
716         1 + 2            + 256 * 0x61,  /*0x0641*/
717         1 + 2            + 256 * 0x65,  /*0x0642*/
718         1 + 2            + 256 * 0x69,  /*0x0643*/
719         1 + 2       + 16 + 256 * 0x6D,  /*0x0644*/
720         1 + 2            + 256 * 0x71,  /*0x0645*/
721         1 + 2            + 256 * 0x75,  /*0x0646*/
722         1 + 2            + 256 * 0x79,  /*0x0647*/
723         1                + 256 * 0x7D,  /*0x0648*/
724         1                + 256 * 0x7F,  /*0x0649*/
725         1 + 2            + 256 * 0x81,  /*0x064A*/
726         4, 4, 4, 4,                     /*0x064B-0x064E*/
727         4, 4, 4, 4,                     /*0x064F-0x0652*/
728         4, 4, 4, 0, 0,                  /*0x0653-0x0657*/
729         0, 0, 0, 0,                     /*0x0658-0x065B*/
730         1                + 256 * 0x85,  /*0x065C*/
731         1                + 256 * 0x87,  /*0x065D*/
732         1                + 256 * 0x89,  /*0x065E*/
733         1                + 256 * 0x8B,  /*0x065F*/
734         0, 0, 0, 0, 0,                  /*0x0660-0x0664*/
735         0, 0, 0, 0, 0,                  /*0x0665-0x0669*/
736         0, 0, 0, 0, 0, 0,               /*0x066A-0x066F*/
737         4,                              /*0x0670*/
738         0,                              /*0x0671*/
739         1           + 32,               /*0x0672*/
740         1           + 32,               /*0x0673*/
741         0,                              /*0x0674*/
742         1           + 32,               /*0x0675*/
743         1, 1,                           /*0x0676-0x0677*/
744         1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x0678-0x067D*/
745         1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x067E-0x0683*/
746         1+2, 1+2, 1+2, 1+2,             /*0x0684-0x0687*/
747         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,   /*0x0688-0x0691*/
748         1, 1, 1, 1, 1, 1, 1, 1,         /*0x0692-0x0699*/
749         1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x069A-0x06A3*/
750         1+2, 1+2, 1+2, 1+2,             /*0x069A-0x06A3*/
751         1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x06A4-0x06AD*/
752         1+2, 1+2, 1+2, 1+2,             /*0x06A4-0x06AD*/
753         1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x06AE-0x06B7*/
754         1+2, 1+2, 1+2, 1+2,             /*0x06AE-0x06B7*/
755         1+2, 1+2, 1+2, 1+2, 1+2, 1+2,   /*0x06B8-0x06BF*/
756         1+2, 1+2,                       /*0x06B8-0x06BF*/
757         1,                              /*0x06C0*/
758         1+2,                            /*0x06C1*/
759         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,   /*0x06C2-0x06CB*/
760         1+2,                            /*0x06CC*/
761         1,                              /*0x06CD*/
762         1+2, 1+2, 1+2, 1+2,             /*0x06CE-0x06D1*/
763         1, 1                            /*0x06D2-0x06D3*/
764     };
765 
766     private static final int[] presLink = {
767         1 + 2,                        /*0xFE70*/
768         1 + 2,                        /*0xFE71*/
769         1 + 2, 0, 1+ 2, 0, 1+ 2,      /*0xFE72-0xFE76*/
770         1 + 2,                        /*0xFE77*/
771         1+ 2, 1 + 2, 1+2, 1 + 2,      /*0xFE78-0xFE81*/
772         1+ 2, 1 + 2, 1+2, 1 + 2,      /*0xFE82-0xFE85*/
773         0, 0 + 32, 1 + 32, 0 + 32,    /*0xFE86-0xFE89*/
774         1 + 32, 0, 1,  0 + 32,        /*0xFE8A-0xFE8D*/
775         1 + 32, 0, 2,  1 + 2,         /*0xFE8E-0xFE91*/
776         1, 0 + 32, 1 + 32, 0,         /*0xFE92-0xFE95*/
777         2, 1 + 2, 1, 0,               /*0xFE96-0xFE99*/
778         1, 0, 2, 1 + 2,               /*0xFE9A-0xFE9D*/
779         1, 0, 2, 1 + 2,               /*0xFE9E-0xFEA1*/
780         1, 0, 2, 1 + 2,               /*0xFEA2-0xFEA5*/
781         1, 0, 2, 1 + 2,               /*0xFEA6-0xFEA9*/
782         1, 0, 2, 1 + 2,               /*0xFEAA-0xFEAD*/
783         1, 0, 1, 0,                   /*0xFEAE-0xFEB1*/
784         1, 0, 1, 0,                   /*0xFEB2-0xFEB5*/
785         1, 0, 2, 1+2,                 /*0xFEB6-0xFEB9*/
786         1, 0, 2, 1+2,                 /*0xFEBA-0xFEBD*/
787         1, 0, 2, 1+2,                 /*0xFEBE-0xFEC1*/
788         1, 0, 2, 1+2,                 /*0xFEC2-0xFEC5*/
789         1, 0, 2, 1+2,                 /*0xFEC6-0xFEC9*/
790         1, 0, 2, 1+2,                 /*0xFECA-0xFECD*/
791         1, 0, 2, 1+2,                 /*0xFECE-0xFED1*/
792         1, 0, 2, 1+2,                 /*0xFED2-0xFED5*/
793         1, 0, 2, 1+2,                 /*0xFED6-0xFED9*/
794         1, 0, 2, 1+2,                 /*0xFEDA-0xFEDD*/
795         1, 0, 2, 1+2,                 /*0xFEDE-0xFEE1*/
796         1, 0 + 16, 2 + 16, 1 + 2 +16, /*0xFEE2-0xFEE5*/
797         1 + 16, 0, 2, 1+2,            /*0xFEE6-0xFEE9*/
798         1, 0, 2, 1+2,                 /*0xFEEA-0xFEED*/
799         1, 0, 2, 1+2,                 /*0xFEEE-0xFEF1*/
800         1, 0, 1, 0,                   /*0xFEF2-0xFEF5*/
801         1, 0, 2, 1+2,                 /*0xFEF6-0xFEF9*/
802         1, 0, 1, 0,                   /*0xFEFA-0xFEFD*/
803         1, 0, 1, 0,
804         1
805     };
806 
807     private static int[] convertFEto06 = {
808         /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
809         /*FE7*/   0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
810         /*FE8*/   0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
811         /*FE9*/   0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
812         /*FEA*/   0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
813         /*FEB*/   0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
814         /*FEC*/   0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
815         /*FED*/   0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
816         /*FEE*/   0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
817         /*FEF*/   0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
818     };
819 
820     private static final int shapeTable[][][] = {
821         { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
822         { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
823         { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
824         { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
825     };
826 
827     /*
828      * This function shapes European digits to Arabic-Indic digits
829      * in-place, writing over the input characters.  Data is in visual
830      * order.
831      */
shapeToArabicDigitsWithContext(char[] dest, int start, int length, char digitBase, boolean lastStrongWasAL)832     private void shapeToArabicDigitsWithContext(char[] dest,
833                                                 int start,
834                                                 int length,
835                                                 char digitBase,
836                                                 boolean lastStrongWasAL) {
837         UBiDiProps bdp=UBiDiProps.INSTANCE;
838         digitBase -= '0'; // move common adjustment out of loop
839 
840         for(int i = start + length; --i >= start;) {
841             char ch = dest[i];
842             switch (bdp.getClass(ch)) {
843             case UCharacterDirection.LEFT_TO_RIGHT:
844             case UCharacterDirection.RIGHT_TO_LEFT:
845                 lastStrongWasAL = false;
846                 break;
847             case UCharacterDirection.RIGHT_TO_LEFT_ARABIC:
848                 lastStrongWasAL = true;
849                 break;
850             case UCharacterDirection.EUROPEAN_NUMBER:
851                 if (lastStrongWasAL && ch <= '\u0039') {
852                     dest[i] = (char)(ch + digitBase);
853                 }
854                 break;
855             default:
856                 break;
857             }
858         }
859     }
860 
861     /*
862      * Name    : invertBuffer
863      * Function: This function inverts the buffer, it's used
864      *           in case the user specifies the buffer to be
865      *           TEXT_DIRECTION_LOGICAL
866      */
invertBuffer(char[] buffer, int start, int length)867     private static void invertBuffer(char[] buffer,
868                                      int start,
869                                      int length) {
870 
871         for(int i = start, j = start + length - 1; i < j; i++, --j) {
872             char temp = buffer[i];
873             buffer[i] = buffer[j];
874             buffer[j] = temp;
875         }
876     }
877 
878     /*
879      * Name    : changeLamAlef
880      * Function: Converts the Alef characters into an equivalent
881      *           LamAlef location in the 0x06xx Range, this is an
882      *           intermediate stage in the operation of the program
883      *           later it'll be converted into the 0xFExx LamAlefs
884      *           in the shaping function.
885      */
changeLamAlef(char ch)886     private static char changeLamAlef(char ch) {
887         switch(ch) {
888         case '\u0622': return '\u065C';
889         case '\u0623': return '\u065D';
890         case '\u0625': return '\u065E';
891         case '\u0627': return '\u065F';
892         default:  return '\u0000'; // not a lamalef
893         }
894     }
895 
896     /*
897      * Name    : specialChar
898      * Function: Special Arabic characters need special handling in the shapeUnicode
899      *           function, this function returns 1 or 2 for these special characters
900      */
specialChar(char ch)901     private static int specialChar(char ch) {
902         if ((ch > '\u0621' && ch < '\u0626') ||
903             (ch == '\u0627') ||
904             (ch > '\u062E' && ch < '\u0633') ||
905             (ch > '\u0647' && ch < '\u064A') ||
906             (ch == '\u0629')) {
907             return 1;
908         } else if (ch >= '\u064B' && ch<= '\u0652') {
909             return 2;
910         } else if (ch >= 0x0653 && ch <= 0x0655 ||
911                    ch == 0x0670 ||
912                    ch >= 0xFE70 && ch <= 0xFE7F) {
913             return 3;
914         } else {
915             return 0;
916         }
917     }
918 
919     /*
920      * Name    : getLink
921      * Function: Resolves the link between the characters as
922      *           Arabic characters have four forms :
923      *           Isolated, Initial, Middle and Final Form
924      */
getLink(char ch)925     private static int getLink(char ch) {
926         if (ch >= '\u0622' && ch <= '\u06D3') {
927             return araLink[ch - '\u0622'];
928         } else if (ch == '\u200D') {
929             return 3;
930         } else if (ch >= '\u206D' && ch <= '\u206F') {
931             return 4;
932         } else if (ch >= '\uFE70' && ch <= '\uFEFC') {
933             return presLink[ch - '\uFE70'];
934         } else {
935             return 0;
936         }
937     }
938 
939     /*
940      * Name    : countSpaces
941      * Function: Counts the number of spaces
942      *           at each end of the logical buffer
943      */
countSpacesLeft(char[] dest, int start, int count)944     private static int countSpacesLeft(char[] dest,
945                                        int start,
946                                        int count) {
947         for (int i = start, e = start + count; i < e; ++i) {
948             if (dest[i] != SPACE_CHAR) {
949                 return i - start;
950             }
951         }
952         return count;
953     }
954 
countSpacesRight(char[] dest, int start, int count)955     private static int countSpacesRight(char[] dest,
956                                         int start,
957                                         int count) {
958 
959         for (int i = start + count; --i >= start;) {
960             if (dest[i] != SPACE_CHAR) {
961                 return start + count - 1 - i;
962             }
963         }
964         return count;
965     }
966 
967     /*
968      * Name    : isTashkeelChar
969      * Function: Returns true for Tashkeel characters else return false
970      */
isTashkeelChar(char ch)971     private static boolean isTashkeelChar(char ch) {
972         return ( ch >='\u064B' && ch <= '\u0652' );
973     }
974 
975     /*
976      *Name     : isSeenTailFamilyChar
977      *Function : returns 1 if the character is a seen family isolated character
978      *           in the FE range otherwise returns 0
979      */
980 
981     @dalvik.annotation.compat.UnsupportedAppUsage
isSeenTailFamilyChar(char ch)982     private static int isSeenTailFamilyChar(char ch) {
983         if (ch >= 0xfeb1 && ch < 0xfebf){
984              return tailFamilyIsolatedFinal [ch - 0xFEB1];
985         } else {
986              return 0;
987         }
988     }
989 
990      /* Name     : isSeenFamilyChar
991       * Function : returns 1 if the character is a seen family character in the Unicode
992       *            06 range otherwise returns 0
993      */
994 
isSeenFamilyChar(char ch)995     private static int isSeenFamilyChar(char  ch){
996         if (ch >= 0x633 && ch <= 0x636){
997             return 1;
998         }else {
999             return 0;
1000         }
1001     }
1002 
1003     /*
1004      *Name     : isTailChar
1005      *Function : returns true if the character matches one of the tail characters
1006      *           (0xfe73 or 0x200b) otherwise returns false
1007      */
1008 
1009     @dalvik.annotation.compat.UnsupportedAppUsage
isTailChar(char ch)1010     private static boolean isTailChar(char ch) {
1011         if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){
1012                 return true;
1013         }else{
1014                 return false;
1015         }
1016     }
1017 
1018     /*
1019      *Name     : isAlefMaksouraChar
1020      *Function : returns true if the character is a Alef Maksoura Final or isolated
1021      *           otherwise returns false
1022      */
1023     @dalvik.annotation.compat.UnsupportedAppUsage
isAlefMaksouraChar(char ch)1024     private static boolean isAlefMaksouraChar(char ch) {
1025         return ( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649));
1026     }
1027 
1028     /*
1029      * Name     : isYehHamzaChar
1030      * Function : returns true if the character is a yehHamza isolated or yehhamza
1031      *            final is found otherwise returns false
1032      */
1033     @dalvik.annotation.compat.UnsupportedAppUsage
isYehHamzaChar(char ch)1034     private static boolean isYehHamzaChar(char ch) {
1035         if((ch==0xFE89)||(ch==0xFE8A)){
1036             return true;
1037         }else{
1038             return false;
1039         }
1040     }
1041 
1042     /*
1043      *Name     : isTashkeelCharFE
1044      *Function : Returns true for Tashkeel characters in FE range else return false
1045      */
1046 
isTashkeelCharFE(char ch)1047     private static boolean isTashkeelCharFE(char ch) {
1048         return ( ch!=0xFE75 &&(ch>=0xFE70 && ch<= 0xFE7F) );
1049     }
1050 
1051     /*
1052      * Name: isTashkeelOnTatweelChar
1053      * Function: Checks if the Tashkeel Character is on Tatweel or not,if the
1054      *           Tashkeel on tatweel (FE range), it returns 1 else if the
1055      *           Tashkeel with shadda on tatweel (FC range)return 2 otherwise
1056      *           returns 0
1057      */
isTashkeelOnTatweelChar(char ch)1058     private static int isTashkeelOnTatweelChar(char ch){
1059         if (ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR)
1060         {
1061             return tashkeelMedial [ch - 0xFE70];
1062         } else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) {
1063             return 2;
1064         } else {
1065             return 0;
1066         }
1067     }
1068 
1069     /*
1070      * Name: isIsolatedTashkeelChar
1071      * Function: Checks if the Tashkeel Character is in the isolated form
1072      *           (i.e. Unicode FE range) returns 1 else if the Tashkeel
1073      *           with shadda is in the isolated form (i.e. Unicode FC range)
1074      *           returns 1 otherwise returns 0
1075      */
isIsolatedTashkeelChar(char ch)1076     private static int isIsolatedTashkeelChar(char ch){
1077         if (ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){
1078             return (1 - tashkeelMedial [ch - 0xFE70]);
1079         } else if(ch >= 0xfc5e && ch <= 0xfc63){
1080             return 1;
1081         } else{
1082             return 0;
1083         }
1084     }
1085 
1086     /*
1087      * Name    : isAlefChar
1088      * Function: Returns 1 for Alef characters else return 0
1089      */
isAlefChar(char ch)1090     private static boolean isAlefChar(char ch) {
1091         return ch == '\u0622' || ch == '\u0623' || ch == '\u0625' || ch == '\u0627';
1092     }
1093 
1094     /*
1095      * Name    : isLamAlefChar
1096      * Function: Returns true for LamAlef characters else return false
1097      */
isLamAlefChar(char ch)1098     private static boolean isLamAlefChar(char ch) {
1099         return ch >= '\uFEF5' && ch <= '\uFEFC';
1100     }
1101 
isNormalizedLamAlefChar(char ch)1102     private static boolean isNormalizedLamAlefChar(char ch) {
1103         return ch >= '\u065C' && ch <= '\u065F';
1104     }
1105 
1106     /*
1107      * Name    : calculateSize
1108      * Function: This function calculates the destSize to be used in preflighting
1109      *           when the destSize is equal to 0
1110      */
calculateSize(char[] source, int sourceStart, int sourceLength)1111     private int calculateSize(char[] source,
1112                               int sourceStart,
1113                               int sourceLength) {
1114 
1115         int destSize = sourceLength;
1116 
1117         switch (options & LETTERS_MASK) {
1118         case LETTERS_SHAPE:
1119         case LETTERS_SHAPE_TASHKEEL_ISOLATED:
1120             if (isLogical) {
1121                 for (int i = sourceStart, e = sourceStart + sourceLength - 1; i < e; ++i) {
1122                     if ((source[i] == LAM_CHAR && isAlefChar(source[i+1])) || isTashkeelCharFE(source[i])){
1123                         --destSize;
1124                     }
1125                 }
1126             } else { // visual
1127                 for(int i = sourceStart + 1, e = sourceStart + sourceLength; i < e; ++i) {
1128                     if ((source[i] == LAM_CHAR && isAlefChar(source[i-1])) || isTashkeelCharFE(source[i])) {
1129                         --destSize;
1130                     }
1131                 }
1132             }
1133             break;
1134 
1135         case LETTERS_UNSHAPE:
1136             for(int i = sourceStart, e = sourceStart + sourceLength; i < e; ++i) {
1137                 if (isLamAlefChar(source[i])) {
1138                     destSize++;
1139                 }
1140             }
1141             break;
1142 
1143         default:
1144             break;
1145         }
1146 
1147         return destSize;
1148     }
1149 
1150 
1151     /*
1152      * Name    : countSpaceSub
1153      * Function: Counts number of times the subChar appears in the array
1154      */
countSpaceSub(char [] dest,int length, char subChar)1155     private static int countSpaceSub(char [] dest,int length, char subChar){
1156         int i = 0;
1157         int count = 0;
1158         while (i < length) {
1159           if (dest[i] == subChar) {
1160               count++;
1161               }
1162           i++;
1163         }
1164         return count;
1165     }
1166 
1167     /*
1168      * Name    : shiftArray
1169      * Function: Shifts characters to replace space sub characters
1170      */
shiftArray(char [] dest,int start, int e, char subChar)1171     private static void shiftArray(char [] dest,int start, int e, char subChar){
1172         int w = e;
1173         int r = e;
1174         while (--r >= start) {
1175           char ch = dest[r];
1176           if (ch != subChar) {
1177             --w;
1178             if (w != r) {
1179               dest[w] = ch;
1180             }
1181           }
1182         }
1183    }
1184 
1185     /*
1186      * Name    : flipArray
1187      * Function: inverts array, so that start becomes end and vice versa
1188      */
flipArray(char [] dest, int start, int e, int w)1189       private static int flipArray(char [] dest, int start, int e, int w){
1190         int r;
1191         if (w > start) {
1192         // shift, assume small buffer size so don't use arraycopy
1193           r = w;
1194           w = start;
1195           while (r < e) {
1196             dest[w++] = dest[r++];
1197            }
1198          } else {
1199              w = e;
1200          }
1201         return w;
1202       }
1203 
1204     /*
1205      * Name     : handleTashkeelWithTatweel
1206      * Function : Replaces Tashkeel as following:
1207      *            Case 1 :if the Tashkeel on tatweel, replace it with Tatweel.
1208      *            Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace
1209      *                   it with Shadda on Tatweel.
1210      *            Case 3: if the Tashkeel is isolated replace it with Space.
1211      *
1212      */
handleTashkeelWithTatweel(char[] dest, int sourceLength)1213     private static int handleTashkeelWithTatweel(char[] dest, int sourceLength) {
1214                      int i;
1215                      for(i = 0; i < sourceLength; i++){
1216                          if((isTashkeelOnTatweelChar(dest[i]) == 1)){
1217                              dest[i] = TATWEEL_CHAR;
1218                         }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){
1219                              dest[i] = SHADDA_TATWEEL_CHAR;
1220                         }else if((isIsolatedTashkeelChar(dest[i])==1) && dest[i] != SHADDA_CHAR){
1221                              dest[i] = SPACE_CHAR;
1222                         }
1223                      }
1224                      return sourceLength;
1225     }
1226 
1227     /*
1228      *Name     : handleGeneratedSpaces
1229      *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space,
1230      *           and Tashkeel to space.
1231      *           handleGeneratedSpaces function puts these generated spaces
1232      *           according to the options the user specifies. LamAlef and Tashkeel
1233      *           spaces can be replaced at begin, at end, at near or decrease the
1234      *           buffer size.
1235      *
1236      *           There is also Auto option for LamAlef and tashkeel, which will put
1237      *           the spaces at end of the buffer (or end of text if the user used
1238      *           the option SPACES_RELATIVE_TO_TEXT_BEGIN_END).
1239      *
1240      *           If the text type was visual_LTR and the option
1241      *           SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END
1242      *           option will place the space at the beginning of the buffer and
1243      *           BEGIN will place the space at the end of the buffer.
1244      */
handleGeneratedSpaces(char[] dest, int start, int length)1245   private int handleGeneratedSpaces(char[] dest,
1246             int start,
1247             int length) {
1248 
1249       int lenOptionsLamAlef = options & LAMALEF_MASK;
1250       int lenOptionsTashkeel = options & TASHKEEL_MASK;
1251       boolean lamAlefOn = false;
1252       boolean tashkeelOn = false;
1253 
1254       if (!isLogical & !spacesRelativeToTextBeginEnd) {
1255           switch (lenOptionsLamAlef) {
1256           case LAMALEF_BEGIN: lenOptionsLamAlef = LAMALEF_END; break;
1257           case LAMALEF_END: lenOptionsLamAlef = LAMALEF_BEGIN; break;
1258           default: break;
1259          }
1260           switch (lenOptionsTashkeel){
1261           case TASHKEEL_BEGIN: lenOptionsTashkeel = TASHKEEL_END; break;
1262           case TASHKEEL_END: lenOptionsTashkeel = TASHKEEL_BEGIN; break;
1263           default: break;
1264           }
1265         }
1266 
1267 
1268       if (lenOptionsLamAlef == LAMALEF_NEAR) {
1269           for (int i = start, e = i + length; i < e; ++i) {
1270               if (dest[i] == LAMALEF_SPACE_SUB) {
1271                   dest[i] = SPACE_CHAR;
1272               }
1273           }
1274 
1275       } else {
1276 
1277           final int e = start + length;
1278           int wL = countSpaceSub(dest, length, LAMALEF_SPACE_SUB);
1279           int wT = countSpaceSub(dest, length, TASHKEEL_SPACE_SUB);
1280 
1281           if (lenOptionsLamAlef == LAMALEF_END){
1282             lamAlefOn = true;
1283           }
1284           if (lenOptionsTashkeel == TASHKEEL_END){
1285             tashkeelOn = true;
1286           }
1287 
1288 
1289           if (lamAlefOn && (lenOptionsLamAlef == LAMALEF_END)) {
1290             shiftArray(dest, start, e, LAMALEF_SPACE_SUB);
1291             while (wL > start) {
1292                 dest[--wL] = SPACE_CHAR;
1293             }
1294           }
1295 
1296           if (tashkeelOn && (lenOptionsTashkeel == TASHKEEL_END)){
1297             shiftArray(dest, start, e, TASHKEEL_SPACE_SUB);
1298             while (wT > start) {
1299                  dest[--wT] = SPACE_CHAR;
1300             }
1301           }
1302 
1303           lamAlefOn = false;
1304           tashkeelOn = false;
1305 
1306           if (lenOptionsLamAlef == LAMALEF_RESIZE){
1307             lamAlefOn = true;
1308           }
1309           if (lenOptionsTashkeel == TASHKEEL_RESIZE){
1310             tashkeelOn = true;
1311           }
1312 
1313           if (lamAlefOn && (lenOptionsLamAlef == LAMALEF_RESIZE)){
1314               shiftArray(dest, start, e, LAMALEF_SPACE_SUB);
1315               wL = flipArray(dest,start,e, wL);
1316               length = wL - start;
1317           }
1318           if (tashkeelOn && (lenOptionsTashkeel == TASHKEEL_RESIZE)) {
1319               shiftArray(dest, start, e, TASHKEEL_SPACE_SUB);
1320               wT = flipArray(dest,start,e, wT);
1321               length = wT - start;
1322           }
1323 
1324           lamAlefOn = false;
1325           tashkeelOn = false;
1326 
1327           if ((lenOptionsLamAlef == LAMALEF_BEGIN) ||
1328               (lenOptionsLamAlef == LAMALEF_AUTO)){
1329                 lamAlefOn = true;
1330           }
1331           if (lenOptionsTashkeel == TASHKEEL_BEGIN){
1332                 tashkeelOn = true;
1333           }
1334 
1335           if (lamAlefOn && ((lenOptionsLamAlef == LAMALEF_BEGIN)||
1336                             (lenOptionsLamAlef == LAMALEF_AUTO))) { // spaces at beginning
1337               shiftArray(dest, start, e, LAMALEF_SPACE_SUB);
1338                wL = flipArray(dest,start,e, wL);
1339                   while (wL < e) {
1340                       dest[wL++] = SPACE_CHAR;
1341                   }
1342               }
1343               if(tashkeelOn && (lenOptionsTashkeel == TASHKEEL_BEGIN)){
1344                shiftArray(dest, start, e, TASHKEEL_SPACE_SUB);
1345                wT = flipArray(dest,start,e, wT);
1346                   while (wT < e) {
1347                       dest[wT++] = SPACE_CHAR;
1348                   }
1349               }
1350            }
1351 
1352       return length;
1353   }
1354 
1355 
1356   /*
1357    *Name     :expandCompositCharAtBegin
1358    *Function :Expands the LamAlef character to Lam and Alef consuming the required
1359    *         space from beginning of the buffer. If the text type was visual_LTR
1360    *         and the option SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected
1361    *         the spaces will be located at end of buffer.
1362    *         If there are no spaces to expand the LamAlef, an exception is thrown.
1363 */
expandCompositCharAtBegin(char[] dest,int start, int length, int lacount)1364  private boolean expandCompositCharAtBegin(char[] dest,int start, int length,
1365                             int lacount) {
1366      boolean spaceNotFound = false;
1367 
1368      if (lacount > countSpacesRight(dest, start, length)) {
1369          spaceNotFound = true;
1370          return spaceNotFound;
1371      }
1372      for (int r = start + length - lacount, w = start + length; --r >= start;) {
1373          char ch = dest[r];
1374          if (isNormalizedLamAlefChar(ch)) {
1375              dest[--w] = LAM_CHAR;
1376              dest[--w] = convertNormalizedLamAlef[ch - '\u065C'];
1377          } else {
1378              dest[--w] = ch;
1379          }
1380      }
1381      return spaceNotFound;
1382 
1383   }
1384 
1385   /*
1386    *Name     : expandCompositCharAtEnd
1387    *Function : Expands the LamAlef character to Lam and Alef consuming the
1388    *           required space from end of the buffer. If the text type was
1389    *           Visual LTR and the option SPACES_RELATIVE_TO_TEXT_BEGIN_END
1390    *           was used, the spaces will be consumed from begin of buffer. If
1391    *           there are no spaces to expand the LamAlef, an exception is thrown.
1392    */
1393 
expandCompositCharAtEnd(char[] dest,int start, int length, int lacount)1394   private boolean  expandCompositCharAtEnd(char[] dest,int start, int length,
1395                           int lacount){
1396       boolean spaceNotFound = false;
1397 
1398       if (lacount > countSpacesLeft(dest, start, length)) {
1399           spaceNotFound = true;
1400           return spaceNotFound;
1401       }
1402       for (int r = start + lacount, w = start, e = start + length; r < e; ++r) {
1403           char ch = dest[r];
1404           if (isNormalizedLamAlefChar(ch)) {
1405               dest[w++] = convertNormalizedLamAlef[ch - '\u065C'];
1406               dest[w++] = LAM_CHAR;
1407           } else {
1408               dest[w++] = ch;
1409           }
1410       }
1411       return spaceNotFound;
1412   }
1413 
1414   /*
1415    *Name     : expandCompositCharAtNear
1416    *Function : Expands the LamAlef character into Lam + Alef, YehHamza character
1417    *           into Yeh + Hamza, SeenFamily character into SeenFamily character
1418    *           + Tail, while consuming the space next to the character.
1419    */
1420 
expandCompositCharAtNear(char[] dest,int start, int length, int yehHamzaOption, int seenTailOption, int lamAlefOption)1421   private boolean expandCompositCharAtNear(char[] dest,int start, int length,
1422                                        int yehHamzaOption, int seenTailOption, int lamAlefOption){
1423 
1424       boolean spaceNotFound = false;
1425 
1426 
1427 
1428       if (isNormalizedLamAlefChar(dest[start])) {
1429           spaceNotFound = true;
1430           return spaceNotFound;
1431       }
1432       for (int i = start + length; --i >=start;) {
1433           char ch = dest[i];
1434           if (lamAlefOption == 1 && isNormalizedLamAlefChar(ch)) {
1435               if (i>start &&dest[i-1] == SPACE_CHAR) {
1436                   dest[i] = LAM_CHAR;
1437                   dest[--i] = convertNormalizedLamAlef[ch - '\u065C'];
1438               } else {
1439                   spaceNotFound = true;
1440                   return spaceNotFound;
1441               }
1442           }else if(seenTailOption == 1 && isSeenTailFamilyChar(ch) == 1){
1443               if(i>start &&dest[i-1] == SPACE_CHAR){
1444                   dest[i-1] = tailChar;
1445               } else{
1446                   spaceNotFound = true;
1447                   return spaceNotFound;
1448               }
1449           }else if(yehHamzaOption == 1 && isYehHamzaChar(ch)){
1450 
1451                if(i>start &&dest[i-1] == SPACE_CHAR){
1452                   dest[i] = yehHamzaToYeh[ch - YEH_HAMZAFE_CHAR];
1453                   dest[i-1] = HAMZAFE_CHAR;
1454               }else{
1455                   spaceNotFound = true;
1456                   return spaceNotFound;
1457                 }
1458 
1459 
1460           }
1461       }
1462       return false;
1463 
1464   }
1465 
1466     /*
1467      * Name    : expandCompositChar
1468      * Function: LamAlef needs special handling as the LamAlef is
1469      *           one character while expanding it will give two
1470      *           characters Lam + Alef, so we need to expand the LamAlef
1471      *           in near or far spaces according to the options the user
1472      *           specifies or increase the buffer size.
1473      *           Dest has enough room for the expansion if we are growing.
1474      *           lamalef are normalized to the 'special characters'
1475      */
expandCompositChar(char[] dest, int start, int length, int lacount, int shapingMode)1476     private int expandCompositChar(char[] dest,
1477                               int start,
1478                               int length,
1479                               int lacount,
1480                               int shapingMode) throws ArabicShapingException {
1481 
1482         int lenOptionsLamAlef = options & LAMALEF_MASK;
1483         int lenOptionsSeen = options & SEEN_MASK;
1484         int lenOptionsYehHamza = options & YEHHAMZA_MASK;
1485         boolean spaceNotFound = false;
1486 
1487         if (!isLogical && !spacesRelativeToTextBeginEnd) {
1488             switch (lenOptionsLamAlef) {
1489             case LAMALEF_BEGIN: lenOptionsLamAlef = LAMALEF_END; break;
1490             case LAMALEF_END: lenOptionsLamAlef = LAMALEF_BEGIN; break;
1491             default: break;
1492             }
1493         }
1494 
1495         if(shapingMode == 1){
1496             if(lenOptionsLamAlef == LAMALEF_AUTO){
1497                 if(isLogical){
1498                     spaceNotFound = expandCompositCharAtEnd(dest, start, length, lacount);
1499                     if(spaceNotFound){
1500                         spaceNotFound = expandCompositCharAtBegin(dest, start, length, lacount);
1501                     }
1502                     if(spaceNotFound){
1503                         spaceNotFound = expandCompositCharAtNear(dest, start, length,0,0,1);
1504                     }
1505                     if(spaceNotFound){
1506                         throw new ArabicShapingException("No spacefor lamalef");
1507                     }
1508                 }else{
1509                     spaceNotFound = expandCompositCharAtBegin(dest, start, length, lacount);
1510                     if(spaceNotFound){
1511                         spaceNotFound = expandCompositCharAtEnd(dest, start, length, lacount);
1512                     }
1513                     if(spaceNotFound){
1514                         spaceNotFound = expandCompositCharAtNear(dest, start, length,0,0,1);
1515                     }
1516                     if(spaceNotFound){
1517                         throw new ArabicShapingException("No spacefor lamalef");
1518                     }
1519                 }
1520             }else if(lenOptionsLamAlef == LAMALEF_END){
1521                 spaceNotFound = expandCompositCharAtEnd(dest, start, length, lacount);
1522                 if(spaceNotFound){
1523                     throw new ArabicShapingException("No spacefor lamalef");
1524                 }
1525             }else if(lenOptionsLamAlef == LAMALEF_BEGIN){
1526                 spaceNotFound = expandCompositCharAtBegin(dest, start, length, lacount);
1527                 if(spaceNotFound){
1528                     throw new ArabicShapingException("No spacefor lamalef");
1529                 }
1530             }else if(lenOptionsLamAlef == LAMALEF_NEAR){
1531                 spaceNotFound = expandCompositCharAtNear(dest, start, length,0,0,1);
1532                 if(spaceNotFound){
1533                     throw new ArabicShapingException("No spacefor lamalef");
1534             }
1535             }else if(lenOptionsLamAlef == LAMALEF_RESIZE){
1536                 for (int r = start + length, w = r + lacount; --r >= start;) {
1537                     char ch = dest[r];
1538                     if (isNormalizedLamAlefChar(ch)) {
1539                         dest[--w] = '\u0644';
1540                         dest[--w] = convertNormalizedLamAlef[ch - '\u065C'];
1541                     } else {
1542                         dest[--w] = ch;
1543                     }
1544                 }
1545                 length += lacount;
1546             }
1547             }else{
1548                 if(lenOptionsSeen == SEEN_TWOCELL_NEAR){
1549                 spaceNotFound = expandCompositCharAtNear(dest, start, length,0,1,0);
1550                 if(spaceNotFound){
1551                     throw new ArabicShapingException("No space for Seen tail expansion");
1552                 }
1553             }
1554             if(lenOptionsYehHamza == YEHHAMZA_TWOCELL_NEAR){
1555                 spaceNotFound = expandCompositCharAtNear(dest, start, length,1,0,0);
1556                 if(spaceNotFound){
1557                     throw new ArabicShapingException("No space for YehHamza expansion");
1558                 }
1559             }
1560             }
1561         return length;
1562     }
1563 
1564 
1565     /* Convert the input buffer from FExx Range into 06xx Range
1566      * to put all characters into the 06xx range
1567      * even the lamalef is converted to the special region in
1568      * the 06xx range.  Return the number of lamalef chars found.
1569      */
normalize(char[] dest, int start, int length)1570     private int normalize(char[] dest, int start, int length) {
1571         int lacount = 0;
1572         for (int i = start, e = i + length; i < e; ++i) {
1573             char ch = dest[i];
1574             if (ch >= '\uFE70' && ch <= '\uFEFC') {
1575                 if (isLamAlefChar(ch)) {
1576                     ++lacount;
1577                 }
1578                 dest[i] = (char)convertFEto06[ch - '\uFE70'];
1579             }
1580         }
1581         return lacount;
1582     }
1583 
1584     /*
1585      * Name    : deshapeNormalize
1586      * Function: Convert the input buffer from FExx Range into 06xx Range
1587      *           even the lamalef is converted to the special region in the 06xx range.
1588      *           According to the options the user enters, all seen family characters
1589      *           followed by a tail character are merged to seen tail family character and
1590      *           any yeh followed by a hamza character are merged to yehhamza character.
1591      *           Method returns the number of lamalef chars found.
1592      */
deshapeNormalize(char[] dest, int start, int length)1593     private int deshapeNormalize(char[] dest, int start, int length) {
1594         int lacount = 0;
1595         int yehHamzaComposeEnabled = 0;
1596         int seenComposeEnabled = 0;
1597 
1598         yehHamzaComposeEnabled = ((options&YEHHAMZA_MASK) == YEHHAMZA_TWOCELL_NEAR) ? 1 : 0;
1599         seenComposeEnabled = ((options&SEEN_MASK) == SEEN_TWOCELL_NEAR)? 1 : 0;
1600 
1601         for (int i = start, e = i + length; i < e; ++i) {
1602             char ch = dest[i];
1603 
1604         if( (yehHamzaComposeEnabled == 1) && ((ch == HAMZA06_CHAR) || (ch == HAMZAFE_CHAR))
1605                && (i < (length - 1)) && isAlefMaksouraChar(dest[i+1] )) {
1606                 dest[i] = SPACE_CHAR;
1607                 dest[i+1] = YEH_HAMZA_CHAR;
1608        } else if ( (seenComposeEnabled == 1) && (isTailChar(ch)) && (i< (length - 1))
1609                        && (isSeenTailFamilyChar(dest[i+1])==1) ) {
1610                dest[i] = SPACE_CHAR;
1611        }
1612        else if (ch >= '\uFE70' && ch <= '\uFEFC') {
1613                 if (isLamAlefChar(ch)) {
1614                     ++lacount;
1615                 }
1616                 dest[i] = (char)convertFEto06[ch - '\uFE70'];
1617             }
1618         }
1619         return lacount;
1620     }
1621 
1622     /*
1623      * Name    : shapeUnicode
1624      * Function: Converts an Arabic Unicode buffer in 06xx Range into a shaped
1625      *           arabic Unicode buffer in FExx Range
1626      */
shapeUnicode(char[] dest, int start, int length, int destSize, int tashkeelFlag)1627     private int shapeUnicode(char[] dest,
1628                              int start,
1629                              int length,
1630                              int destSize,
1631                              int tashkeelFlag)throws ArabicShapingException {
1632 
1633         int lamalef_count = normalize(dest, start, length);
1634 
1635         // resolve the link between the characters.
1636         // Arabic characters have four forms: Isolated, Initial, Medial and Final.
1637         // Tashkeel characters have two, isolated or medial, and sometimes only isolated.
1638         // tashkeelFlag == 0: shape normally, 1: shape isolated, 2: don't shape
1639 
1640         boolean lamalef_found = false, seenfam_found = false;
1641         boolean yehhamza_found = false, tashkeel_found = false;
1642         int i = start + length - 1;
1643         int currLink = getLink(dest[i]);
1644         int nextLink = 0;
1645         int prevLink = 0;
1646         int lastLink = 0;
1647         //int prevPos = i;
1648         int lastPos = i;
1649         int nx = -2;
1650         int nw = 0;
1651 
1652         while (i >= 0) {
1653             // If high byte of currLink != 0 then there might be more than one shape
1654             if ((currLink & '\uFF00') != 0 || isTashkeelChar(dest[i])) {
1655                 nw = i - 1;
1656                 nx = -2;
1657                 while (nx < 0) { // we need to know about next char
1658                     if (nw == -1) {
1659                         nextLink = 0;
1660                         nx = Integer.MAX_VALUE;
1661                     } else {
1662                         nextLink = getLink(dest[nw]);
1663                         if ((nextLink & IRRELEVANT) == 0) {
1664                             nx = nw;
1665                         } else {
1666                             --nw;
1667                         }
1668                     }
1669                 }
1670 
1671                 if (((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0)) {
1672                     lamalef_found = true;
1673                     char wLamalef = changeLamAlef(dest[i]); // get from 0x065C-0x065f
1674                     if (wLamalef != '\u0000') {
1675                         // replace alef by marker, it will be removed later
1676                         dest[i] = '\uffff';
1677                         dest[lastPos] = wLamalef;
1678                         i = lastPos;
1679                     }
1680 
1681                     lastLink = prevLink;
1682                     currLink = getLink(wLamalef); // requires '\u0000', unfortunately
1683                 }
1684                 if ((i > 0) && (dest[i-1] == SPACE_CHAR))
1685                 {
1686                     if ( isSeenFamilyChar(dest[i]) == 1){
1687                         seenfam_found = true;
1688                     } else if (dest[i] == YEH_HAMZA_CHAR) {
1689                         yehhamza_found = true;
1690                     }
1691                 }
1692                 else if(i==0){
1693                     if ( isSeenFamilyChar(dest[i]) == 1){
1694                         seenfam_found = true;
1695                     } else if (dest[i] == YEH_HAMZA_CHAR) {
1696                         yehhamza_found = true;
1697                     }
1698                 }
1699 
1700 
1701                 // get the proper shape according to link ability of neighbors
1702                 // and of character; depends on the order of the shapes
1703                 // (isolated, initial, middle, final) in the compatibility area
1704 
1705                 int flag = specialChar(dest[i]);
1706 
1707                 int shape = shapeTable[nextLink & LINK_MASK]
1708                     [lastLink & LINK_MASK]
1709                     [currLink & LINK_MASK];
1710 
1711                 if (flag == 1) {
1712                     shape &= 0x1;
1713                 } else if (flag == 2) {
1714                     if (tashkeelFlag == 0 &&
1715                         ((lastLink & LINKL) != 0) &&
1716                         ((nextLink & LINKR) != 0) &&
1717                         dest[i] != '\u064C' &&
1718                         dest[i] != '\u064D' &&
1719                         !((nextLink & ALEFTYPE) == ALEFTYPE &&
1720                           (lastLink & LAMTYPE) == LAMTYPE)) {
1721 
1722                         shape = 1;
1723 
1724                     } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){
1725                         shape = 1;
1726 
1727                     } else {
1728                         shape = 0;
1729                     }
1730                 }
1731                 if (flag == 2) {
1732                     if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR) {
1733                         dest[i] = TASHKEEL_SPACE_SUB;
1734                         tashkeel_found = true;
1735                     }
1736                     else{
1737                         dest[i] = (char)('\uFE70' + irrelevantPos[dest[i] - '\u064B'] + shape);
1738                     }
1739                     // else leave tashkeel alone
1740                 } else {
1741                     dest[i] = (char)('\uFE70' + (currLink >> 8) + shape);
1742                 }
1743             }
1744 
1745             // move one notch forward
1746             if ((currLink & IRRELEVANT) == 0) {
1747                 prevLink = lastLink;
1748                 lastLink = currLink;
1749                 //prevPos = lastPos;
1750                 lastPos = i;
1751             }
1752 
1753             --i;
1754             if (i == nx) {
1755                 currLink = nextLink;
1756                 nx = -2;
1757             } else if (i != -1) {
1758                 currLink = getLink(dest[i]);
1759             }
1760         }
1761 
1762         // If we found a lam/alef pair in the buffer
1763         // call handleGeneratedSpaces to remove the spaces that were added
1764 
1765         destSize = length;
1766         if (lamalef_found || tashkeel_found) {
1767             destSize = handleGeneratedSpaces(dest, start, length);
1768         }
1769         if (seenfam_found || yehhamza_found){
1770             destSize = expandCompositChar(dest, start, destSize, lamalef_count, SHAPE_MODE);
1771         }
1772         return destSize;
1773     }
1774 
1775     /*
1776      * Name    : deShapeUnicode
1777      * Function: Converts an Arabic Unicode buffer in FExx Range into unshaped
1778      *           arabic Unicode buffer in 06xx Range
1779      */
deShapeUnicode(char[] dest, int start, int length, int destSize)1780     private int deShapeUnicode(char[] dest,
1781                                int start,
1782                                int length,
1783                                int destSize) throws ArabicShapingException {
1784 
1785         int lamalef_count = deshapeNormalize(dest, start, length);
1786 
1787         // If there was a lamalef in the buffer call expandLamAlef
1788         if (lamalef_count != 0) {
1789             // need to adjust dest to fit expanded buffer... !!!
1790             destSize = expandCompositChar(dest, start, length, lamalef_count,DESHAPE_MODE);
1791         } else {
1792             destSize = length;
1793         }
1794 
1795         return destSize;
1796     }
1797 
internalShape(char[] source, int sourceStart, int sourceLength, char[] dest, int destStart, int destSize)1798     private int internalShape(char[] source,
1799                               int sourceStart,
1800                               int sourceLength,
1801                               char[] dest,
1802                               int destStart,
1803                               int destSize) throws ArabicShapingException {
1804 
1805         if (sourceLength == 0) {
1806             return 0;
1807         }
1808 
1809         if (destSize == 0) {
1810             if (((options & LETTERS_MASK) != LETTERS_NOOP) &&
1811                 ((options & LAMALEF_MASK) == LAMALEF_RESIZE)) {
1812 
1813                 return calculateSize(source, sourceStart, sourceLength);
1814             } else {
1815                 return sourceLength; // by definition
1816             }
1817         }
1818 
1819         // always use temp buffer
1820         char[] temp = new char[sourceLength * 2]; // all lamalefs requiring expansion
1821         System.arraycopy(source, sourceStart, temp, 0, sourceLength);
1822 
1823         if (isLogical) {
1824             invertBuffer(temp, 0, sourceLength);
1825         }
1826 
1827         int outputSize = sourceLength;
1828 
1829         switch (options & LETTERS_MASK) {
1830         case LETTERS_SHAPE_TASHKEEL_ISOLATED:
1831             outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 1);
1832             break;
1833 
1834         case LETTERS_SHAPE:
1835             if( ((options&TASHKEEL_MASK) != 0) &&
1836                 ((options&TASHKEEL_MASK) !=TASHKEEL_REPLACE_BY_TATWEEL)) {
1837                    /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */
1838                 outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 2);
1839                 }else {
1840                    //default Call the shaping function with tashkeel flag == 1 */
1841                     outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 0);
1842 
1843                    /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/
1844                    if( (options&TASHKEEL_MASK) == TASHKEEL_REPLACE_BY_TATWEEL){
1845                        outputSize = handleTashkeelWithTatweel(temp,sourceLength);
1846                    }
1847                }
1848             break;
1849 
1850         case LETTERS_UNSHAPE:
1851             outputSize = deShapeUnicode(temp, 0, sourceLength, destSize);
1852             break;
1853 
1854         default:
1855             break;
1856         }
1857 
1858         if (outputSize > destSize) {
1859             throw new ArabicShapingException("not enough room for result data");
1860         }
1861 
1862         if ((options & DIGITS_MASK) != DIGITS_NOOP) {
1863             char digitBase = '\u0030'; // European digits
1864             switch (options & DIGIT_TYPE_MASK) {
1865             case DIGIT_TYPE_AN:
1866                 digitBase = '\u0660';  // Arabic-Indic digits
1867                 break;
1868 
1869             case DIGIT_TYPE_AN_EXTENDED:
1870                 digitBase = '\u06f0';  // Eastern Arabic-Indic digits (Persian and Urdu)
1871                 break;
1872 
1873             default:
1874                 break;
1875             }
1876 
1877             switch (options & DIGITS_MASK) {
1878             case DIGITS_EN2AN:
1879                 {
1880                     int digitDelta = digitBase - '\u0030';
1881                     for (int i = 0; i < outputSize; ++i) {
1882                         char ch = temp[i];
1883                         if (ch <= '\u0039' && ch >= '\u0030') {
1884                             temp[i] += digitDelta;
1885                         }
1886                     }
1887                 }
1888                 break;
1889 
1890             case DIGITS_AN2EN:
1891                 {
1892                     char digitTop = (char)(digitBase + 9);
1893                     int digitDelta = '\u0030' - digitBase;
1894                     for (int i = 0; i < outputSize; ++i) {
1895                         char ch = temp[i];
1896                         if (ch <= digitTop && ch >= digitBase) {
1897                             temp[i] += digitDelta;
1898                         }
1899                     }
1900                 }
1901                 break;
1902 
1903             case DIGITS_EN2AN_INIT_LR:
1904                 shapeToArabicDigitsWithContext(temp, 0, outputSize, digitBase, false);
1905                 break;
1906 
1907             case DIGITS_EN2AN_INIT_AL:
1908                 shapeToArabicDigitsWithContext(temp, 0, outputSize, digitBase, true);
1909                 break;
1910 
1911             default:
1912                 break;
1913             }
1914         }
1915 
1916         if (isLogical) {
1917             invertBuffer(temp, 0, outputSize);
1918         }
1919 
1920         System.arraycopy(temp, 0, dest, destStart, outputSize);
1921 
1922         return outputSize;
1923     }
1924 }
1925