1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2001-2010, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 */
9 /* Written by Simon Montagu, Matitiahu Allouche
10  * (ported from C code written by Markus W. Scherer)
11  */
12 
13 package com.ibm.icu.text;
14 
15 import com.ibm.icu.lang.UCharacter;
16 
17 final class BidiWriter {
18 
19     /** Bidi control code points */
20     static final char LRM_CHAR = 0x200e;
21     static final char RLM_CHAR = 0x200f;
22     static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
23                                   1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
24 
IsCombining(int type)25     private static boolean IsCombining(int type)
26     {
27         return ((1<<type &
28                 (1<<UCharacter.NON_SPACING_MARK |
29                  1<<UCharacter.COMBINING_SPACING_MARK |
30                  1<<UCharacter.ENCLOSING_MARK)) != 0);
31     }
32 
33     /*
34      * When we have OUTPUT_REVERSE set on writeReordered(), then we
35      * semantically write RTL runs in reverse and later reverse them again.
36      * Instead, we actually write them in forward order to begin with.
37      * However, if the RTL run was to be mirrored, we need to mirror here now
38      * since the implicit second reversal must not do it.
39      * It looks strange to do mirroring in LTR output, but it is only because
40      * we are writing RTL output in reverse.
41      */
doWriteForward(String src, int options)42     private static String doWriteForward(String src, int options) {
43         /* optimize for several combinations of options */
44         switch(options&(Bidi.REMOVE_BIDI_CONTROLS|Bidi.DO_MIRRORING)) {
45         case 0: {
46             /* simply return the LTR run */
47             return src;
48         }
49         case Bidi.DO_MIRRORING: {
50             StringBuffer dest = new StringBuffer(src.length());
51 
52             /* do mirroring */
53             int i=0;
54             int c;
55 
56             do {
57                 c = UTF16.charAt(src, i);
58                 i += UTF16.getCharCount(c);
59                 UTF16.append(dest, UCharacter.getMirror(c));
60             } while(i < src.length());
61             return dest.toString();
62         }
63         case Bidi.REMOVE_BIDI_CONTROLS: {
64             StringBuilder dest = new StringBuilder(src.length());
65 
66             /* copy the LTR run and remove any Bidi control characters */
67             int i = 0;
68             char c;
69             do {
70                 c = src.charAt(i++);
71                 if(!Bidi.IsBidiControlChar(c)) {
72                     dest.append(c);
73                 }
74             } while(i < src.length());
75             return dest.toString();
76         }
77         default: {
78             StringBuffer dest = new StringBuffer(src.length());
79 
80             /* remove Bidi control characters and do mirroring */
81             int i = 0;
82             int c;
83             do {
84                 c = UTF16.charAt(src, i);
85                 i += UTF16.getCharCount(c);
86                 if(!Bidi.IsBidiControlChar(c)) {
87                     UTF16.append(dest, UCharacter.getMirror(c));
88                 }
89             } while(i < src.length());
90             return dest.toString();
91         }
92         } /* end of switch */
93     }
94 
doWriteForward(char[] text, int start, int limit, int options)95     private static String doWriteForward(char[] text, int start, int limit,
96                                          int options)
97     {
98         return doWriteForward(new String(text, start, limit - start), options);
99     }
100 
writeReverse(String src, int options)101     static String writeReverse(String src, int options) {
102         /*
103          * RTL run -
104          *
105          * RTL runs need to be copied to the destination in reverse order
106          * of code points, not code units, to keep Unicode characters intact.
107          *
108          * The general strategy for this is to read the source text
109          * in backward order, collect all code units for a code point
110          * (and optionally following combining characters, see below),
111          * and copy all these code units in ascending order
112          * to the destination for this run.
113          *
114          * Several options request whether combining characters
115          * should be kept after their base characters,
116          * whether Bidi control characters should be removed, and
117          * whether characters should be replaced by their mirror-image
118          * equivalent Unicode characters.
119          */
120         StringBuffer dest = new StringBuffer(src.length());
121 
122         /* optimize for several combinations of options */
123         switch (options &
124                 (Bidi.REMOVE_BIDI_CONTROLS |
125                  Bidi.DO_MIRRORING |
126                  Bidi.KEEP_BASE_COMBINING)) {
127 
128         case 0:
129             /*
130              * With none of the "complicated" options set, the destination
131              * run will have the same length as the source run,
132              * and there is no mirroring and no keeping combining characters
133              * with their base characters.
134              *
135              * XXX: or dest = UTF16.reverse(new StringBuffer(src));
136              */
137 
138             int srcLength = src.length();
139 
140             /* preserve character integrity */
141             do {
142                 /* i is always after the last code unit known to need to be kept
143                  *  in this segment */
144                 int i = srcLength;
145 
146                 /* collect code units for one base character */
147                 srcLength -= UTF16.getCharCount(UTF16.charAt(src,
148                                                              srcLength - 1));
149 
150                 /* copy this base character */
151                 dest.append(src.substring(srcLength, i));
152             } while(srcLength > 0);
153             break;
154 
155         case Bidi.KEEP_BASE_COMBINING:
156             /*
157              * Here, too, the destination
158              * run will have the same length as the source run,
159              * and there is no mirroring.
160              * We do need to keep combining characters with their base
161              * characters.
162              */
163             srcLength = src.length();
164 
165             /* preserve character integrity */
166             do {
167                 /* i is always after the last code unit known to need to be kept
168                  *  in this segment */
169                 int c;
170                 int i = srcLength;
171 
172                 /* collect code units and modifier letters for one base
173                  * character */
174                 do {
175                     c = UTF16.charAt(src, srcLength - 1);
176                     srcLength -= UTF16.getCharCount(c);
177                 } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
178 
179                 /* copy this "user character" */
180                 dest.append(src.substring(srcLength, i));
181             } while(srcLength > 0);
182             break;
183 
184         default:
185             /*
186              * With several "complicated" options set, this is the most
187              * general and the slowest copying of an RTL run.
188              * We will do mirroring, remove Bidi controls, and
189              * keep combining characters with their base characters
190              * as requested.
191              */
192             srcLength = src.length();
193 
194             /* preserve character integrity */
195             do {
196                 /* i is always after the last code unit known to need to be kept
197                  *  in this segment */
198                 int i = srcLength;
199 
200                 /* collect code units for one base character */
201                 int c = UTF16.charAt(src, srcLength - 1);
202                 srcLength -= UTF16.getCharCount(c);
203                 if ((options & Bidi.KEEP_BASE_COMBINING) != 0) {
204                     /* collect modifier letters for this base character */
205                     while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
206                         c = UTF16.charAt(src, srcLength - 1);
207                         srcLength -= UTF16.getCharCount(c);
208                     }
209                 }
210 
211                 if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 &&
212                     Bidi.IsBidiControlChar(c)) {
213                     /* do not copy this Bidi control character */
214                     continue;
215                 }
216 
217                 /* copy this "user character" */
218                 int j = srcLength;
219                 if((options & Bidi.DO_MIRRORING) != 0) {
220                     /* mirror only the base character */
221                     c = UCharacter.getMirror(c);
222                     UTF16.append(dest, c);
223                     j += UTF16.getCharCount(c);
224                 }
225                 dest.append(src.substring(j, i));
226             } while(srcLength > 0);
227             break;
228         } /* end of switch */
229 
230         return dest.toString();
231     }
232 
doWriteReverse(char[] text, int start, int limit, int options)233     static String doWriteReverse(char[] text, int start, int limit, int options)
234     {
235         return writeReverse(new String(text, start, limit - start), options);
236     }
237 
writeReordered(Bidi bidi, int options)238     static String writeReordered(Bidi bidi, int options)
239     {
240         int run, runCount;
241         StringBuilder dest;
242         char[] text = bidi.text;
243         runCount = bidi.countRuns();
244 
245         /*
246          * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the
247          * reordering mode (checked below) is appropriate.
248          */
249         if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) {
250             options |= Bidi.INSERT_LRM_FOR_NUMERIC;
251             options &= ~Bidi.REMOVE_BIDI_CONTROLS;
252         }
253         /*
254          * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS
255          * and cancels Bidi.INSERT_LRM_FOR_NUMERIC.
256          */
257         if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) {
258             options |= Bidi.REMOVE_BIDI_CONTROLS;
259             options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
260         }
261         /*
262          * If we do not perform the "inverse Bidi" algorithm, then we
263          * don't need to insert any LRMs, and don't need to test for it.
264          */
265         if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) &&
266             (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT)  &&
267             (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
268             (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) {
269             options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
270         }
271         dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ?
272                                  bidi.length * 2 : bidi.length);
273         /*
274          * Iterate through all visual runs and copy the run text segments to
275          * the destination, according to the options.
276          *
277          * The tests for where to insert LRMs ignore the fact that there may be
278          * BN codes or non-BMP code points at the beginning and end of a run;
279          * they may insert LRMs unnecessarily but the tests are faster this way
280          * (this would have to be improved for UTF-8).
281          */
282         if ((options & Bidi.OUTPUT_REVERSE) == 0) {
283             /* forward output */
284             if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
285                 /* do not insert Bidi controls */
286                 for (run = 0; run < runCount; ++run) {
287                     BidiRun bidiRun = bidi.getVisualRun(run);
288                     if (bidiRun.isEvenRun()) {
289                         dest.append(doWriteForward(text, bidiRun.start,
290                                                    bidiRun.limit,
291                                                    options & ~Bidi.DO_MIRRORING));
292                      } else {
293                         dest.append(doWriteReverse(text, bidiRun.start,
294                                                    bidiRun.limit, options));
295                      }
296                 }
297             } else {
298                 /* insert Bidi controls for "inverse Bidi" */
299                 byte[] dirProps = bidi.dirProps;
300                 char uc;
301                 int markFlag;
302 
303                 for (run = 0; run < runCount; ++run) {
304                     BidiRun bidiRun = bidi.getVisualRun(run);
305                     markFlag=0;
306                     /* check if something relevant in insertPoints */
307                     markFlag = bidi.runs[run].insertRemove;
308                     if (markFlag < 0) { /* bidi controls count */
309                         markFlag = 0;
310                     }
311                     if (bidiRun.isEvenRun()) {
312                         if (bidi.isInverse() &&
313                                 dirProps[bidiRun.start] != Bidi.L) {
314                             markFlag |= Bidi.LRM_BEFORE;
315                         }
316                         if ((markFlag & Bidi.LRM_BEFORE) != 0) {
317                             uc = LRM_CHAR;
318                         } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
319                             uc = RLM_CHAR;
320                         } else {
321                             uc = 0;
322                         }
323                         if (uc != 0) {
324                             dest.append(uc);
325                         }
326                         dest.append(doWriteForward(text,
327                                                    bidiRun.start, bidiRun.limit,
328                                                    options & ~Bidi.DO_MIRRORING));
329 
330                         if (bidi.isInverse() &&
331                              dirProps[bidiRun.limit - 1] != Bidi.L) {
332                             markFlag |= Bidi.LRM_AFTER;
333                         }
334                         if ((markFlag & Bidi.LRM_AFTER) != 0) {
335                             uc = LRM_CHAR;
336                         } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
337                             uc = RLM_CHAR;
338                         } else {
339                             uc = 0;
340                         }
341                         if (uc != 0) {
342                             dest.append(uc);
343                         }
344                     } else { /* RTL run */
345                         if (bidi.isInverse() &&
346                             !bidi.testDirPropFlagAt(MASK_R_AL,
347                                                     bidiRun.limit - 1)) {
348                             markFlag |= Bidi.RLM_BEFORE;
349                         }
350                         if ((markFlag & Bidi.LRM_BEFORE) != 0) {
351                             uc = LRM_CHAR;
352                         } else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
353                             uc = RLM_CHAR;
354                         } else {
355                             uc = 0;
356                         }
357                         if (uc != 0) {
358                             dest.append(uc);
359                         }
360                         dest.append(doWriteReverse(text, bidiRun.start,
361                                                    bidiRun.limit, options));
362 
363                         if(bidi.isInverse() &&
364                                 (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
365                             markFlag |= Bidi.RLM_AFTER;
366                         }
367                         if ((markFlag & Bidi.LRM_AFTER) != 0) {
368                             uc = LRM_CHAR;
369                         } else if ((markFlag & Bidi.RLM_AFTER) != 0) {
370                             uc = RLM_CHAR;
371                         } else {
372                             uc = 0;
373                         }
374                         if (uc != 0) {
375                             dest.append(uc);
376                         }
377                     }
378                 }
379             }
380         } else {
381             /* reverse output */
382             if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
383                 /* do not insert Bidi controls */
384                 for(run = runCount; --run >= 0; ) {
385                     BidiRun bidiRun = bidi.getVisualRun(run);
386                     if (bidiRun.isEvenRun()) {
387                         dest.append(doWriteReverse(text,
388                                                    bidiRun.start, bidiRun.limit,
389                                                    options & ~Bidi.DO_MIRRORING));
390                     } else {
391                         dest.append(doWriteForward(text, bidiRun.start,
392                                                    bidiRun.limit, options));
393                     }
394                 }
395             } else {
396                 /* insert Bidi controls for "inverse Bidi" */
397 
398                 byte[] dirProps = bidi.dirProps;
399 
400                 for (run = runCount; --run >= 0; ) {
401                     /* reverse output */
402                     BidiRun bidiRun = bidi.getVisualRun(run);
403                     if (bidiRun.isEvenRun()) {
404                         if (dirProps[bidiRun.limit - 1] != Bidi.L) {
405                             dest.append(LRM_CHAR);
406                         }
407 
408                         dest.append(doWriteReverse(text, bidiRun.start,
409                                 bidiRun.limit, options & ~Bidi.DO_MIRRORING));
410 
411                         if (dirProps[bidiRun.start] != Bidi.L) {
412                             dest.append(LRM_CHAR);
413                         }
414                     } else {
415                         if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
416                             dest.append(RLM_CHAR);
417                         }
418 
419                         dest.append(doWriteForward(text, bidiRun.start,
420                                                    bidiRun.limit, options));
421 
422                         if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
423                             dest.append(RLM_CHAR);
424                         }
425                     }
426                 }
427             }
428         }
429 
430         return dest.toString();
431     }
432 }
433