1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2001-2010, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 /* Written by Simon Montagu, Matitiahu Allouche 10 * (ported from C code written by Markus W. Scherer) 11 */ 12 13 package com.ibm.icu.text; 14 15 import com.ibm.icu.lang.UCharacter; 16 17 final class BidiWriter { 18 19 /** Bidi control code points */ 20 static final char LRM_CHAR = 0x200e; 21 static final char RLM_CHAR = 0x200f; 22 static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT | 23 1 << UCharacter.RIGHT_TO_LEFT_ARABIC); 24 IsCombining(int type)25 private static boolean IsCombining(int type) 26 { 27 return ((1<<type & 28 (1<<UCharacter.NON_SPACING_MARK | 29 1<<UCharacter.COMBINING_SPACING_MARK | 30 1<<UCharacter.ENCLOSING_MARK)) != 0); 31 } 32 33 /* 34 * When we have OUTPUT_REVERSE set on writeReordered(), then we 35 * semantically write RTL runs in reverse and later reverse them again. 36 * Instead, we actually write them in forward order to begin with. 37 * However, if the RTL run was to be mirrored, we need to mirror here now 38 * since the implicit second reversal must not do it. 39 * It looks strange to do mirroring in LTR output, but it is only because 40 * we are writing RTL output in reverse. 41 */ doWriteForward(String src, int options)42 private static String doWriteForward(String src, int options) { 43 /* optimize for several combinations of options */ 44 switch(options&(Bidi.REMOVE_BIDI_CONTROLS|Bidi.DO_MIRRORING)) { 45 case 0: { 46 /* simply return the LTR run */ 47 return src; 48 } 49 case Bidi.DO_MIRRORING: { 50 StringBuffer dest = new StringBuffer(src.length()); 51 52 /* do mirroring */ 53 int i=0; 54 int c; 55 56 do { 57 c = UTF16.charAt(src, i); 58 i += UTF16.getCharCount(c); 59 UTF16.append(dest, UCharacter.getMirror(c)); 60 } while(i < src.length()); 61 return dest.toString(); 62 } 63 case Bidi.REMOVE_BIDI_CONTROLS: { 64 StringBuilder dest = new StringBuilder(src.length()); 65 66 /* copy the LTR run and remove any Bidi control characters */ 67 int i = 0; 68 char c; 69 do { 70 c = src.charAt(i++); 71 if(!Bidi.IsBidiControlChar(c)) { 72 dest.append(c); 73 } 74 } while(i < src.length()); 75 return dest.toString(); 76 } 77 default: { 78 StringBuffer dest = new StringBuffer(src.length()); 79 80 /* remove Bidi control characters and do mirroring */ 81 int i = 0; 82 int c; 83 do { 84 c = UTF16.charAt(src, i); 85 i += UTF16.getCharCount(c); 86 if(!Bidi.IsBidiControlChar(c)) { 87 UTF16.append(dest, UCharacter.getMirror(c)); 88 } 89 } while(i < src.length()); 90 return dest.toString(); 91 } 92 } /* end of switch */ 93 } 94 doWriteForward(char[] text, int start, int limit, int options)95 private static String doWriteForward(char[] text, int start, int limit, 96 int options) 97 { 98 return doWriteForward(new String(text, start, limit - start), options); 99 } 100 writeReverse(String src, int options)101 static String writeReverse(String src, int options) { 102 /* 103 * RTL run - 104 * 105 * RTL runs need to be copied to the destination in reverse order 106 * of code points, not code units, to keep Unicode characters intact. 107 * 108 * The general strategy for this is to read the source text 109 * in backward order, collect all code units for a code point 110 * (and optionally following combining characters, see below), 111 * and copy all these code units in ascending order 112 * to the destination for this run. 113 * 114 * Several options request whether combining characters 115 * should be kept after their base characters, 116 * whether Bidi control characters should be removed, and 117 * whether characters should be replaced by their mirror-image 118 * equivalent Unicode characters. 119 */ 120 StringBuffer dest = new StringBuffer(src.length()); 121 122 /* optimize for several combinations of options */ 123 switch (options & 124 (Bidi.REMOVE_BIDI_CONTROLS | 125 Bidi.DO_MIRRORING | 126 Bidi.KEEP_BASE_COMBINING)) { 127 128 case 0: 129 /* 130 * With none of the "complicated" options set, the destination 131 * run will have the same length as the source run, 132 * and there is no mirroring and no keeping combining characters 133 * with their base characters. 134 * 135 * XXX: or dest = UTF16.reverse(new StringBuffer(src)); 136 */ 137 138 int srcLength = src.length(); 139 140 /* preserve character integrity */ 141 do { 142 /* i is always after the last code unit known to need to be kept 143 * in this segment */ 144 int i = srcLength; 145 146 /* collect code units for one base character */ 147 srcLength -= UTF16.getCharCount(UTF16.charAt(src, 148 srcLength - 1)); 149 150 /* copy this base character */ 151 dest.append(src.substring(srcLength, i)); 152 } while(srcLength > 0); 153 break; 154 155 case Bidi.KEEP_BASE_COMBINING: 156 /* 157 * Here, too, the destination 158 * run will have the same length as the source run, 159 * and there is no mirroring. 160 * We do need to keep combining characters with their base 161 * characters. 162 */ 163 srcLength = src.length(); 164 165 /* preserve character integrity */ 166 do { 167 /* i is always after the last code unit known to need to be kept 168 * in this segment */ 169 int c; 170 int i = srcLength; 171 172 /* collect code units and modifier letters for one base 173 * character */ 174 do { 175 c = UTF16.charAt(src, srcLength - 1); 176 srcLength -= UTF16.getCharCount(c); 177 } while(srcLength > 0 && IsCombining(UCharacter.getType(c))); 178 179 /* copy this "user character" */ 180 dest.append(src.substring(srcLength, i)); 181 } while(srcLength > 0); 182 break; 183 184 default: 185 /* 186 * With several "complicated" options set, this is the most 187 * general and the slowest copying of an RTL run. 188 * We will do mirroring, remove Bidi controls, and 189 * keep combining characters with their base characters 190 * as requested. 191 */ 192 srcLength = src.length(); 193 194 /* preserve character integrity */ 195 do { 196 /* i is always after the last code unit known to need to be kept 197 * in this segment */ 198 int i = srcLength; 199 200 /* collect code units for one base character */ 201 int c = UTF16.charAt(src, srcLength - 1); 202 srcLength -= UTF16.getCharCount(c); 203 if ((options & Bidi.KEEP_BASE_COMBINING) != 0) { 204 /* collect modifier letters for this base character */ 205 while(srcLength > 0 && IsCombining(UCharacter.getType(c))) { 206 c = UTF16.charAt(src, srcLength - 1); 207 srcLength -= UTF16.getCharCount(c); 208 } 209 } 210 211 if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 && 212 Bidi.IsBidiControlChar(c)) { 213 /* do not copy this Bidi control character */ 214 continue; 215 } 216 217 /* copy this "user character" */ 218 int j = srcLength; 219 if((options & Bidi.DO_MIRRORING) != 0) { 220 /* mirror only the base character */ 221 c = UCharacter.getMirror(c); 222 UTF16.append(dest, c); 223 j += UTF16.getCharCount(c); 224 } 225 dest.append(src.substring(j, i)); 226 } while(srcLength > 0); 227 break; 228 } /* end of switch */ 229 230 return dest.toString(); 231 } 232 doWriteReverse(char[] text, int start, int limit, int options)233 static String doWriteReverse(char[] text, int start, int limit, int options) 234 { 235 return writeReverse(new String(text, start, limit - start), options); 236 } 237 writeReordered(Bidi bidi, int options)238 static String writeReordered(Bidi bidi, int options) 239 { 240 int run, runCount; 241 StringBuilder dest; 242 char[] text = bidi.text; 243 runCount = bidi.countRuns(); 244 245 /* 246 * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the 247 * reordering mode (checked below) is appropriate. 248 */ 249 if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) { 250 options |= Bidi.INSERT_LRM_FOR_NUMERIC; 251 options &= ~Bidi.REMOVE_BIDI_CONTROLS; 252 } 253 /* 254 * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS 255 * and cancels Bidi.INSERT_LRM_FOR_NUMERIC. 256 */ 257 if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) { 258 options |= Bidi.REMOVE_BIDI_CONTROLS; 259 options &= ~Bidi.INSERT_LRM_FOR_NUMERIC; 260 } 261 /* 262 * If we do not perform the "inverse Bidi" algorithm, then we 263 * don't need to insert any LRMs, and don't need to test for it. 264 */ 265 if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) && 266 (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT) && 267 (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) && 268 (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) { 269 options &= ~Bidi.INSERT_LRM_FOR_NUMERIC; 270 } 271 dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ? 272 bidi.length * 2 : bidi.length); 273 /* 274 * Iterate through all visual runs and copy the run text segments to 275 * the destination, according to the options. 276 * 277 * The tests for where to insert LRMs ignore the fact that there may be 278 * BN codes or non-BMP code points at the beginning and end of a run; 279 * they may insert LRMs unnecessarily but the tests are faster this way 280 * (this would have to be improved for UTF-8). 281 */ 282 if ((options & Bidi.OUTPUT_REVERSE) == 0) { 283 /* forward output */ 284 if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) { 285 /* do not insert Bidi controls */ 286 for (run = 0; run < runCount; ++run) { 287 BidiRun bidiRun = bidi.getVisualRun(run); 288 if (bidiRun.isEvenRun()) { 289 dest.append(doWriteForward(text, bidiRun.start, 290 bidiRun.limit, 291 options & ~Bidi.DO_MIRRORING)); 292 } else { 293 dest.append(doWriteReverse(text, bidiRun.start, 294 bidiRun.limit, options)); 295 } 296 } 297 } else { 298 /* insert Bidi controls for "inverse Bidi" */ 299 byte[] dirProps = bidi.dirProps; 300 char uc; 301 int markFlag; 302 303 for (run = 0; run < runCount; ++run) { 304 BidiRun bidiRun = bidi.getVisualRun(run); 305 markFlag=0; 306 /* check if something relevant in insertPoints */ 307 markFlag = bidi.runs[run].insertRemove; 308 if (markFlag < 0) { /* bidi controls count */ 309 markFlag = 0; 310 } 311 if (bidiRun.isEvenRun()) { 312 if (bidi.isInverse() && 313 dirProps[bidiRun.start] != Bidi.L) { 314 markFlag |= Bidi.LRM_BEFORE; 315 } 316 if ((markFlag & Bidi.LRM_BEFORE) != 0) { 317 uc = LRM_CHAR; 318 } else if ((markFlag & Bidi.RLM_BEFORE) != 0) { 319 uc = RLM_CHAR; 320 } else { 321 uc = 0; 322 } 323 if (uc != 0) { 324 dest.append(uc); 325 } 326 dest.append(doWriteForward(text, 327 bidiRun.start, bidiRun.limit, 328 options & ~Bidi.DO_MIRRORING)); 329 330 if (bidi.isInverse() && 331 dirProps[bidiRun.limit - 1] != Bidi.L) { 332 markFlag |= Bidi.LRM_AFTER; 333 } 334 if ((markFlag & Bidi.LRM_AFTER) != 0) { 335 uc = LRM_CHAR; 336 } else if ((markFlag & Bidi.RLM_AFTER) != 0) { 337 uc = RLM_CHAR; 338 } else { 339 uc = 0; 340 } 341 if (uc != 0) { 342 dest.append(uc); 343 } 344 } else { /* RTL run */ 345 if (bidi.isInverse() && 346 !bidi.testDirPropFlagAt(MASK_R_AL, 347 bidiRun.limit - 1)) { 348 markFlag |= Bidi.RLM_BEFORE; 349 } 350 if ((markFlag & Bidi.LRM_BEFORE) != 0) { 351 uc = LRM_CHAR; 352 } else if ((markFlag & Bidi.RLM_BEFORE) != 0) { 353 uc = RLM_CHAR; 354 } else { 355 uc = 0; 356 } 357 if (uc != 0) { 358 dest.append(uc); 359 } 360 dest.append(doWriteReverse(text, bidiRun.start, 361 bidiRun.limit, options)); 362 363 if(bidi.isInverse() && 364 (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) { 365 markFlag |= Bidi.RLM_AFTER; 366 } 367 if ((markFlag & Bidi.LRM_AFTER) != 0) { 368 uc = LRM_CHAR; 369 } else if ((markFlag & Bidi.RLM_AFTER) != 0) { 370 uc = RLM_CHAR; 371 } else { 372 uc = 0; 373 } 374 if (uc != 0) { 375 dest.append(uc); 376 } 377 } 378 } 379 } 380 } else { 381 /* reverse output */ 382 if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) { 383 /* do not insert Bidi controls */ 384 for(run = runCount; --run >= 0; ) { 385 BidiRun bidiRun = bidi.getVisualRun(run); 386 if (bidiRun.isEvenRun()) { 387 dest.append(doWriteReverse(text, 388 bidiRun.start, bidiRun.limit, 389 options & ~Bidi.DO_MIRRORING)); 390 } else { 391 dest.append(doWriteForward(text, bidiRun.start, 392 bidiRun.limit, options)); 393 } 394 } 395 } else { 396 /* insert Bidi controls for "inverse Bidi" */ 397 398 byte[] dirProps = bidi.dirProps; 399 400 for (run = runCount; --run >= 0; ) { 401 /* reverse output */ 402 BidiRun bidiRun = bidi.getVisualRun(run); 403 if (bidiRun.isEvenRun()) { 404 if (dirProps[bidiRun.limit - 1] != Bidi.L) { 405 dest.append(LRM_CHAR); 406 } 407 408 dest.append(doWriteReverse(text, bidiRun.start, 409 bidiRun.limit, options & ~Bidi.DO_MIRRORING)); 410 411 if (dirProps[bidiRun.start] != Bidi.L) { 412 dest.append(LRM_CHAR); 413 } 414 } else { 415 if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) { 416 dest.append(RLM_CHAR); 417 } 418 419 dest.append(doWriteForward(text, bidiRun.start, 420 bidiRun.limit, options)); 421 422 if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) { 423 dest.append(RLM_CHAR); 424 } 425 } 426 } 427 } 428 } 429 430 return dest.toString(); 431 } 432 } 433