1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 1999-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: ubidi.c
11 * encoding: US-ASCII
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 1999jul27
16 * created by: Markus W. Scherer, updated by Matitiahu Allouche
17 *
18 */
19
20 #include "cmemory.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ubidi.h"
25 #include "unicode/utf16.h"
26 #include "ubidi_props.h"
27 #include "ubidiimp.h"
28 #include "uassert.h"
29
30 /*
31 * General implementation notes:
32 *
33 * Throughout the implementation, there are comments like (W2) that refer to
34 * rules of the BiDi algorithm, in this example to the second rule of the
35 * resolution of weak types.
36 *
37 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38 * character according to UTF-16, the second UChar gets the directional property of
39 * the entire character assigned, while the first one gets a BN, a boundary
40 * neutral, type, which is ignored by most of the algorithm according to
41 * rule (X9) and the implementation suggestions of the BiDi algorithm.
42 *
43 * Later, adjustWSLevels() will set the level for each BN to that of the
44 * following character (UChar), which results in surrogate pairs getting the
45 * same level on each of their surrogates.
46 *
47 * In a UTF-8 implementation, the same thing could be done: the last byte of
48 * a multi-byte sequence would get the "real" property, while all previous
49 * bytes of that sequence would get BN.
50 *
51 * It is not possible to assign all those parts of a character the same real
52 * property because this would fail in the resolution of weak types with rules
53 * that look at immediately surrounding types.
54 *
55 * As a related topic, this implementation does not remove Boundary Neutral
56 * types from the input, but ignores them wherever this is relevant.
57 * For example, the loop for the resolution of the weak types reads
58 * types until it finds a non-BN.
59 * Also, explicit embedding codes are neither changed into BN nor removed.
60 * They are only treated the same way real BNs are.
61 * As stated before, adjustWSLevels() takes care of them at the end.
62 * For the purpose of conformance, the levels of all these codes
63 * do not matter.
64 *
65 * Note that this implementation modifies the dirProps
66 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67 * X6, N0 (replace paired brackets by L or R).
68 *
69 * In this implementation, the resolution of weak types (W1 to W6),
70 * neutrals (N1 and N2), and the assignment of the resolved level (In)
71 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
74 *
75 *
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
80 *
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
85 *
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
90 *
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
94 *
95 * If there are no explicit embedding codes, then the (Xn) steps
96 * are not performed.
97 *
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
100 * are not performed.
101 *
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
106 *
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
109 */
110
111 /* to avoid some conditional statements, use tiny constant arrays */
112 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
115
116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
118 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
119
120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
121
122 #define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
123
124 /* UBiDi object management -------------------------------------------------- */
125
126 U_CAPI UBiDi * U_EXPORT2
ubidi_open(void)127 ubidi_open(void)
128 {
129 UErrorCode errorCode=U_ZERO_ERROR;
130 return ubidi_openSized(0, 0, &errorCode);
131 }
132
133 U_CAPI UBiDi * U_EXPORT2
ubidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)134 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
135 UBiDi *pBiDi;
136
137 /* check the argument values */
138 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
139 return NULL;
140 } else if(maxLength<0 || maxRunCount<0) {
141 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
142 return NULL; /* invalid arguments */
143 }
144
145 /* allocate memory for the object */
146 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
147 if(pBiDi==NULL) {
148 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
149 return NULL;
150 }
151
152 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153 uprv_memset(pBiDi, 0, sizeof(UBiDi));
154
155 /* get BiDi properties */
156 pBiDi->bdp=ubidi_getSingleton();
157
158 /* allocate memory for arrays as requested */
159 if(maxLength>0) {
160 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
161 !getInitialLevelsMemory(pBiDi, maxLength)
162 ) {
163 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
164 }
165 } else {
166 pBiDi->mayAllocateText=TRUE;
167 }
168
169 if(maxRunCount>0) {
170 if(maxRunCount==1) {
171 /* use simpleRuns[] */
172 pBiDi->runsSize=sizeof(Run);
173 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
174 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
175 }
176 } else {
177 pBiDi->mayAllocateRuns=TRUE;
178 }
179
180 if(U_SUCCESS(*pErrorCode)) {
181 return pBiDi;
182 } else {
183 ubidi_close(pBiDi);
184 return NULL;
185 }
186 }
187
188 /*
189 * We are allowed to allocate memory if memory==NULL or
190 * mayAllocate==TRUE for each array that we need.
191 * We also try to grow memory as needed if we
192 * allocate it.
193 *
194 * Assume sizeNeeded>0.
195 * If *pMemory!=NULL, then assume *pSize>0.
196 *
197 * ### this realloc() may unnecessarily copy the old data,
198 * which we know we don't need any more;
199 * is this the best way to do this??
200 */
201 U_CFUNC UBool
ubidi_getMemory(BidiMemoryForAllocation * bidiMem,int32_t * pSize,UBool mayAllocate,int32_t sizeNeeded)202 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
203 void **pMemory = (void **)bidiMem;
204 /* check for existing memory */
205 if(*pMemory==NULL) {
206 /* we need to allocate memory */
207 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
208 *pSize=sizeNeeded;
209 return TRUE;
210 } else {
211 return FALSE;
212 }
213 } else {
214 if(sizeNeeded<=*pSize) {
215 /* there is already enough memory */
216 return TRUE;
217 }
218 else if(!mayAllocate) {
219 /* not enough memory, and we must not allocate */
220 return FALSE;
221 } else {
222 /* we try to grow */
223 void *memory;
224 /* in most cases, we do not need the copy-old-data part of
225 * realloc, but it is needed when adding runs using getRunsMemory()
226 * in setParaRunsOnly()
227 */
228 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
229 *pMemory=memory;
230 *pSize=sizeNeeded;
231 return TRUE;
232 } else {
233 /* we failed to grow */
234 return FALSE;
235 }
236 }
237 }
238 }
239
240 U_CAPI void U_EXPORT2
ubidi_close(UBiDi * pBiDi)241 ubidi_close(UBiDi *pBiDi) {
242 if(pBiDi!=NULL) {
243 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
244 if(pBiDi->dirPropsMemory!=NULL) {
245 uprv_free(pBiDi->dirPropsMemory);
246 }
247 if(pBiDi->levelsMemory!=NULL) {
248 uprv_free(pBiDi->levelsMemory);
249 }
250 if(pBiDi->openingsMemory!=NULL) {
251 uprv_free(pBiDi->openingsMemory);
252 }
253 if(pBiDi->parasMemory!=NULL) {
254 uprv_free(pBiDi->parasMemory);
255 }
256 if(pBiDi->runsMemory!=NULL) {
257 uprv_free(pBiDi->runsMemory);
258 }
259 if(pBiDi->isolatesMemory!=NULL) {
260 uprv_free(pBiDi->isolatesMemory);
261 }
262 if(pBiDi->insertPoints.points!=NULL) {
263 uprv_free(pBiDi->insertPoints.points);
264 }
265
266 uprv_free(pBiDi);
267 }
268 }
269
270 /* set to approximate "inverse BiDi" ---------------------------------------- */
271
272 U_CAPI void U_EXPORT2
ubidi_setInverse(UBiDi * pBiDi,UBool isInverse)273 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
274 if(pBiDi!=NULL) {
275 pBiDi->isInverse=isInverse;
276 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
277 : UBIDI_REORDER_DEFAULT;
278 }
279 }
280
281 U_CAPI UBool U_EXPORT2
ubidi_isInverse(UBiDi * pBiDi)282 ubidi_isInverse(UBiDi *pBiDi) {
283 if(pBiDi!=NULL) {
284 return pBiDi->isInverse;
285 } else {
286 return FALSE;
287 }
288 }
289
290 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
291 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
292 * concept of RUNS_ONLY which is a double operation.
293 * It could be advantageous to divide this into 3 concepts:
294 * a) Operation: direct / inverse / RUNS_ONLY
295 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
296 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
297 * This would allow combinations not possible today like RUNS_ONLY with
298 * NUMBERS_SPECIAL.
299 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
300 * REMOVE_CONTROLS for the inverse step.
301 * Not all combinations would be supported, and probably not all do make sense.
302 * This would need to document which ones are supported and what are the
303 * fallbacks for unsupported combinations.
304 */
305 U_CAPI void U_EXPORT2
ubidi_setReorderingMode(UBiDi * pBiDi,UBiDiReorderingMode reorderingMode)306 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
307 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
308 && (reorderingMode < UBIDI_REORDER_COUNT)) {
309 pBiDi->reorderingMode = reorderingMode;
310 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
311 }
312 }
313
314 U_CAPI UBiDiReorderingMode U_EXPORT2
ubidi_getReorderingMode(UBiDi * pBiDi)315 ubidi_getReorderingMode(UBiDi *pBiDi) {
316 if (pBiDi!=NULL) {
317 return pBiDi->reorderingMode;
318 } else {
319 return UBIDI_REORDER_DEFAULT;
320 }
321 }
322
323 U_CAPI void U_EXPORT2
ubidi_setReorderingOptions(UBiDi * pBiDi,uint32_t reorderingOptions)324 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
325 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
326 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
327 }
328 if (pBiDi!=NULL) {
329 pBiDi->reorderingOptions=reorderingOptions;
330 }
331 }
332
333 U_CAPI uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi * pBiDi)334 ubidi_getReorderingOptions(UBiDi *pBiDi) {
335 if (pBiDi!=NULL) {
336 return pBiDi->reorderingOptions;
337 } else {
338 return 0;
339 }
340 }
341
342 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getBaseDirection(const UChar * text,int32_t length)343 ubidi_getBaseDirection(const UChar *text,
344 int32_t length){
345
346 int32_t i;
347 UChar32 uchar;
348 UCharDirection dir;
349
350 if( text==NULL || length<-1 ){
351 return UBIDI_NEUTRAL;
352 }
353
354 if(length==-1) {
355 length=u_strlen(text);
356 }
357
358 for( i = 0 ; i < length; ) {
359 /* i is incremented by U16_NEXT */
360 U16_NEXT(text, i, length, uchar);
361 dir = u_charDirection(uchar);
362 if( dir == U_LEFT_TO_RIGHT )
363 return UBIDI_LTR;
364 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
365 return UBIDI_RTL;
366 }
367 return UBIDI_NEUTRAL;
368 }
369
370 /* perform (P2)..(P3) ------------------------------------------------------- */
371
372 /**
373 * Returns the directionality of the first strong character
374 * after the last B in prologue, if any.
375 * Requires prologue!=null.
376 */
377 static DirProp
firstL_R_AL(UBiDi * pBiDi)378 firstL_R_AL(UBiDi *pBiDi) {
379 const UChar *text=pBiDi->prologue;
380 int32_t length=pBiDi->proLength;
381 int32_t i;
382 UChar32 uchar;
383 DirProp dirProp, result=ON;
384 for(i=0; i<length; ) {
385 /* i is incremented by U16_NEXT */
386 U16_NEXT(text, i, length, uchar);
387 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
388 if(result==ON) {
389 if(dirProp==L || dirProp==R || dirProp==AL) {
390 result=dirProp;
391 }
392 } else {
393 if(dirProp==B) {
394 result=ON;
395 }
396 }
397 }
398 return result;
399 }
400
401 /*
402 * Check that there are enough entries in the array pointed to by pBiDi->paras
403 */
404 static UBool
checkParaCount(UBiDi * pBiDi)405 checkParaCount(UBiDi *pBiDi) {
406 int32_t count=pBiDi->paraCount;
407 if(pBiDi->paras==pBiDi->simpleParas) {
408 if(count<=SIMPLE_PARAS_COUNT)
409 return TRUE;
410 if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
411 return FALSE;
412 pBiDi->paras=pBiDi->parasMemory;
413 uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
414 return TRUE;
415 }
416 if(!getInitialParasMemory(pBiDi, count * 2))
417 return FALSE;
418 pBiDi->paras=pBiDi->parasMemory;
419 return TRUE;
420 }
421
422 /*
423 * Get the directional properties for the text, calculate the flags bit-set, and
424 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
425 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
426 * When encountering an FSI, it is initially replaced with an LRI, which is the
427 * default. Only if a strong R or AL is found within its scope will the LRI be
428 * replaced by an RLI.
429 */
430 static UBool
getDirProps(UBiDi * pBiDi)431 getDirProps(UBiDi *pBiDi) {
432 const UChar *text=pBiDi->text;
433 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
434
435 int32_t i=0, originalLength=pBiDi->originalLength;
436 Flags flags=0; /* collect all directionalities in the text */
437 UChar32 uchar;
438 DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */
439 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
440 /* for inverse BiDi, the default para level is set to RTL if there is a
441 strong R or AL character at either end of the text */
442 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
443 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
444 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
445 int32_t lastArabicPos=-1;
446 int32_t controlCount=0;
447 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
448 UBIDI_OPTION_REMOVE_CONTROLS);
449
450 typedef enum {
451 NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */
452 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
453 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
454 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
455 } State;
456 State state;
457 DirProp lastStrong=ON; /* for default level & inverse BiDi */
458 /* The following stacks are used to manage isolate sequences. Those
459 sequences may be nested, but obviously never more deeply than the
460 maximum explicit embedding level.
461 lastStack is the index of the last used entry in the stack. A value of -1
462 means that there is no open isolate sequence.
463 lastStack is reset to -1 on paragraph boundaries. */
464 /* The following stack contains the position of the initiator of
465 each open isolate sequence */
466 int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
467 /* The following stack contains the last known state before
468 encountering the initiator of an isolate sequence */
469 int8_t previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
470 int32_t stackLast=-1;
471
472 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
473 pBiDi->length=0;
474 defaultParaLevel=pBiDi->paraLevel&1;
475 if(isDefaultLevel) {
476 pBiDi->paras[0].level=defaultParaLevel;
477 lastStrong=defaultParaLevel;
478 if(pBiDi->proLength>0 && /* there is a prologue */
479 (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */
480 if(dirProp==L)
481 pBiDi->paras[0].level=0; /* set the default para level */
482 else
483 pBiDi->paras[0].level=1; /* set the default para level */
484 state=NOT_SEEKING_STRONG;
485 } else {
486 state=SEEKING_STRONG_FOR_PARA;
487 }
488 } else {
489 pBiDi->paras[0].level=pBiDi->paraLevel;
490 state=NOT_SEEKING_STRONG;
491 }
492 /* count paragraphs and determine the paragraph level (P2..P3) */
493 /*
494 * see comment in ubidi.h:
495 * the UBIDI_DEFAULT_XXX values are designed so that
496 * their bit 0 alone yields the intended default
497 */
498 for( /* i=0 above */ ; i<originalLength; ) {
499 /* i is incremented by U16_NEXT */
500 U16_NEXT(text, i, originalLength, uchar);
501 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
502 dirProps[i-1]=dirProp;
503 if(uchar>0xffff) { /* set the lead surrogate's property to BN */
504 flags|=DIRPROP_FLAG(BN);
505 dirProps[i-2]=BN;
506 }
507 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
508 controlCount++;
509 if(dirProp==L) {
510 if(state==SEEKING_STRONG_FOR_PARA) {
511 pBiDi->paras[pBiDi->paraCount-1].level=0;
512 state=NOT_SEEKING_STRONG;
513 }
514 else if(state==SEEKING_STRONG_FOR_FSI) {
515 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
516 /* no need for next statement, already set by default */
517 /* dirProps[isolateStartStack[stackLast]]=LRI; */
518 flags|=DIRPROP_FLAG(LRI);
519 }
520 state=LOOKING_FOR_PDI;
521 }
522 lastStrong=L;
523 continue;
524 }
525 if(dirProp==R || dirProp==AL) {
526 if(state==SEEKING_STRONG_FOR_PARA) {
527 pBiDi->paras[pBiDi->paraCount-1].level=1;
528 state=NOT_SEEKING_STRONG;
529 }
530 else if(state==SEEKING_STRONG_FOR_FSI) {
531 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
532 dirProps[isolateStartStack[stackLast]]=RLI;
533 flags|=DIRPROP_FLAG(RLI);
534 }
535 state=LOOKING_FOR_PDI;
536 }
537 lastStrong=R;
538 if(dirProp==AL)
539 lastArabicPos=i-1;
540 continue;
541 }
542 if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */
543 stackLast++;
544 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
545 isolateStartStack[stackLast]=i-1;
546 previousStateStack[stackLast]=state;
547 }
548 if(dirProp==FSI) {
549 dirProps[i-1]=LRI; /* default if no strong char */
550 state=SEEKING_STRONG_FOR_FSI;
551 }
552 else
553 state=LOOKING_FOR_PDI;
554 continue;
555 }
556 if(dirProp==PDI) {
557 if(state==SEEKING_STRONG_FOR_FSI) {
558 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
559 /* no need for next statement, already set by default */
560 /* dirProps[isolateStartStack[stackLast]]=LRI; */
561 flags|=DIRPROP_FLAG(LRI);
562 }
563 }
564 if(stackLast>=0) {
565 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
566 state=previousStateStack[stackLast];
567 stackLast--;
568 }
569 continue;
570 }
571 if(dirProp==B) {
572 if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
573 continue;
574 pBiDi->paras[pBiDi->paraCount-1].limit=i;
575 if(isDefaultLevelInverse && lastStrong==R)
576 pBiDi->paras[pBiDi->paraCount-1].level=1;
577 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
578 /* When streaming, we only process whole paragraphs
579 thus some updates are only done on paragraph boundaries */
580 pBiDi->length=i; /* i is index to next character */
581 pBiDi->controlCount=controlCount;
582 }
583 if(i<originalLength) { /* B not last char in text */
584 pBiDi->paraCount++;
585 if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */
586 return FALSE;
587 if(isDefaultLevel) {
588 pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
589 state=SEEKING_STRONG_FOR_PARA;
590 lastStrong=defaultParaLevel;
591 } else {
592 pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
593 state=NOT_SEEKING_STRONG;
594 }
595 stackLast=-1;
596 }
597 continue;
598 }
599 }
600 /* Ignore still open isolate sequences with overflow */
601 if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
602 stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
603 state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
604 }
605 /* Resolve direction of still unresolved open FSI sequences */
606 while(stackLast>=0) {
607 if(state==SEEKING_STRONG_FOR_FSI) {
608 /* no need for next statement, already set by default */
609 /* dirProps[isolateStartStack[stackLast]]=LRI; */
610 flags|=DIRPROP_FLAG(LRI);
611 break;
612 }
613 state=previousStateStack[stackLast];
614 stackLast--;
615 }
616 /* When streaming, ignore text after the last paragraph separator */
617 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
618 if(pBiDi->length<originalLength)
619 pBiDi->paraCount--;
620 } else {
621 pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
622 pBiDi->controlCount=controlCount;
623 }
624 /* For inverse bidi, default para direction is RTL if there is
625 a strong R or AL at either end of the paragraph */
626 if(isDefaultLevelInverse && lastStrong==R) {
627 pBiDi->paras[pBiDi->paraCount-1].level=1;
628 }
629 if(isDefaultLevel) {
630 pBiDi->paraLevel=pBiDi->paras[0].level;
631 }
632 /* The following is needed to resolve the text direction for default level
633 paragraphs containing no strong character */
634 for(i=0; i<pBiDi->paraCount; i++)
635 flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
636
637 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
638 flags|=DIRPROP_FLAG(L);
639 }
640 pBiDi->flags=flags;
641 pBiDi->lastArabicPos=lastArabicPos;
642 return TRUE;
643 }
644
645 /* determine the paragraph level at position index */
646 U_CFUNC UBiDiLevel
ubidi_getParaLevelAtIndex(const UBiDi * pBiDi,int32_t pindex)647 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
648 int32_t i;
649 for(i=0; i<pBiDi->paraCount; i++)
650 if(pindex<pBiDi->paras[i].limit)
651 break;
652 if(i>=pBiDi->paraCount)
653 i=pBiDi->paraCount-1;
654 return (UBiDiLevel)(pBiDi->paras[i].level);
655 }
656
657 /* Functions for handling paired brackets ----------------------------------- */
658
659 /* In the isoRuns array, the first entry is used for text outside of any
660 isolate sequence. Higher entries are used for each more deeply nested
661 isolate sequence. isoRunLast is the index of the last used entry. The
662 openings array is used to note the data of opening brackets not yet
663 matched by a closing bracket, or matched but still susceptible to change
664 level.
665 Each isoRun entry contains the index of the first and
666 one-after-last openings entries for pending opening brackets it
667 contains. The next openings entry to use is the one-after-last of the
668 most deeply nested isoRun entry.
669 isoRun entries also contain their current embedding level and the last
670 encountered strong character, since these will be needed to resolve
671 the level of paired brackets. */
672
673 static void
bracketInit(UBiDi * pBiDi,BracketData * bd)674 bracketInit(UBiDi *pBiDi, BracketData *bd) {
675 bd->pBiDi=pBiDi;
676 bd->isoRunLast=0;
677 bd->isoRuns[0].start=0;
678 bd->isoRuns[0].limit=0;
679 bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
680 bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1;
681 bd->isoRuns[0].contextPos=0;
682 if(pBiDi->openingsMemory) {
683 bd->openings=pBiDi->openingsMemory;
684 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
685 } else {
686 bd->openings=bd->simpleOpenings;
687 bd->openingsCount=SIMPLE_OPENINGS_COUNT;
688 }
689 bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
690 bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
691 }
692
693 /* paragraph boundary */
694 static void
bracketProcessB(BracketData * bd,UBiDiLevel level)695 bracketProcessB(BracketData *bd, UBiDiLevel level) {
696 bd->isoRunLast=0;
697 bd->isoRuns[0].limit=0;
698 bd->isoRuns[0].level=level;
699 bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=level&1;
700 bd->isoRuns[0].contextPos=0;
701 }
702
703 /* LRE, LRO, RLE, RLO, PDF */
704 static void
bracketProcessBoundary(BracketData * bd,int32_t lastCcPos,UBiDiLevel contextLevel,UBiDiLevel embeddingLevel)705 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
706 UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
707 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
708 DirProp *dirProps=bd->pBiDi->dirProps;
709 if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */
710 return;
711 if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */
712 contextLevel=embeddingLevel;
713 pLastIsoRun->limit=pLastIsoRun->start;
714 pLastIsoRun->level=embeddingLevel;
715 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=contextLevel&1;
716 pLastIsoRun->contextPos=lastCcPos;
717 }
718
719 /* LRI or RLI */
720 static void
bracketProcessLRI_RLI(BracketData * bd,UBiDiLevel level)721 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
722 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
723 int16_t lastLimit;
724 pLastIsoRun->lastBase=ON;
725 lastLimit=pLastIsoRun->limit;
726 bd->isoRunLast++;
727 pLastIsoRun++;
728 pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
729 pLastIsoRun->level=level;
730 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=level&1;
731 pLastIsoRun->contextPos=0;
732 }
733
734 /* PDI */
735 static void
bracketProcessPDI(BracketData * bd)736 bracketProcessPDI(BracketData *bd) {
737 IsoRun *pLastIsoRun;
738 bd->isoRunLast--;
739 pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
740 pLastIsoRun->lastBase=ON;
741 }
742
743 /* newly found opening bracket: create an openings entry */
744 static UBool /* return TRUE if success */
bracketAddOpening(BracketData * bd,UChar match,int32_t position)745 bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
746 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
747 Opening *pOpening;
748 if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */
749 UBiDi *pBiDi=bd->pBiDi;
750 if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
751 return FALSE;
752 if(bd->openings==bd->simpleOpenings)
753 uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
754 SIMPLE_OPENINGS_COUNT * sizeof(Opening));
755 bd->openings=pBiDi->openingsMemory; /* may have changed */
756 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
757 }
758 pOpening=&bd->openings[pLastIsoRun->limit];
759 pOpening->position=position;
760 pOpening->match=match;
761 pOpening->contextDir=pLastIsoRun->contextDir;
762 pOpening->contextPos=pLastIsoRun->contextPos;
763 pOpening->flags=0;
764 pLastIsoRun->limit++;
765 return TRUE;
766 }
767
768 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
769 static void
fixN0c(BracketData * bd,int32_t openingIndex,int32_t newPropPosition,DirProp newProp)770 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
771 /* This function calls itself recursively */
772 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
773 Opening *qOpening;
774 DirProp *dirProps=bd->pBiDi->dirProps;
775 int32_t k, openingPosition, closingPosition;
776 for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
777 if(qOpening->match>=0) /* not an N0c match */
778 continue;
779 if(newPropPosition<qOpening->contextPos)
780 break;
781 if(newPropPosition>=qOpening->position)
782 continue;
783 if(newProp==qOpening->contextDir)
784 break;
785 openingPosition=qOpening->position;
786 dirProps[openingPosition]=newProp;
787 closingPosition=-(qOpening->match);
788 dirProps[closingPosition]=newProp;
789 qOpening->match=0; /* prevent further changes */
790 fixN0c(bd, k, openingPosition, newProp);
791 fixN0c(bd, k, closingPosition, newProp);
792 }
793 }
794
795 /* process closing bracket */
796 static DirProp /* return L or R if N0b or N0c, ON if N0d */
bracketProcessClosing(BracketData * bd,int32_t openIdx,int32_t position)797 bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
798 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
799 Opening *pOpening, *qOpening;
800 UBiDiDirection direction;
801 UBool stable;
802 DirProp newProp;
803 pOpening=&bd->openings[openIdx];
804 direction=pLastIsoRun->level&1;
805 stable=TRUE; /* assume stable until proved otherwise */
806
807 /* The stable flag is set when brackets are paired and their
808 level is resolved and cannot be changed by what will be
809 found later in the source string.
810 An unstable match can occur only when applying N0c, where
811 the resolved level depends on the preceding context, and
812 this context may be affected by text occurring later.
813 Example: RTL paragraph containing: abc[(latin) HEBREW]
814 When the closing parenthesis is encountered, it appears
815 that N0c1 must be applied since 'abc' sets an opposite
816 direction context and both parentheses receive level 2.
817 However, when the closing square bracket is processed,
818 N0b applies because of 'HEBREW' being included within the
819 brackets, thus the square brackets are treated like R and
820 receive level 1. However, this changes the preceding
821 context of the opening parenthesis, and it now appears
822 that N0c2 must be applied to the parentheses rather than
823 N0c1. */
824
825 if((direction==0 && pOpening->flags&FOUND_L) ||
826 (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
827 newProp=direction;
828 }
829 else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
830 /* it is stable if there is no containing pair or in
831 conditions too complicated and not worth checking */
832 stable=(openIdx==pLastIsoRun->start);
833 if(direction!=pOpening->contextDir)
834 newProp=pOpening->contextDir; /* N0c1 */
835 else
836 newProp=direction; /* N0c2 */
837 } else {
838 /* forget this and any brackets nested within this pair */
839 pLastIsoRun->limit=openIdx;
840 return ON; /* N0d */
841 }
842 bd->pBiDi->dirProps[pOpening->position]=newProp;
843 bd->pBiDi->dirProps[position]=newProp;
844 /* Update nested N0c pairs that may be affected */
845 fixN0c(bd, openIdx, pOpening->position, newProp);
846 if(stable) {
847 pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */
848 /* remove lower located synonyms if any */
849 while(pLastIsoRun->limit>pLastIsoRun->start &&
850 bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
851 pLastIsoRun->limit--;
852 } else {
853 int32_t k;
854 pOpening->match=-position;
855 /* neutralize lower located synonyms if any */
856 k=openIdx-1;
857 while(k>=pLastIsoRun->start &&
858 bd->openings[k].position==pOpening->position)
859 bd->openings[k--].match=0;
860 /* neutralize any unmatched opening between the current pair;
861 this will also neutralize higher located synonyms if any */
862 for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
863 qOpening=&bd->openings[k];
864 if(qOpening->position>=position)
865 break;
866 if(qOpening->match>0)
867 qOpening->match=0;
868 }
869 }
870 return newProp;
871 }
872
873 /* handle strong characters, digits and candidates for closing brackets */
874 static UBool /* return TRUE if success */
bracketProcessChar(BracketData * bd,int32_t position)875 bracketProcessChar(BracketData *bd, int32_t position) {
876 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
877 DirProp *dirProps, dirProp, newProp;
878 UBiDiLevel level;
879 dirProps=bd->pBiDi->dirProps;
880 dirProp=dirProps[position];
881 if(dirProp==ON) {
882 UChar c, match;
883 int32_t idx;
884 /* First see if it is a matching closing bracket. Hopefully, this is
885 more efficient than checking if it is a closing bracket at all */
886 c=bd->pBiDi->text[position];
887 for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
888 if(bd->openings[idx].match!=c)
889 continue;
890 /* We have a match */
891 newProp=bracketProcessClosing(bd, idx, position);
892 if(newProp==ON) { /* N0d */
893 c=0; /* prevent handling as an opening */
894 break;
895 }
896 pLastIsoRun->lastBase=ON;
897 pLastIsoRun->contextDir=newProp;
898 pLastIsoRun->contextPos=position;
899 level=bd->pBiDi->levels[position];
900 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
901 uint16_t flag;
902 int32_t i;
903 newProp=level&1;
904 pLastIsoRun->lastStrong=newProp;
905 flag=DIRPROP_FLAG(newProp);
906 for(i=pLastIsoRun->start; i<idx; i++)
907 bd->openings[i].flags|=flag;
908 /* matching brackets are not overridden by LRO/RLO */
909 bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
910 }
911 /* matching brackets are not overridden by LRO/RLO */
912 bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
913 return TRUE;
914 }
915 /* We get here only if the ON character is not a matching closing
916 bracket or it is a case of N0d */
917 /* Now see if it is an opening bracket */
918 if(c)
919 match=u_getBidiPairedBracket(c); /* get the matching char */
920 else
921 match=0;
922 if(match!=c && /* has a matching char */
923 ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
924 /* special case: process synonyms
925 create an opening entry for each synonym */
926 if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
927 if(!bracketAddOpening(bd, 0x3009, position))
928 return FALSE;
929 }
930 else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
931 if(!bracketAddOpening(bd, 0x232A, position))
932 return FALSE;
933 }
934 if(!bracketAddOpening(bd, match, position))
935 return FALSE;
936 }
937 }
938 level=bd->pBiDi->levels[position];
939 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
940 newProp=level&1;
941 if(dirProp!=S && dirProp!=WS && dirProp!=ON)
942 dirProps[position]=newProp;
943 pLastIsoRun->lastBase=newProp;
944 pLastIsoRun->lastStrong=newProp;
945 pLastIsoRun->contextDir=newProp;
946 pLastIsoRun->contextPos=position;
947 }
948 else if(dirProp<=R || dirProp==AL) {
949 newProp=DIR_FROM_STRONG(dirProp);
950 pLastIsoRun->lastBase=dirProp;
951 pLastIsoRun->lastStrong=dirProp;
952 pLastIsoRun->contextDir=newProp;
953 pLastIsoRun->contextPos=position;
954 }
955 else if(dirProp==EN) {
956 pLastIsoRun->lastBase=EN;
957 if(pLastIsoRun->lastStrong==L) {
958 newProp=L; /* W7 */
959 if(!bd->isNumbersSpecial)
960 dirProps[position]=ENL;
961 pLastIsoRun->contextDir=L;
962 pLastIsoRun->contextPos=position;
963 }
964 else {
965 newProp=R; /* N0 */
966 if(pLastIsoRun->lastStrong==AL)
967 dirProps[position]=AN; /* W2 */
968 else
969 dirProps[position]=ENR;
970 pLastIsoRun->contextDir=R;
971 pLastIsoRun->contextPos=position;
972 }
973 }
974 else if(dirProp==AN) {
975 newProp=R; /* N0 */
976 pLastIsoRun->lastBase=AN;
977 pLastIsoRun->contextDir=R;
978 pLastIsoRun->contextPos=position;
979 }
980 else if(dirProp==NSM) {
981 /* if the last real char was ON, change NSM to ON so that it
982 will stay ON even if the last real char is a bracket which
983 may be changed to L or R */
984 newProp=pLastIsoRun->lastBase;
985 if(newProp==ON)
986 dirProps[position]=newProp;
987 }
988 else {
989 newProp=dirProp;
990 pLastIsoRun->lastBase=dirProp;
991 }
992 if(newProp<=R || newProp==AL) {
993 int32_t i;
994 uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
995 for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
996 if(position>bd->openings[i].position)
997 bd->openings[i].flags|=flag;
998 }
999 return TRUE;
1000 }
1001
1002 /* perform (X1)..(X9) ------------------------------------------------------- */
1003
1004 /* determine if the text is mixed-directional or single-directional */
1005 static UBiDiDirection
directionFromFlags(UBiDi * pBiDi)1006 directionFromFlags(UBiDi *pBiDi) {
1007 Flags flags=pBiDi->flags;
1008 /* if the text contains AN and neutrals, then some neutrals may become RTL */
1009 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1010 return UBIDI_LTR;
1011 } else if(!(flags&MASK_LTR)) {
1012 return UBIDI_RTL;
1013 } else {
1014 return UBIDI_MIXED;
1015 }
1016 }
1017
1018 /*
1019 * Resolve the explicit levels as specified by explicit embedding codes.
1020 * Recalculate the flags to have them reflect the real properties
1021 * after taking the explicit embeddings into account.
1022 *
1023 * The BiDi algorithm is designed to result in the same behavior whether embedding
1024 * levels are externally specified (from "styled text", supposedly the preferred
1025 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1026 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1027 * However, in a real implementation, the removal of these codes and their index
1028 * positions in the plain text is undesirable since it would result in
1029 * reallocated, reindexed text.
1030 * Instead, this implementation leaves the codes in there and just ignores them
1031 * in the subsequent processing.
1032 * In order to get the same reordering behavior, positions with a BN or a not-isolate
1033 * explicit embedding code just get the same level assigned as the last "real"
1034 * character.
1035 *
1036 * Some implementations, not this one, then overwrite some of these
1037 * directionality properties at "real" same-level-run boundaries by
1038 * L or R codes so that the resolution of weak types can be performed on the
1039 * entire paragraph at once instead of having to parse it once more and
1040 * perform that resolution on same-level-runs.
1041 * This limits the scope of the implicit rules in effectively
1042 * the same way as the run limits.
1043 *
1044 * Instead, this implementation does not modify these codes, except for
1045 * paired brackets whose properties (ON) may be replaced by L or R.
1046 * On one hand, the paragraph has to be scanned for same-level-runs, but
1047 * on the other hand, this saves another loop to reset these codes,
1048 * or saves making and modifying a copy of dirProps[].
1049 *
1050 *
1051 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1052 *
1053 *
1054 * Handling the stack of explicit levels (Xn):
1055 *
1056 * With the BiDi stack of explicit levels, as pushed with each
1057 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1058 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1059 *
1060 * In order to have a correct push-pop semantics even in the case of overflows,
1061 * overflow counters and a valid isolate counter are used as described in UAX#9
1062 * section 3.3.2 "Explicit Levels and Directions".
1063 *
1064 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1065 *
1066 * Returns normally the direction; -1 if there was a memory shortage
1067 *
1068 */
1069 static UBiDiDirection
resolveExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)1070 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1071 DirProp *dirProps=pBiDi->dirProps;
1072 UBiDiLevel *levels=pBiDi->levels;
1073 const UChar *text=pBiDi->text;
1074
1075 int32_t i=0, length=pBiDi->length;
1076 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
1077 DirProp dirProp;
1078 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1079 UBiDiDirection direction;
1080 pBiDi->isolateCount=0;
1081
1082 if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1083
1084 /* determine if the text is mixed-directional or single-directional */
1085 direction=directionFromFlags(pBiDi);
1086
1087 /* we may not need to resolve any explicit levels */
1088 if((direction!=UBIDI_MIXED)) {
1089 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1090 return direction;
1091 }
1092 if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1093 /* inverse BiDi: mixed, but all characters are at the same embedding level */
1094 /* set all levels to the paragraph level */
1095 int32_t paraIndex, start, limit;
1096 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1097 if(paraIndex==0)
1098 start=0;
1099 else
1100 start=pBiDi->paras[paraIndex-1].limit;
1101 limit=pBiDi->paras[paraIndex].limit;
1102 level=pBiDi->paras[paraIndex].level;
1103 for(i=start; i<limit; i++)
1104 levels[i]=level;
1105 }
1106 return direction; /* no bracket matching for inverse BiDi */
1107 }
1108 if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1109 /* no embeddings, set all levels to the paragraph level */
1110 /* we still have to perform bracket matching */
1111 int32_t paraIndex, start, limit;
1112 BracketData bracketData;
1113 bracketInit(pBiDi, &bracketData);
1114 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1115 if(paraIndex==0)
1116 start=0;
1117 else
1118 start=pBiDi->paras[paraIndex-1].limit;
1119 limit=pBiDi->paras[paraIndex].limit;
1120 level=pBiDi->paras[paraIndex].level;
1121 for(i=start; i<limit; i++) {
1122 levels[i]=level;
1123 dirProp=dirProps[i];
1124 if(dirProp==BN)
1125 continue;
1126 if(dirProp==B) {
1127 if((i+1)<length) {
1128 if(text[i]==CR && text[i+1]==LF)
1129 continue; /* skip CR when followed by LF */
1130 bracketProcessB(&bracketData, level);
1131 }
1132 continue;
1133 }
1134 if(!bracketProcessChar(&bracketData, i)) {
1135 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1136 return UBIDI_LTR;
1137 }
1138 }
1139 }
1140 return direction;
1141 }
1142 {
1143 /* continue to perform (Xn) */
1144
1145 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1146 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1147 UBiDiLevel embeddingLevel=level, newLevel;
1148 UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
1149 int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
1150
1151 /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1152 stackLast points to its current entry. */
1153 uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1154 but we need one more entry as base */
1155 uint32_t stackLast=0;
1156 int32_t overflowIsolateCount=0;
1157 int32_t overflowEmbeddingCount=0;
1158 int32_t validIsolateCount=0;
1159 BracketData bracketData;
1160 bracketInit(pBiDi, &bracketData);
1161 stack[0]=level; /* initialize base entry to para level, no override, no isolate */
1162
1163 /* recalculate the flags */
1164 flags=0;
1165
1166 for(i=0; i<length; ++i) {
1167 dirProp=dirProps[i];
1168 switch(dirProp) {
1169 case LRE:
1170 case RLE:
1171 case LRO:
1172 case RLO:
1173 /* (X2, X3, X4, X5) */
1174 flags|=DIRPROP_FLAG(BN);
1175 levels[i]=previousLevel;
1176 if (dirProp==LRE || dirProp==LRO)
1177 /* least greater even level */
1178 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1179 else
1180 /* least greater odd level */
1181 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1182 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1183 overflowEmbeddingCount==0) {
1184 lastCcPos=i;
1185 embeddingLevel=newLevel;
1186 if(dirProp==LRO || dirProp==RLO)
1187 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1188 stackLast++;
1189 stack[stackLast]=embeddingLevel;
1190 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1191 since this has already been done for newLevel which is
1192 the source for embeddingLevel.
1193 */
1194 } else {
1195 if(overflowIsolateCount==0)
1196 overflowEmbeddingCount++;
1197 }
1198 break;
1199 case PDF:
1200 /* (X7) */
1201 flags|=DIRPROP_FLAG(BN);
1202 levels[i]=previousLevel;
1203 /* handle all the overflow cases first */
1204 if(overflowIsolateCount) {
1205 break;
1206 }
1207 if(overflowEmbeddingCount) {
1208 overflowEmbeddingCount--;
1209 break;
1210 }
1211 if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */
1212 lastCcPos=i;
1213 stackLast--;
1214 embeddingLevel=(UBiDiLevel)stack[stackLast];
1215 }
1216 break;
1217 case LRI:
1218 case RLI:
1219 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1220 levels[i]=NO_OVERRIDE(embeddingLevel);
1221 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1222 bracketProcessBoundary(&bracketData, lastCcPos,
1223 previousLevel, embeddingLevel);
1224 flags|=DIRPROP_FLAG_MULTI_RUNS;
1225 }
1226 previousLevel=embeddingLevel;
1227 /* (X5a, X5b) */
1228 if(dirProp==LRI)
1229 /* least greater even level */
1230 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1231 else
1232 /* least greater odd level */
1233 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1234 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1235 overflowEmbeddingCount==0) {
1236 flags|=DIRPROP_FLAG(dirProp);
1237 lastCcPos=i;
1238 validIsolateCount++;
1239 if(validIsolateCount>pBiDi->isolateCount)
1240 pBiDi->isolateCount=validIsolateCount;
1241 embeddingLevel=newLevel;
1242 /* we can increment stackLast without checking because newLevel
1243 will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1244 stackLast++;
1245 stack[stackLast]=embeddingLevel+ISOLATE;
1246 bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1247 } else {
1248 /* make it WS so that it is handled by adjustWSLevels() */
1249 dirProps[i]=WS;
1250 overflowIsolateCount++;
1251 }
1252 break;
1253 case PDI:
1254 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1255 bracketProcessBoundary(&bracketData, lastCcPos,
1256 previousLevel, embeddingLevel);
1257 flags|=DIRPROP_FLAG_MULTI_RUNS;
1258 }
1259 /* (X6a) */
1260 if(overflowIsolateCount) {
1261 overflowIsolateCount--;
1262 /* make it WS so that it is handled by adjustWSLevels() */
1263 dirProps[i]=WS;
1264 }
1265 else if(validIsolateCount) {
1266 flags|=DIRPROP_FLAG(PDI);
1267 lastCcPos=i;
1268 overflowEmbeddingCount=0;
1269 while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1270 stackLast--; /* until the last isolate entry */
1271 stackLast--; /* pop also the last isolate entry */
1272 validIsolateCount--;
1273 bracketProcessPDI(&bracketData);
1274 } else
1275 /* make it WS so that it is handled by adjustWSLevels() */
1276 dirProps[i]=WS;
1277 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1278 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1279 previousLevel=embeddingLevel;
1280 levels[i]=NO_OVERRIDE(embeddingLevel);
1281 break;
1282 case B:
1283 flags|=DIRPROP_FLAG(B);
1284 levels[i]=GET_PARALEVEL(pBiDi, i);
1285 if((i+1)<length) {
1286 if(text[i]==CR && text[i+1]==LF)
1287 break; /* skip CR when followed by LF */
1288 overflowEmbeddingCount=overflowIsolateCount=0;
1289 validIsolateCount=0;
1290 stackLast=0;
1291 previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1292 stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1293 bracketProcessB(&bracketData, embeddingLevel);
1294 }
1295 break;
1296 case BN:
1297 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1298 /* they will get their levels set correctly in adjustWSLevels() */
1299 levels[i]=previousLevel;
1300 flags|=DIRPROP_FLAG(BN);
1301 break;
1302 default:
1303 /* all other types are normal characters and get the "real" level */
1304 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1305 bracketProcessBoundary(&bracketData, lastCcPos,
1306 previousLevel, embeddingLevel);
1307 flags|=DIRPROP_FLAG_MULTI_RUNS;
1308 if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1309 flags|=DIRPROP_FLAG_O(embeddingLevel);
1310 else
1311 flags|=DIRPROP_FLAG_E(embeddingLevel);
1312 }
1313 previousLevel=embeddingLevel;
1314 levels[i]=embeddingLevel;
1315 if(!bracketProcessChar(&bracketData, i))
1316 return -1;
1317 /* the dirProp may have been changed in bracketProcessChar() */
1318 flags|=DIRPROP_FLAG(dirProps[i]);
1319 break;
1320 }
1321 }
1322 if(flags&MASK_EMBEDDING)
1323 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1324 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
1325 flags|=DIRPROP_FLAG(L);
1326 /* again, determine if the text is mixed-directional or single-directional */
1327 pBiDi->flags=flags;
1328 direction=directionFromFlags(pBiDi);
1329 }
1330 return direction;
1331 }
1332
1333 /*
1334 * Use a pre-specified embedding levels array:
1335 *
1336 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1337 * ignore all explicit codes (X9),
1338 * and check all the preset levels.
1339 *
1340 * Recalculate the flags to have them reflect the real properties
1341 * after taking the explicit embeddings into account.
1342 */
1343 static UBiDiDirection
checkExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)1344 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1345 DirProp *dirProps=pBiDi->dirProps;
1346 DirProp dirProp;
1347 UBiDiLevel *levels=pBiDi->levels;
1348 int32_t isolateCount=0;
1349
1350 int32_t i, length=pBiDi->length;
1351 Flags flags=0; /* collect all directionalities in the text */
1352 UBiDiLevel level;
1353 pBiDi->isolateCount=0;
1354
1355 for(i=0; i<length; ++i) {
1356 level=levels[i];
1357 dirProp=dirProps[i];
1358 if(dirProp==LRI || dirProp==RLI) {
1359 isolateCount++;
1360 if(isolateCount>pBiDi->isolateCount)
1361 pBiDi->isolateCount=isolateCount;
1362 }
1363 else if(dirProp==PDI)
1364 isolateCount--;
1365 else if(dirProp==B)
1366 isolateCount=0;
1367 if(level&UBIDI_LEVEL_OVERRIDE) {
1368 /* keep the override flag in levels[i] but adjust the flags */
1369 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
1370 flags|=DIRPROP_FLAG_O(level);
1371 } else {
1372 /* set the flags */
1373 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1374 }
1375 if((level<GET_PARALEVEL(pBiDi, i) &&
1376 !((0==level)&&(dirProp==B))) ||
1377 (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
1378 /* level out of bounds */
1379 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1380 return UBIDI_LTR;
1381 }
1382 }
1383 if(flags&MASK_EMBEDDING)
1384 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1385 /* determine if the text is mixed-directional or single-directional */
1386 pBiDi->flags=flags;
1387 return directionFromFlags(pBiDi);
1388 }
1389
1390 /******************************************************************
1391 The Properties state machine table
1392 *******************************************************************
1393
1394 All table cells are 8 bits:
1395 bits 0..4: next state
1396 bits 5..7: action to perform (if > 0)
1397
1398 Cells may be of format "n" where n represents the next state
1399 (except for the rightmost column).
1400 Cells may also be of format "s(x,y)" where x represents an action
1401 to perform and y represents the next state.
1402
1403 *******************************************************************
1404 Definitions and type for properties state table
1405 *******************************************************************
1406 */
1407 #define IMPTABPROPS_COLUMNS 16
1408 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1409 #define GET_STATEPROPS(cell) ((cell)&0x1f)
1410 #define GET_ACTIONPROPS(cell) ((cell)>>5)
1411 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
1412
1413 static const uint8_t groupProp[] = /* dirProp regrouped */
1414 {
1415 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
1416 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
1417 };
1418 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1419
1420 /******************************************************************
1421
1422 PROPERTIES STATE TABLE
1423
1424 In table impTabProps,
1425 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1426 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1427 - the Res column is the reduced property assigned to a run
1428
1429 Action 1: process current run1, init new run1
1430 2: init new run2
1431 3: process run1, process run2, init new run1
1432 4: process run1, set run1=run2, init new run2
1433
1434 Notes:
1435 1) This table is used in resolveImplicitLevels().
1436 2) This table triggers actions when there is a change in the Bidi
1437 property of incoming characters (action 1).
1438 3) Most such property sequences are processed immediately (in
1439 fact, passed to processPropertySeq().
1440 4) However, numbers are assembled as one sequence. This means
1441 that undefined situations (like CS following digits, until
1442 it is known if the next char will be a digit) are held until
1443 following chars define them.
1444 Example: digits followed by CS, then comes another CS or ON;
1445 the digits will be processed, then the CS assigned
1446 as the start of an ON sequence (action 3).
1447 5) There are cases where more than one sequence must be
1448 processed, for instance digits followed by CS followed by L:
1449 the digits must be processed as one sequence, and the CS
1450 must be processed as an ON sequence, all this before starting
1451 assembling chars for the opening L sequence.
1452
1453
1454 */
1455 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1456 {
1457 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
1458 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
1459 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
1460 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
1461 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
1462 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
1463 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1464 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
1465 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1466 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1467 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
1468 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
1469 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
1470 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1471 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
1472 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1473 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
1474 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
1475 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
1476 /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
1477 /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
1478 /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
1479 /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
1480 /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
1481 /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
1482 };
1483
1484 /* we must undef macro s because the levels tables have a different
1485 * structure (4 bits for action and 4 bits for next state.
1486 */
1487 #undef s
1488
1489 /******************************************************************
1490 The levels state machine tables
1491 *******************************************************************
1492
1493 All table cells are 8 bits:
1494 bits 0..3: next state
1495 bits 4..7: action to perform (if > 0)
1496
1497 Cells may be of format "n" where n represents the next state
1498 (except for the rightmost column).
1499 Cells may also be of format "s(x,y)" where x represents an action
1500 to perform and y represents the next state.
1501
1502 This format limits each table to 16 states each and to 15 actions.
1503
1504 *******************************************************************
1505 Definitions and type for levels state tables
1506 *******************************************************************
1507 */
1508 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1509 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1510 #define GET_STATE(cell) ((cell)&0x0f)
1511 #define GET_ACTION(cell) ((cell)>>4)
1512 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1513
1514 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1515 typedef uint8_t ImpAct[];
1516
1517 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1518 * instead of having a pair of ImpTab and a pair of ImpAct.
1519 */
1520 typedef struct ImpTabPair {
1521 const void * pImpTab[2];
1522 const void * pImpAct[2];
1523 } ImpTabPair;
1524
1525 /******************************************************************
1526
1527 LEVELS STATE TABLES
1528
1529 In all levels state tables,
1530 - state 0 is the initial state
1531 - the Res column is the increment to add to the text level
1532 for this property sequence.
1533
1534 The impAct arrays for each table of a pair map the local action
1535 numbers of the table to the total list of actions. For instance,
1536 action 2 in a given table corresponds to the action number which
1537 appears in entry [2] of the impAct array for that table.
1538 The first entry of all impAct arrays must be 0.
1539
1540 Action 1: init conditional sequence
1541 2: prepend conditional sequence to current sequence
1542 3: set ON sequence to new level - 1
1543 4: init EN/AN/ON sequence
1544 5: fix EN/AN/ON sequence followed by R
1545 6: set previous level sequence to level 2
1546
1547 Notes:
1548 1) These tables are used in processPropertySeq(). The input
1549 is property sequences as determined by resolveImplicitLevels.
1550 2) Most such property sequences are processed immediately
1551 (levels are assigned).
1552 3) However, some sequences cannot be assigned a final level till
1553 one or more following sequences are received. For instance,
1554 ON following an R sequence within an even-level paragraph.
1555 If the following sequence is R, the ON sequence will be
1556 assigned basic run level+1, and so will the R sequence.
1557 4) S is generally handled like ON, since its level will be fixed
1558 to paragraph level in adjustWSLevels().
1559
1560 */
1561
1562 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
1563 /* In this table, conditional sequences receive the lower possible level
1564 until proven otherwise.
1565 */
1566 {
1567 /* L , R , EN , AN , ON , S , B , Res */
1568 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
1569 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1570 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1571 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
1572 /* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
1573 /* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
1574 };
1575 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
1576 /* In this table, conditional sequences receive the lower possible level
1577 until proven otherwise.
1578 */
1579 {
1580 /* L , R , EN , AN , ON , S , B , Res */
1581 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1582 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
1583 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1584 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
1585 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
1586 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1587 };
1588 static const ImpAct impAct0 = {0,1,2,3,4};
1589 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1590 &impTabR_DEFAULT},
1591 {&impAct0, &impAct0}};
1592
1593 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
1594 /* In this table, conditional sequences receive the lower possible level
1595 until proven otherwise.
1596 */
1597 {
1598 /* L , R , EN , AN , ON , S , B , Res */
1599 /* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1600 /* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
1601 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
1602 /* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
1603 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1604 };
1605 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1606 &impTabR_DEFAULT},
1607 {&impAct0, &impAct0}};
1608
1609 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1610 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1611 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1612 */
1613 {
1614 /* L , R , EN , AN , ON , S , B , Res */
1615 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1616 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1617 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1618 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1619 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1620 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
1621 };
1622 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1623 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1624 until proven that there is L on both sides. AN is handled like EN.
1625 */
1626 {
1627 /* L , R , EN , AN , ON , S , B , Res */
1628 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1629 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1630 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1631 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1632 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1633 };
1634 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1635 {&impTabL_GROUP_NUMBERS_WITH_R,
1636 &impTabR_GROUP_NUMBERS_WITH_R},
1637 {&impAct0, &impAct0}};
1638
1639
1640 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1641 /* This table is identical to the Default LTR table except that EN and AN are
1642 handled like L.
1643 */
1644 {
1645 /* L , R , EN , AN , ON , S , B , Res */
1646 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1647 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1648 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1649 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1650 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1651 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
1652 };
1653 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1654 /* This table is identical to the Default RTL table except that EN and AN are
1655 handled like L.
1656 */
1657 {
1658 /* L , R , EN , AN , ON , S , B , Res */
1659 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1660 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
1661 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1662 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1663 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
1664 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1665 };
1666 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1667 {&impTabL_INVERSE_NUMBERS_AS_L,
1668 &impTabR_INVERSE_NUMBERS_AS_L},
1669 {&impAct0, &impAct0}};
1670
1671 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
1672 /* In this table, conditional sequences receive the lower possible level
1673 until proven otherwise.
1674 */
1675 {
1676 /* L , R , EN , AN , ON , S , B , Res */
1677 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1678 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
1679 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1680 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1681 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1682 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1683 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
1684 };
1685 static const ImpAct impAct1 = {0,1,13,14};
1686 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1687 */
1688 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1689 {&impTabL_DEFAULT,
1690 &impTabR_INVERSE_LIKE_DIRECT},
1691 {&impAct0, &impAct1}};
1692
1693 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1694 /* The case handled in this table is (visually): R EN L
1695 */
1696 {
1697 /* L , R , EN , AN , ON , S , B , Res */
1698 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1699 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1700 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1701 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1702 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1703 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1704 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
1705 };
1706 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1707 /* The cases handled in this table are (visually): R EN L
1708 R L AN L
1709 */
1710 {
1711 /* L , R , EN , AN , ON , S , B , Res */
1712 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1713 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1714 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1715 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1716 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1717 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1718 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
1719 };
1720 static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1721 static const ImpAct impAct3 = {0,1,9,10,11,12};
1722 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1723 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1724 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1725 {&impAct2, &impAct3}};
1726
1727 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1728 {&impTabL_NUMBERS_SPECIAL,
1729 &impTabR_INVERSE_LIKE_DIRECT},
1730 {&impAct0, &impAct1}};
1731
1732 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1733 /* The case handled in this table is (visually): R EN L
1734 */
1735 {
1736 /* L , R , EN , AN , ON , S , B , Res */
1737 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1738 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1739 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1740 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1741 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
1742 };
1743 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1744 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1745 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1746 {&impAct2, &impAct3}};
1747
1748 #undef s
1749
1750 typedef struct {
1751 const ImpTab * pImpTab; /* level table pointer */
1752 const ImpAct * pImpAct; /* action map array */
1753 int32_t startON; /* start of ON sequence */
1754 int32_t startL2EN; /* start of level 2 sequence */
1755 int32_t lastStrongRTL; /* index of last found R or AL */
1756 int32_t state; /* current state */
1757 int32_t runStart; /* start position of the run */
1758 UBiDiLevel runLevel; /* run level before implicit solving */
1759 } LevState;
1760
1761 /*------------------------------------------------------------------------*/
1762
1763 static void
addPoint(UBiDi * pBiDi,int32_t pos,int32_t flag)1764 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1765 /* param pos: position where to insert
1766 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1767 */
1768 {
1769 #define FIRSTALLOC 10
1770 Point point;
1771 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1772
1773 if (pInsertPoints->capacity == 0)
1774 {
1775 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1776 if (pInsertPoints->points == NULL)
1777 {
1778 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1779 return;
1780 }
1781 pInsertPoints->capacity=FIRSTALLOC;
1782 }
1783 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1784 {
1785 void * savePoints=pInsertPoints->points;
1786 pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1787 pInsertPoints->capacity*2*sizeof(Point));
1788 if (pInsertPoints->points == NULL)
1789 {
1790 pInsertPoints->points=savePoints;
1791 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1792 return;
1793 }
1794 else pInsertPoints->capacity*=2;
1795 }
1796 point.pos=pos;
1797 point.flag=flag;
1798 pInsertPoints->points[pInsertPoints->size]=point;
1799 pInsertPoints->size++;
1800 #undef FIRSTALLOC
1801 }
1802
1803 static void
setLevelsOutsideIsolates(UBiDi * pBiDi,int32_t start,int32_t limit,UBiDiLevel level)1804 setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
1805 {
1806 DirProp *dirProps=pBiDi->dirProps, dirProp;
1807 UBiDiLevel *levels=pBiDi->levels;
1808 int32_t isolateCount=0, k;
1809 for(k=start; k<limit; k++) {
1810 dirProp=dirProps[k];
1811 if(dirProp==PDI)
1812 isolateCount--;
1813 if(isolateCount==0)
1814 levels[k]=level;
1815 if(dirProp==LRI || dirProp==RLI)
1816 isolateCount++;
1817 }
1818 }
1819
1820 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1821
1822 /*
1823 * This implementation of the (Wn) rules applies all rules in one pass.
1824 * In order to do so, it needs a look-ahead of typically 1 character
1825 * (except for W5: sequences of ET) and keeps track of changes
1826 * in a rule Wp that affect a later Wq (p<q).
1827 *
1828 * The (Nn) and (In) rules are also performed in that same single loop,
1829 * but effectively one iteration behind for white space.
1830 *
1831 * Since all implicit rules are performed in one step, it is not necessary
1832 * to actually store the intermediate directional properties in dirProps[].
1833 */
1834
1835 static void
processPropertySeq(UBiDi * pBiDi,LevState * pLevState,uint8_t _prop,int32_t start,int32_t limit)1836 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1837 int32_t start, int32_t limit) {
1838 uint8_t cell, oldStateSeq, actionSeq;
1839 const ImpTab * pImpTab=pLevState->pImpTab;
1840 const ImpAct * pImpAct=pLevState->pImpAct;
1841 UBiDiLevel * levels=pBiDi->levels;
1842 UBiDiLevel level, addLevel;
1843 InsertPoints * pInsertPoints;
1844 int32_t start0, k;
1845
1846 start0=start; /* save original start position */
1847 oldStateSeq=(uint8_t)pLevState->state;
1848 cell=(*pImpTab)[oldStateSeq][_prop];
1849 pLevState->state=GET_STATE(cell); /* isolate the new state */
1850 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1851 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1852
1853 if(actionSeq) {
1854 switch(actionSeq) {
1855 case 1: /* init ON seq */
1856 pLevState->startON=start0;
1857 break;
1858
1859 case 2: /* prepend ON seq to current seq */
1860 start=pLevState->startON;
1861 break;
1862
1863 case 3: /* EN/AN after R+ON */
1864 level=pLevState->runLevel+1;
1865 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1866 break;
1867
1868 case 4: /* EN/AN before R for NUMBERS_SPECIAL */
1869 level=pLevState->runLevel+2;
1870 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1871 break;
1872
1873 case 5: /* L or S after possible relevant EN/AN */
1874 /* check if we had EN after R/AL */
1875 if (pLevState->startL2EN >= 0) {
1876 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1877 }
1878 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
1879 /* check if we had any relevant EN/AN after R/AL */
1880 pInsertPoints=&(pBiDi->insertPoints);
1881 if ((pInsertPoints->capacity == 0) ||
1882 (pInsertPoints->size <= pInsertPoints->confirmed))
1883 {
1884 /* nothing, just clean up */
1885 pLevState->lastStrongRTL=-1;
1886 /* check if we have a pending conditional segment */
1887 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1888 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
1889 start=pLevState->startON; /* reset to basic run level */
1890 }
1891 if (_prop == DirProp_S) /* add LRM before S */
1892 {
1893 addPoint(pBiDi, start0, LRM_BEFORE);
1894 pInsertPoints->confirmed=pInsertPoints->size;
1895 }
1896 break;
1897 }
1898 /* reset previous RTL cont to level for LTR text */
1899 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1900 {
1901 /* reset odd level, leave runLevel+2 as is */
1902 levels[k]=(levels[k] - 2) & ~1;
1903 }
1904 /* mark insert points as confirmed */
1905 pInsertPoints->confirmed=pInsertPoints->size;
1906 pLevState->lastStrongRTL=-1;
1907 if (_prop == DirProp_S) /* add LRM before S */
1908 {
1909 addPoint(pBiDi, start0, LRM_BEFORE);
1910 pInsertPoints->confirmed=pInsertPoints->size;
1911 }
1912 break;
1913
1914 case 6: /* R/AL after possible relevant EN/AN */
1915 /* just clean up */
1916 pInsertPoints=&(pBiDi->insertPoints);
1917 if (pInsertPoints->capacity > 0)
1918 /* remove all non confirmed insert points */
1919 pInsertPoints->size=pInsertPoints->confirmed;
1920 pLevState->startON=-1;
1921 pLevState->startL2EN=-1;
1922 pLevState->lastStrongRTL=limit - 1;
1923 break;
1924
1925 case 7: /* EN/AN after R/AL + possible cont */
1926 /* check for real AN */
1927 if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
1928 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1929 {
1930 /* real AN */
1931 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1932 {
1933 /* just note the righmost digit as a strong RTL */
1934 pLevState->lastStrongRTL=limit - 1;
1935 break;
1936 }
1937 if (pLevState->startL2EN >= 0) /* after EN, no AN */
1938 {
1939 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1940 pLevState->startL2EN=-2;
1941 }
1942 /* note AN */
1943 addPoint(pBiDi, start0, LRM_BEFORE);
1944 break;
1945 }
1946 /* if first EN/AN after R/AL */
1947 if (pLevState->startL2EN == -1) {
1948 pLevState->startL2EN=start0;
1949 }
1950 break;
1951
1952 case 8: /* note location of latest R/AL */
1953 pLevState->lastStrongRTL=limit - 1;
1954 pLevState->startON=-1;
1955 break;
1956
1957 case 9: /* L after R+ON/EN/AN */
1958 /* include possible adjacent number on the left */
1959 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1960 if(k>=0) {
1961 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
1962 pInsertPoints=&(pBiDi->insertPoints);
1963 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
1964 }
1965 pLevState->startON=start0;
1966 break;
1967
1968 case 10: /* AN after L */
1969 /* AN numbers between L text on both sides may be trouble. */
1970 /* tentatively bracket with LRMs; will be confirmed if followed by L */
1971 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
1972 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
1973 break;
1974
1975 case 11: /* R after L+ON/EN/AN */
1976 /* false alert, infirm LRMs around previous AN */
1977 pInsertPoints=&(pBiDi->insertPoints);
1978 pInsertPoints->size=pInsertPoints->confirmed;
1979 if (_prop == DirProp_S) /* add RLM before S */
1980 {
1981 addPoint(pBiDi, start0, RLM_BEFORE);
1982 pInsertPoints->confirmed=pInsertPoints->size;
1983 }
1984 break;
1985
1986 case 12: /* L after L+ON/AN */
1987 level=pLevState->runLevel + addLevel;
1988 for(k=pLevState->startON; k<start0; k++) {
1989 if (levels[k]<level)
1990 levels[k]=level;
1991 }
1992 pInsertPoints=&(pBiDi->insertPoints);
1993 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
1994 pLevState->startON=start0;
1995 break;
1996
1997 case 13: /* L after L+ON+EN/AN/ON */
1998 level=pLevState->runLevel;
1999 for(k=start0-1; k>=pLevState->startON; k--) {
2000 if(levels[k]==level+3) {
2001 while(levels[k]==level+3) {
2002 levels[k--]-=2;
2003 }
2004 while(levels[k]==level) {
2005 k--;
2006 }
2007 }
2008 if(levels[k]==level+2) {
2009 levels[k]=level;
2010 continue;
2011 }
2012 levels[k]=level+1;
2013 }
2014 break;
2015
2016 case 14: /* R after L+ON+EN/AN/ON */
2017 level=pLevState->runLevel+1;
2018 for(k=start0-1; k>=pLevState->startON; k--) {
2019 if(levels[k]>level) {
2020 levels[k]-=2;
2021 }
2022 }
2023 break;
2024
2025 default: /* we should never get here */
2026 U_ASSERT(FALSE);
2027 break;
2028 }
2029 }
2030 if((addLevel) || (start < start0)) {
2031 level=pLevState->runLevel + addLevel;
2032 if(start>=pLevState->runStart) {
2033 for(k=start; k<limit; k++) {
2034 levels[k]=level;
2035 }
2036 } else {
2037 setLevelsOutsideIsolates(pBiDi, start, limit, level);
2038 }
2039 }
2040 }
2041
2042 /**
2043 * Returns the directionality of the last strong character at the end of the prologue, if any.
2044 * Requires prologue!=null.
2045 */
2046 static DirProp
lastL_R_AL(UBiDi * pBiDi)2047 lastL_R_AL(UBiDi *pBiDi) {
2048 const UChar *text=pBiDi->prologue;
2049 int32_t length=pBiDi->proLength;
2050 int32_t i;
2051 UChar32 uchar;
2052 DirProp dirProp;
2053 for(i=length; i>0; ) {
2054 /* i is decremented by U16_PREV */
2055 U16_PREV(text, 0, i, uchar);
2056 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2057 if(dirProp==L) {
2058 return DirProp_L;
2059 }
2060 if(dirProp==R || dirProp==AL) {
2061 return DirProp_R;
2062 }
2063 if(dirProp==B) {
2064 return DirProp_ON;
2065 }
2066 }
2067 return DirProp_ON;
2068 }
2069
2070 /**
2071 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2072 * Requires epilogue!=null.
2073 */
2074 static DirProp
firstL_R_AL_EN_AN(UBiDi * pBiDi)2075 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
2076 const UChar *text=pBiDi->epilogue;
2077 int32_t length=pBiDi->epiLength;
2078 int32_t i;
2079 UChar32 uchar;
2080 DirProp dirProp;
2081 for(i=0; i<length; ) {
2082 /* i is incremented by U16_NEXT */
2083 U16_NEXT(text, i, length, uchar);
2084 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2085 if(dirProp==L) {
2086 return DirProp_L;
2087 }
2088 if(dirProp==R || dirProp==AL) {
2089 return DirProp_R;
2090 }
2091 if(dirProp==EN) {
2092 return DirProp_EN;
2093 }
2094 if(dirProp==AN) {
2095 return DirProp_AN;
2096 }
2097 }
2098 return DirProp_ON;
2099 }
2100
2101 static void
resolveImplicitLevels(UBiDi * pBiDi,int32_t start,int32_t limit,DirProp sor,DirProp eor)2102 resolveImplicitLevels(UBiDi *pBiDi,
2103 int32_t start, int32_t limit,
2104 DirProp sor, DirProp eor) {
2105 const DirProp *dirProps=pBiDi->dirProps;
2106 DirProp dirProp;
2107 LevState levState;
2108 int32_t i, start1, start2;
2109 uint16_t oldStateImp, stateImp, actionImp;
2110 uint8_t gprop, resProp, cell;
2111 UBool inverseRTL;
2112 DirProp nextStrongProp=R;
2113 int32_t nextStrongPos=-1;
2114
2115 /* check for RTL inverse BiDi mode */
2116 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2117 * loop on the text characters from end to start.
2118 * This would need a different properties state table (at least different
2119 * actions) and different levels state tables (maybe very similar to the
2120 * LTR corresponding ones.
2121 */
2122 inverseRTL=(UBool)
2123 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2124 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
2125 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2126
2127 /* initialize for property and levels state tables */
2128 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2129 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2130 levState.runStart=start;
2131 levState.runLevel=pBiDi->levels[start];
2132 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2133 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2134 if(start==0 && pBiDi->proLength>0) {
2135 DirProp lastStrong=lastL_R_AL(pBiDi);
2136 if(lastStrong!=DirProp_ON) {
2137 sor=lastStrong;
2138 }
2139 }
2140 /* The isolates[] entries contain enough information to
2141 resume the bidi algorithm in the same state as it was
2142 when it was interrupted by an isolate sequence. */
2143 if(dirProps[start]==PDI && pBiDi->isolateCount >= 0) {
2144 levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2145 start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2146 stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2147 levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2148 pBiDi->isolateCount--;
2149 } else {
2150 levState.startON=-1;
2151 start1=start;
2152 if(dirProps[start]==NSM)
2153 stateImp = 1 + sor;
2154 else
2155 stateImp=0;
2156 levState.state=0;
2157 processPropertySeq(pBiDi, &levState, sor, start, start);
2158 }
2159 start2=start; /* to make Java compiler happy */
2160
2161 for(i=start; i<=limit; i++) {
2162 if(i>=limit) {
2163 int32_t k;
2164 for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
2165 dirProp=dirProps[k];
2166 if(dirProp==LRI || dirProp==RLI)
2167 break; /* no forced closing for sequence ending with LRI/RLI */
2168 gprop=eor;
2169 } else {
2170 DirProp prop, prop1;
2171 prop=dirProps[i];
2172 if(prop==B) {
2173 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2174 }
2175 if(inverseRTL) {
2176 if(prop==AL) {
2177 /* AL before EN does not make it AN */
2178 prop=R;
2179 } else if(prop==EN) {
2180 if(nextStrongPos<=i) {
2181 /* look for next strong char (L/R/AL) */
2182 int32_t j;
2183 nextStrongProp=R; /* set default */
2184 nextStrongPos=limit;
2185 for(j=i+1; j<limit; j++) {
2186 prop1=dirProps[j];
2187 if(prop1==L || prop1==R || prop1==AL) {
2188 nextStrongProp=prop1;
2189 nextStrongPos=j;
2190 break;
2191 }
2192 }
2193 }
2194 if(nextStrongProp==AL) {
2195 prop=AN;
2196 }
2197 }
2198 }
2199 gprop=groupProp[prop];
2200 }
2201 oldStateImp=stateImp;
2202 cell=impTabProps[oldStateImp][gprop];
2203 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
2204 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
2205 if((i==limit) && (actionImp==0)) {
2206 /* there is an unprocessed sequence if its property == eor */
2207 actionImp=1; /* process the last sequence */
2208 }
2209 if(actionImp) {
2210 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2211 switch(actionImp) {
2212 case 1: /* process current seq1, init new seq1 */
2213 processPropertySeq(pBiDi, &levState, resProp, start1, i);
2214 start1=i;
2215 break;
2216 case 2: /* init new seq2 */
2217 start2=i;
2218 break;
2219 case 3: /* process seq1, process seq2, init new seq1 */
2220 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2221 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2222 start1=i;
2223 break;
2224 case 4: /* process seq1, set seq1=seq2, init new seq2 */
2225 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2226 start1=start2;
2227 start2=i;
2228 break;
2229 default: /* we should never get here */
2230 U_ASSERT(FALSE);
2231 break;
2232 }
2233 }
2234 }
2235
2236 /* flush possible pending sequence, e.g. ON */
2237 if(limit==pBiDi->length && pBiDi->epiLength>0) {
2238 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2239 if(firstStrong!=DirProp_ON) {
2240 eor=firstStrong;
2241 }
2242 }
2243
2244 /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2245 for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
2246 dirProp=dirProps[i];
2247 if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2248 pBiDi->isolateCount++;
2249 pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2250 pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2251 pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2252 pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2253 }
2254 else
2255 processPropertySeq(pBiDi, &levState, eor, limit, limit);
2256 }
2257
2258 /* perform (L1) and (X9) ---------------------------------------------------- */
2259
2260 /*
2261 * Reset the embedding levels for some non-graphic characters (L1).
2262 * This function also sets appropriate levels for BN, and
2263 * explicit embedding types that are supposed to have been removed
2264 * from the paragraph in (X9).
2265 */
2266 static void
adjustWSLevels(UBiDi * pBiDi)2267 adjustWSLevels(UBiDi *pBiDi) {
2268 const DirProp *dirProps=pBiDi->dirProps;
2269 UBiDiLevel *levels=pBiDi->levels;
2270 int32_t i;
2271
2272 if(pBiDi->flags&MASK_WS) {
2273 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2274 Flags flag;
2275
2276 i=pBiDi->trailingWSStart;
2277 while(i>0) {
2278 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2279 while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
2280 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2281 levels[i]=0;
2282 } else {
2283 levels[i]=GET_PARALEVEL(pBiDi, i);
2284 }
2285 }
2286
2287 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2288 /* here, i+1 is guaranteed to be <length */
2289 while(i>0) {
2290 flag=DIRPROP_FLAG(dirProps[--i]);
2291 if(flag&MASK_BN_EXPLICIT) {
2292 levels[i]=levels[i+1];
2293 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2294 levels[i]=0;
2295 break;
2296 } else if(flag&MASK_B_S) {
2297 levels[i]=GET_PARALEVEL(pBiDi, i);
2298 break;
2299 }
2300 }
2301 }
2302 }
2303 }
2304
2305 U_CAPI void U_EXPORT2
ubidi_setContext(UBiDi * pBiDi,const UChar * prologue,int32_t proLength,const UChar * epilogue,int32_t epiLength,UErrorCode * pErrorCode)2306 ubidi_setContext(UBiDi *pBiDi,
2307 const UChar *prologue, int32_t proLength,
2308 const UChar *epilogue, int32_t epiLength,
2309 UErrorCode *pErrorCode) {
2310 /* check the argument values */
2311 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2312 if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2313 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2314 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2315 return;
2316 }
2317
2318 if(proLength==-1) {
2319 pBiDi->proLength=u_strlen(prologue);
2320 } else {
2321 pBiDi->proLength=proLength;
2322 }
2323 if(epiLength==-1) {
2324 pBiDi->epiLength=u_strlen(epilogue);
2325 } else {
2326 pBiDi->epiLength=epiLength;
2327 }
2328 pBiDi->prologue=prologue;
2329 pBiDi->epilogue=epilogue;
2330 }
2331
2332 static void
setParaSuccess(UBiDi * pBiDi)2333 setParaSuccess(UBiDi *pBiDi) {
2334 pBiDi->proLength=0; /* forget the last context */
2335 pBiDi->epiLength=0;
2336 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
2337 }
2338
2339 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
2340 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
2341
2342 static void
setParaRunsOnly(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UErrorCode * pErrorCode)2343 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2344 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2345 void *runsOnlyMemory = NULL;
2346 int32_t *visualMap;
2347 UChar *visualText;
2348 int32_t saveLength, saveTrailingWSStart;
2349 const UBiDiLevel *levels;
2350 UBiDiLevel *saveLevels;
2351 UBiDiDirection saveDirection;
2352 UBool saveMayAllocateText;
2353 Run *runs;
2354 int32_t visualLength, i, j, visualStart, logicalStart,
2355 runCount, runLength, addedRuns, insertRemove,
2356 start, limit, step, indexOddBit, logicalPos,
2357 index0, index1;
2358 uint32_t saveOptions;
2359
2360 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2361 if(length==0) {
2362 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2363 goto cleanup3;
2364 }
2365 /* obtain memory for mapping table and visual text */
2366 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
2367 if(runsOnlyMemory==NULL) {
2368 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2369 goto cleanup3;
2370 }
2371 visualMap=runsOnlyMemory;
2372 visualText=(UChar *)&visualMap[length];
2373 saveLevels=(UBiDiLevel *)&visualText[length];
2374 saveOptions=pBiDi->reorderingOptions;
2375 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2376 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2377 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2378 }
2379 paraLevel&=1; /* accept only 0 or 1 */
2380 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2381 if(U_FAILURE(*pErrorCode)) {
2382 goto cleanup3;
2383 }
2384 /* we cannot access directly pBiDi->levels since it is not yet set if
2385 * direction is not MIXED
2386 */
2387 levels=ubidi_getLevels(pBiDi, pErrorCode);
2388 uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
2389 saveTrailingWSStart=pBiDi->trailingWSStart;
2390 saveLength=pBiDi->length;
2391 saveDirection=pBiDi->direction;
2392
2393 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2394 * the visual map and the dirProps array to drive the second call
2395 * to ubidi_setPara (but must make provision for possible removal of
2396 * BiDi controls. Alternatively, only use the dirProps array via
2397 * customized classifier callback.
2398 */
2399 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2400 UBIDI_DO_MIRRORING, pErrorCode);
2401 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2402 if(U_FAILURE(*pErrorCode)) {
2403 goto cleanup2;
2404 }
2405 pBiDi->reorderingOptions=saveOptions;
2406
2407 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2408 paraLevel^=1;
2409 /* Because what we did with reorderingOptions, visualText may be shorter
2410 * than the original text. But we don't want the levels memory to be
2411 * reallocated shorter than the original length, since we need to restore
2412 * the levels as after the first call to ubidi_setpara() before returning.
2413 * We will force mayAllocateText to FALSE before the second call to
2414 * ubidi_setpara(), and will restore it afterwards.
2415 */
2416 saveMayAllocateText=pBiDi->mayAllocateText;
2417 pBiDi->mayAllocateText=FALSE;
2418 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2419 pBiDi->mayAllocateText=saveMayAllocateText;
2420 ubidi_getRuns(pBiDi, pErrorCode);
2421 if(U_FAILURE(*pErrorCode)) {
2422 goto cleanup1;
2423 }
2424 /* check if some runs must be split, count how many splits */
2425 addedRuns=0;
2426 runCount=pBiDi->runCount;
2427 runs=pBiDi->runs;
2428 visualStart=0;
2429 for(i=0; i<runCount; i++, visualStart+=runLength) {
2430 runLength=runs[i].visualLimit-visualStart;
2431 if(runLength<2) {
2432 continue;
2433 }
2434 logicalStart=GET_INDEX(runs[i].logicalStart);
2435 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2436 index0=visualMap[j];
2437 index1=visualMap[j-1];
2438 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2439 addedRuns++;
2440 }
2441 }
2442 }
2443 if(addedRuns) {
2444 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2445 if(runCount==1) {
2446 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2447 pBiDi->runsMemory[0]=runs[0];
2448 }
2449 runs=pBiDi->runs=pBiDi->runsMemory;
2450 pBiDi->runCount+=addedRuns;
2451 } else {
2452 goto cleanup1;
2453 }
2454 }
2455 /* split runs which are not consecutive in source text */
2456 for(i=runCount-1; i>=0; i--) {
2457 runLength= i==0 ? runs[0].visualLimit :
2458 runs[i].visualLimit-runs[i-1].visualLimit;
2459 logicalStart=runs[i].logicalStart;
2460 indexOddBit=GET_ODD_BIT(logicalStart);
2461 logicalStart=GET_INDEX(logicalStart);
2462 if(runLength<2) {
2463 if(addedRuns) {
2464 runs[i+addedRuns]=runs[i];
2465 }
2466 logicalPos=visualMap[logicalStart];
2467 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2468 saveLevels[logicalPos]^indexOddBit);
2469 continue;
2470 }
2471 if(indexOddBit) {
2472 start=logicalStart;
2473 limit=logicalStart+runLength-1;
2474 step=1;
2475 } else {
2476 start=logicalStart+runLength-1;
2477 limit=logicalStart;
2478 step=-1;
2479 }
2480 for(j=start; j!=limit; j+=step) {
2481 index0=visualMap[j];
2482 index1=visualMap[j+step];
2483 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2484 logicalPos=BIDI_MIN(visualMap[start], index0);
2485 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2486 saveLevels[logicalPos]^indexOddBit);
2487 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2488 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2489 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2490 runs[i+addedRuns].insertRemove=insertRemove;
2491 runs[i].insertRemove&=~insertRemove;
2492 start=j+step;
2493 addedRuns--;
2494 }
2495 }
2496 if(addedRuns) {
2497 runs[i+addedRuns]=runs[i];
2498 }
2499 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2500 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2501 saveLevels[logicalPos]^indexOddBit);
2502 }
2503
2504 cleanup1:
2505 /* restore initial paraLevel */
2506 pBiDi->paraLevel^=1;
2507 cleanup2:
2508 /* restore real text */
2509 pBiDi->text=text;
2510 pBiDi->length=saveLength;
2511 pBiDi->originalLength=length;
2512 pBiDi->direction=saveDirection;
2513 /* the saved levels should never excess levelsSize, but we check anyway */
2514 if(saveLength>pBiDi->levelsSize) {
2515 saveLength=pBiDi->levelsSize;
2516 }
2517 uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
2518 pBiDi->trailingWSStart=saveTrailingWSStart;
2519 if(pBiDi->runCount>1) {
2520 pBiDi->direction=UBIDI_MIXED;
2521 }
2522 cleanup3:
2523 /* free memory for mapping table and visual text */
2524 uprv_free(runsOnlyMemory);
2525
2526 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2527 }
2528
2529 /* ubidi_setPara ------------------------------------------------------------ */
2530
2531 U_CAPI void U_EXPORT2
ubidi_setPara(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * pErrorCode)2532 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2533 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2534 UErrorCode *pErrorCode) {
2535 UBiDiDirection direction;
2536 DirProp *dirProps;
2537
2538 /* check the argument values */
2539 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2540 if(pBiDi==NULL || text==NULL || length<-1 ||
2541 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2542 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2543 return;
2544 }
2545
2546 if(length==-1) {
2547 length=u_strlen(text);
2548 }
2549
2550 /* special treatment for RUNS_ONLY mode */
2551 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2552 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2553 return;
2554 }
2555
2556 /* initialize the UBiDi structure */
2557 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
2558 pBiDi->text=text;
2559 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2560 pBiDi->paraLevel=paraLevel;
2561 pBiDi->direction=paraLevel&1;
2562 pBiDi->paraCount=1;
2563
2564 pBiDi->dirProps=NULL;
2565 pBiDi->levels=NULL;
2566 pBiDi->runs=NULL;
2567 pBiDi->insertPoints.size=0; /* clean up from last call */
2568 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
2569
2570 /*
2571 * Save the original paraLevel if contextual; otherwise, set to 0.
2572 */
2573 pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2574
2575 if(length==0) {
2576 /*
2577 * For an empty paragraph, create a UBiDi object with the paraLevel and
2578 * the flags and the direction set but without allocating zero-length arrays.
2579 * There is nothing more to do.
2580 */
2581 if(IS_DEFAULT_LEVEL(paraLevel)) {
2582 pBiDi->paraLevel&=1;
2583 pBiDi->defaultParaLevel=0;
2584 }
2585 pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2586 pBiDi->runCount=0;
2587 pBiDi->paraCount=0;
2588 setParaSuccess(pBiDi); /* mark successful setPara */
2589 return;
2590 }
2591
2592 pBiDi->runCount=-1;
2593
2594 /* allocate paras memory */
2595 if(pBiDi->parasMemory)
2596 pBiDi->paras=pBiDi->parasMemory;
2597 else
2598 pBiDi->paras=pBiDi->simpleParas;
2599
2600 /*
2601 * Get the directional properties,
2602 * the flags bit-set, and
2603 * determine the paragraph level if necessary.
2604 */
2605 if(getDirPropsMemory(pBiDi, length)) {
2606 pBiDi->dirProps=pBiDi->dirPropsMemory;
2607 if(!getDirProps(pBiDi)) {
2608 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2609 return;
2610 }
2611 } else {
2612 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2613 return;
2614 }
2615 dirProps=pBiDi->dirProps;
2616 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2617 length= pBiDi->length;
2618 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
2619
2620 /* are explicit levels specified? */
2621 if(embeddingLevels==NULL) {
2622 /* no: determine explicit levels according to the (Xn) rules */\
2623 if(getLevelsMemory(pBiDi, length)) {
2624 pBiDi->levels=pBiDi->levelsMemory;
2625 direction=resolveExplicitLevels(pBiDi, pErrorCode);
2626 if(U_FAILURE(*pErrorCode)) {
2627 return;
2628 }
2629 } else {
2630 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2631 return;
2632 }
2633 } else {
2634 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2635 pBiDi->levels=embeddingLevels;
2636 direction=checkExplicitLevels(pBiDi, pErrorCode);
2637 if(U_FAILURE(*pErrorCode)) {
2638 return;
2639 }
2640 }
2641
2642 /* allocate isolate memory */
2643 if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
2644 pBiDi->isolates=pBiDi->simpleIsolates;
2645 else
2646 if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2647 pBiDi->isolates=pBiDi->isolatesMemory;
2648 else {
2649 if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2650 pBiDi->isolates=pBiDi->isolatesMemory;
2651 } else {
2652 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2653 return;
2654 }
2655 }
2656 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2657
2658 /*
2659 * The steps after (X9) in the UBiDi algorithm are performed only if
2660 * the paragraph text has mixed directionality!
2661 */
2662 pBiDi->direction=direction;
2663 switch(direction) {
2664 case UBIDI_LTR:
2665 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2666 pBiDi->trailingWSStart=0;
2667 break;
2668 case UBIDI_RTL:
2669 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2670 pBiDi->trailingWSStart=0;
2671 break;
2672 default:
2673 /*
2674 * Choose the right implicit state table
2675 */
2676 switch(pBiDi->reorderingMode) {
2677 case UBIDI_REORDER_DEFAULT:
2678 pBiDi->pImpTabPair=&impTab_DEFAULT;
2679 break;
2680 case UBIDI_REORDER_NUMBERS_SPECIAL:
2681 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2682 break;
2683 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2684 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2685 break;
2686 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2687 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2688 break;
2689 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2690 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2691 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2692 } else {
2693 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2694 }
2695 break;
2696 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2697 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2698 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2699 } else {
2700 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2701 }
2702 break;
2703 default:
2704 /* we should never get here */
2705 U_ASSERT(FALSE);
2706 break;
2707 }
2708 /*
2709 * If there are no external levels specified and there
2710 * are no significant explicit level codes in the text,
2711 * then we can treat the entire paragraph as one run.
2712 * Otherwise, we need to perform the following rules on runs of
2713 * the text with the same embedding levels. (X10)
2714 * "Significant" explicit level codes are ones that actually
2715 * affect non-BN characters.
2716 * Examples for "insignificant" ones are empty embeddings
2717 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2718 */
2719 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
2720 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2721 resolveImplicitLevels(pBiDi, 0, length,
2722 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2723 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2724 } else {
2725 /* sor, eor: start and end types of same-level-run */
2726 UBiDiLevel *levels=pBiDi->levels;
2727 int32_t start, limit=0;
2728 UBiDiLevel level, nextLevel;
2729 DirProp sor, eor;
2730
2731 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2732 level=GET_PARALEVEL(pBiDi, 0);
2733 nextLevel=levels[0];
2734 if(level<nextLevel) {
2735 eor=GET_LR_FROM_LEVEL(nextLevel);
2736 } else {
2737 eor=GET_LR_FROM_LEVEL(level);
2738 }
2739
2740 do {
2741 /* determine start and limit of the run (end points just behind the run) */
2742
2743 /* the values for this run's start are the same as for the previous run's end */
2744 start=limit;
2745 level=nextLevel;
2746 if((start>0) && (dirProps[start-1]==B)) {
2747 /* except if this is a new paragraph, then set sor = para level */
2748 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2749 } else {
2750 sor=eor;
2751 }
2752
2753 /* search for the limit of this run */
2754 while((++limit<length) &&
2755 ((levels[limit]==level) ||
2756 (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
2757
2758 /* get the correct level of the next run */
2759 if(limit<length) {
2760 nextLevel=levels[limit];
2761 } else {
2762 nextLevel=GET_PARALEVEL(pBiDi, length-1);
2763 }
2764
2765 /* determine eor from max(level, nextLevel); sor is last run's eor */
2766 if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
2767 eor=GET_LR_FROM_LEVEL(nextLevel);
2768 } else {
2769 eor=GET_LR_FROM_LEVEL(level);
2770 }
2771
2772 /* if the run consists of overridden directional types, then there
2773 are no implicit types to be resolved */
2774 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2775 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2776 } else {
2777 /* remove the UBIDI_LEVEL_OVERRIDE flags */
2778 do {
2779 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2780 } while(start<limit);
2781 }
2782 } while(limit<length);
2783 }
2784 /* check if we got any memory shortage while adding insert points */
2785 if (U_FAILURE(pBiDi->insertPoints.errorCode))
2786 {
2787 *pErrorCode=pBiDi->insertPoints.errorCode;
2788 return;
2789 }
2790 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2791 adjustWSLevels(pBiDi);
2792 break;
2793 }
2794 /* add RLM for inverse Bidi with contextual orientation resolving
2795 * to RTL which would not round-trip otherwise
2796 */
2797 if((pBiDi->defaultParaLevel>0) &&
2798 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2799 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2800 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2801 int32_t i, j, start, last;
2802 UBiDiLevel level;
2803 DirProp dirProp;
2804 for(i=0; i<pBiDi->paraCount; i++) {
2805 last=(pBiDi->paras[i].limit)-1;
2806 level=pBiDi->paras[i].level;
2807 if(level==0)
2808 continue; /* LTR paragraph */
2809 start= i==0 ? 0 : pBiDi->paras[i-1].limit;
2810 for(j=last; j>=start; j--) {
2811 dirProp=dirProps[j];
2812 if(dirProp==L) {
2813 if(j<last) {
2814 while(dirProps[last]==B) {
2815 last--;
2816 }
2817 }
2818 addPoint(pBiDi, last, RLM_BEFORE);
2819 break;
2820 }
2821 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2822 break;
2823 }
2824 }
2825 }
2826 }
2827
2828 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2829 pBiDi->resultLength -= pBiDi->controlCount;
2830 } else {
2831 pBiDi->resultLength += pBiDi->insertPoints.size;
2832 }
2833 setParaSuccess(pBiDi); /* mark successful setPara */
2834 }
2835
2836 U_CAPI void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi * pBiDi,UBool orderParagraphsLTR)2837 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2838 if(pBiDi!=NULL) {
2839 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2840 }
2841 }
2842
2843 U_CAPI UBool U_EXPORT2
ubidi_isOrderParagraphsLTR(UBiDi * pBiDi)2844 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2845 if(pBiDi!=NULL) {
2846 return pBiDi->orderParagraphsLTR;
2847 } else {
2848 return FALSE;
2849 }
2850 }
2851
2852 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi * pBiDi)2853 ubidi_getDirection(const UBiDi *pBiDi) {
2854 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2855 return pBiDi->direction;
2856 } else {
2857 return UBIDI_LTR;
2858 }
2859 }
2860
2861 U_CAPI const UChar * U_EXPORT2
ubidi_getText(const UBiDi * pBiDi)2862 ubidi_getText(const UBiDi *pBiDi) {
2863 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2864 return pBiDi->text;
2865 } else {
2866 return NULL;
2867 }
2868 }
2869
2870 U_CAPI int32_t U_EXPORT2
ubidi_getLength(const UBiDi * pBiDi)2871 ubidi_getLength(const UBiDi *pBiDi) {
2872 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2873 return pBiDi->originalLength;
2874 } else {
2875 return 0;
2876 }
2877 }
2878
2879 U_CAPI int32_t U_EXPORT2
ubidi_getProcessedLength(const UBiDi * pBiDi)2880 ubidi_getProcessedLength(const UBiDi *pBiDi) {
2881 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2882 return pBiDi->length;
2883 } else {
2884 return 0;
2885 }
2886 }
2887
2888 U_CAPI int32_t U_EXPORT2
ubidi_getResultLength(const UBiDi * pBiDi)2889 ubidi_getResultLength(const UBiDi *pBiDi) {
2890 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2891 return pBiDi->resultLength;
2892 } else {
2893 return 0;
2894 }
2895 }
2896
2897 /* paragraphs API functions ------------------------------------------------- */
2898
2899 U_CAPI UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi * pBiDi)2900 ubidi_getParaLevel(const UBiDi *pBiDi) {
2901 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2902 return pBiDi->paraLevel;
2903 } else {
2904 return 0;
2905 }
2906 }
2907
2908 U_CAPI int32_t U_EXPORT2
ubidi_countParagraphs(UBiDi * pBiDi)2909 ubidi_countParagraphs(UBiDi *pBiDi) {
2910 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2911 return 0;
2912 } else {
2913 return pBiDi->paraCount;
2914 }
2915 }
2916
2917 U_CAPI void U_EXPORT2
ubidi_getParagraphByIndex(const UBiDi * pBiDi,int32_t paraIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2918 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2919 int32_t *pParaStart, int32_t *pParaLimit,
2920 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2921 int32_t paraStart;
2922
2923 /* check the argument values */
2924 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2925 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2926 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2927
2928 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2929 if(paraIndex) {
2930 paraStart=pBiDi->paras[paraIndex-1].limit;
2931 } else {
2932 paraStart=0;
2933 }
2934 if(pParaStart!=NULL) {
2935 *pParaStart=paraStart;
2936 }
2937 if(pParaLimit!=NULL) {
2938 *pParaLimit=pBiDi->paras[paraIndex].limit;
2939 }
2940 if(pParaLevel!=NULL) {
2941 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2942 }
2943 }
2944
2945 U_CAPI int32_t U_EXPORT2
ubidi_getParagraph(const UBiDi * pBiDi,int32_t charIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2946 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2947 int32_t *pParaStart, int32_t *pParaLimit,
2948 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2949 int32_t paraIndex;
2950
2951 /* check the argument values */
2952 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2953 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2954 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2955 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2956 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2957
2958 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
2959 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2960 return paraIndex;
2961 }
2962
2963 U_CAPI void U_EXPORT2
ubidi_setClassCallback(UBiDi * pBiDi,UBiDiClassCallback * newFn,const void * newContext,UBiDiClassCallback ** oldFn,const void ** oldContext,UErrorCode * pErrorCode)2964 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2965 const void *newContext, UBiDiClassCallback **oldFn,
2966 const void **oldContext, UErrorCode *pErrorCode)
2967 {
2968 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2969 if(pBiDi==NULL) {
2970 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2971 return;
2972 }
2973 if( oldFn )
2974 {
2975 *oldFn = pBiDi->fnClassCallback;
2976 }
2977 if( oldContext )
2978 {
2979 *oldContext = pBiDi->coClassCallback;
2980 }
2981 pBiDi->fnClassCallback = newFn;
2982 pBiDi->coClassCallback = newContext;
2983 }
2984
2985 U_CAPI void U_EXPORT2
ubidi_getClassCallback(UBiDi * pBiDi,UBiDiClassCallback ** fn,const void ** context)2986 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2987 {
2988 if(pBiDi==NULL) {
2989 return;
2990 }
2991 if( fn )
2992 {
2993 *fn = pBiDi->fnClassCallback;
2994 }
2995 if( context )
2996 {
2997 *context = pBiDi->coClassCallback;
2998 }
2999 }
3000
3001 U_CAPI UCharDirection U_EXPORT2
ubidi_getCustomizedClass(UBiDi * pBiDi,UChar32 c)3002 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3003 {
3004 UCharDirection dir;
3005
3006 if( pBiDi->fnClassCallback == NULL ||
3007 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3008 {
3009 dir = ubidi_getClass(pBiDi->bdp, c);
3010 }
3011 if(dir >= U_CHAR_DIRECTION_COUNT) {
3012 dir = ON;
3013 }
3014 return dir;
3015 }
3016