• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2  ******************************************************************************
3  *
4  *   Copyright (C) 1999-2015, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  ******************************************************************************
8  *   file name:  ubidiimp.h
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 1999aug06
14  *   created by: Markus W. Scherer, updated by Matitiahu Allouche
15  */
16  
17  #ifndef UBIDIIMP_H
18  #define UBIDIIMP_H
19  
20  /*  set import/export definitions */
21  #ifdef U_COMMON_IMPLEMENTATION
22  
23  #include "unicode/utypes.h"
24  #include "unicode/uchar.h"
25  #include "ubidi_props.h"
26  
27  /* miscellaneous definitions ---------------------------------------------- */
28  
29  typedef uint8_t DirProp;
30  typedef uint32_t Flags;
31  
32  /*  Comparing the description of the BiDi algorithm with this implementation
33      is easier with the same names for the BiDi types in the code as there.
34      See UCharDirection in uchar.h .
35  */
36  enum {
37      L=  U_LEFT_TO_RIGHT,                /*  0 */
38      R=  U_RIGHT_TO_LEFT,                /*  1 */
39      EN= U_EUROPEAN_NUMBER,              /*  2 */
40      ES= U_EUROPEAN_NUMBER_SEPARATOR,    /*  3 */
41      ET= U_EUROPEAN_NUMBER_TERMINATOR,   /*  4 */
42      AN= U_ARABIC_NUMBER,                /*  5 */
43      CS= U_COMMON_NUMBER_SEPARATOR,      /*  6 */
44      B=  U_BLOCK_SEPARATOR,              /*  7 */
45      S=  U_SEGMENT_SEPARATOR,            /*  8 */
46      WS= U_WHITE_SPACE_NEUTRAL,          /*  9 */
47      ON= U_OTHER_NEUTRAL,                /* 10 */
48      LRE=U_LEFT_TO_RIGHT_EMBEDDING,      /* 11 */
49      LRO=U_LEFT_TO_RIGHT_OVERRIDE,       /* 12 */
50      AL= U_RIGHT_TO_LEFT_ARABIC,         /* 13 */
51      RLE=U_RIGHT_TO_LEFT_EMBEDDING,      /* 14 */
52      RLO=U_RIGHT_TO_LEFT_OVERRIDE,       /* 15 */
53      PDF=U_POP_DIRECTIONAL_FORMAT,       /* 16 */
54      NSM=U_DIR_NON_SPACING_MARK,         /* 17 */
55      BN= U_BOUNDARY_NEUTRAL,             /* 18 */
56      FSI=U_FIRST_STRONG_ISOLATE,         /* 19 */
57      LRI=U_LEFT_TO_RIGHT_ISOLATE,        /* 20 */
58      RLI=U_RIGHT_TO_LEFT_ISOLATE,        /* 21 */
59      PDI=U_POP_DIRECTIONAL_ISOLATE,      /* 22 */
60      ENL,    /* EN after W7 */           /* 23 */
61      ENR,    /* EN not subject to W7 */  /* 24 */
62      dirPropCount
63  };
64  
65  /*  Sometimes, bit values are more appropriate
66      to deal with directionality properties.
67      Abbreviations in these macro names refer to names
68      used in the BiDi algorithm.
69  */
70  #define DIRPROP_FLAG(dir) (1UL<<(dir))
71  #define PURE_DIRPROP(prop)  ((prop)&~0xE0)    ?????????????????????????
72  
73  /* special flag for multiple runs from explicit embedding codes */
74  #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
75  
76  /* are there any characters that are LTR or RTL? */
77  #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(ENL)|DIRPROP_FLAG(ENR)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
78  #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
79  #define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
80  #define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN))
81  
82  /* explicit embedding codes */
83  #define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF))
84  
85  /* explicit isolate codes */
86  #define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI))
87  
88  #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
89  
90  /* paragraph and segment separators */
91  #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
92  
93  /* all types that are counted as White Space or Neutral in some steps */
94  #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO)
95  
96  /* types that are neutrals or could becomes neutrals in (Wn) */
97  #define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
98  
99  /*
100   *  These types may be changed to "e",
101   *  the embedding type (L or R) of the run,
102   *  in the BiDi algorithm (N2)
103   */
104  #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
105  
106  /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
107  #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
108  
109  #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
110  
111  /*
112   *  The following bit is used for the directional isolate status.
113   *  Stack entries corresponding to isolate sequences are greater than ISOLATE.
114   */
115  #define ISOLATE  0x0100
116  
117  U_CFUNC UBiDiLevel
118  ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index);
119  
120  #define GET_PARALEVEL(ubidi, index) \
121              ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \
122                           (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index))))
123  
124  /* number of paras entries allocated initially without malloc */
125  #define SIMPLE_PARAS_COUNT      10
126  /* number of isolate entries allocated initially without malloc */
127  #define SIMPLE_ISOLATES_COUNT   5
128  /* number of isolate run entries for paired brackets allocated initially without malloc */
129  #define SIMPLE_OPENINGS_COUNT   20
130  
131  #define CR  0x000D
132  #define LF  0x000A
133  
134  /* Run structure for reordering --------------------------------------------- */
135  enum {
136      LRM_BEFORE=1,
137      LRM_AFTER=2,
138      RLM_BEFORE=4,
139      RLM_AFTER=8
140  };
141  
142  typedef struct Para {
143      int32_t limit;
144      int32_t level;
145  } Para;
146  
147  enum {                                  /* flags for Opening.flags */
148      FOUND_L=DIRPROP_FLAG(L),
149      FOUND_R=DIRPROP_FLAG(R)
150  };
151  
152  typedef struct Opening {
153      int32_t position;                   /* position of opening bracket */
154      int32_t match;                      /* matching char or -position of closing bracket */
155      int32_t contextPos;                 /* position of last strong char found before opening */
156      uint16_t flags;                     /* bits for L or R/AL found within the pair */
157      UBiDiDirection contextDir;          /* L or R according to last strong char before opening */
158      uint8_t filler;                     /* to complete a nice multiple of 4 chars */
159  } Opening;
160  
161  typedef struct IsoRun {
162      int32_t  contextPos;                /* position of char determining context */
163      uint16_t start;                     /* index of first opening entry for this run */
164      uint16_t limit;                     /* index after last opening entry for this run */
165      UBiDiLevel level;                   /* level of this run */
166      DirProp lastStrong;                 /* bidi class of last strong char found in this run */
167      DirProp lastBase;                   /* bidi class of last base char found in this run */
168      UBiDiDirection contextDir;          /* L or R to use as context for following openings */
169  } IsoRun;
170  
171  typedef struct BracketData {
172      UBiDi   *pBiDi;
173      /* array of opening entries which should be enough in most cases; no malloc() */
174      Opening simpleOpenings[SIMPLE_OPENINGS_COUNT];
175      Opening *openings;                  /* pointer to current array of entries */
176      int32_t openingsCount;              /* number of allocated entries */
177      int32_t isoRunLast;                 /* index of last used entry */
178      /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
179         + 1 for index 0, + 1 for before the first isolated sequence */
180      IsoRun  isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2];
181      UBool isNumbersSpecial;             /* reordering mode for NUMBERS_SPECIAL */
182  } BracketData;
183  
184  typedef struct Isolate {
185      int32_t startON;
186      int32_t start1;
187      int32_t state;
188      int16_t stateImp;
189  } Isolate;
190  
191  typedef struct Run {
192      int32_t logicalStart,   /* first character of the run; b31 indicates even/odd level */
193              visualLimit,    /* last visual position of the run +1 */
194              insertRemove;   /* if >0, flags for inserting LRM/RLM before/after run,
195                                 if <0, count of bidi controls within run            */
196  } Run;
197  
198  /* in a Run, logicalStart will get this bit set if the run level is odd */
199  #define INDEX_ODD_BIT (1UL<<31)
200  
201  #define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
202  #define ADD_ODD_BIT_FROM_LEVEL(x, level)  ((x)|=((int32_t)(level)<<31))
203  #define REMOVE_ODD_BIT(x)                 ((x)&=~INDEX_ODD_BIT)
204  
205  #define GET_INDEX(x)   ((x)&~INDEX_ODD_BIT)
206  #define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
207  #define IS_ODD_RUN(x)  ((UBool)(((x)&INDEX_ODD_BIT)!=0))
208  #define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
209  
210  U_CFUNC UBool
211  ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
212  
213  /** BiDi control code points */
214  enum {
215      ZWNJ_CHAR=0x200c,
216      ZWJ_CHAR,
217      LRM_CHAR,
218      RLM_CHAR,
219      LRE_CHAR=0x202a,
220      RLE_CHAR,
221      PDF_CHAR,
222      LRO_CHAR,
223      RLO_CHAR,
224      LRI_CHAR=0x2066,
225      RLI_CHAR,
226      FSI_CHAR,
227      PDI_CHAR
228  };
229  
230  #define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4)
231  
232  /* InsertPoints structure for noting where to put BiDi marks ---------------- */
233  
234  typedef struct Point {
235      int32_t pos;            /* position in text */
236      int32_t flag;           /* flag for LRM/RLM, before/after */
237  } Point;
238  
239  typedef struct InsertPoints {
240      int32_t capacity;       /* number of points allocated */
241      int32_t size;           /* number of points used */
242      int32_t confirmed;      /* number of points confirmed */
243      UErrorCode errorCode;   /* for eventual memory shortage */
244      Point *points;          /* pointer to array of points */
245  } InsertPoints;
246  
247  
248  /* UBiDi structure ----------------------------------------------------------- */
249  
250  struct UBiDi {
251      /* pointer to parent paragraph object (pointer to self if this object is
252       * a paragraph object); set to NULL in a newly opened object; set to a
253       * real value after a successful execution of ubidi_setPara or ubidi_setLine
254       */
255      const UBiDi * pParaBiDi;
256  
257      const UBiDiProps *bdp;
258  
259      /* alias pointer to the current text */
260      const UChar *text;
261  
262      /* length of the current text */
263      int32_t originalLength;
264  
265      /* if the UBIDI_OPTION_STREAMING option is set, this is the length
266       * of text actually processed by ubidi_setPara, which may be shorter than
267       * the original length.
268       * Otherwise, it is identical to the original length.
269       */
270      int32_t length;
271  
272      /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
273       * marks are allowed to be inserted in one of the reordering mode, the
274       * length of the result string may be different from the processed length.
275       */
276      int32_t resultLength;
277  
278      /* memory sizes in bytes */
279      int32_t dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize;
280  
281      /* allocated memory */
282      DirProp *dirPropsMemory;
283      UBiDiLevel *levelsMemory;
284      Opening *openingsMemory;
285      Para *parasMemory;
286      Run *runsMemory;
287      Isolate *isolatesMemory;
288  
289      /* indicators for whether memory may be allocated after ubidi_open() */
290      UBool mayAllocateText, mayAllocateRuns;
291  
292      /* arrays with one value per text-character */
293      DirProp *dirProps;
294      UBiDiLevel *levels;
295  
296      /* are we performing an approximation of the "inverse BiDi" algorithm? */
297      UBool isInverse;
298  
299      /* are we using the basic algorithm or its variation? */
300      UBiDiReorderingMode reorderingMode;
301  
302      /* UBIDI_REORDER_xxx values must be ordered so that all the regular
303       * logical to visual modes come first, and all inverse BiDi modes
304       * come last.
305       */
306      #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL    UBIDI_REORDER_NUMBERS_SPECIAL
307  
308      /* bitmask for reordering options */
309      uint32_t reorderingOptions;
310  
311      /* must block separators receive level 0? */
312      UBool orderParagraphsLTR;
313  
314      /* the paragraph level */
315      UBiDiLevel paraLevel;
316      /* original paraLevel when contextual */
317      /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
318      UBiDiLevel defaultParaLevel;
319  
320      /* context data */
321      const UChar *prologue;
322      int32_t proLength;
323      const UChar *epilogue;
324      int32_t epiLength;
325  
326      /* the following is set in ubidi_setPara, used in processPropertySeq */
327      const struct ImpTabPair * pImpTabPair;  /* pointer to levels state table pair */
328  
329      /* the overall paragraph or line directionality - see UBiDiDirection */
330      UBiDiDirection direction;
331  
332      /* flags is a bit set for which directional properties are in the text */
333      Flags flags;
334  
335      /* lastArabicPos is index to the last AL in the text, -1 if none */
336      int32_t lastArabicPos;
337  
338      /* characters after trailingWSStart are WS and are */
339      /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
340      int32_t trailingWSStart;
341  
342      /* fields for paragraph handling */
343      int32_t paraCount;                  /* set in getDirProps() */
344      /* filled in getDirProps() */
345      Para *paras;
346  
347      /* for relatively short text, we only need a tiny array of paras (no malloc()) */
348      Para simpleParas[SIMPLE_PARAS_COUNT];
349  
350      /* fields for line reordering */
351      int32_t runCount;     /* ==-1: runs not set up yet */
352      Run *runs;
353  
354      /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
355      Run simpleRuns[1];
356  
357      /* maximum or current nesting depth of isolate sequences */
358      /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
359         nesting encountered.
360         Within resolveImplicitLevels(), this is the index of the current isolates
361         stack entry. */
362      int32_t isolateCount;
363      Isolate *isolates;
364  
365      /* for simple text, have a small stack (no malloc()) */
366      Isolate simpleIsolates[SIMPLE_ISOLATES_COUNT];
367  
368      /* for inverse Bidi with insertion of directional marks */
369      InsertPoints insertPoints;
370  
371      /* for option UBIDI_OPTION_REMOVE_CONTROLS */
372      int32_t controlCount;
373  
374      /* for Bidi class callback */
375      UBiDiClassCallback *fnClassCallback;    /* action pointer */
376      const void *coClassCallback;            /* context pointer */
377  };
378  
379  #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
380  #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
381  
382  typedef union {
383      DirProp *dirPropsMemory;
384      UBiDiLevel *levelsMemory;
385      Opening *openingsMemory;
386      Para *parasMemory;
387      Run *runsMemory;
388      Isolate *isolatesMemory;
389  } BidiMemoryForAllocation;
390  
391  /* Macros for initial checks at function entry */
392  #define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue)   \
393          if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue
394  #define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue)   \
395          if(!IS_VALID_PARA(bidi)) {  \
396              errcode=U_INVALID_STATE_ERROR;  \
397              return retvalue;                \
398          }
399  #define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue)   \
400          if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
401              errcode=U_INVALID_STATE_ERROR;  \
402              return retvalue;                \
403          }
404  #define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue)   \
405          if((arg)<(start) || (arg)>=(limit)) {       \
406              (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
407              return retvalue;                        \
408          }
409  
410  #define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode)   \
411          if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return
412  #define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode)   \
413          if(!IS_VALID_PARA(bidi)) {  \
414              errcode=U_INVALID_STATE_ERROR;  \
415              return;                \
416          }
417  #define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode)   \
418          if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
419              errcode=U_INVALID_STATE_ERROR;  \
420              return;                \
421          }
422  #define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode)   \
423          if((arg)<(start) || (arg)>=(limit)) {       \
424              (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
425              return;                        \
426          }
427  
428  /* helper function to (re)allocate memory if allowed */
429  U_CFUNC UBool
430  ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
431  
432  /* helper macros for each allocated array in UBiDi */
433  #define getDirPropsMemory(pBiDi, length) \
434          ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
435                          (pBiDi)->mayAllocateText, (length))
436  
437  #define getLevelsMemory(pBiDi, length) \
438          ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
439                          (pBiDi)->mayAllocateText, (length))
440  
441  #define getRunsMemory(pBiDi, length) \
442          ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
443                          (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
444  
445  /* additional macros used by ubidi_open() - always allow allocation */
446  #define getInitialDirPropsMemory(pBiDi, length) \
447          ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
448                          TRUE, (length))
449  
450  #define getInitialLevelsMemory(pBiDi, length) \
451          ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
452                          TRUE, (length))
453  
454  #define getInitialOpeningsMemory(pBiDi, length) \
455          ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \
456                          TRUE, (length)*sizeof(Opening))
457  
458  #define getInitialParasMemory(pBiDi, length) \
459          ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
460                          TRUE, (length)*sizeof(Para))
461  
462  #define getInitialRunsMemory(pBiDi, length) \
463          ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
464                          TRUE, (length)*sizeof(Run))
465  
466  #define getInitialIsolatesMemory(pBiDi, length) \
467          ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \
468                          TRUE, (length)*sizeof(Isolate))
469  
470  #endif
471  
472  #endif
473