1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/fx_bidi.h"
8 
9 #include <algorithm>
10 
11 #include "core/fxcrt/fx_unicode.h"
12 #include "third_party/base/ptr_util.h"
13 
14 #ifdef PDF_ENABLE_XFA
15 #include "core/fxcrt/fx_extension.h"
16 #endif  // PDF_ENABLE_XFA
17 
18 namespace {
19 
20 enum FX_BIDICLASS {
21   FX_BIDICLASS_ON = 0,    // Other Neutral
22   FX_BIDICLASS_L = 1,     // Left Letter
23   FX_BIDICLASS_R = 2,     // Right Letter
24   FX_BIDICLASS_AN = 3,    // Arabic Number
25   FX_BIDICLASS_EN = 4,    // European Number
26   FX_BIDICLASS_AL = 5,    // Arabic Letter
27   FX_BIDICLASS_NSM = 6,   // Non-spacing Mark
28   FX_BIDICLASS_CS = 7,    // Common Number Separator
29   FX_BIDICLASS_ES = 8,    // European Separator
30   FX_BIDICLASS_ET = 9,    // European Number Terminator
31   FX_BIDICLASS_BN = 10,   // Boundary Neutral
32   FX_BIDICLASS_S = 11,    // Segment Separator
33   FX_BIDICLASS_WS = 12,   // Whitespace
34   FX_BIDICLASS_B = 13,    // Paragraph Separator
35   FX_BIDICLASS_RLO = 14,  // Right-to-Left Override
36   FX_BIDICLASS_RLE = 15,  // Right-to-Left Embedding
37   FX_BIDICLASS_LRO = 16,  // Left-to-Right Override
38   FX_BIDICLASS_LRE = 17,  // Left-to-Right Embedding
39   FX_BIDICLASS_PDF = 18,  // Pop Directional Format
40   FX_BIDICLASS_N = FX_BIDICLASS_ON,
41 };
42 constexpr uint32_t FX_BIDICLASSBITS = 6;
43 constexpr uint32_t FX_BIDICLASSBITSMASK = 0x1F << FX_BIDICLASSBITS;
44 
45 #ifdef PDF_ENABLE_XFA
46 
47 #ifndef NDEBUG
48 constexpr int32_t kBidiMaxLevel = 61;
49 #endif  // NDEBUG
50 
51 enum FX_BIDIWEAKSTATE {
52   FX_BWSxa = 0,
53   FX_BWSxr,
54   FX_BWSxl,
55   FX_BWSao,
56   FX_BWSro,
57   FX_BWSlo,
58   FX_BWSrt,
59   FX_BWSlt,
60   FX_BWScn,
61   FX_BWSra,
62   FX_BWSre,
63   FX_BWSla,
64   FX_BWSle,
65   FX_BWSac,
66   FX_BWSrc,
67   FX_BWSrs,
68   FX_BWSlc,
69   FX_BWSls,
70   FX_BWSret,
71   FX_BWSlet
72 };
73 
74 enum FX_BIDIWEAKACTION {
75   FX_BWAIX = 0x100,
76   FX_BWAXX = 0x0F,
77   FX_BWAxxx = (0x0F << 4) + 0x0F,
78   FX_BWAxIx = 0x100 + FX_BWAxxx,
79   FX_BWAxxN = (0x0F << 4) + FX_BIDICLASS_ON,
80   FX_BWAxxE = (0x0F << 4) + FX_BIDICLASS_EN,
81   FX_BWAxxA = (0x0F << 4) + FX_BIDICLASS_AN,
82   FX_BWAxxR = (0x0F << 4) + FX_BIDICLASS_R,
83   FX_BWAxxL = (0x0F << 4) + FX_BIDICLASS_L,
84   FX_BWANxx = (FX_BIDICLASS_ON << 4) + 0x0F,
85   FX_BWAAxx = (FX_BIDICLASS_AN << 4) + 0x0F,
86   FX_BWAExE = (FX_BIDICLASS_EN << 4) + FX_BIDICLASS_EN,
87   FX_BWANIx = (FX_BIDICLASS_ON << 4) + 0x0F + 0x100,
88   FX_BWANxN = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_ON,
89   FX_BWANxR = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_R,
90   FX_BWANxE = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_EN,
91   FX_BWAAxA = (FX_BIDICLASS_AN << 4) + FX_BIDICLASS_AN,
92   FX_BWANxL = (FX_BIDICLASS_ON << 4) + FX_BIDICLASS_L,
93   FX_BWALxL = (FX_BIDICLASS_L << 4) + FX_BIDICLASS_L,
94   FX_BWAxIL = (0x0F << 4) + FX_BIDICLASS_L + 0x100,
95   FX_BWAAxR = (FX_BIDICLASS_AN << 4) + FX_BIDICLASS_R,
96   FX_BWALxx = (FX_BIDICLASS_L << 4) + 0x0F,
97 };
98 
99 enum FX_BIDINEUTRALSTATE {
100   FX_BNSr = 0,
101   FX_BNSl,
102   FX_BNSrn,
103   FX_BNSln,
104   FX_BNSa,
105   FX_BNSna
106 };
107 
108 enum FX_BIDINEUTRALACTION {
109   FX_BNAnL = FX_BIDICLASS_L,
110   FX_BNAEn = (FX_BIDICLASS_AN << 4),
111   FX_BNARn = (FX_BIDICLASS_R << 4),
112   FX_BNALn = (FX_BIDICLASS_L << 4),
113   FX_BNAIn = FX_BWAIX,
114   FX_BNALnL = (FX_BIDICLASS_L << 4) + FX_BIDICLASS_L,
115 };
116 
117 const int32_t gc_FX_BidiNTypes[] = {
118     FX_BIDICLASS_N,   FX_BIDICLASS_L,   FX_BIDICLASS_R,   FX_BIDICLASS_AN,
119     FX_BIDICLASS_EN,  FX_BIDICLASS_AL,  FX_BIDICLASS_NSM, FX_BIDICLASS_CS,
120     FX_BIDICLASS_ES,  FX_BIDICLASS_ET,  FX_BIDICLASS_BN,  FX_BIDICLASS_BN,
121     FX_BIDICLASS_N,   FX_BIDICLASS_B,   FX_BIDICLASS_RLO, FX_BIDICLASS_RLE,
122     FX_BIDICLASS_LRO, FX_BIDICLASS_LRE, FX_BIDICLASS_PDF, FX_BIDICLASS_ON,
123 };
124 
125 const int32_t gc_FX_BidiWeakStates[][10] = {
126     {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSxa,
127      FX_BWSao, FX_BWSao, FX_BWSao},
128     {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSxr,
129      FX_BWSro, FX_BWSro, FX_BWSrt},
130     {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSxl,
131      FX_BWSlo, FX_BWSlo, FX_BWSlt},
132     {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSao,
133      FX_BWSao, FX_BWSao, FX_BWSao},
134     {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro,
135      FX_BWSro, FX_BWSro, FX_BWSrt},
136     {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo,
137      FX_BWSlo, FX_BWSlo, FX_BWSlt},
138     {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSrt,
139      FX_BWSro, FX_BWSro, FX_BWSrt},
140     {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlt,
141      FX_BWSlo, FX_BWSlo, FX_BWSlt},
142     {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWScn,
143      FX_BWSac, FX_BWSao, FX_BWSao},
144     {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSra,
145      FX_BWSrc, FX_BWSro, FX_BWSrt},
146     {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSre,
147      FX_BWSrs, FX_BWSrs, FX_BWSret},
148     {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSla,
149      FX_BWSlc, FX_BWSlo, FX_BWSlt},
150     {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSle,
151      FX_BWSls, FX_BWSls, FX_BWSlet},
152     {FX_BWSao, FX_BWSxl, FX_BWSxr, FX_BWScn, FX_BWScn, FX_BWSxa, FX_BWSao,
153      FX_BWSao, FX_BWSao, FX_BWSao},
154     {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro,
155      FX_BWSro, FX_BWSro, FX_BWSrt},
156     {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSro,
157      FX_BWSro, FX_BWSro, FX_BWSrt},
158     {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo,
159      FX_BWSlo, FX_BWSlo, FX_BWSlt},
160     {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlo,
161      FX_BWSlo, FX_BWSlo, FX_BWSlt},
162     {FX_BWSro, FX_BWSxl, FX_BWSxr, FX_BWSra, FX_BWSre, FX_BWSxa, FX_BWSret,
163      FX_BWSro, FX_BWSro, FX_BWSret},
164     {FX_BWSlo, FX_BWSxl, FX_BWSxr, FX_BWSla, FX_BWSle, FX_BWSxa, FX_BWSlet,
165      FX_BWSlo, FX_BWSlo, FX_BWSlet},
166 };
167 
168 const int32_t gc_FX_BidiWeakActions[][10] = {
169     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR,
170      FX_BWAxxR, FX_BWAxxN, FX_BWAxxN, FX_BWAxxN},
171     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR,
172      FX_BWAxxR, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx},
173     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR,
174      FX_BWAxxL, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx},
175     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR,
176      FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxxN},
177     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR,
178      FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx},
179     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR,
180      FX_BWAxxN, FX_BWAxxN, FX_BWAxxN, FX_BWAxIx},
181     {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAExE, FX_BWANxR,
182      FX_BWAxIx, FX_BWANxN, FX_BWANxN, FX_BWAxIx},
183     {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWALxL, FX_BWANxR,
184      FX_BWAxIx, FX_BWANxN, FX_BWANxN, FX_BWAxIx},
185     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxA, FX_BWAxxR,
186      FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxxN},
187     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR,
188      FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxIx},
189     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR,
190      FX_BWAxxE, FX_BWAxIx, FX_BWAxIx, FX_BWAxxE},
191     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR,
192      FX_BWAxxA, FX_BWAxIx, FX_BWAxxN, FX_BWAxIx},
193     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR,
194      FX_BWAxxL, FX_BWAxIx, FX_BWAxIx, FX_BWAxxL},
195     {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWAAxA, FX_BWANxR,
196      FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANxN},
197     {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWANxE, FX_BWANxR,
198      FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx},
199     {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAExE, FX_BWANxR,
200      FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx},
201     {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWAAxx, FX_BWANxL, FX_BWANxR,
202      FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx},
203     {FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWANxx, FX_BWALxL, FX_BWANxR,
204      FX_BWANxN, FX_BWANxN, FX_BWANxN, FX_BWANIx},
205     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxE, FX_BWAxxR,
206      FX_BWAxxE, FX_BWAxxN, FX_BWAxxN, FX_BWAxxE},
207     {FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxx, FX_BWAxxL, FX_BWAxxR,
208      FX_BWAxxL, FX_BWAxxN, FX_BWAxxN, FX_BWAxxL},
209 };
210 
211 const int32_t gc_FX_BidiNeutralStates[][5] = {
212     {FX_BNSrn, FX_BNSl, FX_BNSr, FX_BNSr, FX_BNSr},
213     {FX_BNSln, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl},
214     {FX_BNSrn, FX_BNSl, FX_BNSr, FX_BNSr, FX_BNSr},
215     {FX_BNSln, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl},
216     {FX_BNSna, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl},
217     {FX_BNSna, FX_BNSl, FX_BNSr, FX_BNSa, FX_BNSl},
218 };
219 const int32_t gc_FX_BidiNeutralActions[][5] = {
220     {FX_BNAIn, 0, 0, 0, 0},
221     {FX_BNAIn, 0, 0, 0, FX_BIDICLASS_L},
222     {FX_BNAIn, FX_BNAEn, FX_BNARn, FX_BNARn, FX_BNARn},
223     {FX_BNAIn, FX_BNALn, FX_BNAEn, FX_BNAEn, FX_BNALnL},
224     {FX_BNAIn, 0, 0, 0, FX_BIDICLASS_L},
225     {FX_BNAIn, FX_BNAEn, FX_BNARn, FX_BNARn, FX_BNAEn},
226 };
227 
228 const int32_t gc_FX_BidiAddLevel[][4] = {
229     {0, 1, 2, 2},
230     {1, 0, 1, 1},
231 };
232 
233 class CFX_BidiLine {
234  public:
BidiLine(std::vector<CFX_Char> * chars,size_t iCount)235   void BidiLine(std::vector<CFX_Char>* chars, size_t iCount) {
236     ASSERT(iCount <= chars->size());
237     if (iCount < 2)
238       return;
239 
240     Classify(chars, iCount, false);
241     ResolveExplicit(chars, iCount);
242     ResolveWeak(chars, iCount);
243     ResolveNeutrals(chars, iCount);
244     ResolveImplicit(chars, iCount);
245     Classify(chars, iCount, true);
246     ResolveWhitespace(chars, iCount);
247     Reorder(chars, iCount);
248     Position(chars, iCount);
249   }
250 
251  private:
Direction(int32_t val)252   int32_t Direction(int32_t val) {
253     return FX_IsOdd(val) ? FX_BIDICLASS_R : FX_BIDICLASS_L;
254   }
255 
GetDeferredType(int32_t val)256   int32_t GetDeferredType(int32_t val) { return (val >> 4) & 0x0F; }
257 
GetResolvedType(int32_t val)258   int32_t GetResolvedType(int32_t val) { return val & 0x0F; }
259 
GetDeferredNeutrals(int32_t iAction,int32_t iLevel)260   int32_t GetDeferredNeutrals(int32_t iAction, int32_t iLevel) {
261     iAction = (iAction >> 4) & 0xF;
262     if (iAction == (FX_BNAEn >> 4))
263       return Direction(iLevel);
264     return iAction;
265   }
266 
GetResolvedNeutrals(int32_t iAction)267   int32_t GetResolvedNeutrals(int32_t iAction) {
268     iAction &= 0xF;
269     return iAction == FX_BNAIn ? 0 : iAction;
270   }
271 
ReverseString(std::vector<CFX_Char> * chars,size_t iStart,size_t iCount)272   void ReverseString(std::vector<CFX_Char>* chars,
273                      size_t iStart,
274                      size_t iCount) {
275     ASSERT(pdfium::IndexInBounds(*chars, iStart));
276     ASSERT(iStart + iCount <= chars->size());
277 
278     std::reverse(chars->begin() + iStart, chars->begin() + iStart + iCount);
279   }
280 
SetDeferredRun(std::vector<CFX_Char> * chars,bool bClass,size_t iStart,size_t iCount,int32_t iValue)281   void SetDeferredRun(std::vector<CFX_Char>* chars,
282                       bool bClass,
283                       size_t iStart,
284                       size_t iCount,
285                       int32_t iValue) {
286     ASSERT(iStart <= chars->size());
287     ASSERT(iStart >= iCount);
288 
289     size_t iLast = iStart - iCount;
290     for (size_t i = iStart - 1; i >= iLast; --i) {
291       if (bClass)
292         (*chars)[i].m_iBidiClass = static_cast<int16_t>(iValue);
293       else
294         (*chars)[i].m_iBidiLevel = static_cast<int16_t>(iValue);
295 
296       if (i == 0)
297         break;
298     }
299   }
300 
Classify(std::vector<CFX_Char> * chars,size_t iCount,bool bWS)301   void Classify(std::vector<CFX_Char>* chars, size_t iCount, bool bWS) {
302     if (bWS) {
303       for (size_t i = 0; i < iCount; ++i) {
304         CFX_Char& cur = (*chars)[i];
305         cur.m_iBidiClass =
306             static_cast<int16_t>(cur.char_props() & FX_BIDICLASSBITSMASK) >>
307             FX_BIDICLASSBITS;
308       }
309       return;
310     }
311 
312     for (size_t i = 0; i < iCount; ++i) {
313       CFX_Char& cur = (*chars)[i];
314       cur.m_iBidiClass = static_cast<int16_t>(
315           gc_FX_BidiNTypes[(cur.char_props() & FX_BIDICLASSBITSMASK) >>
316                            FX_BIDICLASSBITS]);
317     }
318   }
319 
ResolveExplicit(std::vector<CFX_Char> * chars,size_t iCount)320   void ResolveExplicit(std::vector<CFX_Char>* chars, size_t iCount) {
321     for (size_t i = 0; i < iCount; ++i)
322       (*chars)[i].m_iBidiLevel = 0;
323   }
324 
ResolveWeak(std::vector<CFX_Char> * chars,size_t iCount)325   void ResolveWeak(std::vector<CFX_Char>* chars, size_t iCount) {
326     if (iCount <= 1)
327       return;
328     --iCount;
329 
330     int32_t iLevelCur = 0;
331     int32_t iState = FX_BWSxl;
332     size_t i = 0;
333     size_t iNum = 0;
334     int32_t iClsCur;
335     int32_t iClsRun;
336     int32_t iClsNew;
337     int32_t iAction;
338     for (; i <= iCount; ++i) {
339       CFX_Char* pTC = &(*chars)[i];
340       iClsCur = pTC->m_iBidiClass;
341       if (iClsCur == FX_BIDICLASS_BN) {
342         pTC->m_iBidiLevel = (int16_t)iLevelCur;
343         if (i == iCount && iLevelCur != 0) {
344           iClsCur = Direction(iLevelCur);
345           pTC->m_iBidiClass = (int16_t)iClsCur;
346         } else if (i < iCount) {
347           CFX_Char* pTCNext = &(*chars)[i + 1];
348           int32_t iLevelNext, iLevelNew;
349           iClsNew = pTCNext->m_iBidiClass;
350           iLevelNext = pTCNext->m_iBidiLevel;
351           if (iClsNew != FX_BIDICLASS_BN && iLevelCur != iLevelNext) {
352             iLevelNew = std::max(iLevelNext, iLevelCur);
353             pTC->m_iBidiLevel = static_cast<int16_t>(iLevelNew);
354             iClsCur = Direction(iLevelNew);
355             pTC->m_iBidiClass = static_cast<int16_t>(iClsCur);
356             iLevelCur = iLevelNext;
357           } else {
358             if (iNum > 0)
359               ++iNum;
360             continue;
361           }
362         } else {
363           if (iNum > 0)
364             ++iNum;
365           continue;
366         }
367       }
368 
369       ASSERT(iClsCur <= FX_BIDICLASS_BN);
370       iAction = gc_FX_BidiWeakActions[iState][iClsCur];
371       iClsRun = GetDeferredType(iAction);
372       if (iClsRun != FX_BWAXX && iNum > 0) {
373         SetDeferredRun(chars, true, i, iNum, iClsRun);
374         iNum = 0;
375       }
376       iClsNew = GetResolvedType(iAction);
377       if (iClsNew != FX_BWAXX)
378         pTC->m_iBidiClass = static_cast<int16_t>(iClsNew);
379       if (FX_BWAIX & iAction)
380         ++iNum;
381 
382       iState = gc_FX_BidiWeakStates[iState][iClsCur];
383     }
384     if (iNum == 0)
385       return;
386 
387     iClsCur = Direction(0);
388     iClsRun = GetDeferredType(gc_FX_BidiWeakActions[iState][iClsCur]);
389     if (iClsRun != FX_BWAXX)
390       SetDeferredRun(chars, true, i, iNum, iClsRun);
391   }
392 
ResolveNeutrals(std::vector<CFX_Char> * chars,size_t iCount)393   void ResolveNeutrals(std::vector<CFX_Char>* chars, size_t iCount) {
394     if (iCount <= 1)
395       return;
396     --iCount;
397 
398     CFX_Char* pTC;
399     int32_t iLevel = 0;
400     int32_t iState = FX_BNSl;
401     size_t i = 0;
402     size_t iNum = 0;
403     int32_t iClsCur;
404     int32_t iClsRun;
405     int32_t iClsNew;
406     int32_t iAction;
407     for (; i <= iCount; ++i) {
408       pTC = &(*chars)[i];
409       iClsCur = pTC->m_iBidiClass;
410       if (iClsCur == FX_BIDICLASS_BN) {
411         if (iNum)
412           ++iNum;
413         continue;
414       }
415 
416       ASSERT(iClsCur < FX_BIDICLASS_AL);
417       iAction = gc_FX_BidiNeutralActions[iState][iClsCur];
418       iClsRun = GetDeferredNeutrals(iAction, iLevel);
419       if (iClsRun != FX_BIDICLASS_N && iNum > 0) {
420         SetDeferredRun(chars, true, i, iNum, iClsRun);
421         iNum = 0;
422       }
423 
424       iClsNew = GetResolvedNeutrals(iAction);
425       if (iClsNew != FX_BIDICLASS_N)
426         pTC->m_iBidiClass = (int16_t)iClsNew;
427       if (FX_BNAIn & iAction)
428         ++iNum;
429 
430       iState = gc_FX_BidiNeutralStates[iState][iClsCur];
431       iLevel = pTC->m_iBidiLevel;
432     }
433     if (iNum == 0)
434       return;
435 
436     iClsCur = Direction(iLevel);
437     iClsRun =
438         GetDeferredNeutrals(gc_FX_BidiNeutralActions[iState][iClsCur], iLevel);
439     if (iClsRun != FX_BIDICLASS_N)
440       SetDeferredRun(chars, true, i, iNum, iClsRun);
441   }
442 
ResolveImplicit(std::vector<CFX_Char> * chars,size_t iCount)443   void ResolveImplicit(std::vector<CFX_Char>* chars, size_t iCount) {
444     for (size_t i = 0; i < iCount; ++i) {
445       int32_t iCls = (*chars)[i].m_iBidiClass;
446       if (iCls == FX_BIDICLASS_BN)
447         continue;
448 
449       ASSERT(iCls > FX_BIDICLASS_ON && iCls < FX_BIDICLASS_AL);
450       int32_t iLevel = (*chars)[i].m_iBidiLevel;
451       iLevel += gc_FX_BidiAddLevel[FX_IsOdd(iLevel)][iCls - 1];
452       (*chars)[i].m_iBidiLevel = (int16_t)iLevel;
453     }
454   }
455 
ResolveWhitespace(std::vector<CFX_Char> * chars,size_t iCount)456   void ResolveWhitespace(std::vector<CFX_Char>* chars, size_t iCount) {
457     if (iCount <= 1)
458       return;
459     iCount--;
460 
461     int32_t iLevel = 0;
462     size_t i = 0;
463     size_t iNum = 0;
464     for (; i <= iCount; ++i) {
465       switch ((*chars)[i].m_iBidiClass) {
466         case FX_BIDICLASS_WS:
467           ++iNum;
468           break;
469         case FX_BIDICLASS_RLE:
470         case FX_BIDICLASS_LRE:
471         case FX_BIDICLASS_LRO:
472         case FX_BIDICLASS_RLO:
473         case FX_BIDICLASS_PDF:
474         case FX_BIDICLASS_BN:
475           (*chars)[i].m_iBidiLevel = static_cast<int16_t>(iLevel);
476           ++iNum;
477           break;
478         case FX_BIDICLASS_S:
479         case FX_BIDICLASS_B:
480           if (iNum > 0)
481             SetDeferredRun(chars, false, i, iNum, 0);
482 
483           (*chars)[i].m_iBidiLevel = 0;
484           iNum = 0;
485           break;
486         default:
487           iNum = 0;
488           break;
489       }
490       iLevel = (*chars)[i].m_iBidiLevel;
491     }
492     if (iNum > 0)
493       SetDeferredRun(chars, false, i, iNum, 0);
494   }
495 
ReorderLevel(std::vector<CFX_Char> * chars,size_t iCount,int32_t iBaseLevel,size_t iStart,bool bReverse)496   size_t ReorderLevel(std::vector<CFX_Char>* chars,
497                       size_t iCount,
498                       int32_t iBaseLevel,
499                       size_t iStart,
500                       bool bReverse) {
501     ASSERT(iBaseLevel >= 0 && iBaseLevel <= kBidiMaxLevel);
502     ASSERT(iStart < iCount);
503 
504     if (iCount < 1)
505       return 0;
506 
507     bReverse = bReverse || FX_IsOdd(iBaseLevel);
508     size_t i = iStart;
509     for (; i < iCount; ++i) {
510       int32_t iLevel = (*chars)[i].m_iBidiLevel;
511       if (iLevel == iBaseLevel)
512         continue;
513       if (iLevel < iBaseLevel)
514         break;
515 
516       i += ReorderLevel(chars, iCount, iBaseLevel + 1, i, bReverse) - 1;
517     }
518 
519     size_t iNum = i - iStart;
520     if (bReverse && iNum > 1)
521       ReverseString(chars, iStart, iNum);
522 
523     return iNum;
524   }
525 
Reorder(std::vector<CFX_Char> * chars,size_t iCount)526   void Reorder(std::vector<CFX_Char>* chars, size_t iCount) {
527     for (size_t i = 0; i < iCount;)
528       i += ReorderLevel(chars, iCount, 0, i, false);
529   }
530 
Position(std::vector<CFX_Char> * chars,size_t iCount)531   void Position(std::vector<CFX_Char>* chars, size_t iCount) {
532     for (size_t i = 0; i < iCount; ++i)
533       (*chars)[(*chars)[i].m_iBidiPos].m_iBidiOrder = i;
534   }
535 };
536 
537 #endif  // PDF_ENABLE_XFA
538 
539 }  // namespace
540 
CFX_BidiChar()541 CFX_BidiChar::CFX_BidiChar()
542     : m_CurrentSegment({0, 0, NEUTRAL}), m_LastSegment({0, 0, NEUTRAL}) {}
543 
AppendChar(wchar_t wch)544 bool CFX_BidiChar::AppendChar(wchar_t wch) {
545   uint32_t dwProps = FX_GetUnicodeProperties(wch);
546   int32_t iBidiCls = (dwProps & FX_BIDICLASSBITSMASK) >> FX_BIDICLASSBITS;
547   Direction direction = NEUTRAL;
548   switch (iBidiCls) {
549     case FX_BIDICLASS_L:
550     case FX_BIDICLASS_AN:
551     case FX_BIDICLASS_EN:
552       direction = LEFT;
553       break;
554     case FX_BIDICLASS_R:
555     case FX_BIDICLASS_AL:
556       direction = RIGHT;
557       break;
558   }
559 
560   bool bChangeDirection = (direction != m_CurrentSegment.direction);
561   if (bChangeDirection)
562     StartNewSegment(direction);
563 
564   m_CurrentSegment.count++;
565   return bChangeDirection;
566 }
567 
EndChar()568 bool CFX_BidiChar::EndChar() {
569   StartNewSegment(NEUTRAL);
570   return m_LastSegment.count > 0;
571 }
572 
StartNewSegment(CFX_BidiChar::Direction direction)573 void CFX_BidiChar::StartNewSegment(CFX_BidiChar::Direction direction) {
574   m_LastSegment = m_CurrentSegment;
575   m_CurrentSegment.start += m_CurrentSegment.count;
576   m_CurrentSegment.count = 0;
577   m_CurrentSegment.direction = direction;
578 }
579 
CFX_BidiString(const WideString & str)580 CFX_BidiString::CFX_BidiString(const WideString& str)
581     : m_Str(str),
582       m_pBidiChar(pdfium::MakeUnique<CFX_BidiChar>()),
583       m_eOverallDirection(CFX_BidiChar::LEFT) {
584   for (const auto& c : m_Str) {
585     if (m_pBidiChar->AppendChar(c))
586       m_Order.push_back(m_pBidiChar->GetSegmentInfo());
587   }
588   if (m_pBidiChar->EndChar())
589     m_Order.push_back(m_pBidiChar->GetSegmentInfo());
590 
591   size_t nR2L = std::count_if(m_Order.begin(), m_Order.end(),
592                               [](const CFX_BidiChar::Segment& seg) {
593                                 return seg.direction == CFX_BidiChar::RIGHT;
594                               });
595 
596   size_t nL2R = std::count_if(m_Order.begin(), m_Order.end(),
597                               [](const CFX_BidiChar::Segment& seg) {
598                                 return seg.direction == CFX_BidiChar::LEFT;
599                               });
600 
601   if (nR2L > 0 && nR2L >= nL2R)
602     SetOverallDirectionRight();
603 }
604 
~CFX_BidiString()605 CFX_BidiString::~CFX_BidiString() {}
606 
SetOverallDirectionRight()607 void CFX_BidiString::SetOverallDirectionRight() {
608   if (m_eOverallDirection != CFX_BidiChar::RIGHT) {
609     std::reverse(m_Order.begin(), m_Order.end());
610     m_eOverallDirection = CFX_BidiChar::RIGHT;
611   }
612 }
613 
614 #ifdef PDF_ENABLE_XFA
FX_BidiLine(std::vector<CFX_Char> * chars,size_t iCount)615 void FX_BidiLine(std::vector<CFX_Char>* chars, size_t iCount) {
616   CFX_BidiLine blt;
617   blt.BidiLine(chars, iCount);
618 }
619 #endif  // PDF_ENABLE_XFA
620