1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fgas/layout/cfx_txtbreak.h"
8 
9 #include <algorithm>
10 
11 #include "build/build_config.h"
12 #include "core/fxcrt/fx_safe_types.h"
13 #include "core/fxge/text_char_pos.h"
14 #include "third_party/base/stl_util.h"
15 #include "xfa/fgas/font/cfgas_gefont.h"
16 #include "xfa/fgas/layout/cfx_char.h"
17 #include "xfa/fgas/layout/fx_arabic.h"
18 #include "xfa/fgas/layout/fx_linebreak.h"
19 
20 namespace {
21 
IsCtrlCode(wchar_t wch)22 bool IsCtrlCode(wchar_t wch) {
23   FX_CHARTYPE dwRet = FX_GetCharType(wch);
24   return dwRet == FX_CHARTYPE::kTab || dwRet == FX_CHARTYPE::kControl;
25 }
26 
27 }  // namespace
28 
CFX_TxtBreak()29 CFX_TxtBreak::CFX_TxtBreak()
30     : CFX_Break(FX_LAYOUTSTYLE_None),
31       m_iAlignment(CFX_TxtLineAlignment_Left),
32       m_iCombWidth(360000) {}
33 
~CFX_TxtBreak()34 CFX_TxtBreak::~CFX_TxtBreak() {}
35 
SetLineWidth(float fLineWidth)36 void CFX_TxtBreak::SetLineWidth(float fLineWidth) {
37   m_iLineWidth = FXSYS_roundf(fLineWidth * kConversionFactor);
38   ASSERT(m_iLineWidth >= 20000);
39 }
40 
SetAlignment(int32_t iAlignment)41 void CFX_TxtBreak::SetAlignment(int32_t iAlignment) {
42   ASSERT(iAlignment >= CFX_TxtLineAlignment_Left);
43   ASSERT(iAlignment <= CFX_TxtLineAlignment_Justified);
44   m_iAlignment = iAlignment;
45 }
46 
SetCombWidth(float fCombWidth)47 void CFX_TxtBreak::SetCombWidth(float fCombWidth) {
48   m_iCombWidth = FXSYS_roundf(fCombWidth * kConversionFactor);
49 }
50 
AppendChar_Combination(CFX_Char * pCurChar)51 void CFX_TxtBreak::AppendChar_Combination(CFX_Char* pCurChar) {
52   wchar_t wch = pCurChar->char_code();
53   wchar_t wForm;
54   FX_SAFE_INT32 iCharWidth = 0;
55   pCurChar->m_iCharWidth = -1;
56   if (m_bCombText) {
57     iCharWidth = m_iCombWidth;
58   } else {
59     wForm = wch;
60     CFX_Char* pLastChar = GetLastChar(0, false, false);
61     if (pLastChar &&
62         (pLastChar->m_dwCharStyles & FX_TXTCHARSTYLE_ArabicShadda) == 0) {
63       bool bShadda = false;
64       if (wch == 0x0651) {
65         wchar_t wLast = pLastChar->char_code();
66         if (wLast >= 0x064C && wLast <= 0x0650) {
67           wForm = FX_GetArabicFromShaddaTable(wLast);
68           bShadda = true;
69         }
70       } else if (wch >= 0x064C && wch <= 0x0650) {
71         if (pLastChar->char_code() == 0x0651) {
72           wForm = FX_GetArabicFromShaddaTable(wch);
73           bShadda = true;
74         }
75       }
76       if (bShadda) {
77         pLastChar->m_dwCharStyles |= FX_TXTCHARSTYLE_ArabicShadda;
78         pLastChar->m_iCharWidth = 0;
79         pCurChar->m_dwCharStyles |= FX_TXTCHARSTYLE_ArabicShadda;
80       }
81     }
82     int32_t iCharWidthOut;
83     if (m_pFont && m_pFont->GetCharWidth(wForm, &iCharWidthOut))
84       iCharWidth = iCharWidthOut;
85     else
86       iCharWidth = 0;
87 
88     iCharWidth *= m_iFontSize;
89     iCharWidth *= m_iHorizontalScale;
90     iCharWidth /= 100;
91   }
92 
93   iCharWidth *= -1;
94   pCurChar->m_iCharWidth = iCharWidth.ValueOrDefault(0);
95 }
96 
AppendChar_Tab(CFX_Char * pCurChar)97 void CFX_TxtBreak::AppendChar_Tab(CFX_Char* pCurChar) {
98   m_eCharType = FX_CHARTYPE::kTab;
99 }
100 
AppendChar_Control(CFX_Char * pCurChar)101 CFX_BreakType CFX_TxtBreak::AppendChar_Control(CFX_Char* pCurChar) {
102   m_eCharType = FX_CHARTYPE::kControl;
103   CFX_BreakType dwRet = CFX_BreakType::None;
104   if (!m_bSingleLine) {
105     wchar_t wch = pCurChar->char_code();
106     switch (wch) {
107       case L'\v':
108       case 0x2028:
109         dwRet = CFX_BreakType::Line;
110         break;
111       case L'\f':
112         dwRet = CFX_BreakType::Page;
113         break;
114       case 0x2029:
115         dwRet = CFX_BreakType::Paragraph;
116         break;
117       default:
118         if (wch == m_wParagraphBreakChar)
119           dwRet = CFX_BreakType::Paragraph;
120         break;
121     }
122     if (dwRet != CFX_BreakType::None)
123       dwRet = EndBreak(dwRet);
124   }
125   return dwRet;
126 }
127 
AppendChar_Arabic(CFX_Char * pCurChar)128 CFX_BreakType CFX_TxtBreak::AppendChar_Arabic(CFX_Char* pCurChar) {
129   FX_CHARTYPE chartype = pCurChar->GetCharType();
130   int32_t& iLineWidth = m_pCurLine->m_iWidth;
131   wchar_t wForm;
132   CFX_Char* pLastChar = nullptr;
133   bool bAlef = false;
134   if (!m_bCombText && m_eCharType >= FX_CHARTYPE::kArabicAlef &&
135       m_eCharType <= FX_CHARTYPE::kArabicDistortion) {
136     FX_SAFE_INT32 iCharWidth = 0;
137     pLastChar = GetLastChar(1, true, false);
138     if (pLastChar) {
139       if (pLastChar->m_iCharWidth > 0)
140         iLineWidth -= pLastChar->m_iCharWidth;
141       iCharWidth = pLastChar->m_iCharWidth;
142 
143       CFX_Char* pPrevChar = GetLastChar(2, true, false);
144       wForm = pdfium::arabic::GetFormChar(pLastChar, pPrevChar, pCurChar);
145       bAlef = (wForm == 0xFEFF &&
146                pLastChar->GetCharType() == FX_CHARTYPE::kArabicAlef);
147       if (m_pFont) {
148         int32_t iCharWidthOut = 0;
149         m_pFont->GetCharWidth(wForm, &iCharWidthOut);
150         iCharWidth = iCharWidthOut;
151       }
152       if (wForm == 0xFEFF)
153         iCharWidth = 0;
154 
155       iCharWidth *= m_iFontSize;
156       iCharWidth *= m_iHorizontalScale;
157       iCharWidth /= 100;
158 
159       int32_t iCharWidthValid = iCharWidth.ValueOrDefault(0);
160       pLastChar->m_iCharWidth = iCharWidthValid;
161       iLineWidth += iCharWidthValid;
162     }
163   }
164 
165   m_eCharType = chartype;
166   wForm = pdfium::arabic::GetFormChar(pCurChar, bAlef ? nullptr : pLastChar,
167                                       nullptr);
168   FX_SAFE_INT32 iCharWidth = 0;
169   if (m_bCombText) {
170     iCharWidth = m_iCombWidth;
171   } else {
172     if (m_pFont) {
173       int32_t iCharWidthOut = 0;
174       m_pFont->GetCharWidth(wForm, &iCharWidthOut);
175       iCharWidth = iCharWidthOut;
176     }
177     if (wForm == 0xFEFF)
178       iCharWidth = 0;
179 
180     iCharWidth *= m_iFontSize;
181     iCharWidth *= m_iHorizontalScale;
182     iCharWidth /= 100;
183   }
184 
185   int32_t iCharWidthValid = iCharWidth.ValueOrDefault(0);
186   pCurChar->m_iCharWidth = iCharWidthValid;
187   iLineWidth += iCharWidthValid;
188 
189   m_pCurLine->IncrementArabicCharCount();
190   if (!m_bSingleLine && IsGreaterThanLineWidth(iLineWidth))
191     return EndBreak(CFX_BreakType::Line);
192   return CFX_BreakType::None;
193 }
194 
AppendChar_Others(CFX_Char * pCurChar)195 CFX_BreakType CFX_TxtBreak::AppendChar_Others(CFX_Char* pCurChar) {
196   FX_CHARTYPE chartype = pCurChar->GetCharType();
197   int32_t& iLineWidth = m_pCurLine->m_iWidth;
198   FX_SAFE_INT32 iCharWidth = 0;
199   m_eCharType = chartype;
200   wchar_t wch = pCurChar->char_code();
201   wchar_t wForm = wch;
202 
203   if (m_bCombText) {
204     iCharWidth = m_iCombWidth;
205   } else {
206     int32_t iCharWidthOut;
207     if (m_pFont && m_pFont->GetCharWidth(wForm, &iCharWidthOut))
208       iCharWidth = iCharWidthOut;
209     else
210       iCharWidth = 0;
211 
212     iCharWidth *= m_iFontSize;
213     iCharWidth *= m_iHorizontalScale;
214     iCharWidth /= 100;
215   }
216 
217   iCharWidth += m_iCharSpace;
218 
219   int32_t iCharWidthValid = iCharWidth.ValueOrDefault(0);
220   pCurChar->m_iCharWidth = iCharWidthValid;
221   iLineWidth += iCharWidthValid;
222   if (!m_bSingleLine && chartype != FX_CHARTYPE::kSpace &&
223       IsGreaterThanLineWidth(iLineWidth)) {
224     return EndBreak(CFX_BreakType::Line);
225   }
226 
227   return CFX_BreakType::None;
228 }
229 
AppendChar(wchar_t wch)230 CFX_BreakType CFX_TxtBreak::AppendChar(wchar_t wch) {
231   FX_CHARTYPE chartype = FX_GetCharType(wch);
232   m_pCurLine->m_LineChars.emplace_back(wch, m_iHorizontalScale,
233                                        m_iVerticalScale);
234   CFX_Char* pCurChar = &m_pCurLine->m_LineChars.back();
235   pCurChar->m_dwCharStyles = m_iAlignment | (1 << 8);
236 
237   CFX_BreakType dwRet1 = CFX_BreakType::None;
238   if (chartype != FX_CHARTYPE::kCombination &&
239       GetUnifiedCharType(m_eCharType) != GetUnifiedCharType(chartype) &&
240       m_eCharType != FX_CHARTYPE::kUnknown && !m_bSingleLine &&
241       IsGreaterThanLineWidth(m_pCurLine->m_iWidth) &&
242       (m_eCharType != FX_CHARTYPE::kSpace ||
243        chartype != FX_CHARTYPE::kControl)) {
244     dwRet1 = EndBreak(CFX_BreakType::Line);
245     if (!m_pCurLine->m_LineChars.empty())
246       pCurChar = &m_pCurLine->m_LineChars.back();
247   }
248 
249   CFX_BreakType dwRet2 = CFX_BreakType::None;
250   if (wch == m_wParagraphBreakChar) {
251     // This is handled in AppendChar_Control, but it seems like \n and \r
252     // don't get matched as control characters so we go into AppendChar_other
253     // and never detect the new paragraph ...
254     dwRet2 = CFX_BreakType::Paragraph;
255     EndBreak(dwRet2);
256   } else {
257     switch (chartype) {
258       case FX_CHARTYPE::kTab:
259         AppendChar_Tab(pCurChar);
260         break;
261       case FX_CHARTYPE::kControl:
262         dwRet2 = AppendChar_Control(pCurChar);
263         break;
264       case FX_CHARTYPE::kCombination:
265         AppendChar_Combination(pCurChar);
266         break;
267       case FX_CHARTYPE::kArabicAlef:
268       case FX_CHARTYPE::kArabicSpecial:
269       case FX_CHARTYPE::kArabicDistortion:
270       case FX_CHARTYPE::kArabicNormal:
271       case FX_CHARTYPE::kArabicForm:
272       case FX_CHARTYPE::kArabic:
273         dwRet2 = AppendChar_Arabic(pCurChar);
274         break;
275       case FX_CHARTYPE::kUnknown:
276       case FX_CHARTYPE::kSpace:
277       case FX_CHARTYPE::kNumeric:
278       case FX_CHARTYPE::kNormal:
279       default:
280         dwRet2 = AppendChar_Others(pCurChar);
281         break;
282     }
283   }
284   return std::max(dwRet1, dwRet2);
285 }
286 
EndBreak_SplitLine(CFX_BreakLine * pNextLine,bool bAllChars)287 bool CFX_TxtBreak::EndBreak_SplitLine(CFX_BreakLine* pNextLine,
288                                       bool bAllChars) {
289   bool bDone = false;
290   CFX_Char* pTC;
291   if (!m_bSingleLine && IsGreaterThanLineWidth(m_pCurLine->m_iWidth)) {
292     pTC = m_pCurLine->GetChar(m_pCurLine->m_LineChars.size() - 1);
293     switch (pTC->GetCharType()) {
294       case FX_CHARTYPE::kTab:
295       case FX_CHARTYPE::kControl:
296       case FX_CHARTYPE::kSpace:
297         break;
298       default:
299         SplitTextLine(m_pCurLine.Get(), pNextLine, bAllChars);
300         bDone = true;
301         break;
302     }
303   }
304 
305   CFX_BreakPiece tp;
306   if (bAllChars && !bDone) {
307     int32_t iEndPos = m_pCurLine->m_iWidth;
308     GetBreakPos(&m_pCurLine->m_LineChars, bAllChars, true, &iEndPos);
309   }
310   return false;
311 }
312 
EndBreak_BidiLine(std::deque<FX_TPO> * tpos,CFX_BreakType dwStatus)313 void CFX_TxtBreak::EndBreak_BidiLine(std::deque<FX_TPO>* tpos,
314                                      CFX_BreakType dwStatus) {
315   CFX_BreakPiece tp;
316   FX_TPO tpo;
317   CFX_Char* pTC;
318   std::vector<CFX_Char>& chars = m_pCurLine->m_LineChars;
319   if (!m_pCurLine->HasArabicChar()) {
320     tp.m_dwStatus = dwStatus;
321     tp.m_iStartPos = m_pCurLine->m_iStart;
322     tp.m_iWidth = m_pCurLine->m_iWidth;
323     tp.m_iStartChar = 0;
324     tp.m_iChars = m_pCurLine->m_LineChars.size();
325     tp.m_pChars = &m_pCurLine->m_LineChars;
326     pTC = &chars[0];
327     tp.m_dwCharStyles = pTC->m_dwCharStyles;
328     tp.m_iHorizontalScale = pTC->horizonal_scale();
329     tp.m_iVerticalScale = pTC->vertical_scale();
330     m_pCurLine->m_LinePieces.push_back(tp);
331     tpos->push_back({0, 0});
332     return;
333   }
334 
335   size_t iBidiNum = 0;
336   for (size_t i = 0; i < m_pCurLine->m_LineChars.size(); ++i) {
337     pTC = &chars[i];
338     pTC->m_iBidiPos = static_cast<int32_t>(i);
339     if (pTC->GetCharType() != FX_CHARTYPE::kControl)
340       iBidiNum = i;
341     if (i == 0)
342       pTC->m_iBidiLevel = 1;
343   }
344   CFX_Char::BidiLine(&chars, iBidiNum + 1);
345 
346   tp.m_dwStatus = CFX_BreakType::Piece;
347   tp.m_iStartPos = m_pCurLine->m_iStart;
348   tp.m_pChars = &m_pCurLine->m_LineChars;
349   int32_t iBidiLevel = -1;
350   int32_t iCharWidth;
351   int32_t i = 0;
352   int32_t j = -1;
353   int32_t iCount = pdfium::CollectionSize<int32_t>(m_pCurLine->m_LineChars);
354   while (i < iCount) {
355     pTC = &chars[i];
356     if (iBidiLevel < 0) {
357       iBidiLevel = pTC->m_iBidiLevel;
358       tp.m_iWidth = 0;
359       tp.m_iBidiLevel = iBidiLevel;
360       tp.m_iBidiPos = pTC->m_iBidiOrder;
361       tp.m_dwCharStyles = pTC->m_dwCharStyles;
362       tp.m_iHorizontalScale = pTC->horizonal_scale();
363       tp.m_iVerticalScale = pTC->vertical_scale();
364       tp.m_dwStatus = CFX_BreakType::Piece;
365     }
366     if (iBidiLevel != pTC->m_iBidiLevel ||
367         pTC->m_dwStatus != CFX_BreakType::None) {
368       if (iBidiLevel == pTC->m_iBidiLevel) {
369         tp.m_dwStatus = pTC->m_dwStatus;
370         iCharWidth = pTC->m_iCharWidth;
371         if (iCharWidth > 0)
372           tp.m_iWidth += iCharWidth;
373 
374         i++;
375       }
376       tp.m_iChars = i - tp.m_iStartChar;
377       m_pCurLine->m_LinePieces.push_back(tp);
378       tp.m_iStartPos += tp.m_iWidth;
379       tp.m_iStartChar = i;
380       tpo.index = ++j;
381       tpo.pos = tp.m_iBidiPos;
382       tpos->push_back(tpo);
383       iBidiLevel = -1;
384     } else {
385       iCharWidth = pTC->m_iCharWidth;
386       if (iCharWidth > 0)
387         tp.m_iWidth += iCharWidth;
388 
389       i++;
390     }
391   }
392   if (i > tp.m_iStartChar) {
393     tp.m_dwStatus = dwStatus;
394     tp.m_iChars = i - tp.m_iStartChar;
395     m_pCurLine->m_LinePieces.push_back(tp);
396     tpo.index = ++j;
397     tpo.pos = tp.m_iBidiPos;
398     tpos->push_back(tpo);
399   }
400   if (j > -1) {
401     if (j > 0) {
402       std::sort(tpos->begin(), tpos->end());
403       int32_t iStartPos = 0;
404       for (i = 0; i <= j; i++) {
405         tpo = (*tpos)[i];
406         CFX_BreakPiece& ttp = m_pCurLine->m_LinePieces[tpo.index];
407         ttp.m_iStartPos = iStartPos;
408         iStartPos += ttp.m_iWidth;
409       }
410     }
411     m_pCurLine->m_LinePieces[j].m_dwStatus = dwStatus;
412   }
413 }
414 
EndBreak_Alignment(const std::deque<FX_TPO> & tpos,bool bAllChars,CFX_BreakType dwStatus)415 void CFX_TxtBreak::EndBreak_Alignment(const std::deque<FX_TPO>& tpos,
416                                       bool bAllChars,
417                                       CFX_BreakType dwStatus) {
418   int32_t iNetWidth = m_pCurLine->m_iWidth;
419   int32_t iGapChars = 0;
420   bool bFind = false;
421   for (auto it = tpos.rbegin(); it != tpos.rend(); ++it) {
422     CFX_BreakPiece& ttp = m_pCurLine->m_LinePieces[it->index];
423     if (!bFind)
424       iNetWidth = ttp.GetEndPos();
425 
426     bool bArabic = FX_IsOdd(ttp.m_iBidiLevel);
427     int32_t j = bArabic ? 0 : ttp.m_iChars - 1;
428     while (j > -1 && j < ttp.m_iChars) {
429       const CFX_Char* pTC = ttp.GetChar(j);
430       if (pTC->m_eLineBreakType == FX_LINEBREAKTYPE::kDIRECT_BRK)
431         iGapChars++;
432       if (!bFind || !bAllChars) {
433         FX_CHARTYPE chartype = pTC->GetCharType();
434         if (chartype == FX_CHARTYPE::kSpace ||
435             chartype == FX_CHARTYPE::kControl) {
436           if (!bFind && bAllChars && pTC->m_iCharWidth > 0)
437             iNetWidth -= pTC->m_iCharWidth;
438         } else {
439           bFind = true;
440           if (!bAllChars)
441             break;
442         }
443       }
444       j += bArabic ? 1 : -1;
445     }
446     if (!bAllChars && bFind)
447       break;
448   }
449 
450   int32_t iOffset = m_iLineWidth - iNetWidth;
451   if (iGapChars > 0 && m_iAlignment & CFX_TxtLineAlignment_Justified &&
452       dwStatus != CFX_BreakType::Paragraph) {
453     int32_t iStart = -1;
454     for (auto& tpo : tpos) {
455       CFX_BreakPiece& ttp = m_pCurLine->m_LinePieces[tpo.index];
456       if (iStart < -1)
457         iStart = ttp.m_iStartPos;
458       else
459         ttp.m_iStartPos = iStart;
460 
461       for (int32_t j = 0; j < ttp.m_iChars && iGapChars > 0; j++, iGapChars--) {
462         CFX_Char* pTC = ttp.GetChar(j);
463         if (pTC->m_eLineBreakType != FX_LINEBREAKTYPE::kDIRECT_BRK ||
464             pTC->m_iCharWidth < 0) {
465           continue;
466         }
467         int32_t k = iOffset / iGapChars;
468         pTC->m_iCharWidth += k;
469         ttp.m_iWidth += k;
470         iOffset -= k;
471       }
472       iStart += ttp.m_iWidth;
473     }
474   } else if (m_iAlignment & CFX_TxtLineAlignment_Center ||
475              m_iAlignment & CFX_TxtLineAlignment_Right) {
476     if (m_iAlignment & CFX_TxtLineAlignment_Center &&
477         !(m_iAlignment & CFX_TxtLineAlignment_Right)) {
478       iOffset /= 2;
479     }
480     if (iOffset > 0) {
481       for (auto& ttp : m_pCurLine->m_LinePieces)
482         ttp.m_iStartPos += iOffset;
483     }
484   }
485 }
486 
EndBreak(CFX_BreakType dwStatus)487 CFX_BreakType CFX_TxtBreak::EndBreak(CFX_BreakType dwStatus) {
488   ASSERT(dwStatus != CFX_BreakType::None);
489 
490   if (!m_pCurLine->m_LinePieces.empty()) {
491     if (dwStatus != CFX_BreakType::Piece)
492       m_pCurLine->m_LinePieces.back().m_dwStatus = dwStatus;
493     return m_pCurLine->m_LinePieces.back().m_dwStatus;
494   }
495 
496   if (HasLine()) {
497     if (m_Lines[m_iReadyLineIndex].m_LinePieces.empty())
498       return CFX_BreakType::None;
499 
500     if (dwStatus != CFX_BreakType::Piece)
501       m_Lines[m_iReadyLineIndex].m_LinePieces.back().m_dwStatus = dwStatus;
502     return m_Lines[m_iReadyLineIndex].m_LinePieces.back().m_dwStatus;
503   }
504 
505   if (m_pCurLine->m_LineChars.empty())
506     return CFX_BreakType::None;
507 
508   m_pCurLine->m_LineChars.back().m_dwStatus = dwStatus;
509   if (dwStatus == CFX_BreakType::Piece)
510     return dwStatus;
511 
512   m_iReadyLineIndex = m_pCurLine == &m_Lines[0] ? 0 : 1;
513   CFX_BreakLine* pNextLine = &m_Lines[1 - m_iReadyLineIndex];
514   bool bAllChars = m_iAlignment > CFX_TxtLineAlignment_Right;
515   if (!EndBreak_SplitLine(pNextLine, bAllChars)) {
516     std::deque<FX_TPO> tpos;
517     EndBreak_BidiLine(&tpos, dwStatus);
518     if (m_iAlignment > CFX_TxtLineAlignment_Left)
519       EndBreak_Alignment(tpos, bAllChars, dwStatus);
520   }
521 
522   m_pCurLine = pNextLine;
523   CFX_Char* pTC = GetLastChar(0, false, false);
524   m_eCharType = pTC ? pTC->GetCharType() : FX_CHARTYPE::kUnknown;
525 
526   return dwStatus;
527 }
528 
GetBreakPos(std::vector<CFX_Char> * pChars,bool bAllChars,bool bOnlyBrk,int32_t * pEndPos)529 int32_t CFX_TxtBreak::GetBreakPos(std::vector<CFX_Char>* pChars,
530                                   bool bAllChars,
531                                   bool bOnlyBrk,
532                                   int32_t* pEndPos) {
533   std::vector<CFX_Char>& chars = *pChars;
534   int32_t iLength = pdfium::CollectionSize<int32_t>(chars) - 1;
535   if (iLength < 1)
536     return iLength;
537 
538   int32_t iBreak = -1;
539   int32_t iBreakPos = -1;
540   int32_t iIndirect = -1;
541   int32_t iIndirectPos = -1;
542   int32_t iLast = -1;
543   int32_t iLastPos = -1;
544   if (m_bSingleLine || *pEndPos <= m_iLineWidth) {
545     if (!bAllChars)
546       return iLength;
547 
548     iBreak = iLength;
549     iBreakPos = *pEndPos;
550   }
551 
552   FX_LINEBREAKTYPE eType;
553   FX_BREAKPROPERTY nCur;
554   FX_BREAKPROPERTY nNext;
555   CFX_Char* pCur = &chars[iLength--];
556   if (bAllChars)
557     pCur->m_eLineBreakType = FX_LINEBREAKTYPE::kUNKNOWN;
558 
559   nNext = FX_GetBreakProperty(pCur->char_code());
560   int32_t iCharWidth = pCur->m_iCharWidth;
561   if (iCharWidth > 0)
562     *pEndPos -= iCharWidth;
563 
564   while (iLength >= 0) {
565     pCur = &chars[iLength];
566     nCur = FX_GetBreakProperty(pCur->char_code());
567     if (nNext == FX_BREAKPROPERTY::kSP)
568       eType = FX_LINEBREAKTYPE::kPROHIBITED_BRK;
569     else
570       eType = GetLineBreakTypeFromPair(nCur, nNext);
571     if (bAllChars)
572       pCur->m_eLineBreakType = eType;
573     if (!bOnlyBrk) {
574       if (m_bSingleLine || *pEndPos <= m_iLineWidth ||
575           nCur == FX_BREAKPROPERTY::kSP) {
576         if (eType == FX_LINEBREAKTYPE::kDIRECT_BRK && iBreak < 0) {
577           iBreak = iLength;
578           iBreakPos = *pEndPos;
579           if (!bAllChars)
580             return iLength;
581         } else if (eType == FX_LINEBREAKTYPE::kINDIRECT_BRK && iIndirect < 0) {
582           iIndirect = iLength;
583           iIndirectPos = *pEndPos;
584         }
585         if (iLast < 0) {
586           iLast = iLength;
587           iLastPos = *pEndPos;
588         }
589       }
590       iCharWidth = pCur->m_iCharWidth;
591       if (iCharWidth > 0)
592         *pEndPos -= iCharWidth;
593     }
594     nNext = nCur;
595     iLength--;
596   }
597   if (bOnlyBrk)
598     return 0;
599   if (iBreak > -1) {
600     *pEndPos = iBreakPos;
601     return iBreak;
602   }
603   if (iIndirect > -1) {
604     *pEndPos = iIndirectPos;
605     return iIndirect;
606   }
607   if (iLast > -1) {
608     *pEndPos = iLastPos;
609     return iLast;
610   }
611   return 0;
612 }
613 
SplitTextLine(CFX_BreakLine * pCurLine,CFX_BreakLine * pNextLine,bool bAllChars)614 void CFX_TxtBreak::SplitTextLine(CFX_BreakLine* pCurLine,
615                                  CFX_BreakLine* pNextLine,
616                                  bool bAllChars) {
617   ASSERT(pCurLine);
618   ASSERT(pNextLine);
619 
620   if (pCurLine->m_LineChars.size() < 2)
621     return;
622 
623   int32_t iEndPos = pCurLine->m_iWidth;
624   std::vector<CFX_Char>& curChars = pCurLine->m_LineChars;
625   int32_t iCharPos = GetBreakPos(&curChars, bAllChars, false, &iEndPos);
626   if (iCharPos < 0)
627     iCharPos = 0;
628 
629   iCharPos++;
630   if (iCharPos >= pdfium::CollectionSize<int32_t>(pCurLine->m_LineChars)) {
631     pNextLine->Clear();
632     CFX_Char* pTC = &curChars[iCharPos - 1];
633     pTC->m_eLineBreakType = FX_LINEBREAKTYPE::kUNKNOWN;
634     return;
635   }
636 
637   pNextLine->m_LineChars =
638       std::vector<CFX_Char>(curChars.begin() + iCharPos, curChars.end());
639   curChars.erase(curChars.begin() + iCharPos, curChars.end());
640   pCurLine->m_iWidth = iEndPos;
641   CFX_Char* pTC = &curChars[iCharPos - 1];
642   pTC->m_eLineBreakType = FX_LINEBREAKTYPE::kUNKNOWN;
643   int32_t iWidth = 0;
644   for (size_t i = 0; i < pNextLine->m_LineChars.size(); ++i) {
645     if (pNextLine->m_LineChars[i].GetCharType() >= FX_CHARTYPE::kArabicAlef) {
646       pCurLine->DecrementArabicCharCount();
647       pNextLine->IncrementArabicCharCount();
648     }
649     iWidth += std::max(0, pNextLine->m_LineChars[i].m_iCharWidth);
650     pNextLine->m_LineChars[i].m_dwStatus = CFX_BreakType::None;
651   }
652   pNextLine->m_iWidth = iWidth;
653 }
654 
655 struct FX_FORMCHAR {
656   uint16_t wch;
657   uint16_t wForm;
658   int32_t iWidth;
659 };
660 
GetDisplayPos(const Run * pTxtRun,TextCharPos * pCharPos) const661 size_t CFX_TxtBreak::GetDisplayPos(const Run* pTxtRun,
662                                    TextCharPos* pCharPos) const {
663   if (!pTxtRun || pTxtRun->iLength < 1)
664     return 0;
665 
666   Engine* pEngine = pTxtRun->pEdtEngine;
667   const wchar_t* pStr = pTxtRun->wsStr.c_str();
668   int32_t* pWidths = pTxtRun->pWidths;
669   int32_t iLength = pTxtRun->iLength - 1;
670   RetainPtr<CFGAS_GEFont> pFont = pTxtRun->pFont;
671   uint32_t dwStyles = pTxtRun->dwStyles;
672   CFX_RectF rtText(*pTxtRun->pRect);
673   bool bRTLPiece = (pTxtRun->dwCharStyles & FX_TXTCHARSTYLE_OddBidiLevel) != 0;
674   float fFontSize = pTxtRun->fFontSize;
675   int32_t iFontSize = FXSYS_roundf(fFontSize * 20.0f);
676   int32_t iAscent = pFont->GetAscent();
677   int32_t iDescent = pFont->GetDescent();
678   int32_t iMaxHeight = iAscent - iDescent;
679   float fFontHeight = fFontSize;
680   float fAscent = fFontHeight * iAscent / iMaxHeight;
681   float fX = rtText.left;
682   float fY;
683   float fCharWidth;
684   int32_t iHorScale = pTxtRun->iHorizontalScale;
685   int32_t iVerScale = pTxtRun->iVerticalScale;
686   bool bSkipSpace = pTxtRun->bSkipSpace;
687   FX_FORMCHAR formChars[3];
688   float fYBase;
689 
690   if (bRTLPiece)
691     fX = rtText.right();
692 
693   fYBase = rtText.top + (rtText.height - fFontSize) / 2.0f;
694   fY = fYBase + fAscent;
695 
696   size_t szCount = 0;
697   int32_t iNext = 0;
698   wchar_t wPrev = 0xFEFF;
699   wchar_t wNext = 0xFEFF;
700   wchar_t wForm = 0xFEFF;
701   wchar_t wLast = 0xFEFF;
702   bool bShadda = false;
703   bool bLam = false;
704   for (int32_t i = 0; i <= iLength; i++) {
705     int32_t iAbsolute = i + pTxtRun->iStart;
706     int32_t iWidth;
707     wchar_t wch;
708     if (pEngine) {
709       wch = pEngine->GetChar(iAbsolute);
710       iWidth = pEngine->GetWidthOfChar(iAbsolute);
711     } else {
712       wch = *pStr++;
713       iWidth = *pWidths++;
714     }
715 
716     FX_CHARTYPE chartype = FX_GetCharType(wch);
717     if (chartype == FX_CHARTYPE::kArabicAlef && iWidth == 0) {
718       wPrev = 0xFEFF;
719       wLast = wch;
720       continue;
721     }
722 
723     if (chartype >= FX_CHARTYPE::kArabicAlef) {
724       if (i < iLength) {
725         if (pEngine) {
726           iNext = i + 1;
727           while (iNext <= iLength) {
728             int32_t iNextAbsolute = iNext + pTxtRun->iStart;
729             wNext = pEngine->GetChar(iNextAbsolute);
730             if (FX_GetCharType(wNext) != FX_CHARTYPE::kCombination)
731               break;
732 
733             iNext++;
734           }
735           if (iNext > iLength)
736             wNext = 0xFEFF;
737         } else {
738           int32_t j = -1;
739           do {
740             j++;
741             if (i + j >= iLength)
742               break;
743 
744             wNext = pStr[j];
745           } while (FX_GetCharType(wNext) == FX_CHARTYPE::kCombination);
746           if (i + j >= iLength)
747             wNext = 0xFEFF;
748         }
749       } else {
750         wNext = 0xFEFF;
751       }
752 
753       wForm = pdfium::arabic::GetFormChar(wch, wPrev, wNext);
754       bLam = (wPrev == 0x0644 && wch == 0x0644 && wNext == 0x0647);
755     } else if (chartype == FX_CHARTYPE::kCombination) {
756       wForm = wch;
757       if (wch >= 0x064C && wch <= 0x0651) {
758         if (bShadda) {
759           wForm = 0xFEFF;
760           bShadda = false;
761         } else {
762           wNext = 0xFEFF;
763           if (pEngine) {
764             iNext = i + 1;
765             if (iNext <= iLength) {
766               int32_t iNextAbsolute = iNext + pTxtRun->iStart;
767               wNext = pEngine->GetChar(iNextAbsolute);
768             }
769           } else {
770             if (i < iLength)
771               wNext = *pStr;
772           }
773           if (wch == 0x0651) {
774             if (wNext >= 0x064C && wNext <= 0x0650) {
775               wForm = FX_GetArabicFromShaddaTable(wNext);
776               bShadda = true;
777             }
778           } else {
779             if (wNext == 0x0651) {
780               wForm = FX_GetArabicFromShaddaTable(wch);
781               bShadda = true;
782             }
783           }
784         }
785       } else {
786         bShadda = false;
787       }
788     } else if (chartype == FX_CHARTYPE::kNumeric) {
789       wForm = wch;
790     } else if (wch == L'.') {
791       wForm = wch;
792     } else if (wch == L',') {
793       wForm = wch;
794     } else if (bRTLPiece) {
795       wForm = FX_GetMirrorChar(wch);
796     } else {
797       wForm = wch;
798     }
799     if (chartype != FX_CHARTYPE::kCombination)
800       bShadda = false;
801     if (chartype < FX_CHARTYPE::kArabicAlef)
802       bLam = false;
803 
804     bool bEmptyChar =
805         (chartype >= FX_CHARTYPE::kTab && chartype <= FX_CHARTYPE::kControl);
806     if (wForm == 0xFEFF)
807       bEmptyChar = true;
808 
809     int32_t iForms = bLam ? 3 : 1;
810     szCount += (bEmptyChar && bSkipSpace) ? 0 : iForms;
811     if (!pCharPos) {
812       if (iWidth > 0)
813         wPrev = wch;
814       wLast = wch;
815       continue;
816     }
817 
818     int32_t iCharWidth = iWidth;
819     if (iCharWidth < 0)
820       iCharWidth = -iCharWidth;
821 
822     iCharWidth /= iFontSize;
823     formChars[0].wch = wch;
824     formChars[0].wForm = wForm;
825     formChars[0].iWidth = iCharWidth;
826     if (bLam) {
827       formChars[1].wForm = 0x0651;
828       iCharWidth = 0;
829       pFont->GetCharWidth(0x0651, &iCharWidth);
830       formChars[1].iWidth = iCharWidth;
831       formChars[2].wForm = 0x0670;
832       iCharWidth = 0;
833       pFont->GetCharWidth(0x0670, &iCharWidth);
834       formChars[2].iWidth = iCharWidth;
835     }
836 
837     for (int32_t j = 0; j < iForms; j++) {
838       wForm = (wchar_t)formChars[j].wForm;
839       iCharWidth = formChars[j].iWidth;
840       if (j > 0) {
841         chartype = FX_CHARTYPE::kCombination;
842         wch = wForm;
843         wLast = (wchar_t)formChars[j - 1].wForm;
844       }
845       if (!bEmptyChar || (bEmptyChar && !bSkipSpace)) {
846         pCharPos->m_GlyphIndex = pFont->GetGlyphIndex(wForm);
847 #if defined(OS_MACOSX)
848         pCharPos->m_ExtGID = pCharPos->m_GlyphIndex;
849 #endif
850         // TODO(npm): change widths in this method to unsigned to avoid implicit
851         // cast in the following line.
852         pCharPos->m_FontCharWidth = iCharWidth;
853       }
854 
855       fCharWidth = fFontSize * iCharWidth / 1000.0f;
856       if (bRTLPiece && chartype != FX_CHARTYPE::kCombination)
857         fX -= fCharWidth;
858 
859       if (!bEmptyChar || (bEmptyChar && !bSkipSpace)) {
860         pCharPos->m_Origin = CFX_PointF(fX, fY);
861 
862         if ((dwStyles & FX_LAYOUTSTYLE_CombText) != 0) {
863           int32_t iFormWidth = iCharWidth;
864           pFont->GetCharWidth(wForm, &iFormWidth);
865           float fOffset = fFontSize * (iCharWidth - iFormWidth) / 2000.0f;
866           pCharPos->m_Origin.x += fOffset;
867         }
868 
869         if (chartype == FX_CHARTYPE::kCombination) {
870           FX_RECT rtBBox;
871           if (pFont->GetCharBBox(wForm, &rtBBox)) {
872             pCharPos->m_Origin.y =
873                 fYBase + fFontSize - fFontSize * rtBBox.Height() / iMaxHeight;
874           }
875           if (wForm == wch && wLast != 0xFEFF) {
876             if (FX_GetCharType(wLast) == FX_CHARTYPE::kCombination) {
877               FX_RECT rtBox;
878               if (pFont->GetCharBBox(wLast, &rtBox))
879                 pCharPos->m_Origin.y -= fFontSize * rtBox.Height() / iMaxHeight;
880             }
881           }
882         }
883       }
884       if (!bRTLPiece && chartype != FX_CHARTYPE::kCombination)
885         fX += fCharWidth;
886 
887       if (!bEmptyChar || (bEmptyChar && !bSkipSpace)) {
888         pCharPos->m_bGlyphAdjust = true;
889         pCharPos->m_AdjustMatrix[0] = -1;
890         pCharPos->m_AdjustMatrix[1] = 0;
891         pCharPos->m_AdjustMatrix[2] = 0;
892         pCharPos->m_AdjustMatrix[3] = 1;
893 
894         if (iHorScale != 100 || iVerScale != 100) {
895           pCharPos->m_AdjustMatrix[0] =
896               pCharPos->m_AdjustMatrix[0] * iHorScale / 100.0f;
897           pCharPos->m_AdjustMatrix[1] =
898               pCharPos->m_AdjustMatrix[1] * iHorScale / 100.0f;
899           pCharPos->m_AdjustMatrix[2] =
900               pCharPos->m_AdjustMatrix[2] * iVerScale / 100.0f;
901           pCharPos->m_AdjustMatrix[3] =
902               pCharPos->m_AdjustMatrix[3] * iVerScale / 100.0f;
903         }
904         pCharPos++;
905       }
906     }
907     if (iWidth > 0)
908       wPrev = static_cast<wchar_t>(formChars[0].wch);
909     wLast = wch;
910   }
911   return szCount;
912 }
913 
GetCharRects(const Run * pTxtRun,bool bCharBBox) const914 std::vector<CFX_RectF> CFX_TxtBreak::GetCharRects(const Run* pTxtRun,
915                                                   bool bCharBBox) const {
916   if (!pTxtRun || pTxtRun->iLength < 1)
917     return std::vector<CFX_RectF>();
918 
919   Engine* pEngine = pTxtRun->pEdtEngine;
920   const wchar_t* pStr = pTxtRun->wsStr.c_str();
921   int32_t* pWidths = pTxtRun->pWidths;
922   int32_t iLength = pTxtRun->iLength;
923   CFX_RectF rect(*pTxtRun->pRect);
924   float fFontSize = pTxtRun->fFontSize;
925   float fScale = fFontSize / 1000.0f;
926   RetainPtr<CFGAS_GEFont> pFont = pTxtRun->pFont;
927   if (!pFont)
928     bCharBBox = false;
929 
930   FX_RECT bbox;
931   if (bCharBBox)
932     bCharBBox = pFont->GetBBox(&bbox);
933 
934   float fLeft = std::max(0.0f, bbox.left * fScale);
935   float fHeight = fabs(bbox.Height() * fScale);
936   bool bRTLPiece = !!(pTxtRun->dwCharStyles & FX_TXTCHARSTYLE_OddBidiLevel);
937   bool bSingleLine = !!(pTxtRun->dwStyles & FX_LAYOUTSTYLE_SingleLine);
938   bool bCombText = !!(pTxtRun->dwStyles & FX_LAYOUTSTYLE_CombText);
939   wchar_t wch;
940   int32_t iCharSize;
941   float fCharSize;
942   float fStart = bRTLPiece ? rect.right() : rect.left;
943 
944   std::vector<CFX_RectF> rtArray(iLength);
945   for (int32_t i = 0; i < iLength; i++) {
946     int32_t iAbsolute = i + pTxtRun->iStart;
947     if (pEngine) {
948       wch = pEngine->GetChar(iAbsolute);
949       iCharSize = pEngine->GetWidthOfChar(iAbsolute);
950     } else {
951       wch = *pStr++;
952       iCharSize = *pWidths++;
953     }
954     fCharSize = static_cast<float>(iCharSize) / kConversionFactor;
955     bool bRet = (!bSingleLine && IsCtrlCode(wch));
956     if (!(wch == L'\v' || wch == L'\f' || wch == 0x2028 || wch == 0x2029 ||
957           wch == L'\n')) {
958       bRet = false;
959     }
960     if (bRet)
961       fCharSize = fFontSize / 2.0f;
962     rect.left = fStart;
963     if (bRTLPiece) {
964       rect.left -= fCharSize;
965       fStart -= fCharSize;
966     } else {
967       fStart += fCharSize;
968     }
969     rect.width = fCharSize;
970 
971     if (bCharBBox && !bRet) {
972       int32_t iCharWidth = 1000;
973       pFont->GetCharWidth(wch, &iCharWidth);
974       float fRTLeft = 0, fCharWidth = 0;
975       if (iCharWidth > 0) {
976         fCharWidth = iCharWidth * fScale;
977         fRTLeft = fLeft;
978         if (bCombText)
979           fRTLeft = (rect.width - fCharWidth) / 2.0f;
980       }
981       CFX_RectF rtBBoxF;
982       rtBBoxF.left = rect.left + fRTLeft;
983       rtBBoxF.top = rect.top + (rect.height - fHeight) / 2.0f;
984       rtBBoxF.width = fCharWidth;
985       rtBBoxF.height = fHeight;
986       rtBBoxF.top = std::max(rtBBoxF.top, 0.0f);
987       rtArray[i] = rtBBoxF;
988       continue;
989     }
990     rtArray[i] = rect;
991   }
992   return rtArray;
993 }
994 
995 CFX_TxtBreak::Engine::~Engine() = default;
996 
997 CFX_TxtBreak::Run::Run() = default;
998 
999 CFX_TxtBreak::Run::~Run() = default;
1000 
1001 CFX_TxtBreak::Run::Run(const CFX_TxtBreak::Run& other) = default;
1002