1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fxfa/cxfa_textparser.h"
8 
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12 
13 #include "core/fxcrt/css/cfx_css.h"
14 #include "core/fxcrt/css/cfx_csscomputedstyle.h"
15 #include "core/fxcrt/css/cfx_cssstyleselector.h"
16 #include "core/fxcrt/css/cfx_cssstylesheet.h"
17 #include "core/fxcrt/fx_codepage.h"
18 #include "core/fxcrt/xml/cfx_xmlelement.h"
19 #include "core/fxcrt/xml/cfx_xmlnode.h"
20 #include "core/fxge/fx_font.h"
21 #include "third_party/base/ptr_util.h"
22 #include "xfa/fgas/font/cfgas_fontmgr.h"
23 #include "xfa/fgas/font/cfgas_gefont.h"
24 #include "xfa/fxfa/cxfa_ffapp.h"
25 #include "xfa/fxfa/cxfa_ffdoc.h"
26 #include "xfa/fxfa/cxfa_fontmgr.h"
27 #include "xfa/fxfa/cxfa_textparsecontext.h"
28 #include "xfa/fxfa/cxfa_textprovider.h"
29 #include "xfa/fxfa/cxfa_texttabstopscontext.h"
30 #include "xfa/fxfa/parser/cxfa_font.h"
31 #include "xfa/fxfa/parser/cxfa_measurement.h"
32 #include "xfa/fxfa/parser/cxfa_para.h"
33 
34 namespace {
35 
36 enum class TabStopStatus {
37   Error,
38   EOS,
39   None,
40   Alignment,
41   StartLeader,
42   Leader,
43   Location,
44 };
45 
GetLowerCaseElementAttributeOrDefault(const CFX_XMLElement * pElement,const WideString & wsName,const WideString & wsDefaultValue)46 WideString GetLowerCaseElementAttributeOrDefault(
47     const CFX_XMLElement* pElement,
48     const WideString& wsName,
49     const WideString& wsDefaultValue) {
50   WideString ws = pElement->GetAttribute(wsName);
51   if (ws.IsEmpty())
52     ws = wsDefaultValue;
53   else
54     ws.MakeLower();
55   return ws;
56 }
57 
58 }  // namespace
59 
CXFA_TextParser()60 CXFA_TextParser::CXFA_TextParser()
61     : m_bParsed(false), m_cssInitialized(false) {}
62 
~CXFA_TextParser()63 CXFA_TextParser::~CXFA_TextParser() {}
64 
Reset()65 void CXFA_TextParser::Reset() {
66   m_mapXMLNodeToParseContext.clear();
67   m_bParsed = false;
68 }
69 
InitCSSData(CXFA_TextProvider * pTextProvider)70 void CXFA_TextParser::InitCSSData(CXFA_TextProvider* pTextProvider) {
71   if (!pTextProvider)
72     return;
73 
74   if (!m_pSelector) {
75     m_pSelector = pdfium::MakeUnique<CFX_CSSStyleSelector>();
76 
77     CXFA_Font* font = pTextProvider->GetFontIfExists();
78     m_pSelector->SetDefFontSize(font ? font->GetFontSize() : 10.0f);
79   }
80 
81   if (m_cssInitialized)
82     return;
83 
84   m_cssInitialized = true;
85   auto uaSheet = LoadDefaultSheetStyle();
86   m_pSelector->SetUAStyleSheet(std::move(uaSheet));
87   m_pSelector->UpdateStyleIndex();
88 }
89 
LoadDefaultSheetStyle()90 std::unique_ptr<CFX_CSSStyleSheet> CXFA_TextParser::LoadDefaultSheetStyle() {
91   static const char kStyle[] =
92       "html,body,ol,p,ul{display:block}"
93       "li{display:list-item}"
94       "ol,ul{padding-left:33px;margin:1.12em 0}"
95       "ol{list-style-type:decimal}"
96       "a{color:#0000ff;text-decoration:underline}"
97       "b{font-weight:bolder}"
98       "i{font-style:italic}"
99       "sup{vertical-align:+15em;font-size:.66em}"
100       "sub{vertical-align:-15em;font-size:.66em}";
101   WideString ws = WideString::FromASCII(kStyle);
102   auto sheet = pdfium::MakeUnique<CFX_CSSStyleSheet>();
103   if (!sheet->LoadBuffer(ws.c_str(), ws.GetLength()))
104     return nullptr;
105 
106   return sheet;
107 }
108 
CreateRootStyle(CXFA_TextProvider * pTextProvider)109 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::CreateRootStyle(
110     CXFA_TextProvider* pTextProvider) {
111   CXFA_Para* para = pTextProvider->GetParaIfExists();
112   auto pStyle = m_pSelector->CreateComputedStyle(nullptr);
113   float fLineHeight = 0;
114   float fFontSize = 10;
115 
116   if (para) {
117     fLineHeight = para->GetLineHeight();
118     CFX_CSSLength indent;
119     indent.Set(CFX_CSSLengthUnit::Point, para->GetTextIndent());
120     pStyle->SetTextIndent(indent);
121     CFX_CSSTextAlign hAlign = CFX_CSSTextAlign::Left;
122     switch (para->GetHorizontalAlign()) {
123       case XFA_AttributeValue::Center:
124         hAlign = CFX_CSSTextAlign::Center;
125         break;
126       case XFA_AttributeValue::Right:
127         hAlign = CFX_CSSTextAlign::Right;
128         break;
129       case XFA_AttributeValue::Justify:
130         hAlign = CFX_CSSTextAlign::Justify;
131         break;
132       case XFA_AttributeValue::JustifyAll:
133         hAlign = CFX_CSSTextAlign::JustifyAll;
134         break;
135       case XFA_AttributeValue::Left:
136       case XFA_AttributeValue::Radix:
137         break;
138       default:
139         NOTREACHED();
140         break;
141     }
142     pStyle->SetTextAlign(hAlign);
143     CFX_CSSRect rtMarginWidth;
144     rtMarginWidth.left.Set(CFX_CSSLengthUnit::Point, para->GetMarginLeft());
145     rtMarginWidth.top.Set(CFX_CSSLengthUnit::Point, para->GetSpaceAbove());
146     rtMarginWidth.right.Set(CFX_CSSLengthUnit::Point, para->GetMarginRight());
147     rtMarginWidth.bottom.Set(CFX_CSSLengthUnit::Point, para->GetSpaceBelow());
148     pStyle->SetMarginWidth(rtMarginWidth);
149   }
150 
151   CXFA_Font* font = pTextProvider->GetFontIfExists();
152   if (font) {
153     pStyle->SetColor(font->GetColor());
154     pStyle->SetFontStyle(font->IsItalic() ? CFX_CSSFontStyle::Italic
155                                           : CFX_CSSFontStyle::Normal);
156     pStyle->SetFontWeight(font->IsBold() ? FXFONT_FW_BOLD : FXFONT_FW_NORMAL);
157     pStyle->SetNumberVerticalAlign(-font->GetBaselineShift());
158     fFontSize = font->GetFontSize();
159     CFX_CSSLength letterSpacing;
160     letterSpacing.Set(CFX_CSSLengthUnit::Point, font->GetLetterSpacing());
161     pStyle->SetLetterSpacing(letterSpacing);
162     uint32_t dwDecoration = 0;
163     if (font->GetLineThrough() > 0)
164       dwDecoration |= CFX_CSSTEXTDECORATION_LineThrough;
165     if (font->GetUnderline() > 1)
166       dwDecoration |= CFX_CSSTEXTDECORATION_Double;
167     else if (font->GetUnderline() > 0)
168       dwDecoration |= CFX_CSSTEXTDECORATION_Underline;
169 
170     pStyle->SetTextDecoration(dwDecoration);
171   }
172   pStyle->SetLineHeight(fLineHeight);
173   pStyle->SetFontSize(fFontSize);
174   return pStyle;
175 }
176 
CreateStyle(CFX_CSSComputedStyle * pParentStyle)177 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::CreateStyle(
178     CFX_CSSComputedStyle* pParentStyle) {
179   auto pNewStyle = m_pSelector->CreateComputedStyle(pParentStyle);
180   ASSERT(pNewStyle);
181   if (!pParentStyle)
182     return pNewStyle;
183 
184   uint32_t dwDecoration = pParentStyle->GetTextDecoration();
185   float fBaseLine = 0;
186   if (pParentStyle->GetVerticalAlign() == CFX_CSSVerticalAlign::Number)
187     fBaseLine = pParentStyle->GetNumberVerticalAlign();
188 
189   pNewStyle->SetTextDecoration(dwDecoration);
190   pNewStyle->SetNumberVerticalAlign(fBaseLine);
191 
192   const CFX_CSSRect* pRect = pParentStyle->GetMarginWidth();
193   if (pRect)
194     pNewStyle->SetMarginWidth(*pRect);
195   return pNewStyle;
196 }
197 
ComputeStyle(const CFX_XMLNode * pXMLNode,CFX_CSSComputedStyle * pParentStyle)198 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::ComputeStyle(
199     const CFX_XMLNode* pXMLNode,
200     CFX_CSSComputedStyle* pParentStyle) {
201   auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
202   if (it == m_mapXMLNodeToParseContext.end())
203     return nullptr;
204 
205   CXFA_TextParseContext* pContext = it->second.get();
206   if (!pContext)
207     return nullptr;
208 
209   pContext->m_pParentStyle.Reset(pParentStyle);
210 
211   auto tagProvider = ParseTagInfo(pXMLNode);
212   if (tagProvider->m_bContent)
213     return nullptr;
214 
215   auto pStyle = CreateStyle(pParentStyle);
216   m_pSelector->ComputeStyle(pContext->GetDecls(),
217                             tagProvider->GetAttribute(L"style"),
218                             tagProvider->GetAttribute(L"align"), pStyle.Get());
219   return pStyle;
220 }
221 
DoParse(const CFX_XMLNode * pXMLContainer,CXFA_TextProvider * pTextProvider)222 void CXFA_TextParser::DoParse(const CFX_XMLNode* pXMLContainer,
223                               CXFA_TextProvider* pTextProvider) {
224   if (!pXMLContainer || !pTextProvider || m_bParsed)
225     return;
226 
227   m_bParsed = true;
228   InitCSSData(pTextProvider);
229   auto pRootStyle = CreateRootStyle(pTextProvider);
230   ParseRichText(pXMLContainer, pRootStyle.Get());
231 }
232 
ParseRichText(const CFX_XMLNode * pXMLNode,CFX_CSSComputedStyle * pParentStyle)233 void CXFA_TextParser::ParseRichText(const CFX_XMLNode* pXMLNode,
234                                     CFX_CSSComputedStyle* pParentStyle) {
235   if (!pXMLNode)
236     return;
237 
238   auto tagProvider = ParseTagInfo(pXMLNode);
239   if (!tagProvider->m_bTagAvailable)
240     return;
241 
242   RetainPtr<CFX_CSSComputedStyle> pNewStyle;
243   if (!(tagProvider->GetTagName().EqualsASCII("body") &&
244         tagProvider->GetTagName().EqualsASCII("html"))) {
245     auto pTextContext = pdfium::MakeUnique<CXFA_TextParseContext>();
246     CFX_CSSDisplay eDisplay = CFX_CSSDisplay::Inline;
247     if (!tagProvider->m_bContent) {
248       auto declArray =
249           m_pSelector->MatchDeclarations(tagProvider->GetTagName());
250       pNewStyle = CreateStyle(pParentStyle);
251       m_pSelector->ComputeStyle(declArray, tagProvider->GetAttribute(L"style"),
252                                 tagProvider->GetAttribute(L"align"),
253                                 pNewStyle.Get());
254 
255       if (!declArray.empty())
256         pTextContext->SetDecls(std::move(declArray));
257 
258       eDisplay = pNewStyle->GetDisplay();
259     }
260     pTextContext->SetDisplay(eDisplay);
261     m_mapXMLNodeToParseContext[pXMLNode] = std::move(pTextContext);
262   }
263 
264   for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
265        pXMLChild = pXMLChild->GetNextSibling()) {
266     ParseRichText(pXMLChild, pNewStyle.Get());
267   }
268 }
269 
TagValidate(const WideString & wsName) const270 bool CXFA_TextParser::TagValidate(const WideString& wsName) const {
271   static const uint32_t s_XFATagName[] = {
272       0x61,        // a
273       0x62,        // b
274       0x69,        // i
275       0x70,        // p
276       0x0001f714,  // br
277       0x00022a55,  // li
278       0x000239bb,  // ol
279       0x00025881,  // ul
280       0x0bd37faa,  // sub
281       0x0bd37fb8,  // sup
282       0xa73e3af2,  // span
283       0xb182eaae,  // body
284       0xdb8ac455,  // html
285   };
286   return std::binary_search(std::begin(s_XFATagName), std::end(s_XFATagName),
287                             FX_HashCode_GetW(wsName.AsStringView(), true));
288 }
289 
290 // static
ParseTagInfo(const CFX_XMLNode * pXMLNode)291 std::unique_ptr<CXFA_TextParser::TagProvider> CXFA_TextParser::ParseTagInfo(
292     const CFX_XMLNode* pXMLNode) {
293   auto tagProvider = pdfium::MakeUnique<TagProvider>();
294   const CFX_XMLElement* pXMLElement = ToXMLElement(pXMLNode);
295   if (pXMLElement) {
296     WideString wsName = pXMLElement->GetLocalTagName();
297     tagProvider->SetTagName(wsName);
298     tagProvider->m_bTagAvailable = TagValidate(wsName);
299     WideString wsValue = pXMLElement->GetAttribute(L"style");
300     if (!wsValue.IsEmpty())
301       tagProvider->SetAttribute(L"style", wsValue);
302 
303     return tagProvider;
304   }
305   if (pXMLNode->GetType() == CFX_XMLNode::Type::kText) {
306     tagProvider->m_bTagAvailable = true;
307     tagProvider->m_bContent = true;
308   }
309   return tagProvider;
310 }
311 
GetVAlign(CXFA_TextProvider * pTextProvider) const312 XFA_AttributeValue CXFA_TextParser::GetVAlign(
313     CXFA_TextProvider* pTextProvider) const {
314   CXFA_Para* para = pTextProvider->GetParaIfExists();
315   return para ? para->GetVerticalAlign() : XFA_AttributeValue::Top;
316 }
317 
GetTabInterval(CFX_CSSComputedStyle * pStyle) const318 float CXFA_TextParser::GetTabInterval(CFX_CSSComputedStyle* pStyle) const {
319   WideString wsValue;
320   if (pStyle && pStyle->GetCustomStyle(L"tab-interval", &wsValue))
321     return CXFA_Measurement(wsValue.AsStringView()).ToUnit(XFA_Unit::Pt);
322   return 36;
323 }
324 
CountTabs(CFX_CSSComputedStyle * pStyle) const325 int32_t CXFA_TextParser::CountTabs(CFX_CSSComputedStyle* pStyle) const {
326   WideString wsValue;
327   if (pStyle && pStyle->GetCustomStyle(L"xfa-tab-count", &wsValue))
328     return wsValue.GetInteger();
329   return 0;
330 }
331 
IsSpaceRun(CFX_CSSComputedStyle * pStyle) const332 bool CXFA_TextParser::IsSpaceRun(CFX_CSSComputedStyle* pStyle) const {
333   WideString wsValue;
334   return pStyle && pStyle->GetCustomStyle(L"xfa-spacerun", &wsValue) &&
335          wsValue.EqualsASCIINoCase("yes");
336 }
337 
GetFont(CXFA_FFDoc * doc,CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const338 RetainPtr<CFGAS_GEFont> CXFA_TextParser::GetFont(
339     CXFA_FFDoc* doc,
340     CXFA_TextProvider* pTextProvider,
341     CFX_CSSComputedStyle* pStyle) const {
342   WideString wsFamily = L"Courier";
343   uint32_t dwStyle = 0;
344   CXFA_Font* font = pTextProvider->GetFontIfExists();
345   if (font) {
346     wsFamily = font->GetTypeface();
347     if (font->IsBold())
348       dwStyle |= FXFONT_FORCE_BOLD;
349     if (font->IsItalic())
350       dwStyle |= FXFONT_FORCE_BOLD;
351   }
352 
353   if (pStyle) {
354     int32_t iCount = pStyle->CountFontFamilies();
355     if (iCount > 0)
356       wsFamily = pStyle->GetFontFamily(iCount - 1).AsStringView();
357 
358     dwStyle = 0;
359     if (pStyle->GetFontWeight() > FXFONT_FW_NORMAL)
360       dwStyle |= FXFONT_FORCE_BOLD;
361     if (pStyle->GetFontStyle() == CFX_CSSFontStyle::Italic)
362       dwStyle |= FXFONT_ITALIC;
363   }
364 
365   CXFA_FontMgr* pFontMgr = doc->GetApp()->GetXFAFontMgr();
366   return pFontMgr->GetFont(doc, wsFamily.AsStringView(), dwStyle);
367 }
368 
GetFontSize(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const369 float CXFA_TextParser::GetFontSize(CXFA_TextProvider* pTextProvider,
370                                    CFX_CSSComputedStyle* pStyle) const {
371   if (pStyle)
372     return pStyle->GetFontSize();
373 
374   CXFA_Font* font = pTextProvider->GetFontIfExists();
375   return font ? font->GetFontSize() : 10;
376 }
377 
GetHorScale(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,const CFX_XMLNode * pXMLNode) const378 int32_t CXFA_TextParser::GetHorScale(CXFA_TextProvider* pTextProvider,
379                                      CFX_CSSComputedStyle* pStyle,
380                                      const CFX_XMLNode* pXMLNode) const {
381   if (pStyle) {
382     WideString wsValue;
383     if (pStyle->GetCustomStyle(L"xfa-font-horizontal-scale", &wsValue))
384       return wsValue.GetInteger();
385 
386     while (pXMLNode) {
387       auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
388       if (it != m_mapXMLNodeToParseContext.end()) {
389         CXFA_TextParseContext* pContext = it->second.get();
390         if (pContext && pContext->m_pParentStyle &&
391             pContext->m_pParentStyle->GetCustomStyle(
392                 L"xfa-font-horizontal-scale", &wsValue)) {
393           return wsValue.GetInteger();
394         }
395       }
396       pXMLNode = pXMLNode->GetParent();
397     }
398   }
399 
400   CXFA_Font* font = pTextProvider->GetFontIfExists();
401   return font ? static_cast<int32_t>(font->GetHorizontalScale()) : 100;
402 }
403 
GetVerScale(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const404 int32_t CXFA_TextParser::GetVerScale(CXFA_TextProvider* pTextProvider,
405                                      CFX_CSSComputedStyle* pStyle) const {
406   if (pStyle) {
407     WideString wsValue;
408     if (pStyle->GetCustomStyle(L"xfa-font-vertical-scale", &wsValue))
409       return wsValue.GetInteger();
410   }
411 
412   CXFA_Font* font = pTextProvider->GetFontIfExists();
413   return font ? static_cast<int32_t>(font->GetVerticalScale()) : 100;
414 }
415 
GetUnderline(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,int32_t & iUnderline,XFA_AttributeValue & iPeriod) const416 void CXFA_TextParser::GetUnderline(CXFA_TextProvider* pTextProvider,
417                                    CFX_CSSComputedStyle* pStyle,
418                                    int32_t& iUnderline,
419                                    XFA_AttributeValue& iPeriod) const {
420   iUnderline = 0;
421   iPeriod = XFA_AttributeValue::All;
422   CXFA_Font* font = pTextProvider->GetFontIfExists();
423   if (!pStyle) {
424     if (font) {
425       iUnderline = font->GetUnderline();
426       iPeriod = font->GetUnderlinePeriod();
427     }
428     return;
429   }
430 
431   uint32_t dwDecoration = pStyle->GetTextDecoration();
432   if (dwDecoration & CFX_CSSTEXTDECORATION_Double)
433     iUnderline = 2;
434   else if (dwDecoration & CFX_CSSTEXTDECORATION_Underline)
435     iUnderline = 1;
436 
437   WideString wsValue;
438   if (pStyle->GetCustomStyle(L"underlinePeriod", &wsValue)) {
439     if (wsValue.EqualsASCII("word"))
440       iPeriod = XFA_AttributeValue::Word;
441   } else if (font) {
442     iPeriod = font->GetUnderlinePeriod();
443   }
444 }
445 
GetLinethrough(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,int32_t & iLinethrough) const446 void CXFA_TextParser::GetLinethrough(CXFA_TextProvider* pTextProvider,
447                                      CFX_CSSComputedStyle* pStyle,
448                                      int32_t& iLinethrough) const {
449   iLinethrough = 0;
450   if (pStyle) {
451     uint32_t dwDecoration = pStyle->GetTextDecoration();
452     if (dwDecoration & CFX_CSSTEXTDECORATION_LineThrough)
453       iLinethrough = 1;
454     return;
455   }
456 
457   CXFA_Font* font = pTextProvider->GetFontIfExists();
458   if (font)
459     iLinethrough = font->GetLineThrough();
460 }
461 
GetColor(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const462 FX_ARGB CXFA_TextParser::GetColor(CXFA_TextProvider* pTextProvider,
463                                   CFX_CSSComputedStyle* pStyle) const {
464   if (pStyle)
465     return pStyle->GetColor();
466 
467   CXFA_Font* font = pTextProvider->GetFontIfExists();
468   return font ? font->GetColor() : 0xFF000000;
469 }
470 
GetBaseline(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const471 float CXFA_TextParser::GetBaseline(CXFA_TextProvider* pTextProvider,
472                                    CFX_CSSComputedStyle* pStyle) const {
473   if (pStyle) {
474     if (pStyle->GetVerticalAlign() == CFX_CSSVerticalAlign::Number)
475       return pStyle->GetNumberVerticalAlign();
476   } else {
477     CXFA_Font* font = pTextProvider->GetFontIfExists();
478     if (font)
479       return font->GetBaselineShift();
480   }
481   return 0;
482 }
483 
GetLineHeight(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,bool bFirst,float fVerScale) const484 float CXFA_TextParser::GetLineHeight(CXFA_TextProvider* pTextProvider,
485                                      CFX_CSSComputedStyle* pStyle,
486                                      bool bFirst,
487                                      float fVerScale) const {
488   float fLineHeight = 0;
489   if (pStyle) {
490     fLineHeight = pStyle->GetLineHeight();
491   } else {
492     CXFA_Para* para = pTextProvider->GetParaIfExists();
493     if (para)
494       fLineHeight = para->GetLineHeight();
495   }
496 
497   if (bFirst) {
498     float fFontSize = GetFontSize(pTextProvider, pStyle);
499     if (fLineHeight < 0.1f)
500       fLineHeight = fFontSize;
501     else
502       fLineHeight = std::min(fLineHeight, fFontSize);
503   } else if (fLineHeight < 0.1f) {
504     fLineHeight = GetFontSize(pTextProvider, pStyle) * 1.2f;
505   }
506   fLineHeight *= fVerScale;
507   return fLineHeight;
508 }
509 
GetEmbeddedObj(const CXFA_TextProvider * pTextProvider,const CFX_XMLNode * pXMLNode)510 Optional<WideString> CXFA_TextParser::GetEmbeddedObj(
511     const CXFA_TextProvider* pTextProvider,
512     const CFX_XMLNode* pXMLNode) {
513   if (!pXMLNode)
514     return {};
515 
516   const CFX_XMLElement* pElement = ToXMLElement(pXMLNode);
517   if (!pElement)
518     return {};
519 
520   WideString wsAttr = pElement->GetAttribute(L"xfa:embed");
521   if (wsAttr.IsEmpty())
522     return {};
523 
524   if (wsAttr[0] == L'#')
525     wsAttr.Delete(0);
526 
527   WideString ws =
528       GetLowerCaseElementAttributeOrDefault(pElement, L"xfa:embedType", L"som");
529   if (!ws.EqualsASCII("uri"))
530     return {};
531 
532   ws = GetLowerCaseElementAttributeOrDefault(pElement, L"xfa:embedMode",
533                                              L"formatted");
534   if (!(ws.EqualsASCII("raw") || ws.EqualsASCII("formatted")))
535     return {};
536 
537   return pTextProvider->GetEmbeddedObj(wsAttr);
538 }
539 
GetParseContextFromMap(const CFX_XMLNode * pXMLNode)540 CXFA_TextParseContext* CXFA_TextParser::GetParseContextFromMap(
541     const CFX_XMLNode* pXMLNode) {
542   auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
543   return it != m_mapXMLNodeToParseContext.end() ? it->second.get() : nullptr;
544 }
545 
GetTabstops(CFX_CSSComputedStyle * pStyle,CXFA_TextTabstopsContext * pTabstopContext)546 bool CXFA_TextParser::GetTabstops(CFX_CSSComputedStyle* pStyle,
547                                   CXFA_TextTabstopsContext* pTabstopContext) {
548   if (!pStyle || !pTabstopContext)
549     return false;
550 
551   WideString wsValue;
552   if (!pStyle->GetCustomStyle(L"xfa-tab-stops", &wsValue) &&
553       !pStyle->GetCustomStyle(L"tab-stops", &wsValue)) {
554     return false;
555   }
556 
557   pdfium::span<const wchar_t> spTabStops = wsValue.span();
558   size_t iCur = 0;
559   size_t iLast = 0;
560   WideString wsAlign;
561   TabStopStatus eStatus = TabStopStatus::None;
562   while (iCur < spTabStops.size()) {
563     wchar_t ch = spTabStops[iCur];
564     switch (eStatus) {
565       case TabStopStatus::None:
566         if (ch <= ' ') {
567           iCur++;
568         } else {
569           eStatus = TabStopStatus::Alignment;
570           iLast = iCur;
571         }
572         break;
573       case TabStopStatus::Alignment:
574         if (ch == ' ') {
575           wsAlign = WideStringView(spTabStops.subspan(iLast, iCur - iLast));
576           eStatus = TabStopStatus::StartLeader;
577           iCur++;
578           while (iCur < spTabStops.size() && spTabStops[iCur] <= ' ')
579             iCur++;
580           iLast = iCur;
581         } else {
582           iCur++;
583         }
584         break;
585       case TabStopStatus::StartLeader:
586         if (ch != 'l') {
587           eStatus = TabStopStatus::Location;
588         } else {
589           int32_t iCount = 0;
590           while (iCur < spTabStops.size()) {
591             ch = spTabStops[iCur];
592             iCur++;
593             if (ch == '(') {
594               iCount++;
595             } else if (ch == ')') {
596               iCount--;
597               if (iCount == 0)
598                 break;
599             }
600           }
601           while (iCur < spTabStops.size() && spTabStops[iCur] <= ' ')
602             iCur++;
603 
604           iLast = iCur;
605           eStatus = TabStopStatus::Location;
606         }
607         break;
608       case TabStopStatus::Location:
609         if (ch == ' ') {
610           uint32_t dwHashCode = FX_HashCode_GetW(wsAlign.AsStringView(), true);
611           CXFA_Measurement ms(
612               WideStringView(spTabStops.subspan(iLast, iCur - iLast)));
613           float fPos = ms.ToUnit(XFA_Unit::Pt);
614           pTabstopContext->Append(dwHashCode, fPos);
615           wsAlign.clear();
616           eStatus = TabStopStatus::None;
617         }
618         iCur++;
619         break;
620       default:
621         break;
622     }
623   }
624 
625   if (!wsAlign.IsEmpty()) {
626     uint32_t dwHashCode = FX_HashCode_GetW(wsAlign.AsStringView(), true);
627     CXFA_Measurement ms(
628         WideStringView(spTabStops.subspan(iLast, iCur - iLast)));
629     float fPos = ms.ToUnit(XFA_Unit::Pt);
630     pTabstopContext->Append(dwHashCode, fPos);
631   }
632   return true;
633 }
634 
TagProvider()635 CXFA_TextParser::TagProvider::TagProvider()
636     : m_bTagAvailable(false), m_bContent(false) {}
637 
~TagProvider()638 CXFA_TextParser::TagProvider::~TagProvider() {}
639