1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fxfa/cxfa_textparser.h"
8 
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12 
13 #include "core/fxcrt/css/cfx_css.h"
14 #include "core/fxcrt/css/cfx_csscomputedstyle.h"
15 #include "core/fxcrt/css/cfx_cssstyleselector.h"
16 #include "core/fxcrt/css/cfx_cssstylesheet.h"
17 #include "core/fxcrt/fx_codepage.h"
18 #include "core/fxcrt/xml/cfx_xmlelement.h"
19 #include "core/fxcrt/xml/cfx_xmlnode.h"
20 #include "third_party/base/ptr_util.h"
21 #include "xfa/fgas/font/cfgas_fontmgr.h"
22 #include "xfa/fxfa/cxfa_ffapp.h"
23 #include "xfa/fxfa/cxfa_ffdoc.h"
24 #include "xfa/fxfa/cxfa_fontmgr.h"
25 #include "xfa/fxfa/cxfa_textparsecontext.h"
26 #include "xfa/fxfa/cxfa_textprovider.h"
27 #include "xfa/fxfa/cxfa_texttabstopscontext.h"
28 #include "xfa/fxfa/parser/cxfa_font.h"
29 #include "xfa/fxfa/parser/cxfa_measurement.h"
30 #include "xfa/fxfa/parser/cxfa_para.h"
31 
32 namespace {
33 
34 enum class TabStopStatus {
35   Error,
36   EOS,
37   None,
38   Alignment,
39   StartLeader,
40   Leader,
41   Location,
42 };
43 
44 }  // namespace
45 
CXFA_TextParser()46 CXFA_TextParser::CXFA_TextParser()
47     : m_bParsed(false), m_cssInitialized(false) {}
48 
~CXFA_TextParser()49 CXFA_TextParser::~CXFA_TextParser() {}
50 
Reset()51 void CXFA_TextParser::Reset() {
52   m_mapXMLNodeToParseContext.clear();
53   m_bParsed = false;
54 }
55 
InitCSSData(CXFA_TextProvider * pTextProvider)56 void CXFA_TextParser::InitCSSData(CXFA_TextProvider* pTextProvider) {
57   if (!pTextProvider)
58     return;
59 
60   if (!m_pSelector) {
61     m_pSelector = pdfium::MakeUnique<CFX_CSSStyleSelector>();
62 
63     CXFA_Font* font = pTextProvider->GetFontIfExists();
64     m_pSelector->SetDefFontSize(font ? font->GetFontSize() : 10.0f);
65   }
66 
67   if (m_cssInitialized)
68     return;
69 
70   m_cssInitialized = true;
71   auto uaSheet = LoadDefaultSheetStyle();
72   m_pSelector->SetUAStyleSheet(std::move(uaSheet));
73   m_pSelector->UpdateStyleIndex();
74 }
75 
LoadDefaultSheetStyle()76 std::unique_ptr<CFX_CSSStyleSheet> CXFA_TextParser::LoadDefaultSheetStyle() {
77   static const wchar_t s_pStyle[] =
78       L"html,body,ol,p,ul{display:block}"
79       L"li{display:list-item}"
80       L"ol,ul{padding-left:33px;margin:1.12em 0}"
81       L"ol{list-style-type:decimal}"
82       L"a{color:#0000ff;text-decoration:underline}"
83       L"b{font-weight:bolder}"
84       L"i{font-style:italic}"
85       L"sup{vertical-align:+15em;font-size:.66em}"
86       L"sub{vertical-align:-15em;font-size:.66em}";
87 
88   auto sheet = pdfium::MakeUnique<CFX_CSSStyleSheet>();
89   return sheet->LoadBuffer(s_pStyle, wcslen(s_pStyle)) ? std::move(sheet)
90                                                        : nullptr;
91 }
92 
CreateRootStyle(CXFA_TextProvider * pTextProvider)93 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::CreateRootStyle(
94     CXFA_TextProvider* pTextProvider) {
95   CXFA_Para* para = pTextProvider->GetParaIfExists();
96   auto pStyle = m_pSelector->CreateComputedStyle(nullptr);
97   float fLineHeight = 0;
98   float fFontSize = 10;
99 
100   if (para) {
101     fLineHeight = para->GetLineHeight();
102     CFX_CSSLength indent;
103     indent.Set(CFX_CSSLengthUnit::Point, para->GetTextIndent());
104     pStyle->SetTextIndent(indent);
105     CFX_CSSTextAlign hAlign = CFX_CSSTextAlign::Left;
106     switch (para->GetHorizontalAlign()) {
107       case XFA_AttributeEnum::Center:
108         hAlign = CFX_CSSTextAlign::Center;
109         break;
110       case XFA_AttributeEnum::Right:
111         hAlign = CFX_CSSTextAlign::Right;
112         break;
113       case XFA_AttributeEnum::Justify:
114         hAlign = CFX_CSSTextAlign::Justify;
115         break;
116       case XFA_AttributeEnum::JustifyAll:
117         hAlign = CFX_CSSTextAlign::JustifyAll;
118         break;
119       case XFA_AttributeEnum::Left:
120       case XFA_AttributeEnum::Radix:
121         break;
122       default:
123         NOTREACHED();
124         break;
125     }
126     pStyle->SetTextAlign(hAlign);
127     CFX_CSSRect rtMarginWidth;
128     rtMarginWidth.left.Set(CFX_CSSLengthUnit::Point, para->GetMarginLeft());
129     rtMarginWidth.top.Set(CFX_CSSLengthUnit::Point, para->GetSpaceAbove());
130     rtMarginWidth.right.Set(CFX_CSSLengthUnit::Point, para->GetMarginRight());
131     rtMarginWidth.bottom.Set(CFX_CSSLengthUnit::Point, para->GetSpaceBelow());
132     pStyle->SetMarginWidth(rtMarginWidth);
133   }
134 
135   CXFA_Font* font = pTextProvider->GetFontIfExists();
136   if (font) {
137     pStyle->SetColor(font->GetColor());
138     pStyle->SetFontStyle(font->IsItalic() ? CFX_CSSFontStyle::Italic
139                                           : CFX_CSSFontStyle::Normal);
140     pStyle->SetFontWeight(font->IsBold() ? FXFONT_FW_BOLD : FXFONT_FW_NORMAL);
141     pStyle->SetNumberVerticalAlign(-font->GetBaselineShift());
142     fFontSize = font->GetFontSize();
143     CFX_CSSLength letterSpacing;
144     letterSpacing.Set(CFX_CSSLengthUnit::Point, font->GetLetterSpacing());
145     pStyle->SetLetterSpacing(letterSpacing);
146     uint32_t dwDecoration = 0;
147     if (font->GetLineThrough() > 0)
148       dwDecoration |= CFX_CSSTEXTDECORATION_LineThrough;
149     if (font->GetUnderline() > 1)
150       dwDecoration |= CFX_CSSTEXTDECORATION_Double;
151     else if (font->GetUnderline() > 0)
152       dwDecoration |= CFX_CSSTEXTDECORATION_Underline;
153 
154     pStyle->SetTextDecoration(dwDecoration);
155   }
156   pStyle->SetLineHeight(fLineHeight);
157   pStyle->SetFontSize(fFontSize);
158   return pStyle;
159 }
160 
CreateStyle(CFX_CSSComputedStyle * pParentStyle)161 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::CreateStyle(
162     CFX_CSSComputedStyle* pParentStyle) {
163   auto pNewStyle = m_pSelector->CreateComputedStyle(pParentStyle);
164   ASSERT(pNewStyle);
165   if (!pParentStyle)
166     return pNewStyle;
167 
168   uint32_t dwDecoration = pParentStyle->GetTextDecoration();
169   float fBaseLine = 0;
170   if (pParentStyle->GetVerticalAlign() == CFX_CSSVerticalAlign::Number)
171     fBaseLine = pParentStyle->GetNumberVerticalAlign();
172 
173   pNewStyle->SetTextDecoration(dwDecoration);
174   pNewStyle->SetNumberVerticalAlign(fBaseLine);
175 
176   const CFX_CSSRect* pRect = pParentStyle->GetMarginWidth();
177   if (pRect)
178     pNewStyle->SetMarginWidth(*pRect);
179   return pNewStyle;
180 }
181 
ComputeStyle(CFX_XMLNode * pXMLNode,CFX_CSSComputedStyle * pParentStyle)182 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::ComputeStyle(
183     CFX_XMLNode* pXMLNode,
184     CFX_CSSComputedStyle* pParentStyle) {
185   auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
186   if (it == m_mapXMLNodeToParseContext.end())
187     return nullptr;
188 
189   CXFA_TextParseContext* pContext = it->second.get();
190   if (!pContext)
191     return nullptr;
192 
193   pContext->m_pParentStyle.Reset(pParentStyle);
194 
195   auto tagProvider = ParseTagInfo(pXMLNode);
196   if (tagProvider->m_bContent)
197     return nullptr;
198 
199   auto pStyle = CreateStyle(pParentStyle);
200   m_pSelector->ComputeStyle(pContext->GetDecls(),
201                             tagProvider->GetAttribute(L"style"),
202                             tagProvider->GetAttribute(L"align"), pStyle.Get());
203   return pStyle;
204 }
205 
DoParse(CFX_XMLNode * pXMLContainer,CXFA_TextProvider * pTextProvider)206 void CXFA_TextParser::DoParse(CFX_XMLNode* pXMLContainer,
207                               CXFA_TextProvider* pTextProvider) {
208   if (!pXMLContainer || !pTextProvider || m_bParsed)
209     return;
210 
211   m_bParsed = true;
212   InitCSSData(pTextProvider);
213   auto pRootStyle = CreateRootStyle(pTextProvider);
214   ParseRichText(pXMLContainer, pRootStyle.Get());
215 }
216 
ParseRichText(CFX_XMLNode * pXMLNode,CFX_CSSComputedStyle * pParentStyle)217 void CXFA_TextParser::ParseRichText(CFX_XMLNode* pXMLNode,
218                                     CFX_CSSComputedStyle* pParentStyle) {
219   if (!pXMLNode)
220     return;
221 
222   auto tagProvider = ParseTagInfo(pXMLNode);
223   if (!tagProvider->m_bTagAvailable)
224     return;
225 
226   RetainPtr<CFX_CSSComputedStyle> pNewStyle;
227   if ((tagProvider->GetTagName() != L"body") ||
228       (tagProvider->GetTagName() != L"html")) {
229     auto pTextContext = pdfium::MakeUnique<CXFA_TextParseContext>();
230     CFX_CSSDisplay eDisplay = CFX_CSSDisplay::Inline;
231     if (!tagProvider->m_bContent) {
232       auto declArray =
233           m_pSelector->MatchDeclarations(tagProvider->GetTagName());
234       pNewStyle = CreateStyle(pParentStyle);
235       m_pSelector->ComputeStyle(declArray, tagProvider->GetAttribute(L"style"),
236                                 tagProvider->GetAttribute(L"align"),
237                                 pNewStyle.Get());
238 
239       if (!declArray.empty())
240         pTextContext->SetDecls(std::move(declArray));
241 
242       eDisplay = pNewStyle->GetDisplay();
243     }
244     pTextContext->SetDisplay(eDisplay);
245     m_mapXMLNodeToParseContext[pXMLNode] = std::move(pTextContext);
246   }
247 
248   for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
249        pXMLChild;
250        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
251     ParseRichText(pXMLChild, pNewStyle.Get());
252   }
253 }
254 
TagValidate(const WideString & wsName) const255 bool CXFA_TextParser::TagValidate(const WideString& wsName) const {
256   static const uint32_t s_XFATagName[] = {
257       0x61,        // a
258       0x62,        // b
259       0x69,        // i
260       0x70,        // p
261       0x0001f714,  // br
262       0x00022a55,  // li
263       0x000239bb,  // ol
264       0x00025881,  // ul
265       0x0bd37faa,  // sub
266       0x0bd37fb8,  // sup
267       0xa73e3af2,  // span
268       0xb182eaae,  // body
269       0xdb8ac455,  // html
270   };
271   static const int32_t s_iCount = FX_ArraySize(s_XFATagName);
272 
273   return std::binary_search(s_XFATagName, s_XFATagName + s_iCount,
274                             FX_HashCode_GetW(wsName.AsStringView(), true));
275 }
276 
ParseTagInfo(CFX_XMLNode * pXMLNode)277 std::unique_ptr<CXFA_TextParser::TagProvider> CXFA_TextParser::ParseTagInfo(
278     CFX_XMLNode* pXMLNode) {
279   auto tagProvider = pdfium::MakeUnique<TagProvider>();
280 
281   WideString wsName;
282   if (pXMLNode->GetType() == FX_XMLNODE_Element) {
283     CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode);
284     wsName = pXMLElement->GetLocalTagName();
285     tagProvider->SetTagName(wsName);
286     tagProvider->m_bTagAvailable = TagValidate(wsName);
287 
288     WideString wsValue = pXMLElement->GetString(L"style");
289     if (!wsValue.IsEmpty())
290       tagProvider->SetAttribute(L"style", wsValue);
291   } else if (pXMLNode->GetType() == FX_XMLNODE_Text) {
292     tagProvider->m_bTagAvailable = true;
293     tagProvider->m_bContent = true;
294   }
295   return tagProvider;
296 }
297 
GetVAlign(CXFA_TextProvider * pTextProvider) const298 XFA_AttributeEnum CXFA_TextParser::GetVAlign(
299     CXFA_TextProvider* pTextProvider) const {
300   CXFA_Para* para = pTextProvider->GetParaIfExists();
301   return para ? para->GetVerticalAlign() : XFA_AttributeEnum::Top;
302 }
303 
GetTabInterval(CFX_CSSComputedStyle * pStyle) const304 float CXFA_TextParser::GetTabInterval(CFX_CSSComputedStyle* pStyle) const {
305   WideString wsValue;
306   if (pStyle && pStyle->GetCustomStyle(L"tab-interval", wsValue))
307     return CXFA_Measurement(wsValue.AsStringView()).ToUnit(XFA_Unit::Pt);
308   return 36;
309 }
310 
CountTabs(CFX_CSSComputedStyle * pStyle) const311 int32_t CXFA_TextParser::CountTabs(CFX_CSSComputedStyle* pStyle) const {
312   WideString wsValue;
313   if (pStyle && pStyle->GetCustomStyle(L"xfa-tab-count", wsValue))
314     return wsValue.GetInteger();
315   return 0;
316 }
317 
IsSpaceRun(CFX_CSSComputedStyle * pStyle) const318 bool CXFA_TextParser::IsSpaceRun(CFX_CSSComputedStyle* pStyle) const {
319   WideString wsValue;
320   if (pStyle && pStyle->GetCustomStyle(L"xfa-spacerun", wsValue)) {
321     wsValue.MakeLower();
322     return wsValue == L"yes";
323   }
324   return false;
325 }
326 
GetFont(CXFA_FFDoc * doc,CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const327 RetainPtr<CFGAS_GEFont> CXFA_TextParser::GetFont(
328     CXFA_FFDoc* doc,
329     CXFA_TextProvider* pTextProvider,
330     CFX_CSSComputedStyle* pStyle) const {
331   WideString wsFamily = L"Courier";
332   uint32_t dwStyle = 0;
333   CXFA_Font* font = pTextProvider->GetFontIfExists();
334   if (font) {
335     wsFamily = font->GetTypeface();
336     if (font->IsBold())
337       dwStyle |= FXFONT_BOLD;
338     if (font->IsItalic())
339       dwStyle |= FXFONT_BOLD;
340   }
341 
342   if (pStyle) {
343     int32_t iCount = pStyle->CountFontFamilies();
344     if (iCount > 0)
345       wsFamily = pStyle->GetFontFamily(iCount - 1).AsStringView();
346 
347     dwStyle = 0;
348     if (pStyle->GetFontWeight() > FXFONT_FW_NORMAL)
349       dwStyle |= FXFONT_BOLD;
350     if (pStyle->GetFontStyle() == CFX_CSSFontStyle::Italic)
351       dwStyle |= FXFONT_ITALIC;
352   }
353 
354   CXFA_FontMgr* pFontMgr = doc->GetApp()->GetXFAFontMgr();
355   return pFontMgr->GetFont(doc, wsFamily.AsStringView(), dwStyle);
356 }
357 
GetFontSize(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const358 float CXFA_TextParser::GetFontSize(CXFA_TextProvider* pTextProvider,
359                                    CFX_CSSComputedStyle* pStyle) const {
360   if (pStyle)
361     return pStyle->GetFontSize();
362 
363   CXFA_Font* font = pTextProvider->GetFontIfExists();
364   return font ? font->GetFontSize() : 10;
365 }
366 
GetHorScale(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,CFX_XMLNode * pXMLNode) const367 int32_t CXFA_TextParser::GetHorScale(CXFA_TextProvider* pTextProvider,
368                                      CFX_CSSComputedStyle* pStyle,
369                                      CFX_XMLNode* pXMLNode) const {
370   if (pStyle) {
371     WideString wsValue;
372     if (pStyle->GetCustomStyle(L"xfa-font-horizontal-scale", wsValue))
373       return wsValue.GetInteger();
374 
375     while (pXMLNode) {
376       auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
377       if (it != m_mapXMLNodeToParseContext.end()) {
378         CXFA_TextParseContext* pContext = it->second.get();
379         if (pContext && pContext->m_pParentStyle &&
380             pContext->m_pParentStyle->GetCustomStyle(
381                 L"xfa-font-horizontal-scale", wsValue)) {
382           return wsValue.GetInteger();
383         }
384       }
385       pXMLNode = pXMLNode->GetNodeItem(CFX_XMLNode::Parent);
386     }
387   }
388 
389   CXFA_Font* font = pTextProvider->GetFontIfExists();
390   return font ? static_cast<int32_t>(font->GetHorizontalScale()) : 100;
391 }
392 
GetVerScale(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const393 int32_t CXFA_TextParser::GetVerScale(CXFA_TextProvider* pTextProvider,
394                                      CFX_CSSComputedStyle* pStyle) const {
395   if (pStyle) {
396     WideString wsValue;
397     if (pStyle->GetCustomStyle(L"xfa-font-vertical-scale", wsValue))
398       return wsValue.GetInteger();
399   }
400 
401   CXFA_Font* font = pTextProvider->GetFontIfExists();
402   return font ? static_cast<int32_t>(font->GetVerticalScale()) : 100;
403 }
404 
GetUnderline(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,int32_t & iUnderline,XFA_AttributeEnum & iPeriod) const405 void CXFA_TextParser::GetUnderline(CXFA_TextProvider* pTextProvider,
406                                    CFX_CSSComputedStyle* pStyle,
407                                    int32_t& iUnderline,
408                                    XFA_AttributeEnum& iPeriod) const {
409   iUnderline = 0;
410   iPeriod = XFA_AttributeEnum::All;
411   CXFA_Font* font = pTextProvider->GetFontIfExists();
412   if (!pStyle) {
413     if (font) {
414       iUnderline = font->GetUnderline();
415       iPeriod = font->GetUnderlinePeriod();
416     }
417     return;
418   }
419 
420   uint32_t dwDecoration = pStyle->GetTextDecoration();
421   if (dwDecoration & CFX_CSSTEXTDECORATION_Double)
422     iUnderline = 2;
423   else if (dwDecoration & CFX_CSSTEXTDECORATION_Underline)
424     iUnderline = 1;
425 
426   WideString wsValue;
427   if (pStyle->GetCustomStyle(L"underlinePeriod", wsValue)) {
428     if (wsValue == L"word")
429       iPeriod = XFA_AttributeEnum::Word;
430   } else if (font) {
431     iPeriod = font->GetUnderlinePeriod();
432   }
433 }
434 
GetLinethrough(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,int32_t & iLinethrough) const435 void CXFA_TextParser::GetLinethrough(CXFA_TextProvider* pTextProvider,
436                                      CFX_CSSComputedStyle* pStyle,
437                                      int32_t& iLinethrough) const {
438   if (pStyle) {
439     uint32_t dwDecoration = pStyle->GetTextDecoration();
440     iLinethrough = (dwDecoration & CFX_CSSTEXTDECORATION_LineThrough) ? 1 : 0;
441     return;
442   }
443 
444   CXFA_Font* font = pTextProvider->GetFontIfExists();
445   if (font)
446     iLinethrough = font->GetLineThrough();
447 }
448 
GetColor(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const449 FX_ARGB CXFA_TextParser::GetColor(CXFA_TextProvider* pTextProvider,
450                                   CFX_CSSComputedStyle* pStyle) const {
451   if (pStyle)
452     return pStyle->GetColor();
453 
454   CXFA_Font* font = pTextProvider->GetFontIfExists();
455   return font ? font->GetColor() : 0xFF000000;
456 }
457 
GetBaseline(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle) const458 float CXFA_TextParser::GetBaseline(CXFA_TextProvider* pTextProvider,
459                                    CFX_CSSComputedStyle* pStyle) const {
460   if (pStyle) {
461     if (pStyle->GetVerticalAlign() == CFX_CSSVerticalAlign::Number)
462       return pStyle->GetNumberVerticalAlign();
463   } else {
464     CXFA_Font* font = pTextProvider->GetFontIfExists();
465     if (font)
466       return font->GetBaselineShift();
467   }
468   return 0;
469 }
470 
GetLineHeight(CXFA_TextProvider * pTextProvider,CFX_CSSComputedStyle * pStyle,bool bFirst,float fVerScale) const471 float CXFA_TextParser::GetLineHeight(CXFA_TextProvider* pTextProvider,
472                                      CFX_CSSComputedStyle* pStyle,
473                                      bool bFirst,
474                                      float fVerScale) const {
475   float fLineHeight = 0;
476   if (pStyle) {
477     fLineHeight = pStyle->GetLineHeight();
478   } else {
479     CXFA_Para* para = pTextProvider->GetParaIfExists();
480     if (para)
481       fLineHeight = para->GetLineHeight();
482   }
483 
484   if (bFirst) {
485     float fFontSize = GetFontSize(pTextProvider, pStyle);
486     if (fLineHeight < 0.1f)
487       fLineHeight = fFontSize;
488     else
489       fLineHeight = std::min(fLineHeight, fFontSize);
490   } else if (fLineHeight < 0.1f) {
491     fLineHeight = GetFontSize(pTextProvider, pStyle) * 1.2f;
492   }
493   fLineHeight *= fVerScale;
494   return fLineHeight;
495 }
496 
GetEmbbedObj(CXFA_TextProvider * pTextProvider,CFX_XMLNode * pXMLNode,WideString & wsValue)497 bool CXFA_TextParser::GetEmbbedObj(CXFA_TextProvider* pTextProvider,
498                                    CFX_XMLNode* pXMLNode,
499                                    WideString& wsValue) {
500   wsValue.clear();
501   if (!pXMLNode)
502     return false;
503 
504   bool bRet = false;
505   if (pXMLNode->GetType() == FX_XMLNODE_Element) {
506     CFX_XMLElement* pElement = static_cast<CFX_XMLElement*>(pXMLNode);
507     WideString wsAttr = pElement->GetString(L"xfa:embed");
508     if (wsAttr.IsEmpty())
509       return false;
510     if (wsAttr[0] == L'#')
511       wsAttr.Delete(0);
512 
513     WideString ws = pElement->GetString(L"xfa:embedType");
514     if (ws.IsEmpty())
515       ws = L"som";
516     else
517       ws.MakeLower();
518 
519     bool bURI = (ws == L"uri");
520     if (!bURI && ws != L"som")
521       return false;
522 
523     ws = pElement->GetString(L"xfa:embedMode");
524     if (ws.IsEmpty())
525       ws = L"formatted";
526     else
527       ws.MakeLower();
528 
529     bool bRaw = (ws == L"raw");
530     if (!bRaw && ws != L"formatted")
531       return false;
532 
533     bRet = pTextProvider->GetEmbbedObj(bURI, bRaw, wsAttr, wsValue);
534   }
535   return bRet;
536 }
537 
GetParseContextFromMap(CFX_XMLNode * pXMLNode)538 CXFA_TextParseContext* CXFA_TextParser::GetParseContextFromMap(
539     CFX_XMLNode* pXMLNode) {
540   auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
541   return it != m_mapXMLNodeToParseContext.end() ? it->second.get() : nullptr;
542 }
543 
GetTabstops(CFX_CSSComputedStyle * pStyle,CXFA_TextTabstopsContext * pTabstopContext)544 bool CXFA_TextParser::GetTabstops(CFX_CSSComputedStyle* pStyle,
545                                   CXFA_TextTabstopsContext* pTabstopContext) {
546   if (!pStyle || !pTabstopContext)
547     return false;
548 
549   WideString wsValue;
550   if (!pStyle->GetCustomStyle(L"xfa-tab-stops", wsValue) &&
551       !pStyle->GetCustomStyle(L"tab-stops", wsValue)) {
552     return false;
553   }
554 
555   int32_t iLength = wsValue.GetLength();
556   const wchar_t* pTabStops = wsValue.c_str();
557   int32_t iCur = 0;
558   int32_t iLast = 0;
559   WideString wsAlign;
560   TabStopStatus eStatus = TabStopStatus::None;
561   wchar_t ch;
562   while (iCur < iLength) {
563     ch = pTabStops[iCur];
564     switch (eStatus) {
565       case TabStopStatus::None:
566         if (ch <= ' ') {
567           iCur++;
568         } else {
569           eStatus = TabStopStatus::Alignment;
570           iLast = iCur;
571         }
572         break;
573       case TabStopStatus::Alignment:
574         if (ch == ' ') {
575           wsAlign = WideStringView(pTabStops + iLast, iCur - iLast);
576           eStatus = TabStopStatus::StartLeader;
577           iCur++;
578           while (iCur < iLength && pTabStops[iCur] <= ' ')
579             iCur++;
580           iLast = iCur;
581         } else {
582           iCur++;
583         }
584         break;
585       case TabStopStatus::StartLeader:
586         if (ch != 'l') {
587           eStatus = TabStopStatus::Location;
588         } else {
589           int32_t iCount = 0;
590           while (iCur < iLength) {
591             ch = pTabStops[iCur];
592             iCur++;
593             if (ch == '(') {
594               iCount++;
595             } else if (ch == ')') {
596               iCount--;
597               if (iCount == 0)
598                 break;
599             }
600           }
601           while (iCur < iLength && pTabStops[iCur] <= ' ')
602             iCur++;
603 
604           iLast = iCur;
605           eStatus = TabStopStatus::Location;
606         }
607         break;
608       case TabStopStatus::Location:
609         if (ch == ' ') {
610           uint32_t dwHashCode = FX_HashCode_GetW(wsAlign.AsStringView(), true);
611           CXFA_Measurement ms(WideStringView(pTabStops + iLast, iCur - iLast));
612           float fPos = ms.ToUnit(XFA_Unit::Pt);
613           pTabstopContext->Append(dwHashCode, fPos);
614           wsAlign.clear();
615           eStatus = TabStopStatus::None;
616         }
617         iCur++;
618         break;
619       default:
620         break;
621     }
622   }
623 
624   if (!wsAlign.IsEmpty()) {
625     uint32_t dwHashCode = FX_HashCode_GetW(wsAlign.AsStringView(), true);
626     CXFA_Measurement ms(WideStringView(pTabStops + iLast, iCur - iLast));
627     float fPos = ms.ToUnit(XFA_Unit::Pt);
628     pTabstopContext->Append(dwHashCode, fPos);
629   }
630   return true;
631 }
632 
TagProvider()633 CXFA_TextParser::TagProvider::TagProvider()
634     : m_bTagAvailable(false), m_bContent(false) {}
635 
~TagProvider()636 CXFA_TextParser::TagProvider::~TagProvider() {}
637