1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fxfa/xfa_ffdoc.h"
8 
9 #include <algorithm>
10 #include <memory>
11 #include <vector>
12 
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
16 #include "core/fpdfdoc/cpdf_nametree.h"
17 #include "core/fxcrt/fx_ext.h"
18 #include "core/fxcrt/fx_memory.h"
19 #include "third_party/base/ptr_util.h"
20 #include "xfa/fde/xml/fde_xml_imp.h"
21 #include "xfa/fwl/cfwl_notedriver.h"
22 #include "xfa/fxfa/app/xfa_ffnotify.h"
23 #include "xfa/fxfa/parser/cxfa_dataexporter.h"
24 #include "xfa/fxfa/parser/cxfa_dataimporter.h"
25 #include "xfa/fxfa/parser/cxfa_document.h"
26 #include "xfa/fxfa/xfa_checksum.h"
27 #include "xfa/fxfa/xfa_ffapp.h"
28 #include "xfa/fxfa/xfa_ffdocview.h"
29 #include "xfa/fxfa/xfa_ffwidget.h"
30 #include "xfa/fxfa/xfa_fontmgr.h"
31 
32 namespace {
33 
34 struct FX_BASE64DATA {
35   uint32_t data1 : 2;
36   uint32_t data2 : 6;
37   uint32_t data3 : 4;
38   uint32_t data4 : 4;
39   uint32_t data5 : 6;
40   uint32_t data6 : 2;
41   uint32_t data7 : 8;
42 };
43 
44 const uint8_t kStartValuesRemoved = 43;
45 const uint8_t kDecoderMapSize = 80;
46 const uint8_t g_FXBase64DecoderMap[kDecoderMapSize] = {
47     0x3E, 0xFF, 0xFF, 0xFF, 0x3F, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A,
48     0x3B, 0x3C, 0x3D, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01,
49     0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
50     0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
51     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
52     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B,
53     0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33,
54 };
55 
base64DecoderValue(uint8_t val)56 uint8_t base64DecoderValue(uint8_t val) {
57   if (val < kStartValuesRemoved || val >= kStartValuesRemoved + kDecoderMapSize)
58     return 0xFF;
59   return g_FXBase64DecoderMap[val - kStartValuesRemoved];
60 }
61 
Base64DecodePiece(const FX_CHAR src[4],int32_t iChars,FX_BASE64DATA & dst,int32_t & iBytes)62 void Base64DecodePiece(const FX_CHAR src[4],
63                        int32_t iChars,
64                        FX_BASE64DATA& dst,
65                        int32_t& iBytes) {
66   ASSERT(iChars > 0 && iChars < 5);
67   iBytes = 1;
68   dst.data2 = base64DecoderValue(static_cast<uint8_t>(src[0]));
69   if (iChars > 1) {
70     uint8_t b = base64DecoderValue(static_cast<uint8_t>(src[1]));
71     dst.data1 = b >> 4;
72     dst.data4 = b;
73     if (iChars > 2) {
74       iBytes = 2;
75       b = base64DecoderValue(static_cast<uint8_t>(src[2]));
76       dst.data3 = b >> 2;
77       dst.data6 = b;
78       if (iChars > 3) {
79         iBytes = 3;
80         dst.data5 = base64DecoderValue(static_cast<uint8_t>(src[3]));
81       } else {
82         dst.data5 = 0;
83       }
84     } else {
85       dst.data3 = 0;
86     }
87   } else {
88     dst.data1 = 0;
89   }
90 }
91 
Base64DecodeW(const FX_WCHAR * pSrc,int32_t iSrcLen,uint8_t * pDst)92 int32_t Base64DecodeW(const FX_WCHAR* pSrc, int32_t iSrcLen, uint8_t* pDst) {
93   ASSERT(pSrc);
94   if (iSrcLen < 1) {
95     return 0;
96   }
97   while (iSrcLen > 0 && pSrc[iSrcLen - 1] == '=') {
98     iSrcLen--;
99   }
100   if (iSrcLen < 1) {
101     return 0;
102   }
103   if (!pDst) {
104     int32_t iDstLen = iSrcLen / 4 * 3;
105     iSrcLen %= 4;
106     if (iSrcLen == 1) {
107       iDstLen += 1;
108     } else if (iSrcLen == 2) {
109       iDstLen += 1;
110     } else if (iSrcLen == 3) {
111       iDstLen += 2;
112     }
113     return iDstLen;
114   }
115   FX_CHAR srcData[4];
116   FX_BASE64DATA dstData;
117   int32_t iChars = 4, iBytes;
118   uint8_t* pDstEnd = pDst;
119   while (iSrcLen > 0) {
120     if (iSrcLen > 3) {
121       srcData[0] = (FX_CHAR)*pSrc++;
122       srcData[1] = (FX_CHAR)*pSrc++;
123       srcData[2] = (FX_CHAR)*pSrc++;
124       srcData[3] = (FX_CHAR)*pSrc++;
125       iSrcLen -= 4;
126     } else {
127       *((uint32_t*)&dstData) = 0;
128       *((uint32_t*)srcData) = 0;
129       srcData[0] = (FX_CHAR)*pSrc++;
130       if (iSrcLen > 1) {
131         srcData[1] = (FX_CHAR)*pSrc++;
132       }
133       if (iSrcLen > 2) {
134         srcData[2] = (FX_CHAR)*pSrc++;
135       }
136       iChars = iSrcLen;
137       iSrcLen = 0;
138     }
139     Base64DecodePiece(srcData, iChars, dstData, iBytes);
140     *pDstEnd++ = ((uint8_t*)&dstData)[0];
141     if (iBytes > 1) {
142       *pDstEnd++ = ((uint8_t*)&dstData)[1];
143     }
144     if (iBytes > 2) {
145       *pDstEnd++ = ((uint8_t*)&dstData)[2];
146     }
147   }
148   return pDstEnd - pDst;
149 }
150 
151 }  // namespace
152 
CXFA_FFDoc(CXFA_FFApp * pApp,IXFA_DocEnvironment * pDocEnvironment)153 CXFA_FFDoc::CXFA_FFDoc(CXFA_FFApp* pApp, IXFA_DocEnvironment* pDocEnvironment)
154     : m_pDocEnvironment(pDocEnvironment),
155       m_pDocumentParser(nullptr),
156       m_pApp(pApp),
157       m_pNotify(nullptr),
158       m_pPDFDoc(nullptr),
159       m_dwDocType(XFA_DOCTYPE_Static) {}
160 
~CXFA_FFDoc()161 CXFA_FFDoc::~CXFA_FFDoc() {
162   CloseDoc();
163 }
164 
GetDocType()165 uint32_t CXFA_FFDoc::GetDocType() {
166   return m_dwDocType;
167 }
168 
StartLoad()169 int32_t CXFA_FFDoc::StartLoad() {
170   m_pNotify = pdfium::MakeUnique<CXFA_FFNotify>(this);
171   m_pDocumentParser = pdfium::MakeUnique<CXFA_DocumentParser>(m_pNotify.get());
172   return m_pDocumentParser->StartParse(m_pStream, XFA_XDPPACKET_XDP);
173 }
174 
XFA_GetPDFContentsFromPDFXML(CFDE_XMLNode * pPDFElement,uint8_t * & pByteBuffer,int32_t & iBufferSize)175 bool XFA_GetPDFContentsFromPDFXML(CFDE_XMLNode* pPDFElement,
176                                   uint8_t*& pByteBuffer,
177                                   int32_t& iBufferSize) {
178   CFDE_XMLElement* pDocumentElement = nullptr;
179   for (CFDE_XMLNode* pXMLNode =
180            pPDFElement->GetNodeItem(CFDE_XMLNode::FirstChild);
181        pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
182     if (pXMLNode->GetType() == FDE_XMLNODE_Element) {
183       CFX_WideString wsTagName;
184       CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLNode);
185       pXMLElement->GetTagName(wsTagName);
186       if (wsTagName == L"document") {
187         pDocumentElement = pXMLElement;
188         break;
189       }
190     }
191   }
192   if (!pDocumentElement) {
193     return false;
194   }
195   CFDE_XMLElement* pChunkElement = nullptr;
196   for (CFDE_XMLNode* pXMLNode =
197            pDocumentElement->GetNodeItem(CFDE_XMLNode::FirstChild);
198        pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
199     if (pXMLNode->GetType() == FDE_XMLNODE_Element) {
200       CFX_WideString wsTagName;
201       CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLNode);
202       pXMLElement->GetTagName(wsTagName);
203       if (wsTagName == L"chunk") {
204         pChunkElement = pXMLElement;
205         break;
206       }
207     }
208   }
209   if (!pChunkElement) {
210     return false;
211   }
212   CFX_WideString wsPDFContent;
213   pChunkElement->GetTextData(wsPDFContent);
214   iBufferSize =
215       Base64DecodeW(wsPDFContent.c_str(), wsPDFContent.GetLength(), nullptr);
216   pByteBuffer = FX_Alloc(uint8_t, iBufferSize + 1);
217   pByteBuffer[iBufferSize] = '0';  // FIXME: I bet this is wrong.
218   Base64DecodeW(wsPDFContent.c_str(), wsPDFContent.GetLength(), pByteBuffer);
219   return true;
220 }
XFA_XPDPacket_MergeRootNode(CXFA_Node * pOriginRoot,CXFA_Node * pNewRoot)221 void XFA_XPDPacket_MergeRootNode(CXFA_Node* pOriginRoot, CXFA_Node* pNewRoot) {
222   CXFA_Node* pChildNode = pNewRoot->GetNodeItem(XFA_NODEITEM_FirstChild);
223   while (pChildNode) {
224     CXFA_Node* pOriginChild =
225         pOriginRoot->GetFirstChildByName(pChildNode->GetNameHash());
226     if (pOriginChild) {
227       pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling);
228     } else {
229       CXFA_Node* pNextSibling =
230           pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling);
231       pNewRoot->RemoveChild(pChildNode);
232       pOriginRoot->InsertChild(pChildNode);
233       pChildNode = pNextSibling;
234       pNextSibling = nullptr;
235     }
236   }
237 }
238 
DoLoad(IFX_Pause * pPause)239 int32_t CXFA_FFDoc::DoLoad(IFX_Pause* pPause) {
240   int32_t iStatus = m_pDocumentParser->DoParse(pPause);
241   if (iStatus == XFA_PARSESTATUS_Done && !m_pPDFDoc)
242     return XFA_PARSESTATUS_SyntaxErr;
243   return iStatus;
244 }
245 
StopLoad()246 void CXFA_FFDoc::StopLoad() {
247   m_pApp->GetXFAFontMgr()->LoadDocFonts(this);
248   m_dwDocType = XFA_DOCTYPE_Static;
249   CXFA_Node* pConfig = ToNode(
250       m_pDocumentParser->GetDocument()->GetXFAObject(XFA_HASHCODE_Config));
251   if (!pConfig) {
252     return;
253   }
254   CXFA_Node* pAcrobat = pConfig->GetFirstChildByClass(XFA_Element::Acrobat);
255   if (!pAcrobat) {
256     return;
257   }
258   CXFA_Node* pAcrobat7 = pAcrobat->GetFirstChildByClass(XFA_Element::Acrobat7);
259   if (!pAcrobat7) {
260     return;
261   }
262   CXFA_Node* pDynamicRender =
263       pAcrobat7->GetFirstChildByClass(XFA_Element::DynamicRender);
264   if (!pDynamicRender) {
265     return;
266   }
267   CFX_WideString wsType;
268   if (pDynamicRender->TryContent(wsType) && wsType == L"required") {
269     m_dwDocType = XFA_DOCTYPE_Dynamic;
270   }
271 }
272 
CreateDocView(uint32_t dwView)273 CXFA_FFDocView* CXFA_FFDoc::CreateDocView(uint32_t dwView) {
274   if (!m_TypeToDocViewMap[dwView])
275     m_TypeToDocViewMap[dwView] = pdfium::MakeUnique<CXFA_FFDocView>(this);
276 
277   return m_TypeToDocViewMap[dwView].get();
278 }
279 
GetDocView(CXFA_LayoutProcessor * pLayout)280 CXFA_FFDocView* CXFA_FFDoc::GetDocView(CXFA_LayoutProcessor* pLayout) {
281   for (const auto& pair : m_TypeToDocViewMap) {
282     if (pair.second->GetXFALayout() == pLayout)
283       return pair.second.get();
284   }
285   return nullptr;
286 }
287 
GetDocView()288 CXFA_FFDocView* CXFA_FFDoc::GetDocView() {
289   auto it = m_TypeToDocViewMap.begin();
290   return it != m_TypeToDocViewMap.end() ? it->second.get() : nullptr;
291 }
292 
OpenDoc(const CFX_RetainPtr<IFX_SeekableReadStream> & pStream)293 bool CXFA_FFDoc::OpenDoc(const CFX_RetainPtr<IFX_SeekableReadStream>& pStream) {
294   m_pStream = pStream;
295   return true;
296 }
OpenDoc(CPDF_Document * pPDFDoc)297 bool CXFA_FFDoc::OpenDoc(CPDF_Document* pPDFDoc) {
298   if (!pPDFDoc)
299     return false;
300 
301   CPDF_Dictionary* pRoot = pPDFDoc->GetRoot();
302   if (!pRoot)
303     return false;
304 
305   CPDF_Dictionary* pAcroForm = pRoot->GetDictFor("AcroForm");
306   if (!pAcroForm)
307     return false;
308 
309   CPDF_Object* pElementXFA = pAcroForm->GetDirectObjectFor("XFA");
310   if (!pElementXFA)
311     return false;
312 
313   std::vector<CPDF_Stream*> xfaStreams;
314   if (pElementXFA->IsArray()) {
315     CPDF_Array* pXFAArray = (CPDF_Array*)pElementXFA;
316     for (size_t i = 0; i < pXFAArray->GetCount() / 2; i++) {
317       if (CPDF_Stream* pStream = pXFAArray->GetStreamAt(i * 2 + 1))
318         xfaStreams.push_back(pStream);
319     }
320   } else if (pElementXFA->IsStream()) {
321     xfaStreams.push_back((CPDF_Stream*)pElementXFA);
322   }
323   if (xfaStreams.empty())
324     return false;
325 
326   m_pPDFDoc = pPDFDoc;
327   m_pStream = MakeSeekableReadStream(xfaStreams);
328   return true;
329 }
330 
CloseDoc()331 bool CXFA_FFDoc::CloseDoc() {
332   for (const auto& pair : m_TypeToDocViewMap)
333     pair.second->RunDocClose();
334 
335   CXFA_Document* doc =
336       m_pDocumentParser ? m_pDocumentParser->GetDocument() : nullptr;
337   if (doc)
338     doc->ClearLayoutData();
339 
340   m_TypeToDocViewMap.clear();
341 
342   m_pNotify.reset(nullptr);
343   m_pApp->GetXFAFontMgr()->ReleaseDocFonts(this);
344 
345   for (const auto& pair : m_HashToDibDpiMap)
346     delete pair.second.pDibSource;
347 
348   m_HashToDibDpiMap.clear();
349   m_pApp->ClearEventTargets();
350   return true;
351 }
SetDocType(uint32_t dwType)352 void CXFA_FFDoc::SetDocType(uint32_t dwType) {
353   m_dwDocType = dwType;
354 }
GetPDFDoc()355 CPDF_Document* CXFA_FFDoc::GetPDFDoc() {
356   return m_pPDFDoc;
357 }
358 
GetPDFNamedImage(const CFX_WideStringC & wsName,int32_t & iImageXDpi,int32_t & iImageYDpi)359 CFX_DIBitmap* CXFA_FFDoc::GetPDFNamedImage(const CFX_WideStringC& wsName,
360                                            int32_t& iImageXDpi,
361                                            int32_t& iImageYDpi) {
362   if (!m_pPDFDoc)
363     return nullptr;
364 
365   uint32_t dwHash = FX_HashCode_GetW(wsName, false);
366   auto it = m_HashToDibDpiMap.find(dwHash);
367   if (it != m_HashToDibDpiMap.end()) {
368     iImageXDpi = it->second.iImageXDpi;
369     iImageYDpi = it->second.iImageYDpi;
370     return static_cast<CFX_DIBitmap*>(it->second.pDibSource);
371   }
372 
373   CPDF_Dictionary* pRoot = m_pPDFDoc->GetRoot();
374   if (!pRoot)
375     return nullptr;
376 
377   CPDF_Dictionary* pNames = pRoot->GetDictFor("Names");
378   if (!pNames)
379     return nullptr;
380 
381   CPDF_Dictionary* pXFAImages = pNames->GetDictFor("XFAImages");
382   if (!pXFAImages)
383     return nullptr;
384 
385   CPDF_NameTree nametree(pXFAImages);
386   CFX_ByteString bsName = PDF_EncodeText(wsName.c_str(), wsName.GetLength());
387   CPDF_Object* pObject = nametree.LookupValue(bsName);
388   if (!pObject) {
389     for (size_t i = 0; i < nametree.GetCount(); i++) {
390       CFX_ByteString bsTemp;
391       CPDF_Object* pTempObject = nametree.LookupValue(i, bsTemp);
392       if (bsTemp == bsName) {
393         pObject = pTempObject;
394         break;
395       }
396     }
397   }
398 
399   CPDF_Stream* pStream = ToStream(pObject);
400   if (!pStream)
401     return nullptr;
402 
403   CPDF_StreamAcc streamAcc;
404   streamAcc.LoadAllData(pStream);
405 
406   CFX_RetainPtr<IFX_SeekableReadStream> pImageFileRead =
407       IFX_MemoryStream::Create((uint8_t*)streamAcc.GetData(),
408                                streamAcc.GetSize());
409 
410   CFX_DIBitmap* pDibSource = XFA_LoadImageFromBuffer(
411       pImageFileRead, FXCODEC_IMAGE_UNKNOWN, iImageXDpi, iImageYDpi);
412   m_HashToDibDpiMap[dwHash] = {pDibSource, iImageXDpi, iImageYDpi};
413   return pDibSource;
414 }
415 
SavePackage(XFA_HashCode code,const CFX_RetainPtr<IFX_SeekableWriteStream> & pFile,CXFA_ChecksumContext * pCSContext)416 bool CXFA_FFDoc::SavePackage(
417     XFA_HashCode code,
418     const CFX_RetainPtr<IFX_SeekableWriteStream>& pFile,
419     CXFA_ChecksumContext* pCSContext) {
420   CXFA_Document* doc = m_pDocumentParser->GetDocument();
421   std::unique_ptr<CXFA_DataExporter> pExport(new CXFA_DataExporter(doc));
422   CXFA_Node* pNode = code == XFA_HASHCODE_Xfa ? doc->GetRoot()
423                                               : ToNode(doc->GetXFAObject(code));
424   if (!pNode)
425     return !!pExport->Export(pFile);
426 
427   CFX_ByteString bsChecksum;
428   if (pCSContext)
429     bsChecksum = pCSContext->GetChecksum();
430 
431   return !!pExport->Export(
432       pFile, pNode, 0, bsChecksum.GetLength() ? bsChecksum.c_str() : nullptr);
433 }
434 
ImportData(const CFX_RetainPtr<IFX_SeekableReadStream> & pStream,bool bXDP)435 bool CXFA_FFDoc::ImportData(
436     const CFX_RetainPtr<IFX_SeekableReadStream>& pStream,
437     bool bXDP) {
438   auto importer =
439       pdfium::MakeUnique<CXFA_DataImporter>(m_pDocumentParser->GetDocument());
440   return importer->ImportData(pStream);
441 }
442