1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "public/fpdf_ppo.h"
8 
9 #include <map>
10 #include <memory>
11 #include <utility>
12 #include <vector>
13 
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_document.h"
16 #include "core/fpdfapi/parser/cpdf_name.h"
17 #include "core/fpdfapi/parser/cpdf_number.h"
18 #include "core/fpdfapi/parser/cpdf_reference.h"
19 #include "core/fpdfapi/parser/cpdf_stream.h"
20 #include "core/fpdfapi/parser/cpdf_string.h"
21 #include "fpdfsdk/fsdk_define.h"
22 #include "third_party/base/ptr_util.h"
23 #include "third_party/base/stl_util.h"
24 
25 namespace {
26 
PageDictGetInheritableTag(CPDF_Dictionary * pDict,const CFX_ByteString & bsSrcTag)27 CPDF_Object* PageDictGetInheritableTag(CPDF_Dictionary* pDict,
28                                        const CFX_ByteString& bsSrcTag) {
29   if (!pDict || bsSrcTag.IsEmpty())
30     return nullptr;
31   if (!pDict->KeyExist("Parent") || !pDict->KeyExist("Type"))
32     return nullptr;
33 
34   CPDF_Object* pType = pDict->GetObjectFor("Type")->GetDirect();
35   if (!ToName(pType))
36     return nullptr;
37   if (pType->GetString().Compare("Page"))
38     return nullptr;
39 
40   CPDF_Dictionary* pp =
41       ToDictionary(pDict->GetObjectFor("Parent")->GetDirect());
42   if (!pp)
43     return nullptr;
44 
45   if (pDict->KeyExist(bsSrcTag))
46     return pDict->GetObjectFor(bsSrcTag);
47 
48   while (pp) {
49     if (pp->KeyExist(bsSrcTag))
50       return pp->GetObjectFor(bsSrcTag);
51     if (!pp->KeyExist("Parent"))
52       break;
53     pp = ToDictionary(pp->GetObjectFor("Parent")->GetDirect());
54   }
55   return nullptr;
56 }
57 
CopyInheritable(CPDF_Dictionary * pCurPageDict,CPDF_Dictionary * pSrcPageDict,const CFX_ByteString & key)58 bool CopyInheritable(CPDF_Dictionary* pCurPageDict,
59                      CPDF_Dictionary* pSrcPageDict,
60                      const CFX_ByteString& key) {
61   if (pCurPageDict->KeyExist(key))
62     return true;
63 
64   CPDF_Object* pInheritable = PageDictGetInheritableTag(pSrcPageDict, key);
65   if (!pInheritable)
66     return false;
67 
68   pCurPageDict->SetFor(key, pInheritable->Clone());
69   return true;
70 }
71 
ParserPageRangeString(CFX_ByteString rangstring,std::vector<uint16_t> * pageArray,int nCount)72 bool ParserPageRangeString(CFX_ByteString rangstring,
73                            std::vector<uint16_t>* pageArray,
74                            int nCount) {
75   if (rangstring.IsEmpty())
76     return true;
77 
78   rangstring.Remove(' ');
79   int nLength = rangstring.GetLength();
80   CFX_ByteString cbCompareString("0123456789-,");
81   for (int i = 0; i < nLength; ++i) {
82     if (cbCompareString.Find(rangstring[i]) == -1)
83       return false;
84   }
85 
86   CFX_ByteString cbMidRange;
87   int nStringFrom = 0;
88   int nStringTo = 0;
89   while (nStringTo < nLength) {
90     nStringTo = rangstring.Find(',', nStringFrom);
91     if (nStringTo == -1)
92       nStringTo = nLength;
93     cbMidRange = rangstring.Mid(nStringFrom, nStringTo - nStringFrom);
94     int nMid = cbMidRange.Find('-');
95     if (nMid == -1) {
96       long lPageNum = atol(cbMidRange.c_str());
97       if (lPageNum <= 0 || lPageNum > nCount)
98         return false;
99       pageArray->push_back((uint16_t)lPageNum);
100     } else {
101       int nStartPageNum = atol(cbMidRange.Mid(0, nMid).c_str());
102       if (nStartPageNum == 0)
103         return false;
104 
105       ++nMid;
106       int nEnd = cbMidRange.GetLength() - nMid;
107       if (nEnd == 0)
108         return false;
109 
110       int nEndPageNum = atol(cbMidRange.Mid(nMid, nEnd).c_str());
111       if (nStartPageNum < 0 || nStartPageNum > nEndPageNum ||
112           nEndPageNum > nCount) {
113         return false;
114       }
115       for (int i = nStartPageNum; i <= nEndPageNum; ++i) {
116         pageArray->push_back(i);
117       }
118     }
119     nStringFrom = nStringTo + 1;
120   }
121   return true;
122 }
123 
124 }  // namespace
125 
126 class CPDF_PageOrganizer {
127  public:
128   CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc, CPDF_Document* pSrcPDFDoc);
129   ~CPDF_PageOrganizer();
130 
131   bool PDFDocInit();
132   bool ExportPage(const std::vector<uint16_t>& pageNums, int nIndex);
133 
134  private:
135   using ObjectNumberMap = std::map<uint32_t, uint32_t>;
136 
137   bool UpdateReference(CPDF_Object* pObj, ObjectNumberMap* pObjNumberMap);
138   uint32_t GetNewObjId(ObjectNumberMap* pObjNumberMap, CPDF_Reference* pRef);
139 
140   CPDF_Document* m_pDestPDFDoc;
141   CPDF_Document* m_pSrcPDFDoc;
142 };
143 
CPDF_PageOrganizer(CPDF_Document * pDestPDFDoc,CPDF_Document * pSrcPDFDoc)144 CPDF_PageOrganizer::CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc,
145                                        CPDF_Document* pSrcPDFDoc)
146     : m_pDestPDFDoc(pDestPDFDoc), m_pSrcPDFDoc(pSrcPDFDoc) {}
147 
~CPDF_PageOrganizer()148 CPDF_PageOrganizer::~CPDF_PageOrganizer() {}
149 
PDFDocInit()150 bool CPDF_PageOrganizer::PDFDocInit() {
151   ASSERT(m_pDestPDFDoc);
152   ASSERT(m_pSrcPDFDoc);
153 
154   CPDF_Dictionary* pNewRoot = m_pDestPDFDoc->GetRoot();
155   if (!pNewRoot)
156     return false;
157 
158   CPDF_Dictionary* pDocInfoDict = m_pDestPDFDoc->GetInfo();
159   if (!pDocInfoDict)
160     return false;
161 
162   pDocInfoDict->SetNewFor<CPDF_String>("Producer", "PDFium", false);
163 
164   CFX_ByteString cbRootType = pNewRoot->GetStringFor("Type", "");
165   if (cbRootType.IsEmpty())
166     pNewRoot->SetNewFor<CPDF_Name>("Type", "Catalog");
167 
168   CPDF_Object* pElement = pNewRoot->GetObjectFor("Pages");
169   CPDF_Dictionary* pNewPages =
170       pElement ? ToDictionary(pElement->GetDirect()) : nullptr;
171   if (!pNewPages) {
172     pNewPages = m_pDestPDFDoc->NewIndirect<CPDF_Dictionary>();
173     pNewRoot->SetNewFor<CPDF_Reference>("Pages", m_pDestPDFDoc,
174                                         pNewPages->GetObjNum());
175   }
176 
177   CFX_ByteString cbPageType = pNewPages->GetStringFor("Type", "");
178   if (cbPageType.IsEmpty())
179     pNewPages->SetNewFor<CPDF_Name>("Type", "Pages");
180 
181   if (!pNewPages->GetArrayFor("Kids")) {
182     pNewPages->SetNewFor<CPDF_Number>("Count", 0);
183     pNewPages->SetNewFor<CPDF_Reference>(
184         "Kids", m_pDestPDFDoc,
185         m_pDestPDFDoc->NewIndirect<CPDF_Array>()->GetObjNum());
186   }
187 
188   return true;
189 }
190 
ExportPage(const std::vector<uint16_t> & pageNums,int nIndex)191 bool CPDF_PageOrganizer::ExportPage(const std::vector<uint16_t>& pageNums,
192                                     int nIndex) {
193   int curpage = nIndex;
194   auto pObjNumberMap = pdfium::MakeUnique<ObjectNumberMap>();
195   int nSize = pdfium::CollectionSize<int>(pageNums);
196   for (int i = 0; i < nSize; ++i) {
197     CPDF_Dictionary* pCurPageDict = m_pDestPDFDoc->CreateNewPage(curpage);
198     CPDF_Dictionary* pSrcPageDict = m_pSrcPDFDoc->GetPage(pageNums[i] - 1);
199     if (!pSrcPageDict || !pCurPageDict)
200       return false;
201 
202     // Clone the page dictionary
203     for (const auto& it : *pSrcPageDict) {
204       const CFX_ByteString& cbSrcKeyStr = it.first;
205       if (cbSrcKeyStr == "Type" || cbSrcKeyStr == "Parent")
206         continue;
207 
208       CPDF_Object* pObj = it.second.get();
209       pCurPageDict->SetFor(cbSrcKeyStr, pObj->Clone());
210     }
211 
212     // inheritable item
213     // 1 MediaBox - required
214     if (!CopyInheritable(pCurPageDict, pSrcPageDict, "MediaBox")) {
215       // Search for "CropBox" in the source page dictionary,
216       // if it does not exists, use the default letter size.
217       CPDF_Object* pInheritable =
218           PageDictGetInheritableTag(pSrcPageDict, "CropBox");
219       if (pInheritable) {
220         pCurPageDict->SetFor("MediaBox", pInheritable->Clone());
221       } else {
222         // Make the default size to be letter size (8.5'x11')
223         CPDF_Array* pArray = pCurPageDict->SetNewFor<CPDF_Array>("MediaBox");
224         pArray->AddNew<CPDF_Number>(0);
225         pArray->AddNew<CPDF_Number>(0);
226         pArray->AddNew<CPDF_Number>(612);
227         pArray->AddNew<CPDF_Number>(792);
228       }
229     }
230 
231     // 2 Resources - required
232     if (!CopyInheritable(pCurPageDict, pSrcPageDict, "Resources"))
233       return false;
234 
235     // 3 CropBox - optional
236     CopyInheritable(pCurPageDict, pSrcPageDict, "CropBox");
237     // 4 Rotate - optional
238     CopyInheritable(pCurPageDict, pSrcPageDict, "Rotate");
239 
240     // Update the reference
241     uint32_t dwOldPageObj = pSrcPageDict->GetObjNum();
242     uint32_t dwNewPageObj = pCurPageDict->GetObjNum();
243     (*pObjNumberMap)[dwOldPageObj] = dwNewPageObj;
244     UpdateReference(pCurPageDict, pObjNumberMap.get());
245     ++curpage;
246   }
247 
248   return true;
249 }
250 
UpdateReference(CPDF_Object * pObj,ObjectNumberMap * pObjNumberMap)251 bool CPDF_PageOrganizer::UpdateReference(CPDF_Object* pObj,
252                                          ObjectNumberMap* pObjNumberMap) {
253   switch (pObj->GetType()) {
254     case CPDF_Object::REFERENCE: {
255       CPDF_Reference* pReference = pObj->AsReference();
256       uint32_t newobjnum = GetNewObjId(pObjNumberMap, pReference);
257       if (newobjnum == 0)
258         return false;
259       pReference->SetRef(m_pDestPDFDoc, newobjnum);
260       break;
261     }
262     case CPDF_Object::DICTIONARY: {
263       CPDF_Dictionary* pDict = pObj->AsDictionary();
264       auto it = pDict->begin();
265       while (it != pDict->end()) {
266         const CFX_ByteString& key = it->first;
267         CPDF_Object* pNextObj = it->second.get();
268         ++it;
269         if (key == "Parent" || key == "Prev" || key == "First")
270           continue;
271         if (!pNextObj)
272           return false;
273         if (!UpdateReference(pNextObj, pObjNumberMap))
274           pDict->RemoveFor(key);
275       }
276       break;
277     }
278     case CPDF_Object::ARRAY: {
279       CPDF_Array* pArray = pObj->AsArray();
280       for (size_t i = 0; i < pArray->GetCount(); ++i) {
281         CPDF_Object* pNextObj = pArray->GetObjectAt(i);
282         if (!pNextObj)
283           return false;
284         if (!UpdateReference(pNextObj, pObjNumberMap))
285           return false;
286       }
287       break;
288     }
289     case CPDF_Object::STREAM: {
290       CPDF_Stream* pStream = pObj->AsStream();
291       CPDF_Dictionary* pDict = pStream->GetDict();
292       if (!pDict)
293         return false;
294       if (!UpdateReference(pDict, pObjNumberMap))
295         return false;
296       break;
297     }
298     default:
299       break;
300   }
301 
302   return true;
303 }
304 
GetNewObjId(ObjectNumberMap * pObjNumberMap,CPDF_Reference * pRef)305 uint32_t CPDF_PageOrganizer::GetNewObjId(ObjectNumberMap* pObjNumberMap,
306                                          CPDF_Reference* pRef) {
307   if (!pRef)
308     return 0;
309 
310   uint32_t dwObjnum = pRef->GetRefObjNum();
311   uint32_t dwNewObjNum = 0;
312   const auto it = pObjNumberMap->find(dwObjnum);
313   if (it != pObjNumberMap->end())
314     dwNewObjNum = it->second;
315   if (dwNewObjNum)
316     return dwNewObjNum;
317 
318   CPDF_Object* pDirect = pRef->GetDirect();
319   if (!pDirect)
320     return 0;
321 
322   std::unique_ptr<CPDF_Object> pClone = pDirect->Clone();
323   if (CPDF_Dictionary* pDictClone = pClone->AsDictionary()) {
324     if (pDictClone->KeyExist("Type")) {
325       CFX_ByteString strType = pDictClone->GetStringFor("Type");
326       if (!FXSYS_stricmp(strType.c_str(), "Pages"))
327         return 4;
328       if (!FXSYS_stricmp(strType.c_str(), "Page"))
329         return 0;
330     }
331   }
332   CPDF_Object* pUnownedClone =
333       m_pDestPDFDoc->AddIndirectObject(std::move(pClone));
334   dwNewObjNum = pUnownedClone->GetObjNum();
335   (*pObjNumberMap)[dwObjnum] = dwNewObjNum;
336   if (!UpdateReference(pUnownedClone, pObjNumberMap))
337     return 0;
338 
339   return dwNewObjNum;
340 }
341 
FPDF_ImportPages(FPDF_DOCUMENT dest_doc,FPDF_DOCUMENT src_doc,FPDF_BYTESTRING pagerange,int index)342 DLLEXPORT FPDF_BOOL STDCALL FPDF_ImportPages(FPDF_DOCUMENT dest_doc,
343                                              FPDF_DOCUMENT src_doc,
344                                              FPDF_BYTESTRING pagerange,
345                                              int index) {
346   CPDF_Document* pDestDoc = CPDFDocumentFromFPDFDocument(dest_doc);
347   if (!dest_doc)
348     return false;
349 
350   CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
351   if (!pSrcDoc)
352     return false;
353 
354   std::vector<uint16_t> pageArray;
355   int nCount = pSrcDoc->GetPageCount();
356   if (pagerange) {
357     if (!ParserPageRangeString(pagerange, &pageArray, nCount))
358       return false;
359   } else {
360     for (int i = 1; i <= nCount; ++i) {
361       pageArray.push_back(i);
362     }
363   }
364 
365   CPDF_PageOrganizer pageOrg(pDestDoc, pSrcDoc);
366   return pageOrg.PDFDocInit() && pageOrg.ExportPage(pageArray, index);
367 }
368 
FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc,FPDF_DOCUMENT src_doc)369 DLLEXPORT FPDF_BOOL STDCALL FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc,
370                                                        FPDF_DOCUMENT src_doc) {
371   CPDF_Document* pDstDoc = CPDFDocumentFromFPDFDocument(dest_doc);
372   if (!pDstDoc)
373     return false;
374 
375   CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
376   if (!pSrcDoc)
377     return false;
378 
379   CPDF_Dictionary* pSrcDict = pSrcDoc->GetRoot();
380   pSrcDict = pSrcDict->GetDictFor("ViewerPreferences");
381   if (!pSrcDict)
382     return false;
383 
384   CPDF_Dictionary* pDstDict = pDstDoc->GetRoot();
385   if (!pDstDict)
386     return false;
387 
388   pDstDict->SetFor("ViewerPreferences", pSrcDict->CloneDirectObject());
389   return true;
390 }
391