1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "public/fpdf_ppo.h"
8 
9 #include <map>
10 #include <memory>
11 #include <utility>
12 #include <vector>
13 
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_document.h"
16 #include "core/fpdfapi/parser/cpdf_name.h"
17 #include "core/fpdfapi/parser/cpdf_number.h"
18 #include "core/fpdfapi/parser/cpdf_reference.h"
19 #include "core/fpdfapi/parser/cpdf_stream.h"
20 #include "core/fpdfapi/parser/cpdf_string.h"
21 #include "core/fxcrt/unowned_ptr.h"
22 #include "fpdfsdk/fsdk_define.h"
23 #include "third_party/base/ptr_util.h"
24 
25 namespace {
26 
PageDictGetInheritableTag(CPDF_Dictionary * pDict,const ByteString & bsSrcTag)27 CPDF_Object* PageDictGetInheritableTag(CPDF_Dictionary* pDict,
28                                        const ByteString& bsSrcTag) {
29   if (!pDict || bsSrcTag.IsEmpty())
30     return nullptr;
31   if (!pDict->KeyExist("Parent") || !pDict->KeyExist("Type"))
32     return nullptr;
33 
34   CPDF_Object* pType = pDict->GetObjectFor("Type")->GetDirect();
35   if (!ToName(pType))
36     return nullptr;
37   if (pType->GetString().Compare("Page"))
38     return nullptr;
39 
40   CPDF_Dictionary* pp =
41       ToDictionary(pDict->GetObjectFor("Parent")->GetDirect());
42   if (!pp)
43     return nullptr;
44 
45   if (pDict->KeyExist(bsSrcTag))
46     return pDict->GetObjectFor(bsSrcTag);
47 
48   while (pp) {
49     if (pp->KeyExist(bsSrcTag))
50       return pp->GetObjectFor(bsSrcTag);
51     if (!pp->KeyExist("Parent"))
52       break;
53     pp = ToDictionary(pp->GetObjectFor("Parent")->GetDirect());
54   }
55   return nullptr;
56 }
57 
CopyInheritable(CPDF_Dictionary * pCurPageDict,CPDF_Dictionary * pSrcPageDict,const ByteString & key)58 bool CopyInheritable(CPDF_Dictionary* pCurPageDict,
59                      CPDF_Dictionary* pSrcPageDict,
60                      const ByteString& key) {
61   if (pCurPageDict->KeyExist(key))
62     return true;
63 
64   CPDF_Object* pInheritable = PageDictGetInheritableTag(pSrcPageDict, key);
65   if (!pInheritable)
66     return false;
67 
68   pCurPageDict->SetFor(key, pInheritable->Clone());
69   return true;
70 }
71 
ParserPageRangeString(ByteString rangstring,std::vector<uint16_t> * pageArray,int nCount)72 bool ParserPageRangeString(ByteString rangstring,
73                            std::vector<uint16_t>* pageArray,
74                            int nCount) {
75   if (rangstring.IsEmpty())
76     return true;
77 
78   rangstring.Remove(' ');
79   size_t nLength = rangstring.GetLength();
80   ByteString cbCompareString("0123456789-,");
81   for (size_t i = 0; i < nLength; ++i) {
82     if (!cbCompareString.Contains(rangstring[i]))
83       return false;
84   }
85 
86   ByteString cbMidRange;
87   size_t nStringFrom = 0;
88   Optional<size_t> nStringTo = 0;
89   while (nStringTo < nLength) {
90     nStringTo = rangstring.Find(',', nStringFrom);
91     if (!nStringTo.has_value())
92       nStringTo = nLength;
93     cbMidRange = rangstring.Mid(nStringFrom, nStringTo.value() - nStringFrom);
94     auto nMid = cbMidRange.Find('-');
95     if (!nMid.has_value()) {
96       uint16_t pageNum =
97           pdfium::base::checked_cast<uint16_t>(atoi(cbMidRange.c_str()));
98       if (pageNum <= 0 || pageNum > nCount)
99         return false;
100       pageArray->push_back(pageNum);
101     } else {
102       uint16_t nStartPageNum = pdfium::base::checked_cast<uint16_t>(
103           atoi(cbMidRange.Left(nMid.value()).c_str()));
104       if (nStartPageNum == 0)
105         return false;
106 
107       nMid = nMid.value() + 1;
108       size_t nEnd = cbMidRange.GetLength() - nMid.value();
109       if (nEnd == 0)
110         return false;
111 
112       uint16_t nEndPageNum = pdfium::base::checked_cast<uint16_t>(
113           atoi(cbMidRange.Mid(nMid.value(), nEnd).c_str()));
114       if (nStartPageNum < 0 || nStartPageNum > nEndPageNum ||
115           nEndPageNum > nCount) {
116         return false;
117       }
118       for (uint16_t i = nStartPageNum; i <= nEndPageNum; ++i) {
119         pageArray->push_back(i);
120       }
121     }
122     nStringFrom = nStringTo.value() + 1;
123   }
124   return true;
125 }
126 
127 }  // namespace
128 
129 class CPDF_PageOrganizer {
130  public:
131   CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc, CPDF_Document* pSrcPDFDoc);
132   ~CPDF_PageOrganizer();
133 
134   bool PDFDocInit();
135   bool ExportPage(const std::vector<uint16_t>& pageNums, int nIndex);
136 
137  private:
138   using ObjectNumberMap = std::map<uint32_t, uint32_t>;
139 
140   bool UpdateReference(CPDF_Object* pObj, ObjectNumberMap* pObjNumberMap);
141   uint32_t GetNewObjId(ObjectNumberMap* pObjNumberMap, CPDF_Reference* pRef);
142 
143   UnownedPtr<CPDF_Document> m_pDestPDFDoc;
144   UnownedPtr<CPDF_Document> m_pSrcPDFDoc;
145 };
146 
CPDF_PageOrganizer(CPDF_Document * pDestPDFDoc,CPDF_Document * pSrcPDFDoc)147 CPDF_PageOrganizer::CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc,
148                                        CPDF_Document* pSrcPDFDoc)
149     : m_pDestPDFDoc(pDestPDFDoc), m_pSrcPDFDoc(pSrcPDFDoc) {}
150 
~CPDF_PageOrganizer()151 CPDF_PageOrganizer::~CPDF_PageOrganizer() {}
152 
PDFDocInit()153 bool CPDF_PageOrganizer::PDFDocInit() {
154   ASSERT(m_pDestPDFDoc);
155   ASSERT(m_pSrcPDFDoc);
156 
157   CPDF_Dictionary* pNewRoot = m_pDestPDFDoc->GetRoot();
158   if (!pNewRoot)
159     return false;
160 
161   CPDF_Dictionary* pDocInfoDict = m_pDestPDFDoc->GetInfo();
162   if (!pDocInfoDict)
163     return false;
164 
165   pDocInfoDict->SetNewFor<CPDF_String>("Producer", "PDFium", false);
166 
167   ByteString cbRootType = pNewRoot->GetStringFor("Type", "");
168   if (cbRootType.IsEmpty())
169     pNewRoot->SetNewFor<CPDF_Name>("Type", "Catalog");
170 
171   CPDF_Object* pElement = pNewRoot->GetObjectFor("Pages");
172   CPDF_Dictionary* pNewPages =
173       pElement ? ToDictionary(pElement->GetDirect()) : nullptr;
174   if (!pNewPages) {
175     pNewPages = m_pDestPDFDoc->NewIndirect<CPDF_Dictionary>();
176     pNewRoot->SetNewFor<CPDF_Reference>("Pages", m_pDestPDFDoc.Get(),
177                                         pNewPages->GetObjNum());
178   }
179 
180   ByteString cbPageType = pNewPages->GetStringFor("Type", "");
181   if (cbPageType.IsEmpty())
182     pNewPages->SetNewFor<CPDF_Name>("Type", "Pages");
183 
184   if (!pNewPages->GetArrayFor("Kids")) {
185     pNewPages->SetNewFor<CPDF_Number>("Count", 0);
186     pNewPages->SetNewFor<CPDF_Reference>(
187         "Kids", m_pDestPDFDoc.Get(),
188         m_pDestPDFDoc->NewIndirect<CPDF_Array>()->GetObjNum());
189   }
190 
191   return true;
192 }
193 
ExportPage(const std::vector<uint16_t> & pageNums,int nIndex)194 bool CPDF_PageOrganizer::ExportPage(const std::vector<uint16_t>& pageNums,
195                                     int nIndex) {
196   int curpage = nIndex;
197   auto pObjNumberMap = pdfium::MakeUnique<ObjectNumberMap>();
198   for (size_t i = 0; i < pageNums.size(); ++i) {
199     CPDF_Dictionary* pCurPageDict = m_pDestPDFDoc->CreateNewPage(curpage);
200     CPDF_Dictionary* pSrcPageDict = m_pSrcPDFDoc->GetPage(pageNums[i] - 1);
201     if (!pSrcPageDict || !pCurPageDict)
202       return false;
203 
204     // Clone the page dictionary
205     for (const auto& it : *pSrcPageDict) {
206       const ByteString& cbSrcKeyStr = it.first;
207       if (cbSrcKeyStr == "Type" || cbSrcKeyStr == "Parent")
208         continue;
209 
210       CPDF_Object* pObj = it.second.get();
211       pCurPageDict->SetFor(cbSrcKeyStr, pObj->Clone());
212     }
213 
214     // inheritable item
215     // Even though some entries are required by the PDF spec, there exist
216     // PDFs that omit them. Set some defaults in this case.
217     // 1 MediaBox - required
218     if (!CopyInheritable(pCurPageDict, pSrcPageDict, "MediaBox")) {
219       // Search for "CropBox" in the source page dictionary.
220       // If it does not exist, use the default letter size.
221       CPDF_Object* pInheritable =
222           PageDictGetInheritableTag(pSrcPageDict, "CropBox");
223       if (pInheritable) {
224         pCurPageDict->SetFor("MediaBox", pInheritable->Clone());
225       } else {
226         // Make the default size letter size (8.5"x11")
227         CPDF_Array* pArray = pCurPageDict->SetNewFor<CPDF_Array>("MediaBox");
228         pArray->AddNew<CPDF_Number>(0);
229         pArray->AddNew<CPDF_Number>(0);
230         pArray->AddNew<CPDF_Number>(612);
231         pArray->AddNew<CPDF_Number>(792);
232       }
233     }
234 
235     // 2 Resources - required
236     if (!CopyInheritable(pCurPageDict, pSrcPageDict, "Resources")) {
237       // Use a default empty resources if it does not exist.
238       pCurPageDict->SetNewFor<CPDF_Dictionary>("Resources");
239     }
240 
241     // 3 CropBox - optional
242     CopyInheritable(pCurPageDict, pSrcPageDict, "CropBox");
243     // 4 Rotate - optional
244     CopyInheritable(pCurPageDict, pSrcPageDict, "Rotate");
245 
246     // Update the reference
247     uint32_t dwOldPageObj = pSrcPageDict->GetObjNum();
248     uint32_t dwNewPageObj = pCurPageDict->GetObjNum();
249     (*pObjNumberMap)[dwOldPageObj] = dwNewPageObj;
250     UpdateReference(pCurPageDict, pObjNumberMap.get());
251     ++curpage;
252   }
253 
254   return true;
255 }
256 
UpdateReference(CPDF_Object * pObj,ObjectNumberMap * pObjNumberMap)257 bool CPDF_PageOrganizer::UpdateReference(CPDF_Object* pObj,
258                                          ObjectNumberMap* pObjNumberMap) {
259   switch (pObj->GetType()) {
260     case CPDF_Object::REFERENCE: {
261       CPDF_Reference* pReference = pObj->AsReference();
262       uint32_t newobjnum = GetNewObjId(pObjNumberMap, pReference);
263       if (newobjnum == 0)
264         return false;
265       pReference->SetRef(m_pDestPDFDoc.Get(), newobjnum);
266       break;
267     }
268     case CPDF_Object::DICTIONARY: {
269       CPDF_Dictionary* pDict = pObj->AsDictionary();
270       auto it = pDict->begin();
271       while (it != pDict->end()) {
272         const ByteString& key = it->first;
273         CPDF_Object* pNextObj = it->second.get();
274         ++it;
275         if (key == "Parent" || key == "Prev" || key == "First")
276           continue;
277         if (!pNextObj)
278           return false;
279         if (!UpdateReference(pNextObj, pObjNumberMap))
280           pDict->RemoveFor(key);
281       }
282       break;
283     }
284     case CPDF_Object::ARRAY: {
285       CPDF_Array* pArray = pObj->AsArray();
286       for (size_t i = 0; i < pArray->GetCount(); ++i) {
287         CPDF_Object* pNextObj = pArray->GetObjectAt(i);
288         if (!pNextObj)
289           return false;
290         if (!UpdateReference(pNextObj, pObjNumberMap))
291           return false;
292       }
293       break;
294     }
295     case CPDF_Object::STREAM: {
296       CPDF_Stream* pStream = pObj->AsStream();
297       CPDF_Dictionary* pDict = pStream->GetDict();
298       if (!pDict)
299         return false;
300       if (!UpdateReference(pDict, pObjNumberMap))
301         return false;
302       break;
303     }
304     default:
305       break;
306   }
307 
308   return true;
309 }
310 
GetNewObjId(ObjectNumberMap * pObjNumberMap,CPDF_Reference * pRef)311 uint32_t CPDF_PageOrganizer::GetNewObjId(ObjectNumberMap* pObjNumberMap,
312                                          CPDF_Reference* pRef) {
313   if (!pRef)
314     return 0;
315 
316   uint32_t dwObjnum = pRef->GetRefObjNum();
317   uint32_t dwNewObjNum = 0;
318   const auto it = pObjNumberMap->find(dwObjnum);
319   if (it != pObjNumberMap->end())
320     dwNewObjNum = it->second;
321   if (dwNewObjNum)
322     return dwNewObjNum;
323 
324   CPDF_Object* pDirect = pRef->GetDirect();
325   if (!pDirect)
326     return 0;
327 
328   std::unique_ptr<CPDF_Object> pClone = pDirect->Clone();
329   if (CPDF_Dictionary* pDictClone = pClone->AsDictionary()) {
330     if (pDictClone->KeyExist("Type")) {
331       ByteString strType = pDictClone->GetStringFor("Type");
332       if (!FXSYS_stricmp(strType.c_str(), "Pages"))
333         return 4;
334       if (!FXSYS_stricmp(strType.c_str(), "Page"))
335         return 0;
336     }
337   }
338   CPDF_Object* pUnownedClone =
339       m_pDestPDFDoc->AddIndirectObject(std::move(pClone));
340   dwNewObjNum = pUnownedClone->GetObjNum();
341   (*pObjNumberMap)[dwObjnum] = dwNewObjNum;
342   if (!UpdateReference(pUnownedClone, pObjNumberMap))
343     return 0;
344 
345   return dwNewObjNum;
346 }
347 
FPDF_ImportPages(FPDF_DOCUMENT dest_doc,FPDF_DOCUMENT src_doc,FPDF_BYTESTRING pagerange,int index)348 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_ImportPages(FPDF_DOCUMENT dest_doc,
349                                                      FPDF_DOCUMENT src_doc,
350                                                      FPDF_BYTESTRING pagerange,
351                                                      int index) {
352   CPDF_Document* pDestDoc = CPDFDocumentFromFPDFDocument(dest_doc);
353   if (!dest_doc)
354     return false;
355 
356   CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
357   if (!pSrcDoc)
358     return false;
359 
360   std::vector<uint16_t> pageArray;
361   int nCount = pSrcDoc->GetPageCount();
362   if (pagerange) {
363     if (!ParserPageRangeString(pagerange, &pageArray, nCount))
364       return false;
365   } else {
366     for (int i = 1; i <= nCount; ++i) {
367       pageArray.push_back(i);
368     }
369   }
370 
371   CPDF_PageOrganizer pageOrg(pDestDoc, pSrcDoc);
372   return pageOrg.PDFDocInit() && pageOrg.ExportPage(pageArray, index);
373 }
374 
375 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc,FPDF_DOCUMENT src_doc)376 FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc, FPDF_DOCUMENT src_doc) {
377   CPDF_Document* pDstDoc = CPDFDocumentFromFPDFDocument(dest_doc);
378   if (!pDstDoc)
379     return false;
380 
381   CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
382   if (!pSrcDoc)
383     return false;
384 
385   CPDF_Dictionary* pSrcDict = pSrcDoc->GetRoot();
386   pSrcDict = pSrcDict->GetDictFor("ViewerPreferences");
387   if (!pSrcDict)
388     return false;
389 
390   CPDF_Dictionary* pDstDict = pDstDoc->GetRoot();
391   if (!pDstDict)
392     return false;
393 
394   pDstDict->SetFor("ViewerPreferences", pSrcDict->CloneDirectObject());
395   return true;
396 }
397