1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "public/fpdf_flatten.h"
8 
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12 #include <vector>
13 
14 #include "core/fpdfapi/page/cpdf_page.h"
15 #include "core/fpdfapi/page/cpdf_pageobject.h"
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_document.h"
18 #include "core/fpdfapi/parser/cpdf_name.h"
19 #include "core/fpdfapi/parser/cpdf_number.h"
20 #include "core/fpdfapi/parser/cpdf_reference.h"
21 #include "core/fpdfapi/parser/cpdf_stream.h"
22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
23 #include "core/fpdfdoc/cpdf_annot.h"
24 #include "fpdfsdk/fsdk_define.h"
25 #include "third_party/base/stl_util.h"
26 
27 enum FPDF_TYPE { MAX, MIN };
28 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM };
29 
30 namespace {
31 
IsValidRect(const CFX_FloatRect & rect,const CFX_FloatRect & rcPage)32 bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) {
33   constexpr float kMinSize = 0.000001f;
34   if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize)
35     return false;
36 
37   if (rcPage.IsEmpty())
38     return true;
39 
40   constexpr float kMinBorderSize = 10.000001f;
41   return rect.left - rcPage.left >= -kMinBorderSize &&
42          rect.right - rcPage.right <= kMinBorderSize &&
43          rect.top - rcPage.top <= kMinBorderSize &&
44          rect.bottom - rcPage.bottom >= -kMinBorderSize;
45 }
46 
GetContentsRect(CPDF_Document * pDoc,CPDF_Dictionary * pDict,std::vector<CFX_FloatRect> * pRectArray)47 void GetContentsRect(CPDF_Document* pDoc,
48                      CPDF_Dictionary* pDict,
49                      std::vector<CFX_FloatRect>* pRectArray) {
50   auto pPDFPage = pdfium::MakeUnique<CPDF_Page>(pDoc, pDict, false);
51   pPDFPage->ParseContent();
52 
53   for (const auto& pPageObject : *pPDFPage->GetPageObjectList()) {
54     CFX_FloatRect rc;
55     rc.left = pPageObject->m_Left;
56     rc.right = pPageObject->m_Right;
57     rc.bottom = pPageObject->m_Bottom;
58     rc.top = pPageObject->m_Top;
59     if (IsValidRect(rc, pDict->GetRectFor("MediaBox")))
60       pRectArray->push_back(rc);
61   }
62 }
63 
ParserStream(CPDF_Dictionary * pPageDic,CPDF_Dictionary * pStream,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray)64 void ParserStream(CPDF_Dictionary* pPageDic,
65                   CPDF_Dictionary* pStream,
66                   std::vector<CFX_FloatRect>* pRectArray,
67                   std::vector<CPDF_Dictionary*>* pObjectArray) {
68   if (!pStream)
69     return;
70   CFX_FloatRect rect;
71   if (pStream->KeyExist("Rect"))
72     rect = pStream->GetRectFor("Rect");
73   else if (pStream->KeyExist("BBox"))
74     rect = pStream->GetRectFor("BBox");
75 
76   if (IsValidRect(rect, pPageDic->GetRectFor("MediaBox")))
77     pRectArray->push_back(rect);
78 
79   pObjectArray->push_back(pStream);
80 }
81 
ParserAnnots(CPDF_Document * pSourceDoc,CPDF_Dictionary * pPageDic,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray,int nUsage)82 int ParserAnnots(CPDF_Document* pSourceDoc,
83                  CPDF_Dictionary* pPageDic,
84                  std::vector<CFX_FloatRect>* pRectArray,
85                  std::vector<CPDF_Dictionary*>* pObjectArray,
86                  int nUsage) {
87   if (!pSourceDoc || !pPageDic)
88     return FLATTEN_FAIL;
89 
90   GetContentsRect(pSourceDoc, pPageDic, pRectArray);
91   CPDF_Array* pAnnots = pPageDic->GetArrayFor("Annots");
92   if (!pAnnots)
93     return FLATTEN_NOTHINGTODO;
94 
95   for (const auto& pAnnot : *pAnnots) {
96     CPDF_Dictionary* pAnnotDic = ToDictionary(pAnnot->GetDirect());
97     if (!pAnnotDic)
98       continue;
99 
100     ByteString sSubtype = pAnnotDic->GetStringFor("Subtype");
101     if (sSubtype == "Popup")
102       continue;
103 
104     int nAnnotFlag = pAnnotDic->GetIntegerFor("F");
105     if (nAnnotFlag & ANNOTFLAG_HIDDEN)
106       continue;
107 
108     bool bParseStream;
109     if (nUsage == FLAT_NORMALDISPLAY)
110       bParseStream = !(nAnnotFlag & ANNOTFLAG_INVISIBLE);
111     else
112       bParseStream = !!(nAnnotFlag & ANNOTFLAG_PRINT);
113     if (bParseStream)
114       ParserStream(pPageDic, pAnnotDic, pRectArray, pObjectArray);
115   }
116   return FLATTEN_SUCCESS;
117 }
118 
GetMinMaxValue(const std::vector<CFX_FloatRect> & array,FPDF_TYPE type,FPDF_VALUE value)119 float GetMinMaxValue(const std::vector<CFX_FloatRect>& array,
120                      FPDF_TYPE type,
121                      FPDF_VALUE value) {
122   if (array.empty())
123     return 0.0f;
124 
125   size_t nRects = array.size();
126   std::vector<float> pArray(nRects);
127   switch (value) {
128     case LEFT:
129       for (size_t i = 0; i < nRects; i++)
130         pArray[i] = array[i].left;
131       break;
132     case TOP:
133       for (size_t i = 0; i < nRects; i++)
134         pArray[i] = array[i].top;
135       break;
136     case RIGHT:
137       for (size_t i = 0; i < nRects; i++)
138         pArray[i] = array[i].right;
139       break;
140     case BOTTOM:
141       for (size_t i = 0; i < nRects; i++)
142         pArray[i] = array[i].bottom;
143       break;
144     default:
145       NOTREACHED();
146       return 0.0f;
147   }
148 
149   float fRet = pArray[0];
150   if (type == MAX) {
151     for (size_t i = 1; i < nRects; i++)
152       fRet = std::max(fRet, pArray[i]);
153   } else {
154     for (size_t i = 1; i < nRects; i++)
155       fRet = std::min(fRet, pArray[i]);
156   }
157   return fRet;
158 }
159 
CalculateRect(std::vector<CFX_FloatRect> * pRectArray)160 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) {
161   CFX_FloatRect rcRet;
162 
163   rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT);
164   rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP);
165   rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT);
166   rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM);
167 
168   return rcRet;
169 }
170 
NewIndirectContentsStream(const ByteString & key,CPDF_Document * pDocument)171 uint32_t NewIndirectContentsStream(const ByteString& key,
172                                    CPDF_Document* pDocument) {
173   CPDF_Stream* pNewContents = pDocument->NewIndirect<CPDF_Stream>(
174       nullptr, 0,
175       pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool()));
176   ByteString sStream =
177       ByteString::Format("q 1 0 0 1 0 0 cm /%s Do Q", key.c_str());
178   pNewContents->SetData(sStream.raw_str(), sStream.GetLength());
179   return pNewContents->GetObjNum();
180 }
181 
SetPageContents(const ByteString & key,CPDF_Dictionary * pPage,CPDF_Document * pDocument)182 void SetPageContents(const ByteString& key,
183                      CPDF_Dictionary* pPage,
184                      CPDF_Document* pDocument) {
185   CPDF_Array* pContentsArray = nullptr;
186   CPDF_Stream* pContentsStream = pPage->GetStreamFor("Contents");
187   if (!pContentsStream) {
188     pContentsArray = pPage->GetArrayFor("Contents");
189     if (!pContentsArray) {
190       if (!key.IsEmpty()) {
191         pPage->SetNewFor<CPDF_Reference>(
192             "Contents", pDocument, NewIndirectContentsStream(key, pDocument));
193       }
194       return;
195     }
196   }
197   pPage->ConvertToIndirectObjectFor("Contents", pDocument);
198   if (!pContentsArray) {
199     pContentsArray = pDocument->NewIndirect<CPDF_Array>();
200     auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pContentsStream);
201     pAcc->LoadAllDataFiltered();
202     ByteString sStream = "q\n";
203     ByteString sBody = ByteString(pAcc->GetData(), pAcc->GetSize());
204     sStream = sStream + sBody + "\nQ";
205     pContentsStream->SetDataAndRemoveFilter(sStream.raw_str(),
206                                             sStream.GetLength());
207     pContentsArray->AddNew<CPDF_Reference>(pDocument,
208                                            pContentsStream->GetObjNum());
209     pPage->SetNewFor<CPDF_Reference>("Contents", pDocument,
210                                      pContentsArray->GetObjNum());
211   }
212   if (!key.IsEmpty()) {
213     pContentsArray->AddNew<CPDF_Reference>(
214         pDocument, NewIndirectContentsStream(key, pDocument));
215   }
216 }
217 
GetMatrix(CFX_FloatRect rcAnnot,CFX_FloatRect rcStream,const CFX_Matrix & matrix)218 CFX_Matrix GetMatrix(CFX_FloatRect rcAnnot,
219                      CFX_FloatRect rcStream,
220                      const CFX_Matrix& matrix) {
221   if (rcStream.IsEmpty())
222     return CFX_Matrix();
223 
224   rcStream = matrix.TransformRect(rcStream);
225   rcStream.Normalize();
226 
227   float a = rcAnnot.Width() / rcStream.Width();
228   float d = rcAnnot.Height() / rcStream.Height();
229 
230   float e = rcAnnot.left - rcStream.left * a;
231   float f = rcAnnot.bottom - rcStream.bottom * d;
232   return CFX_Matrix(a, 0, 0, d, e, f);
233 }
234 
235 }  // namespace
236 
FPDFPage_Flatten(FPDF_PAGE page,int nFlag)237 FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
238   CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
239   if (!page)
240     return FLATTEN_FAIL;
241 
242   CPDF_Document* pDocument = pPage->m_pDocument.Get();
243   CPDF_Dictionary* pPageDict = pPage->m_pFormDict.Get();
244   if (!pDocument || !pPageDict)
245     return FLATTEN_FAIL;
246 
247   std::vector<CPDF_Dictionary*> ObjectArray;
248   std::vector<CFX_FloatRect> RectArray;
249   int iRet =
250       ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag);
251   if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL)
252     return iRet;
253 
254   CFX_FloatRect rcOriginalCB;
255   CFX_FloatRect rcMerger = CalculateRect(&RectArray);
256   CFX_FloatRect rcOriginalMB = pPageDict->GetRectFor("MediaBox");
257   if (pPageDict->KeyExist("CropBox"))
258     rcOriginalMB = pPageDict->GetRectFor("CropBox");
259 
260   if (rcOriginalMB.IsEmpty())
261     rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f);
262 
263   rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left);
264   rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right);
265   rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom);
266   rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top);
267   if (pPageDict->KeyExist("ArtBox"))
268     rcOriginalCB = pPageDict->GetRectFor("ArtBox");
269   else
270     rcOriginalCB = rcOriginalMB;
271 
272   if (!rcOriginalMB.IsEmpty()) {
273     CPDF_Array* pMediaBox = pPageDict->SetNewFor<CPDF_Array>("MediaBox");
274     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.left);
275     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.bottom);
276     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.right);
277     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.top);
278   }
279 
280   if (!rcOriginalCB.IsEmpty()) {
281     CPDF_Array* pCropBox = pPageDict->SetNewFor<CPDF_Array>("ArtBox");
282     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.left);
283     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.bottom);
284     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.right);
285     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.top);
286   }
287 
288   CPDF_Dictionary* pRes = pPageDict->GetDictFor("Resources");
289   if (!pRes)
290     pRes = pPageDict->SetNewFor<CPDF_Dictionary>("Resources");
291 
292   CPDF_Stream* pNewXObject = pDocument->NewIndirect<CPDF_Stream>(
293       nullptr, 0,
294       pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool()));
295 
296   uint32_t dwObjNum = pNewXObject->GetObjNum();
297   CPDF_Dictionary* pPageXObject = pRes->GetDictFor("XObject");
298   if (!pPageXObject)
299     pPageXObject = pRes->SetNewFor<CPDF_Dictionary>("XObject");
300 
301   ByteString key;
302   if (!ObjectArray.empty()) {
303     int i = 0;
304     while (i < INT_MAX) {
305       ByteString sKey = ByteString::Format("FFT%d", i);
306       if (!pPageXObject->KeyExist(sKey)) {
307         key = sKey;
308         break;
309       }
310       ++i;
311     }
312   }
313 
314   SetPageContents(key, pPageDict, pDocument);
315 
316   CPDF_Dictionary* pNewXORes = nullptr;
317   if (!key.IsEmpty()) {
318     pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument, dwObjNum);
319     CPDF_Dictionary* pNewOXbjectDic = pNewXObject->GetDict();
320     pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources");
321     pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject");
322     pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form");
323     pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1);
324     CFX_FloatRect rcBBox = pPageDict->GetRectFor("ArtBox");
325     pNewOXbjectDic->SetRectFor("BBox", rcBBox);
326   }
327 
328   for (size_t i = 0; i < ObjectArray.size(); ++i) {
329     CPDF_Dictionary* pAnnotDic = ObjectArray[i];
330     if (!pAnnotDic)
331       continue;
332 
333     CFX_FloatRect rcAnnot = pAnnotDic->GetRectFor("Rect");
334     rcAnnot.Normalize();
335 
336     ByteString sAnnotState = pAnnotDic->GetStringFor("AS");
337     CPDF_Dictionary* pAnnotAP = pAnnotDic->GetDictFor("AP");
338     if (!pAnnotAP)
339       continue;
340 
341     CPDF_Stream* pAPStream = pAnnotAP->GetStreamFor("N");
342     if (!pAPStream) {
343       CPDF_Dictionary* pAPDic = pAnnotAP->GetDictFor("N");
344       if (!pAPDic)
345         continue;
346 
347       if (!sAnnotState.IsEmpty()) {
348         pAPStream = pAPDic->GetStreamFor(sAnnotState);
349       } else {
350         if (pAPDic->GetCount() > 0) {
351           CPDF_Object* pFirstObj = pAPDic->begin()->second.get();
352           if (pFirstObj) {
353             if (pFirstObj->IsReference())
354               pFirstObj = pFirstObj->GetDirect();
355             if (!pFirstObj->IsStream())
356               continue;
357             pAPStream = pFirstObj->AsStream();
358           }
359         }
360       }
361     }
362     if (!pAPStream)
363       continue;
364 
365     CPDF_Dictionary* pAPDic = pAPStream->GetDict();
366     CFX_FloatRect rcStream;
367     if (pAPDic->KeyExist("Rect"))
368       rcStream = pAPDic->GetRectFor("Rect");
369     else if (pAPDic->KeyExist("BBox"))
370       rcStream = pAPDic->GetRectFor("BBox");
371 
372     if (rcStream.IsEmpty())
373       continue;
374 
375     CPDF_Object* pObj = pAPStream;
376     if (pObj->IsInline()) {
377       std::unique_ptr<CPDF_Object> pNew = pObj->Clone();
378       pObj = pNew.get();
379       pDocument->AddIndirectObject(std::move(pNew));
380     }
381 
382     CPDF_Dictionary* pObjDic = pObj->GetDict();
383     if (pObjDic) {
384       pObjDic->SetNewFor<CPDF_Name>("Type", "XObject");
385       pObjDic->SetNewFor<CPDF_Name>("Subtype", "Form");
386     }
387 
388     CPDF_Dictionary* pXObject = pNewXORes->GetDictFor("XObject");
389     if (!pXObject)
390       pXObject = pNewXORes->SetNewFor<CPDF_Dictionary>("XObject");
391 
392     ByteString sFormName = ByteString::Format("F%d", i);
393     pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument,
394                                         pObj->GetObjNum());
395 
396     auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pNewXObject);
397     pAcc->LoadAllDataFiltered();
398     ByteString sStream(pAcc->GetData(), pAcc->GetSize());
399     CFX_Matrix matrix = pAPDic->GetMatrixFor("Matrix");
400     CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix);
401     sStream += ByteString::Format("q %f 0 0 %f %f %f cm /%s Do Q\n", m.a, m.d,
402                                   m.e, m.f, sFormName.c_str());
403     pNewXObject->SetDataAndRemoveFilter(sStream.raw_str(), sStream.GetLength());
404   }
405   pPageDict->RemoveFor("Annots");
406   return FLATTEN_SUCCESS;
407 }
408