1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "public/fpdf_flatten.h"
8 
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12 #include <vector>
13 
14 #include "constants/annotation_common.h"
15 #include "constants/annotation_flags.h"
16 #include "constants/page_object.h"
17 #include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
18 #include "core/fpdfapi/page/cpdf_page.h"
19 #include "core/fpdfapi/page/cpdf_pageobject.h"
20 #include "core/fpdfapi/parser/cpdf_array.h"
21 #include "core/fpdfapi/parser/cpdf_dictionary.h"
22 #include "core/fpdfapi/parser/cpdf_document.h"
23 #include "core/fpdfapi/parser/cpdf_name.h"
24 #include "core/fpdfapi/parser/cpdf_number.h"
25 #include "core/fpdfapi/parser/cpdf_reference.h"
26 #include "core/fpdfapi/parser/cpdf_stream.h"
27 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
28 #include "core/fpdfdoc/cpdf_annot.h"
29 #include "fpdfsdk/cpdfsdk_helpers.h"
30 
31 enum FPDF_TYPE { MAX, MIN };
32 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM };
33 
34 namespace {
35 
IsValidRect(const CFX_FloatRect & rect,const CFX_FloatRect & rcPage)36 bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) {
37   constexpr float kMinSize = 0.000001f;
38   if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize)
39     return false;
40 
41   if (rcPage.IsEmpty())
42     return true;
43 
44   constexpr float kMinBorderSize = 10.000001f;
45   return rect.left - rcPage.left >= -kMinBorderSize &&
46          rect.right - rcPage.right <= kMinBorderSize &&
47          rect.top - rcPage.top <= kMinBorderSize &&
48          rect.bottom - rcPage.bottom >= -kMinBorderSize;
49 }
50 
GetContentsRect(CPDF_Document * pDoc,CPDF_Dictionary * pDict,std::vector<CFX_FloatRect> * pRectArray)51 void GetContentsRect(CPDF_Document* pDoc,
52                      CPDF_Dictionary* pDict,
53                      std::vector<CFX_FloatRect>* pRectArray) {
54   auto pPDFPage = pdfium::MakeRetain<CPDF_Page>(pDoc, pDict);
55   pPDFPage->ParseContent();
56 
57   for (const auto& pPageObject : *pPDFPage) {
58     const CFX_FloatRect& rc = pPageObject->GetRect();
59     if (IsValidRect(rc, pDict->GetRectFor(pdfium::page_object::kMediaBox)))
60       pRectArray->push_back(rc);
61   }
62 }
63 
ParserStream(CPDF_Dictionary * pPageDic,CPDF_Dictionary * pStream,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray)64 void ParserStream(CPDF_Dictionary* pPageDic,
65                   CPDF_Dictionary* pStream,
66                   std::vector<CFX_FloatRect>* pRectArray,
67                   std::vector<CPDF_Dictionary*>* pObjectArray) {
68   if (!pStream)
69     return;
70   CFX_FloatRect rect;
71   if (pStream->KeyExist("Rect"))
72     rect = pStream->GetRectFor("Rect");
73   else if (pStream->KeyExist("BBox"))
74     rect = pStream->GetRectFor("BBox");
75 
76   if (IsValidRect(rect, pPageDic->GetRectFor(pdfium::page_object::kMediaBox)))
77     pRectArray->push_back(rect);
78 
79   pObjectArray->push_back(pStream);
80 }
81 
ParserAnnots(CPDF_Document * pSourceDoc,CPDF_Dictionary * pPageDic,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray,int nUsage)82 int ParserAnnots(CPDF_Document* pSourceDoc,
83                  CPDF_Dictionary* pPageDic,
84                  std::vector<CFX_FloatRect>* pRectArray,
85                  std::vector<CPDF_Dictionary*>* pObjectArray,
86                  int nUsage) {
87   if (!pSourceDoc)
88     return FLATTEN_FAIL;
89 
90   GetContentsRect(pSourceDoc, pPageDic, pRectArray);
91   CPDF_Array* pAnnots = pPageDic->GetArrayFor("Annots");
92   if (!pAnnots)
93     return FLATTEN_NOTHINGTODO;
94 
95   CPDF_ArrayLocker locker(pAnnots);
96   for (const auto& pAnnot : locker) {
97     CPDF_Dictionary* pAnnotDict = ToDictionary(pAnnot->GetDirect());
98     if (!pAnnotDict)
99       continue;
100 
101     ByteString sSubtype =
102         pAnnotDict->GetStringFor(pdfium::annotation::kSubtype);
103     if (sSubtype == "Popup")
104       continue;
105 
106     int nAnnotFlag = pAnnotDict->GetIntegerFor("F");
107     if (nAnnotFlag & pdfium::annotation_flags::kHidden)
108       continue;
109 
110     bool bParseStream;
111     if (nUsage == FLAT_NORMALDISPLAY)
112       bParseStream = !(nAnnotFlag & pdfium::annotation_flags::kInvisible);
113     else
114       bParseStream = !!(nAnnotFlag & pdfium::annotation_flags::kPrint);
115     if (bParseStream)
116       ParserStream(pPageDic, pAnnotDict, pRectArray, pObjectArray);
117   }
118   return FLATTEN_SUCCESS;
119 }
120 
GetMinMaxValue(const std::vector<CFX_FloatRect> & array,FPDF_TYPE type,FPDF_VALUE value)121 float GetMinMaxValue(const std::vector<CFX_FloatRect>& array,
122                      FPDF_TYPE type,
123                      FPDF_VALUE value) {
124   if (array.empty())
125     return 0.0f;
126 
127   size_t nRects = array.size();
128   std::vector<float> pArray(nRects);
129   switch (value) {
130     case LEFT:
131       for (size_t i = 0; i < nRects; i++)
132         pArray[i] = array[i].left;
133       break;
134     case TOP:
135       for (size_t i = 0; i < nRects; i++)
136         pArray[i] = array[i].top;
137       break;
138     case RIGHT:
139       for (size_t i = 0; i < nRects; i++)
140         pArray[i] = array[i].right;
141       break;
142     case BOTTOM:
143       for (size_t i = 0; i < nRects; i++)
144         pArray[i] = array[i].bottom;
145       break;
146     default:
147       NOTREACHED();
148       return 0.0f;
149   }
150 
151   float fRet = pArray[0];
152   if (type == MAX) {
153     for (size_t i = 1; i < nRects; i++)
154       fRet = std::max(fRet, pArray[i]);
155   } else {
156     for (size_t i = 1; i < nRects; i++)
157       fRet = std::min(fRet, pArray[i]);
158   }
159   return fRet;
160 }
161 
CalculateRect(std::vector<CFX_FloatRect> * pRectArray)162 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) {
163   CFX_FloatRect rcRet;
164 
165   rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT);
166   rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP);
167   rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT);
168   rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM);
169 
170   return rcRet;
171 }
172 
GenerateFlattenedContent(const ByteString & key)173 ByteString GenerateFlattenedContent(const ByteString& key) {
174   return "q 1 0 0 1 0 0 cm /" + key + " Do Q";
175 }
176 
NewIndirectContentsStream(CPDF_Document * pDocument,const ByteString & contents)177 CPDF_Object* NewIndirectContentsStream(CPDF_Document* pDocument,
178                                        const ByteString& contents) {
179   CPDF_Stream* pNewContents = pDocument->NewIndirect<CPDF_Stream>(
180       nullptr, 0, pDocument->New<CPDF_Dictionary>());
181   pNewContents->SetData(contents.raw_span());
182   return pNewContents;
183 }
184 
SetPageContents(const ByteString & key,CPDF_Dictionary * pPage,CPDF_Document * pDocument)185 void SetPageContents(const ByteString& key,
186                      CPDF_Dictionary* pPage,
187                      CPDF_Document* pDocument) {
188   CPDF_Array* pContentsArray =
189       pPage->GetArrayFor(pdfium::page_object::kContents);
190   CPDF_Stream* pContentsStream =
191       pPage->GetStreamFor(pdfium::page_object::kContents);
192   if (!pContentsStream && !pContentsArray) {
193     if (!key.IsEmpty()) {
194       pPage->SetFor(
195           pdfium::page_object::kContents,
196           NewIndirectContentsStream(pDocument, GenerateFlattenedContent(key))
197               ->MakeReference(pDocument));
198     }
199     return;
200   }
201 
202   pPage->ConvertToIndirectObjectFor(pdfium::page_object::kContents, pDocument);
203   if (pContentsArray) {
204     pContentsArray->InsertAt(
205         0, NewIndirectContentsStream(pDocument, "q")->MakeReference(pDocument));
206     pContentsArray->Add(
207         NewIndirectContentsStream(pDocument, "Q")->MakeReference(pDocument));
208   } else {
209     ByteString sStream = "q\n";
210     {
211       auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pContentsStream);
212       pAcc->LoadAllDataFiltered();
213       sStream += ByteString(pAcc->GetSpan());
214       sStream += "\nQ";
215     }
216     pContentsStream->SetDataAndRemoveFilter(sStream.raw_span());
217     pContentsArray = pDocument->NewIndirect<CPDF_Array>();
218     pContentsArray->AddNew<CPDF_Reference>(pDocument,
219                                            pContentsStream->GetObjNum());
220     pPage->SetNewFor<CPDF_Reference>(pdfium::page_object::kContents, pDocument,
221                                      pContentsArray->GetObjNum());
222   }
223   if (!key.IsEmpty()) {
224     pContentsArray->Add(
225         NewIndirectContentsStream(pDocument, GenerateFlattenedContent(key))
226             ->MakeReference(pDocument));
227   }
228 }
229 
GetMatrix(const CFX_FloatRect & rcAnnot,const CFX_FloatRect & rcStream,const CFX_Matrix & matrix)230 CFX_Matrix GetMatrix(const CFX_FloatRect& rcAnnot,
231                      const CFX_FloatRect& rcStream,
232                      const CFX_Matrix& matrix) {
233   if (rcStream.IsEmpty())
234     return CFX_Matrix();
235 
236   CFX_FloatRect rcTransformed = matrix.TransformRect(rcStream);
237   rcTransformed.Normalize();
238 
239   float a = rcAnnot.Width() / rcTransformed.Width();
240   float d = rcAnnot.Height() / rcTransformed.Height();
241 
242   float e = rcAnnot.left - rcTransformed.left * a;
243   float f = rcAnnot.bottom - rcTransformed.bottom * d;
244   return CFX_Matrix(a, 0.0f, 0.0f, d, e, f);
245 }
246 
247 }  // namespace
248 
FPDFPage_Flatten(FPDF_PAGE page,int nFlag)249 FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
250   CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
251   if (!page)
252     return FLATTEN_FAIL;
253 
254   CPDF_Document* pDocument = pPage->GetDocument();
255   CPDF_Dictionary* pPageDict = pPage->GetDict();
256   if (!pDocument)
257     return FLATTEN_FAIL;
258 
259   std::vector<CPDF_Dictionary*> ObjectArray;
260   std::vector<CFX_FloatRect> RectArray;
261   int iRet =
262       ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag);
263   if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL)
264     return iRet;
265 
266   CFX_FloatRect rcMerger = CalculateRect(&RectArray);
267   CFX_FloatRect rcOriginalMB =
268       pPageDict->GetRectFor(pdfium::page_object::kMediaBox);
269   if (pPageDict->KeyExist(pdfium::page_object::kCropBox))
270     rcOriginalMB = pPageDict->GetRectFor(pdfium::page_object::kCropBox);
271 
272   if (rcOriginalMB.IsEmpty())
273     rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f);
274 
275   CFX_FloatRect rcOriginalCB;
276   if (pPageDict->KeyExist(pdfium::page_object::kCropBox))
277     rcOriginalCB = pPageDict->GetRectFor(pdfium::page_object::kCropBox);
278   if (rcOriginalCB.IsEmpty())
279     rcOriginalCB = rcOriginalMB;
280 
281   rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left);
282   rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right);
283   rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom);
284   rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top);
285 
286   pPageDict->SetRectFor(pdfium::page_object::kMediaBox, rcOriginalMB);
287   pPageDict->SetRectFor(pdfium::page_object::kCropBox, rcOriginalCB);
288 
289   CPDF_Dictionary* pRes =
290       pPageDict->GetDictFor(pdfium::page_object::kResources);
291   if (!pRes) {
292     pRes =
293         pPageDict->SetNewFor<CPDF_Dictionary>(pdfium::page_object::kResources);
294   }
295 
296   CPDF_Stream* pNewXObject = pDocument->NewIndirect<CPDF_Stream>(
297       nullptr, 0, pDocument->New<CPDF_Dictionary>());
298 
299   CPDF_Dictionary* pPageXObject = pRes->GetDictFor("XObject");
300   if (!pPageXObject)
301     pPageXObject = pRes->SetNewFor<CPDF_Dictionary>("XObject");
302 
303   ByteString key;
304   if (!ObjectArray.empty()) {
305     int i = 0;
306     while (i < INT_MAX) {
307       ByteString sKey = ByteString::Format("FFT%d", i);
308       if (!pPageXObject->KeyExist(sKey)) {
309         key = std::move(sKey);
310         break;
311       }
312       ++i;
313     }
314   }
315 
316   SetPageContents(key, pPageDict, pDocument);
317 
318   CPDF_Dictionary* pNewXORes = nullptr;
319   if (!key.IsEmpty()) {
320     pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument,
321                                             pNewXObject->GetObjNum());
322 
323     CPDF_Dictionary* pNewOXbjectDic = pNewXObject->GetDict();
324     pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources");
325     pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject");
326     pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form");
327     pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1);
328     pNewOXbjectDic->SetRectFor("BBox", rcOriginalCB);
329   }
330 
331   for (size_t i = 0; i < ObjectArray.size(); ++i) {
332     CPDF_Dictionary* pAnnotDict = ObjectArray[i];
333     if (!pAnnotDict)
334       continue;
335 
336     CFX_FloatRect rcAnnot = pAnnotDict->GetRectFor(pdfium::annotation::kRect);
337     rcAnnot.Normalize();
338 
339     ByteString sAnnotState = pAnnotDict->GetStringFor("AS");
340     CPDF_Dictionary* pAnnotAP = pAnnotDict->GetDictFor(pdfium::annotation::kAP);
341     if (!pAnnotAP)
342       continue;
343 
344     CPDF_Stream* pAPStream = pAnnotAP->GetStreamFor("N");
345     if (!pAPStream) {
346       CPDF_Dictionary* pAPDict = pAnnotAP->GetDictFor("N");
347       if (!pAPDict)
348         continue;
349 
350       if (!sAnnotState.IsEmpty()) {
351         pAPStream = pAPDict->GetStreamFor(sAnnotState);
352       } else {
353         if (pAPDict->size() > 0) {
354           CPDF_DictionaryLocker locker(pAPDict);
355           CPDF_Object* pFirstObj = locker.begin()->second.Get();
356           if (pFirstObj) {
357             if (pFirstObj->IsReference())
358               pFirstObj = pFirstObj->GetDirect();
359             if (!pFirstObj->IsStream())
360               continue;
361             pAPStream = pFirstObj->AsStream();
362           }
363         }
364       }
365     }
366     if (!pAPStream)
367       continue;
368 
369     CPDF_Dictionary* pAPDict = pAPStream->GetDict();
370     CFX_FloatRect rcStream;
371     if (pAPDict->KeyExist("Rect"))
372       rcStream = pAPDict->GetRectFor("Rect");
373     else if (pAPDict->KeyExist("BBox"))
374       rcStream = pAPDict->GetRectFor("BBox");
375     rcStream.Normalize();
376 
377     if (rcStream.IsEmpty())
378       continue;
379 
380     CPDF_Object* pObj = pAPStream;
381     if (pObj->IsInline()) {
382       RetainPtr<CPDF_Object> pNew = pObj->Clone();
383       pObj = pNew.Get();
384       pDocument->AddIndirectObject(std::move(pNew));
385     }
386 
387     CPDF_Dictionary* pObjDict = pObj->GetDict();
388     if (pObjDict) {
389       pObjDict->SetNewFor<CPDF_Name>("Type", "XObject");
390       pObjDict->SetNewFor<CPDF_Name>("Subtype", "Form");
391     }
392 
393     CPDF_Dictionary* pXObject = pNewXORes->GetDictFor("XObject");
394     if (!pXObject)
395       pXObject = pNewXORes->SetNewFor<CPDF_Dictionary>("XObject");
396 
397     ByteString sFormName = ByteString::Format("F%d", i);
398     pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument,
399                                         pObj->GetObjNum());
400 
401     ByteString sStream;
402     {
403       auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pNewXObject);
404       pAcc->LoadAllDataFiltered();
405       sStream = ByteString(pAcc->GetSpan());
406     }
407     CFX_Matrix matrix = pAPDict->GetMatrixFor("Matrix");
408     CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix);
409     m.b = 0;
410     m.c = 0;
411     std::ostringstream buf;
412     buf << m;
413     ByteString str(buf);
414     sStream += ByteString::Format("q %s cm /%s Do Q\n", str.c_str(),
415                                   sFormName.c_str());
416     pNewXObject->SetDataAndRemoveFilter(sStream.raw_span());
417   }
418   pPageDict->RemoveFor("Annots");
419   return FLATTEN_SUCCESS;
420 }
421