1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "public/fpdf_flatten.h"
8
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12 #include <vector>
13
14 #include "constants/annotation_common.h"
15 #include "constants/annotation_flags.h"
16 #include "constants/page_object.h"
17 #include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
18 #include "core/fpdfapi/page/cpdf_page.h"
19 #include "core/fpdfapi/page/cpdf_pageobject.h"
20 #include "core/fpdfapi/parser/cpdf_array.h"
21 #include "core/fpdfapi/parser/cpdf_dictionary.h"
22 #include "core/fpdfapi/parser/cpdf_document.h"
23 #include "core/fpdfapi/parser/cpdf_name.h"
24 #include "core/fpdfapi/parser/cpdf_number.h"
25 #include "core/fpdfapi/parser/cpdf_reference.h"
26 #include "core/fpdfapi/parser/cpdf_stream.h"
27 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
28 #include "core/fpdfdoc/cpdf_annot.h"
29 #include "fpdfsdk/cpdfsdk_helpers.h"
30
31 enum FPDF_TYPE { MAX, MIN };
32 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM };
33
34 namespace {
35
IsValidRect(const CFX_FloatRect & rect,const CFX_FloatRect & rcPage)36 bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) {
37 constexpr float kMinSize = 0.000001f;
38 if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize)
39 return false;
40
41 if (rcPage.IsEmpty())
42 return true;
43
44 constexpr float kMinBorderSize = 10.000001f;
45 return rect.left - rcPage.left >= -kMinBorderSize &&
46 rect.right - rcPage.right <= kMinBorderSize &&
47 rect.top - rcPage.top <= kMinBorderSize &&
48 rect.bottom - rcPage.bottom >= -kMinBorderSize;
49 }
50
GetContentsRect(CPDF_Document * pDoc,CPDF_Dictionary * pDict,std::vector<CFX_FloatRect> * pRectArray)51 void GetContentsRect(CPDF_Document* pDoc,
52 CPDF_Dictionary* pDict,
53 std::vector<CFX_FloatRect>* pRectArray) {
54 auto pPDFPage = pdfium::MakeRetain<CPDF_Page>(pDoc, pDict);
55 pPDFPage->ParseContent();
56
57 for (const auto& pPageObject : *pPDFPage) {
58 const CFX_FloatRect& rc = pPageObject->GetRect();
59 if (IsValidRect(rc, pDict->GetRectFor(pdfium::page_object::kMediaBox)))
60 pRectArray->push_back(rc);
61 }
62 }
63
ParserStream(CPDF_Dictionary * pPageDic,CPDF_Dictionary * pStream,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray)64 void ParserStream(CPDF_Dictionary* pPageDic,
65 CPDF_Dictionary* pStream,
66 std::vector<CFX_FloatRect>* pRectArray,
67 std::vector<CPDF_Dictionary*>* pObjectArray) {
68 if (!pStream)
69 return;
70 CFX_FloatRect rect;
71 if (pStream->KeyExist("Rect"))
72 rect = pStream->GetRectFor("Rect");
73 else if (pStream->KeyExist("BBox"))
74 rect = pStream->GetRectFor("BBox");
75
76 if (IsValidRect(rect, pPageDic->GetRectFor(pdfium::page_object::kMediaBox)))
77 pRectArray->push_back(rect);
78
79 pObjectArray->push_back(pStream);
80 }
81
ParserAnnots(CPDF_Document * pSourceDoc,CPDF_Dictionary * pPageDic,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray,int nUsage)82 int ParserAnnots(CPDF_Document* pSourceDoc,
83 CPDF_Dictionary* pPageDic,
84 std::vector<CFX_FloatRect>* pRectArray,
85 std::vector<CPDF_Dictionary*>* pObjectArray,
86 int nUsage) {
87 if (!pSourceDoc)
88 return FLATTEN_FAIL;
89
90 GetContentsRect(pSourceDoc, pPageDic, pRectArray);
91 CPDF_Array* pAnnots = pPageDic->GetArrayFor("Annots");
92 if (!pAnnots)
93 return FLATTEN_NOTHINGTODO;
94
95 CPDF_ArrayLocker locker(pAnnots);
96 for (const auto& pAnnot : locker) {
97 CPDF_Dictionary* pAnnotDict = ToDictionary(pAnnot->GetDirect());
98 if (!pAnnotDict)
99 continue;
100
101 ByteString sSubtype =
102 pAnnotDict->GetStringFor(pdfium::annotation::kSubtype);
103 if (sSubtype == "Popup")
104 continue;
105
106 int nAnnotFlag = pAnnotDict->GetIntegerFor("F");
107 if (nAnnotFlag & pdfium::annotation_flags::kHidden)
108 continue;
109
110 bool bParseStream;
111 if (nUsage == FLAT_NORMALDISPLAY)
112 bParseStream = !(nAnnotFlag & pdfium::annotation_flags::kInvisible);
113 else
114 bParseStream = !!(nAnnotFlag & pdfium::annotation_flags::kPrint);
115 if (bParseStream)
116 ParserStream(pPageDic, pAnnotDict, pRectArray, pObjectArray);
117 }
118 return FLATTEN_SUCCESS;
119 }
120
GetMinMaxValue(const std::vector<CFX_FloatRect> & array,FPDF_TYPE type,FPDF_VALUE value)121 float GetMinMaxValue(const std::vector<CFX_FloatRect>& array,
122 FPDF_TYPE type,
123 FPDF_VALUE value) {
124 if (array.empty())
125 return 0.0f;
126
127 size_t nRects = array.size();
128 std::vector<float> pArray(nRects);
129 switch (value) {
130 case LEFT:
131 for (size_t i = 0; i < nRects; i++)
132 pArray[i] = array[i].left;
133 break;
134 case TOP:
135 for (size_t i = 0; i < nRects; i++)
136 pArray[i] = array[i].top;
137 break;
138 case RIGHT:
139 for (size_t i = 0; i < nRects; i++)
140 pArray[i] = array[i].right;
141 break;
142 case BOTTOM:
143 for (size_t i = 0; i < nRects; i++)
144 pArray[i] = array[i].bottom;
145 break;
146 default:
147 NOTREACHED();
148 return 0.0f;
149 }
150
151 float fRet = pArray[0];
152 if (type == MAX) {
153 for (size_t i = 1; i < nRects; i++)
154 fRet = std::max(fRet, pArray[i]);
155 } else {
156 for (size_t i = 1; i < nRects; i++)
157 fRet = std::min(fRet, pArray[i]);
158 }
159 return fRet;
160 }
161
CalculateRect(std::vector<CFX_FloatRect> * pRectArray)162 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) {
163 CFX_FloatRect rcRet;
164
165 rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT);
166 rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP);
167 rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT);
168 rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM);
169
170 return rcRet;
171 }
172
GenerateFlattenedContent(const ByteString & key)173 ByteString GenerateFlattenedContent(const ByteString& key) {
174 return "q 1 0 0 1 0 0 cm /" + key + " Do Q";
175 }
176
NewIndirectContentsStream(CPDF_Document * pDocument,const ByteString & contents)177 CPDF_Object* NewIndirectContentsStream(CPDF_Document* pDocument,
178 const ByteString& contents) {
179 CPDF_Stream* pNewContents = pDocument->NewIndirect<CPDF_Stream>(
180 nullptr, 0, pDocument->New<CPDF_Dictionary>());
181 pNewContents->SetData(contents.raw_span());
182 return pNewContents;
183 }
184
SetPageContents(const ByteString & key,CPDF_Dictionary * pPage,CPDF_Document * pDocument)185 void SetPageContents(const ByteString& key,
186 CPDF_Dictionary* pPage,
187 CPDF_Document* pDocument) {
188 CPDF_Array* pContentsArray =
189 pPage->GetArrayFor(pdfium::page_object::kContents);
190 CPDF_Stream* pContentsStream =
191 pPage->GetStreamFor(pdfium::page_object::kContents);
192 if (!pContentsStream && !pContentsArray) {
193 if (!key.IsEmpty()) {
194 pPage->SetFor(
195 pdfium::page_object::kContents,
196 NewIndirectContentsStream(pDocument, GenerateFlattenedContent(key))
197 ->MakeReference(pDocument));
198 }
199 return;
200 }
201
202 pPage->ConvertToIndirectObjectFor(pdfium::page_object::kContents, pDocument);
203 if (pContentsArray) {
204 pContentsArray->InsertAt(
205 0, NewIndirectContentsStream(pDocument, "q")->MakeReference(pDocument));
206 pContentsArray->Add(
207 NewIndirectContentsStream(pDocument, "Q")->MakeReference(pDocument));
208 } else {
209 ByteString sStream = "q\n";
210 {
211 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pContentsStream);
212 pAcc->LoadAllDataFiltered();
213 sStream += ByteString(pAcc->GetSpan());
214 sStream += "\nQ";
215 }
216 pContentsStream->SetDataAndRemoveFilter(sStream.raw_span());
217 pContentsArray = pDocument->NewIndirect<CPDF_Array>();
218 pContentsArray->AddNew<CPDF_Reference>(pDocument,
219 pContentsStream->GetObjNum());
220 pPage->SetNewFor<CPDF_Reference>(pdfium::page_object::kContents, pDocument,
221 pContentsArray->GetObjNum());
222 }
223 if (!key.IsEmpty()) {
224 pContentsArray->Add(
225 NewIndirectContentsStream(pDocument, GenerateFlattenedContent(key))
226 ->MakeReference(pDocument));
227 }
228 }
229
GetMatrix(const CFX_FloatRect & rcAnnot,const CFX_FloatRect & rcStream,const CFX_Matrix & matrix)230 CFX_Matrix GetMatrix(const CFX_FloatRect& rcAnnot,
231 const CFX_FloatRect& rcStream,
232 const CFX_Matrix& matrix) {
233 if (rcStream.IsEmpty())
234 return CFX_Matrix();
235
236 CFX_FloatRect rcTransformed = matrix.TransformRect(rcStream);
237 rcTransformed.Normalize();
238
239 float a = rcAnnot.Width() / rcTransformed.Width();
240 float d = rcAnnot.Height() / rcTransformed.Height();
241
242 float e = rcAnnot.left - rcTransformed.left * a;
243 float f = rcAnnot.bottom - rcTransformed.bottom * d;
244 return CFX_Matrix(a, 0.0f, 0.0f, d, e, f);
245 }
246
247 } // namespace
248
FPDFPage_Flatten(FPDF_PAGE page,int nFlag)249 FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
250 CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
251 if (!page)
252 return FLATTEN_FAIL;
253
254 CPDF_Document* pDocument = pPage->GetDocument();
255 CPDF_Dictionary* pPageDict = pPage->GetDict();
256 if (!pDocument)
257 return FLATTEN_FAIL;
258
259 std::vector<CPDF_Dictionary*> ObjectArray;
260 std::vector<CFX_FloatRect> RectArray;
261 int iRet =
262 ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag);
263 if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL)
264 return iRet;
265
266 CFX_FloatRect rcMerger = CalculateRect(&RectArray);
267 CFX_FloatRect rcOriginalMB =
268 pPageDict->GetRectFor(pdfium::page_object::kMediaBox);
269 if (pPageDict->KeyExist(pdfium::page_object::kCropBox))
270 rcOriginalMB = pPageDict->GetRectFor(pdfium::page_object::kCropBox);
271
272 if (rcOriginalMB.IsEmpty())
273 rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f);
274
275 CFX_FloatRect rcOriginalCB;
276 if (pPageDict->KeyExist(pdfium::page_object::kCropBox))
277 rcOriginalCB = pPageDict->GetRectFor(pdfium::page_object::kCropBox);
278 if (rcOriginalCB.IsEmpty())
279 rcOriginalCB = rcOriginalMB;
280
281 rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left);
282 rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right);
283 rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom);
284 rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top);
285
286 pPageDict->SetRectFor(pdfium::page_object::kMediaBox, rcOriginalMB);
287 pPageDict->SetRectFor(pdfium::page_object::kCropBox, rcOriginalCB);
288
289 CPDF_Dictionary* pRes =
290 pPageDict->GetDictFor(pdfium::page_object::kResources);
291 if (!pRes) {
292 pRes =
293 pPageDict->SetNewFor<CPDF_Dictionary>(pdfium::page_object::kResources);
294 }
295
296 CPDF_Stream* pNewXObject = pDocument->NewIndirect<CPDF_Stream>(
297 nullptr, 0, pDocument->New<CPDF_Dictionary>());
298
299 CPDF_Dictionary* pPageXObject = pRes->GetDictFor("XObject");
300 if (!pPageXObject)
301 pPageXObject = pRes->SetNewFor<CPDF_Dictionary>("XObject");
302
303 ByteString key;
304 if (!ObjectArray.empty()) {
305 int i = 0;
306 while (i < INT_MAX) {
307 ByteString sKey = ByteString::Format("FFT%d", i);
308 if (!pPageXObject->KeyExist(sKey)) {
309 key = std::move(sKey);
310 break;
311 }
312 ++i;
313 }
314 }
315
316 SetPageContents(key, pPageDict, pDocument);
317
318 CPDF_Dictionary* pNewXORes = nullptr;
319 if (!key.IsEmpty()) {
320 pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument,
321 pNewXObject->GetObjNum());
322
323 CPDF_Dictionary* pNewOXbjectDic = pNewXObject->GetDict();
324 pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources");
325 pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject");
326 pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form");
327 pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1);
328 pNewOXbjectDic->SetRectFor("BBox", rcOriginalCB);
329 }
330
331 for (size_t i = 0; i < ObjectArray.size(); ++i) {
332 CPDF_Dictionary* pAnnotDict = ObjectArray[i];
333 if (!pAnnotDict)
334 continue;
335
336 CFX_FloatRect rcAnnot = pAnnotDict->GetRectFor(pdfium::annotation::kRect);
337 rcAnnot.Normalize();
338
339 ByteString sAnnotState = pAnnotDict->GetStringFor("AS");
340 CPDF_Dictionary* pAnnotAP = pAnnotDict->GetDictFor(pdfium::annotation::kAP);
341 if (!pAnnotAP)
342 continue;
343
344 CPDF_Stream* pAPStream = pAnnotAP->GetStreamFor("N");
345 if (!pAPStream) {
346 CPDF_Dictionary* pAPDict = pAnnotAP->GetDictFor("N");
347 if (!pAPDict)
348 continue;
349
350 if (!sAnnotState.IsEmpty()) {
351 pAPStream = pAPDict->GetStreamFor(sAnnotState);
352 } else {
353 if (pAPDict->size() > 0) {
354 CPDF_DictionaryLocker locker(pAPDict);
355 CPDF_Object* pFirstObj = locker.begin()->second.Get();
356 if (pFirstObj) {
357 if (pFirstObj->IsReference())
358 pFirstObj = pFirstObj->GetDirect();
359 if (!pFirstObj->IsStream())
360 continue;
361 pAPStream = pFirstObj->AsStream();
362 }
363 }
364 }
365 }
366 if (!pAPStream)
367 continue;
368
369 CPDF_Dictionary* pAPDict = pAPStream->GetDict();
370 CFX_FloatRect rcStream;
371 if (pAPDict->KeyExist("Rect"))
372 rcStream = pAPDict->GetRectFor("Rect");
373 else if (pAPDict->KeyExist("BBox"))
374 rcStream = pAPDict->GetRectFor("BBox");
375 rcStream.Normalize();
376
377 if (rcStream.IsEmpty())
378 continue;
379
380 CPDF_Object* pObj = pAPStream;
381 if (pObj->IsInline()) {
382 RetainPtr<CPDF_Object> pNew = pObj->Clone();
383 pObj = pNew.Get();
384 pDocument->AddIndirectObject(std::move(pNew));
385 }
386
387 CPDF_Dictionary* pObjDict = pObj->GetDict();
388 if (pObjDict) {
389 pObjDict->SetNewFor<CPDF_Name>("Type", "XObject");
390 pObjDict->SetNewFor<CPDF_Name>("Subtype", "Form");
391 }
392
393 CPDF_Dictionary* pXObject = pNewXORes->GetDictFor("XObject");
394 if (!pXObject)
395 pXObject = pNewXORes->SetNewFor<CPDF_Dictionary>("XObject");
396
397 ByteString sFormName = ByteString::Format("F%d", i);
398 pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument,
399 pObj->GetObjNum());
400
401 ByteString sStream;
402 {
403 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pNewXObject);
404 pAcc->LoadAllDataFiltered();
405 sStream = ByteString(pAcc->GetSpan());
406 }
407 CFX_Matrix matrix = pAPDict->GetMatrixFor("Matrix");
408 CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix);
409 m.b = 0;
410 m.c = 0;
411 std::ostringstream buf;
412 buf << m;
413 ByteString str(buf);
414 sStream += ByteString::Format("q %s cm /%s Do Q\n", str.c_str(),
415 sFormName.c_str());
416 pNewXObject->SetDataAndRemoveFilter(sStream.raw_span());
417 }
418 pPageDict->RemoveFor("Annots");
419 return FLATTEN_SUCCESS;
420 }
421