1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "../../include/fpdfapi/fpdf_parser.h"
8 #include "../../include/fpdfapi/fpdf_page.h"
9 #include "../../include/fpdfdoc/fpdf_tagged.h"
10 #include "tagged_int.h"
11 const int nMaxRecursion = 32;
IsTagged(const CPDF_Document * pDoc)12 static FX_BOOL IsTagged(const CPDF_Document* pDoc)
13 {
14 CPDF_Dictionary* pCatalog = pDoc->GetRoot();
15 CPDF_Dictionary* pMarkInfo = pCatalog->GetDict(FX_BSTRC("MarkInfo"));
16 return pMarkInfo != NULL && pMarkInfo->GetInteger(FX_BSTRC("Marked"));
17 }
LoadPage(const CPDF_Document * pDoc,const CPDF_Dictionary * pPageDict)18 CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, const CPDF_Dictionary* pPageDict)
19 {
20 if (!IsTagged(pDoc)) {
21 return NULL;
22 }
23 CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
24 pTree->LoadPageTree(pPageDict);
25 return pTree;
26 }
LoadDoc(const CPDF_Document * pDoc)27 CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc)
28 {
29 if (!IsTagged(pDoc)) {
30 return NULL;
31 }
32 CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
33 pTree->LoadDocTree();
34 return pTree;
35 }
CPDF_StructTreeImpl(const CPDF_Document * pDoc)36 CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc)
37 {
38 CPDF_Dictionary* pCatalog = pDoc->GetRoot();
39 m_pTreeRoot = pCatalog->GetDict(FX_BSTRC("StructTreeRoot"));
40 if (m_pTreeRoot == NULL) {
41 return;
42 }
43 m_pRoleMap = m_pTreeRoot->GetDict(FX_BSTRC("RoleMap"));
44 }
~CPDF_StructTreeImpl()45 CPDF_StructTreeImpl::~CPDF_StructTreeImpl()
46 {
47 for (int i = 0; i < m_Kids.GetSize(); i ++)
48 if (m_Kids[i]) {
49 m_Kids[i]->Release();
50 }
51 }
LoadDocTree()52 void CPDF_StructTreeImpl::LoadDocTree()
53 {
54 m_pPage = NULL;
55 if (m_pTreeRoot == NULL) {
56 return;
57 }
58 CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
59 if (pKids == NULL) {
60 return;
61 }
62 if (pKids->GetType() == PDFOBJ_DICTIONARY) {
63 CPDF_StructElementImpl* pStructElementImpl = new CPDF_StructElementImpl(this, NULL, (CPDF_Dictionary*)pKids);
64 m_Kids.Add(pStructElementImpl);
65 return;
66 }
67 if (pKids->GetType() != PDFOBJ_ARRAY) {
68 return;
69 }
70 CPDF_Array* pArray = (CPDF_Array*)pKids;
71 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
72 CPDF_Dictionary* pKid = pArray->GetDict(i);
73 CPDF_StructElementImpl* pStructElementImpl = new CPDF_StructElementImpl(this, NULL, pKid);
74 m_Kids.Add(pStructElementImpl);
75 }
76 }
LoadPageTree(const CPDF_Dictionary * pPageDict)77 void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict)
78 {
79 m_pPage = pPageDict;
80 if (m_pTreeRoot == NULL) {
81 return;
82 }
83 CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
84 if (pKids == NULL) {
85 return;
86 }
87 FX_DWORD dwKids = 0;
88 if (pKids->GetType() == PDFOBJ_DICTIONARY) {
89 dwKids = 1;
90 } else if (pKids->GetType() == PDFOBJ_ARRAY) {
91 dwKids = ((CPDF_Array*)pKids)->GetCount();
92 } else {
93 return;
94 }
95 FX_DWORD i;
96 m_Kids.SetSize(dwKids);
97 for (i = 0; i < dwKids; i ++) {
98 m_Kids[i] = NULL;
99 }
100 CFX_MapPtrToPtr element_map;
101 CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict(FX_BSTRC("ParentTree"));
102 if (pParentTree == NULL) {
103 return;
104 }
105 CPDF_NumberTree parent_tree(pParentTree);
106 int parents_id = pPageDict->GetInteger(FX_BSTRC("StructParents"), -1);
107 if (parents_id >= 0) {
108 CPDF_Object* pParents = parent_tree.LookupValue(parents_id);
109 if (pParents == NULL || pParents->GetType() != PDFOBJ_ARRAY) {
110 return;
111 }
112 CPDF_Array* pParentArray = (CPDF_Array*)pParents;
113 for (i = 0; i < pParentArray->GetCount(); i ++) {
114 CPDF_Dictionary* pParent = pParentArray->GetDict(i);
115 if (pParent == NULL) {
116 continue;
117 }
118 AddPageNode(pParent, element_map);
119 }
120 }
121 }
AddPageNode(CPDF_Dictionary * pDict,CFX_MapPtrToPtr & map,int nLevel)122 CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, CFX_MapPtrToPtr& map, int nLevel)
123 {
124 if (nLevel > nMaxRecursion) {
125 return NULL;
126 }
127 CPDF_StructElementImpl* pElement = NULL;
128 if (map.Lookup(pDict, (FX_LPVOID&)pElement)) {
129 return pElement;
130 }
131 pElement = new CPDF_StructElementImpl(this, NULL, pDict);
132 map.SetAt(pDict, pElement);
133 CPDF_Dictionary* pParent = pDict->GetDict(FX_BSTRC("P"));
134 if (pParent == NULL || pParent->GetString(FX_BSTRC("Type")) == FX_BSTRC("StructTreeRoot")) {
135 if (!AddTopLevelNode(pDict, pElement)) {
136 pElement->Release();
137 map.RemoveKey(pDict);
138 }
139 } else {
140 CPDF_StructElementImpl* pParentElement = AddPageNode(pParent, map, nLevel + 1);
141 FX_BOOL bSave = FALSE;
142 for (int i = 0; i < pParentElement->m_Kids.GetSize(); i ++) {
143 if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) {
144 continue;
145 }
146 if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) {
147 continue;
148 }
149 pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain();
150 bSave = TRUE;
151 }
152 if (!bSave) {
153 pElement->Release();
154 map.RemoveKey(pDict);
155 }
156 }
157 return pElement;
158 }
AddTopLevelNode(CPDF_Dictionary * pDict,CPDF_StructElementImpl * pElement)159 FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, CPDF_StructElementImpl* pElement)
160 {
161 CPDF_Object *pObj = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
162 if (!pObj) {
163 return FALSE;
164 }
165 if (pObj->GetType() == PDFOBJ_DICTIONARY) {
166 if (pObj->GetObjNum() == pDict->GetObjNum()) {
167 if (m_Kids[0]) {
168 m_Kids[0]->Release();
169 }
170 m_Kids[0] = pElement->Retain();
171 } else {
172 return FALSE;
173 }
174 }
175 if (pObj->GetType() == PDFOBJ_ARRAY) {
176 CPDF_Array* pTopKids = (CPDF_Array*)pObj;
177 FX_DWORD i;
178 FX_BOOL bSave = FALSE;
179 for (i = 0; i < pTopKids->GetCount(); i ++) {
180 CPDF_Object* pKidRef = pTopKids->GetElement(i);
181 if (pKidRef == NULL || pKidRef->GetType() != PDFOBJ_REFERENCE) {
182 continue;
183 }
184 if (((CPDF_Reference*) pKidRef)->GetRefObjNum() != pDict->GetObjNum()) {
185 continue;
186 }
187 if (m_Kids[i]) {
188 m_Kids[i]->Release();
189 }
190 m_Kids[i] = pElement->Retain();
191 bSave = TRUE;
192 }
193 if (!bSave) {
194 return FALSE;
195 }
196 }
197 return TRUE;
198 }
CPDF_StructElementImpl(CPDF_StructTreeImpl * pTree,CPDF_StructElementImpl * pParent,CPDF_Dictionary * pDict)199 CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, CPDF_StructElementImpl* pParent, CPDF_Dictionary* pDict)
200 : m_RefCount(0)
201 {
202 m_pTree = pTree;
203 m_pDict = pDict;
204 m_Type = pDict->GetString(FX_BSTRC("S"));
205 if (pTree->m_pRoleMap) {
206 CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type);
207 if (!mapped.IsEmpty()) {
208 m_Type = mapped;
209 }
210 }
211 m_pParent = pParent;
212 LoadKids(pDict);
213 }
~CPDF_StructElementImpl()214 CPDF_StructElementImpl::~CPDF_StructElementImpl()
215 {
216 for (int i = 0; i < m_Kids.GetSize(); i ++) {
217 if (m_Kids[i].m_Type == CPDF_StructKid::Element && m_Kids[i].m_Element.m_pElement) {
218 ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release();
219 }
220 }
221 }
Retain()222 CPDF_StructElementImpl* CPDF_StructElementImpl::Retain()
223 {
224 m_RefCount++;
225 return this;
226 }
Release()227 void CPDF_StructElementImpl::Release()
228 {
229 if(--m_RefCount < 1) {
230 delete this;
231 }
232 }
LoadKids(CPDF_Dictionary * pDict)233 void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict)
234 {
235 CPDF_Object* pObj = pDict->GetElement(FX_BSTRC("Pg"));
236 FX_DWORD PageObjNum = 0;
237 if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
238 PageObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
239 }
240 CPDF_Object* pKids = pDict->GetElementValue(FX_BSTRC("K"));
241 if (pKids == NULL) {
242 return;
243 }
244 if (pKids->GetType() == PDFOBJ_ARRAY) {
245 CPDF_Array* pArray = (CPDF_Array*)pKids;
246 m_Kids.SetSize(pArray->GetCount());
247 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
248 CPDF_Object* pKid = pArray->GetElementValue(i);
249 LoadKid(PageObjNum, pKid, &m_Kids[i]);
250 }
251 } else {
252 m_Kids.SetSize(1);
253 LoadKid(PageObjNum, pKids, &m_Kids[0]);
254 }
255 }
LoadKid(FX_DWORD PageObjNum,CPDF_Object * pKidObj,CPDF_StructKid * pKid)256 void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid)
257 {
258 pKid->m_Type = CPDF_StructKid::Invalid;
259 if (pKidObj == NULL) {
260 return;
261 }
262 if (pKidObj->GetType() == PDFOBJ_NUMBER) {
263 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
264 return;
265 }
266 pKid->m_Type = CPDF_StructKid::PageContent;
267 pKid->m_PageContent.m_ContentId = pKidObj->GetInteger();
268 pKid->m_PageContent.m_PageObjNum = PageObjNum;
269 return;
270 }
271 if (pKidObj->GetType() != PDFOBJ_DICTIONARY) {
272 return;
273 }
274 CPDF_Dictionary* pKidDict = (CPDF_Dictionary*)pKidObj;
275 CPDF_Object* pPageObj = pKidDict->GetElement(FX_BSTRC("Pg"));
276 if (pPageObj && pPageObj->GetType() == PDFOBJ_REFERENCE) {
277 PageObjNum = ((CPDF_Reference*)pPageObj)->GetRefObjNum();
278 }
279 CFX_ByteString type = pKidDict->GetString(FX_BSTRC("Type"));
280 if (type == FX_BSTRC("MCR")) {
281 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
282 return;
283 }
284 pKid->m_Type = CPDF_StructKid::StreamContent;
285 CPDF_Object* pStreamObj = pKidDict->GetElement(FX_BSTRC("Stm"));
286 if (pStreamObj && pStreamObj->GetType() == PDFOBJ_REFERENCE) {
287 pKid->m_StreamContent.m_RefObjNum = ((CPDF_Reference*)pStreamObj)->GetRefObjNum();
288 } else {
289 pKid->m_StreamContent.m_RefObjNum = 0;
290 }
291 pKid->m_StreamContent.m_PageObjNum = PageObjNum;
292 pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger(FX_BSTRC("MCID"));
293 } else if (type == FX_BSTRC("OBJR")) {
294 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
295 return;
296 }
297 pKid->m_Type = CPDF_StructKid::Object;
298 CPDF_Object* pObj = pKidDict->GetElement(FX_BSTRC("Obj"));
299 if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
300 pKid->m_Object.m_RefObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
301 } else {
302 pKid->m_Object.m_RefObjNum = 0;
303 }
304 pKid->m_Object.m_PageObjNum = PageObjNum;
305 } else {
306 pKid->m_Type = CPDF_StructKid::Element;
307 pKid->m_Element.m_pDict = pKidDict;
308 if (m_pTree->m_pPage == NULL) {
309 pKid->m_Element.m_pElement = new CPDF_StructElementImpl(m_pTree, this, pKidDict);
310 } else {
311 pKid->m_Element.m_pElement = NULL;
312 }
313 }
314 }
FindAttrDict(CPDF_Object * pAttrs,FX_BSTR owner,FX_FLOAT nLevel=0.0F)315 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, FX_BSTR owner, FX_FLOAT nLevel = 0.0F)
316 {
317 if (nLevel > nMaxRecursion) {
318 return NULL;
319 }
320 if (pAttrs == NULL) {
321 return NULL;
322 }
323 CPDF_Dictionary* pDict = NULL;
324 if (pAttrs->GetType() == PDFOBJ_DICTIONARY) {
325 pDict = (CPDF_Dictionary*)pAttrs;
326 } else if (pAttrs->GetType() == PDFOBJ_STREAM) {
327 pDict = ((CPDF_Stream*)pAttrs)->GetDict();
328 } else if (pAttrs->GetType() == PDFOBJ_ARRAY) {
329 CPDF_Array* pArray = (CPDF_Array*)pAttrs;
330 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
331 CPDF_Object* pElement = pArray->GetElementValue(i);
332 pDict = FindAttrDict(pElement, owner, nLevel + 1);
333 if (pDict) {
334 return pDict;
335 }
336 }
337 }
338 if (pDict && pDict->GetString(FX_BSTRC("O")) == owner) {
339 return pDict;
340 }
341 return NULL;
342 }
GetAttr(FX_BSTR owner,FX_BSTR name,FX_BOOL bInheritable,FX_FLOAT fLevel)343 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, FX_FLOAT fLevel)
344 {
345 if (fLevel > nMaxRecursion) {
346 return NULL;
347 }
348 if (bInheritable) {
349 CPDF_Object* pAttr = GetAttr(owner, name, FALSE);
350 if (pAttr) {
351 return pAttr;
352 }
353 if (m_pParent == NULL) {
354 return NULL;
355 }
356 return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1);
357 }
358 CPDF_Object* pA = m_pDict->GetElementValue(FX_BSTRC("A"));
359 if (pA) {
360 CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
361 if (pAttrDict) {
362 CPDF_Object* pAttr = pAttrDict->GetElementValue(name);
363 if (pAttr) {
364 return pAttr;
365 }
366 }
367 }
368 CPDF_Object* pC = m_pDict->GetElementValue(FX_BSTRC("C"));
369 if (pC == NULL) {
370 return NULL;
371 }
372 CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict(FX_BSTRC("ClassMap"));
373 if (pClassMap == NULL) {
374 return NULL;
375 }
376 if (pC->GetType() == PDFOBJ_ARRAY) {
377 CPDF_Array* pArray = (CPDF_Array*)pC;
378 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
379 CFX_ByteString class_name = pArray->GetString(i);
380 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
381 if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
382 return pClassDict->GetElementValue(name);
383 }
384 }
385 return NULL;
386 }
387 CFX_ByteString class_name = pC->GetString();
388 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
389 if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
390 return pClassDict->GetElementValue(name);
391 }
392 return NULL;
393 }
GetAttr(FX_BSTR owner,FX_BSTR name,FX_BOOL bInheritable,int subindex)394 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, int subindex)
395 {
396 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
397 if (pAttr == NULL || subindex == -1 || pAttr->GetType() != PDFOBJ_ARRAY) {
398 return pAttr;
399 }
400 CPDF_Array* pArray = (CPDF_Array*)pAttr;
401 if (subindex >= (int)pArray->GetCount()) {
402 return pAttr;
403 }
404 return pArray->GetElementValue(subindex);
405 }
GetName(FX_BSTR owner,FX_BSTR name,FX_BSTR default_value,FX_BOOL bInheritable,int subindex)406 CFX_ByteString CPDF_StructElementImpl::GetName(FX_BSTR owner, FX_BSTR name, FX_BSTR default_value, FX_BOOL bInheritable, int subindex)
407 {
408 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
409 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NAME) {
410 return default_value;
411 }
412 return pAttr->GetString();
413 }
GetColor(FX_BSTR owner,FX_BSTR name,FX_ARGB default_value,FX_BOOL bInheritable,int subindex)414 FX_ARGB CPDF_StructElementImpl::GetColor(FX_BSTR owner, FX_BSTR name, FX_ARGB default_value, FX_BOOL bInheritable, int subindex)
415 {
416 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
417 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_ARRAY) {
418 return default_value;
419 }
420 CPDF_Array* pArray = (CPDF_Array*)pAttr;
421 return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | ((int)(pArray->GetNumber(1) * 255) << 8) | (int)(pArray->GetNumber(2) * 255);
422 }
GetNumber(FX_BSTR owner,FX_BSTR name,FX_FLOAT default_value,FX_BOOL bInheritable,int subindex)423 FX_FLOAT CPDF_StructElementImpl::GetNumber(FX_BSTR owner, FX_BSTR name, FX_FLOAT default_value, FX_BOOL bInheritable, int subindex)
424 {
425 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
426 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
427 return default_value;
428 }
429 return pAttr->GetNumber();
430 }
GetInteger(FX_BSTR owner,FX_BSTR name,int default_value,FX_BOOL bInheritable,int subindex)431 int CPDF_StructElementImpl::GetInteger(FX_BSTR owner, FX_BSTR name, int default_value, FX_BOOL bInheritable, int subindex)
432 {
433 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
434 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
435 return default_value;
436 }
437 return pAttr->GetInteger();
438 }
439