1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "../../include/fpdfapi/fpdf_parser.h"
8 #include "../../include/fpdfapi/fpdf_page.h"
9 #include "../../include/fpdfdoc/fpdf_tagged.h"
10 #include "tagged_int.h"
11 const int nMaxRecursion = 32;
IsTagged(const CPDF_Document * pDoc)12 static FX_BOOL IsTagged(const CPDF_Document* pDoc)
13 {
14     CPDF_Dictionary* pCatalog = pDoc->GetRoot();
15     CPDF_Dictionary* pMarkInfo = pCatalog->GetDict(FX_BSTRC("MarkInfo"));
16     return pMarkInfo != NULL && pMarkInfo->GetInteger(FX_BSTRC("Marked"));
17 }
LoadPage(const CPDF_Document * pDoc,const CPDF_Dictionary * pPageDict)18 CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, const CPDF_Dictionary* pPageDict)
19 {
20     if (!IsTagged(pDoc)) {
21         return NULL;
22     }
23     CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
24     pTree->LoadPageTree(pPageDict);
25     return pTree;
26 }
LoadDoc(const CPDF_Document * pDoc)27 CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc)
28 {
29     if (!IsTagged(pDoc)) {
30         return NULL;
31     }
32     CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
33     pTree->LoadDocTree();
34     return pTree;
35 }
CPDF_StructTreeImpl(const CPDF_Document * pDoc)36 CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc)
37 {
38     CPDF_Dictionary* pCatalog = pDoc->GetRoot();
39     m_pTreeRoot = pCatalog->GetDict(FX_BSTRC("StructTreeRoot"));
40     if (m_pTreeRoot == NULL) {
41         return;
42     }
43     m_pRoleMap = m_pTreeRoot->GetDict(FX_BSTRC("RoleMap"));
44 }
~CPDF_StructTreeImpl()45 CPDF_StructTreeImpl::~CPDF_StructTreeImpl()
46 {
47     for (int i = 0; i < m_Kids.GetSize(); i ++)
48         if (m_Kids[i]) {
49             m_Kids[i]->Release();
50         }
51 }
LoadDocTree()52 void CPDF_StructTreeImpl::LoadDocTree()
53 {
54     m_pPage = NULL;
55     if (m_pTreeRoot == NULL) {
56         return;
57     }
58     CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
59     if (pKids == NULL) {
60         return;
61     }
62     if (pKids->GetType() == PDFOBJ_DICTIONARY) {
63         CPDF_StructElementImpl* pStructElementImpl = new CPDF_StructElementImpl(this, NULL, (CPDF_Dictionary*)pKids);
64         m_Kids.Add(pStructElementImpl);
65         return;
66     }
67     if (pKids->GetType() != PDFOBJ_ARRAY) {
68         return;
69     }
70     CPDF_Array* pArray = (CPDF_Array*)pKids;
71     for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
72         CPDF_Dictionary* pKid = pArray->GetDict(i);
73         CPDF_StructElementImpl* pStructElementImpl = new CPDF_StructElementImpl(this, NULL, pKid);
74         m_Kids.Add(pStructElementImpl);
75     }
76 }
LoadPageTree(const CPDF_Dictionary * pPageDict)77 void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict)
78 {
79     m_pPage = pPageDict;
80     if (m_pTreeRoot == NULL) {
81         return;
82     }
83     CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
84     if (pKids == NULL) {
85         return;
86     }
87     FX_DWORD dwKids = 0;
88     if (pKids->GetType() == PDFOBJ_DICTIONARY) {
89         dwKids = 1;
90     } else if (pKids->GetType() == PDFOBJ_ARRAY) {
91         dwKids = ((CPDF_Array*)pKids)->GetCount();
92     } else {
93         return;
94     }
95     FX_DWORD i;
96     m_Kids.SetSize(dwKids);
97     for (i = 0; i < dwKids; i ++) {
98         m_Kids[i] = NULL;
99     }
100     CFX_MapPtrToPtr element_map;
101     CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict(FX_BSTRC("ParentTree"));
102     if (pParentTree == NULL) {
103         return;
104     }
105     CPDF_NumberTree parent_tree(pParentTree);
106     int parents_id = pPageDict->GetInteger(FX_BSTRC("StructParents"), -1);
107     if (parents_id >= 0) {
108         CPDF_Object* pParents = parent_tree.LookupValue(parents_id);
109         if (pParents == NULL || pParents->GetType() != PDFOBJ_ARRAY) {
110             return;
111         }
112         CPDF_Array* pParentArray = (CPDF_Array*)pParents;
113         for (i = 0; i < pParentArray->GetCount(); i ++) {
114             CPDF_Dictionary* pParent = pParentArray->GetDict(i);
115             if (pParent == NULL) {
116                 continue;
117             }
118             AddPageNode(pParent, element_map);
119         }
120     }
121 }
AddPageNode(CPDF_Dictionary * pDict,CFX_MapPtrToPtr & map,int nLevel)122 CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, CFX_MapPtrToPtr& map, int nLevel)
123 {
124     if (nLevel > nMaxRecursion) {
125         return NULL;
126     }
127     CPDF_StructElementImpl* pElement = NULL;
128     if (map.Lookup(pDict, (FX_LPVOID&)pElement)) {
129         return pElement;
130     }
131     pElement = new CPDF_StructElementImpl(this, NULL, pDict);
132     map.SetAt(pDict, pElement);
133     CPDF_Dictionary* pParent = pDict->GetDict(FX_BSTRC("P"));
134     if (pParent == NULL || pParent->GetString(FX_BSTRC("Type")) == FX_BSTRC("StructTreeRoot")) {
135         if (!AddTopLevelNode(pDict, pElement)) {
136             pElement->Release();
137             map.RemoveKey(pDict);
138         }
139     } else {
140         CPDF_StructElementImpl* pParentElement = AddPageNode(pParent, map, nLevel + 1);
141         FX_BOOL bSave = FALSE;
142         for (int i = 0; i < pParentElement->m_Kids.GetSize(); i ++) {
143             if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) {
144                 continue;
145             }
146             if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) {
147                 continue;
148             }
149             pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain();
150             bSave = TRUE;
151         }
152         if (!bSave) {
153             pElement->Release();
154             map.RemoveKey(pDict);
155         }
156     }
157     return pElement;
158 }
AddTopLevelNode(CPDF_Dictionary * pDict,CPDF_StructElementImpl * pElement)159 FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, CPDF_StructElementImpl* pElement)
160 {
161     CPDF_Object *pObj = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
162     if (!pObj) {
163         return FALSE;
164     }
165     if (pObj->GetType() == PDFOBJ_DICTIONARY) {
166         if (pObj->GetObjNum() == pDict->GetObjNum()) {
167             if (m_Kids[0]) {
168                 m_Kids[0]->Release();
169             }
170             m_Kids[0] = pElement->Retain();
171         } else {
172             return FALSE;
173         }
174     }
175     if (pObj->GetType() == PDFOBJ_ARRAY) {
176         CPDF_Array* pTopKids = (CPDF_Array*)pObj;
177         FX_DWORD i;
178         FX_BOOL bSave = FALSE;
179         for (i = 0; i < pTopKids->GetCount(); i ++) {
180             CPDF_Object* pKidRef = pTopKids->GetElement(i);
181             if (pKidRef == NULL || pKidRef->GetType() != PDFOBJ_REFERENCE) {
182                 continue;
183             }
184             if (((CPDF_Reference*) pKidRef)->GetRefObjNum() != pDict->GetObjNum()) {
185                 continue;
186             }
187             if (m_Kids[i]) {
188                 m_Kids[i]->Release();
189             }
190             m_Kids[i] = pElement->Retain();
191             bSave = TRUE;
192         }
193         if (!bSave) {
194             return FALSE;
195         }
196     }
197     return TRUE;
198 }
CPDF_StructElementImpl(CPDF_StructTreeImpl * pTree,CPDF_StructElementImpl * pParent,CPDF_Dictionary * pDict)199 CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, CPDF_StructElementImpl* pParent, CPDF_Dictionary* pDict)
200     : m_RefCount(0)
201 {
202     m_pTree = pTree;
203     m_pDict = pDict;
204     m_Type = pDict->GetString(FX_BSTRC("S"));
205     if (pTree->m_pRoleMap) {
206         CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type);
207         if (!mapped.IsEmpty()) {
208             m_Type = mapped;
209         }
210     }
211     m_pParent = pParent;
212     LoadKids(pDict);
213 }
~CPDF_StructElementImpl()214 CPDF_StructElementImpl::~CPDF_StructElementImpl()
215 {
216     for (int i = 0; i < m_Kids.GetSize(); i ++) {
217         if (m_Kids[i].m_Type == CPDF_StructKid::Element && m_Kids[i].m_Element.m_pElement) {
218             ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release();
219         }
220     }
221 }
Retain()222 CPDF_StructElementImpl* CPDF_StructElementImpl::Retain()
223 {
224     m_RefCount++;
225     return this;
226 }
Release()227 void CPDF_StructElementImpl::Release()
228 {
229     if(--m_RefCount < 1) {
230         delete this;
231     }
232 }
LoadKids(CPDF_Dictionary * pDict)233 void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict)
234 {
235     CPDF_Object* pObj = pDict->GetElement(FX_BSTRC("Pg"));
236     FX_DWORD PageObjNum = 0;
237     if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
238         PageObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
239     }
240     CPDF_Object* pKids = pDict->GetElementValue(FX_BSTRC("K"));
241     if (pKids == NULL) {
242         return;
243     }
244     if (pKids->GetType() == PDFOBJ_ARRAY) {
245         CPDF_Array* pArray = (CPDF_Array*)pKids;
246         m_Kids.SetSize(pArray->GetCount());
247         for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
248             CPDF_Object* pKid = pArray->GetElementValue(i);
249             LoadKid(PageObjNum, pKid, &m_Kids[i]);
250         }
251     } else {
252         m_Kids.SetSize(1);
253         LoadKid(PageObjNum, pKids, &m_Kids[0]);
254     }
255 }
LoadKid(FX_DWORD PageObjNum,CPDF_Object * pKidObj,CPDF_StructKid * pKid)256 void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid)
257 {
258     pKid->m_Type = CPDF_StructKid::Invalid;
259     if (pKidObj == NULL) {
260         return;
261     }
262     if (pKidObj->GetType() == PDFOBJ_NUMBER) {
263         if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
264             return;
265         }
266         pKid->m_Type = CPDF_StructKid::PageContent;
267         pKid->m_PageContent.m_ContentId = pKidObj->GetInteger();
268         pKid->m_PageContent.m_PageObjNum = PageObjNum;
269         return;
270     }
271     if (pKidObj->GetType() != PDFOBJ_DICTIONARY) {
272         return;
273     }
274     CPDF_Dictionary* pKidDict = (CPDF_Dictionary*)pKidObj;
275     CPDF_Object* pPageObj = pKidDict->GetElement(FX_BSTRC("Pg"));
276     if (pPageObj && pPageObj->GetType() == PDFOBJ_REFERENCE) {
277         PageObjNum = ((CPDF_Reference*)pPageObj)->GetRefObjNum();
278     }
279     CFX_ByteString type = pKidDict->GetString(FX_BSTRC("Type"));
280     if (type == FX_BSTRC("MCR")) {
281         if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
282             return;
283         }
284         pKid->m_Type = CPDF_StructKid::StreamContent;
285         CPDF_Object* pStreamObj = pKidDict->GetElement(FX_BSTRC("Stm"));
286         if (pStreamObj && pStreamObj->GetType() == PDFOBJ_REFERENCE) {
287             pKid->m_StreamContent.m_RefObjNum = ((CPDF_Reference*)pStreamObj)->GetRefObjNum();
288         } else {
289             pKid->m_StreamContent.m_RefObjNum = 0;
290         }
291         pKid->m_StreamContent.m_PageObjNum = PageObjNum;
292         pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger(FX_BSTRC("MCID"));
293     } else if (type == FX_BSTRC("OBJR")) {
294         if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
295             return;
296         }
297         pKid->m_Type = CPDF_StructKid::Object;
298         CPDF_Object* pObj = pKidDict->GetElement(FX_BSTRC("Obj"));
299         if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
300             pKid->m_Object.m_RefObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
301         } else {
302             pKid->m_Object.m_RefObjNum = 0;
303         }
304         pKid->m_Object.m_PageObjNum = PageObjNum;
305     } else {
306         pKid->m_Type = CPDF_StructKid::Element;
307         pKid->m_Element.m_pDict = pKidDict;
308         if (m_pTree->m_pPage == NULL) {
309             pKid->m_Element.m_pElement = new CPDF_StructElementImpl(m_pTree, this, pKidDict);
310         } else {
311             pKid->m_Element.m_pElement = NULL;
312         }
313     }
314 }
FindAttrDict(CPDF_Object * pAttrs,FX_BSTR owner,FX_FLOAT nLevel=0.0F)315 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, FX_BSTR owner, FX_FLOAT nLevel = 0.0F)
316 {
317     if (nLevel > nMaxRecursion) {
318         return NULL;
319     }
320     if (pAttrs == NULL) {
321         return NULL;
322     }
323     CPDF_Dictionary* pDict = NULL;
324     if (pAttrs->GetType() == PDFOBJ_DICTIONARY) {
325         pDict = (CPDF_Dictionary*)pAttrs;
326     } else if (pAttrs->GetType() == PDFOBJ_STREAM) {
327         pDict = ((CPDF_Stream*)pAttrs)->GetDict();
328     } else if (pAttrs->GetType() == PDFOBJ_ARRAY) {
329         CPDF_Array* pArray = (CPDF_Array*)pAttrs;
330         for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
331             CPDF_Object* pElement = pArray->GetElementValue(i);
332             pDict = FindAttrDict(pElement, owner, nLevel + 1);
333             if (pDict) {
334                 return pDict;
335             }
336         }
337     }
338     if (pDict && pDict->GetString(FX_BSTRC("O")) == owner) {
339         return pDict;
340     }
341     return NULL;
342 }
GetAttr(FX_BSTR owner,FX_BSTR name,FX_BOOL bInheritable,FX_FLOAT fLevel)343 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, FX_FLOAT fLevel)
344 {
345     if (fLevel > nMaxRecursion) {
346         return NULL;
347     }
348     if (bInheritable) {
349         CPDF_Object* pAttr = GetAttr(owner, name, FALSE);
350         if (pAttr) {
351             return pAttr;
352         }
353         if (m_pParent == NULL) {
354             return NULL;
355         }
356         return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1);
357     }
358     CPDF_Object* pA = m_pDict->GetElementValue(FX_BSTRC("A"));
359     if (pA) {
360         CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
361         if (pAttrDict) {
362             CPDF_Object* pAttr = pAttrDict->GetElementValue(name);
363             if (pAttr) {
364                 return pAttr;
365             }
366         }
367     }
368     CPDF_Object* pC = m_pDict->GetElementValue(FX_BSTRC("C"));
369     if (pC == NULL) {
370         return NULL;
371     }
372     CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict(FX_BSTRC("ClassMap"));
373     if (pClassMap == NULL) {
374         return NULL;
375     }
376     if (pC->GetType() == PDFOBJ_ARRAY) {
377         CPDF_Array* pArray = (CPDF_Array*)pC;
378         for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
379             CFX_ByteString class_name = pArray->GetString(i);
380             CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
381             if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
382                 return pClassDict->GetElementValue(name);
383             }
384         }
385         return NULL;
386     }
387     CFX_ByteString class_name = pC->GetString();
388     CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
389     if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
390         return pClassDict->GetElementValue(name);
391     }
392     return NULL;
393 }
GetAttr(FX_BSTR owner,FX_BSTR name,FX_BOOL bInheritable,int subindex)394 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, int subindex)
395 {
396     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
397     if (pAttr == NULL || subindex == -1 || pAttr->GetType() != PDFOBJ_ARRAY) {
398         return pAttr;
399     }
400     CPDF_Array* pArray = (CPDF_Array*)pAttr;
401     if (subindex >= (int)pArray->GetCount()) {
402         return pAttr;
403     }
404     return pArray->GetElementValue(subindex);
405 }
GetName(FX_BSTR owner,FX_BSTR name,FX_BSTR default_value,FX_BOOL bInheritable,int subindex)406 CFX_ByteString CPDF_StructElementImpl::GetName(FX_BSTR owner, FX_BSTR name, FX_BSTR default_value, FX_BOOL bInheritable, int subindex)
407 {
408     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
409     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NAME) {
410         return default_value;
411     }
412     return pAttr->GetString();
413 }
GetColor(FX_BSTR owner,FX_BSTR name,FX_ARGB default_value,FX_BOOL bInheritable,int subindex)414 FX_ARGB	CPDF_StructElementImpl::GetColor(FX_BSTR owner, FX_BSTR name, FX_ARGB default_value, FX_BOOL bInheritable, int subindex)
415 {
416     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
417     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_ARRAY) {
418         return default_value;
419     }
420     CPDF_Array* pArray = (CPDF_Array*)pAttr;
421     return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | ((int)(pArray->GetNumber(1) * 255) << 8) | (int)(pArray->GetNumber(2) * 255);
422 }
GetNumber(FX_BSTR owner,FX_BSTR name,FX_FLOAT default_value,FX_BOOL bInheritable,int subindex)423 FX_FLOAT CPDF_StructElementImpl::GetNumber(FX_BSTR owner, FX_BSTR name, FX_FLOAT default_value, FX_BOOL bInheritable, int subindex)
424 {
425     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
426     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
427         return default_value;
428     }
429     return pAttr->GetNumber();
430 }
GetInteger(FX_BSTR owner,FX_BSTR name,int default_value,FX_BOOL bInheritable,int subindex)431 int	CPDF_StructElementImpl::GetInteger(FX_BSTR owner, FX_BSTR name, int default_value, FX_BOOL bInheritable, int subindex)
432 {
433     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
434     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
435         return default_value;
436     }
437     return pAttr->GetInteger();
438 }
439