1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfdoc/cpdf_structtree.h"
8 
9 #include "core/fpdfapi/parser/cpdf_array.h"
10 #include "core/fpdfapi/parser/cpdf_document.h"
11 #include "core/fpdfapi/parser/cpdf_number.h"
12 #include "core/fpdfapi/parser/cpdf_reference.h"
13 #include "core/fpdfdoc/cpdf_numbertree.h"
14 #include "core/fpdfdoc/cpdf_structelement.h"
15 
16 namespace {
17 
IsTagged(const CPDF_Document * pDoc)18 bool IsTagged(const CPDF_Document* pDoc) {
19   const CPDF_Dictionary* pCatalog = pDoc->GetRoot();
20   const CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo");
21   return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
22 }
23 
24 }  // namespace
25 
26 // static
LoadPage(const CPDF_Document * pDoc,const CPDF_Dictionary * pPageDict)27 std::unique_ptr<CPDF_StructTree> CPDF_StructTree::LoadPage(
28     const CPDF_Document* pDoc,
29     const CPDF_Dictionary* pPageDict) {
30   if (!IsTagged(pDoc))
31     return nullptr;
32 
33   auto pTree = pdfium::MakeUnique<CPDF_StructTree>(pDoc);
34   pTree->LoadPageTree(pPageDict);
35   return pTree;
36 }
37 
CPDF_StructTree(const CPDF_Document * pDoc)38 CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
39     : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
40       m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr),
41       m_pPage(nullptr) {}
42 
~CPDF_StructTree()43 CPDF_StructTree::~CPDF_StructTree() {}
44 
LoadPageTree(const CPDF_Dictionary * pPageDict)45 void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) {
46   m_pPage = pPageDict;
47   if (!m_pTreeRoot)
48     return;
49 
50   CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K");
51   if (!pKids)
52     return;
53 
54   uint32_t dwKids = 0;
55   if (pKids->IsDictionary())
56     dwKids = 1;
57   else if (CPDF_Array* pArray = pKids->AsArray())
58     dwKids = pArray->GetCount();
59   else
60     return;
61 
62   m_Kids.clear();
63   m_Kids.resize(dwKids);
64   CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree");
65   if (!pParentTree)
66     return;
67 
68   CPDF_NumberTree parent_tree(pParentTree);
69   int parents_id = pPageDict->GetIntegerFor("StructParents", -1);
70   if (parents_id < 0)
71     return;
72 
73   CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id));
74   if (!pParentArray)
75     return;
76 
77   std::map<CPDF_Dictionary*, RetainPtr<CPDF_StructElement>> element_map;
78   for (size_t i = 0; i < pParentArray->GetCount(); i++) {
79     if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i))
80       AddPageNode(pParent, &element_map, 0);
81   }
82 }
83 
AddPageNode(CPDF_Dictionary * pDict,std::map<CPDF_Dictionary *,RetainPtr<CPDF_StructElement>> * map,int nLevel)84 RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
85     CPDF_Dictionary* pDict,
86     std::map<CPDF_Dictionary*, RetainPtr<CPDF_StructElement>>* map,
87     int nLevel) {
88   static constexpr int kStructTreeMaxRecursion = 32;
89   if (nLevel > kStructTreeMaxRecursion)
90     return nullptr;
91 
92   auto it = map->find(pDict);
93   if (it != map->end())
94     return it->second;
95 
96   auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, nullptr, pDict);
97   (*map)[pDict] = pElement;
98   CPDF_Dictionary* pParent = pDict->GetDictFor("P");
99   if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") {
100     if (!AddTopLevelNode(pDict, pElement))
101       map->erase(pDict);
102     return pElement;
103   }
104 
105   RetainPtr<CPDF_StructElement> pParentElement =
106       AddPageNode(pParent, map, nLevel + 1);
107   bool bSave = false;
108   for (CPDF_StructKid& kid : *pParentElement->GetKids()) {
109     if (kid.m_Type == CPDF_StructKid::Element && kid.m_pDict == pDict) {
110       kid.m_pElement = pElement;
111       bSave = true;
112     }
113   }
114   if (!bSave)
115     map->erase(pDict);
116   return pElement;
117 }
118 
AddTopLevelNode(CPDF_Dictionary * pDict,const RetainPtr<CPDF_StructElement> & pElement)119 bool CPDF_StructTree::AddTopLevelNode(
120     CPDF_Dictionary* pDict,
121     const RetainPtr<CPDF_StructElement>& pElement) {
122   CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K");
123   if (!pObj)
124     return false;
125 
126   if (pObj->IsDictionary()) {
127     if (pObj->GetObjNum() != pDict->GetObjNum())
128       return false;
129     m_Kids[0] = pElement;
130   }
131 
132   CPDF_Array* pTopKids = pObj->AsArray();
133   if (!pTopKids)
134     return true;
135 
136   bool bSave = false;
137   for (size_t i = 0; i < pTopKids->GetCount(); i++) {
138     CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i));
139     if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
140       m_Kids[i] = pElement;
141       bSave = true;
142     }
143   }
144   return bSave;
145 }
146