1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfdoc/cpdf_structtree.h"
8 
9 #include "core/fpdfapi/parser/cpdf_array.h"
10 #include "core/fpdfapi/parser/cpdf_dictionary.h"
11 #include "core/fpdfapi/parser/cpdf_document.h"
12 #include "core/fpdfapi/parser/cpdf_number.h"
13 #include "core/fpdfapi/parser/cpdf_reference.h"
14 #include "core/fpdfdoc/cpdf_numbertree.h"
15 #include "core/fpdfdoc/cpdf_structelement.h"
16 #include "third_party/base/ptr_util.h"
17 
18 namespace {
19 
IsTagged(const CPDF_Document * pDoc)20 bool IsTagged(const CPDF_Document* pDoc) {
21   const CPDF_Dictionary* pCatalog = pDoc->GetRoot();
22   const CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo");
23   return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
24 }
25 
26 }  // namespace
27 
28 // static
LoadPage(const CPDF_Document * pDoc,const CPDF_Dictionary * pPageDict)29 std::unique_ptr<CPDF_StructTree> CPDF_StructTree::LoadPage(
30     const CPDF_Document* pDoc,
31     const CPDF_Dictionary* pPageDict) {
32   if (!IsTagged(pDoc))
33     return nullptr;
34 
35   auto pTree = pdfium::MakeUnique<CPDF_StructTree>(pDoc);
36   pTree->LoadPageTree(pPageDict);
37   return pTree;
38 }
39 
CPDF_StructTree(const CPDF_Document * pDoc)40 CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
41     : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
42       m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr) {}
43 
44 CPDF_StructTree::~CPDF_StructTree() = default;
45 
LoadPageTree(const CPDF_Dictionary * pPageDict)46 void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) {
47   m_pPage.Reset(pPageDict);
48   if (!m_pTreeRoot)
49     return;
50 
51   const CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K");
52   if (!pKids)
53     return;
54 
55   uint32_t dwKids = 0;
56   if (pKids->IsDictionary())
57     dwKids = 1;
58   else if (const CPDF_Array* pArray = pKids->AsArray())
59     dwKids = pArray->size();
60   else
61     return;
62 
63   m_Kids.clear();
64   m_Kids.resize(dwKids);
65   const CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree");
66   if (!pParentTree)
67     return;
68 
69   CPDF_NumberTree parent_tree(pParentTree);
70   int parents_id = pPageDict->GetIntegerFor("StructParents", -1);
71   if (parents_id < 0)
72     return;
73 
74   const CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id));
75   if (!pParentArray)
76     return;
77 
78   StructElementMap element_map;
79   for (size_t i = 0; i < pParentArray->size(); i++) {
80     if (const CPDF_Dictionary* pParent = pParentArray->GetDictAt(i))
81       AddPageNode(pParent, &element_map, 0);
82   }
83 }
84 
AddPageNode(const CPDF_Dictionary * pDict,StructElementMap * map,int nLevel)85 RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
86     const CPDF_Dictionary* pDict,
87     StructElementMap* map,
88     int nLevel) {
89   static constexpr int kStructTreeMaxRecursion = 32;
90   if (nLevel > kStructTreeMaxRecursion)
91     return nullptr;
92 
93   auto it = map->find(pDict);
94   if (it != map->end())
95     return it->second;
96 
97   auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, nullptr, pDict);
98   (*map)[pDict] = pElement;
99   const CPDF_Dictionary* pParent = pDict->GetDictFor("P");
100   if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") {
101     if (!AddTopLevelNode(pDict, pElement))
102       map->erase(pDict);
103     return pElement;
104   }
105 
106   RetainPtr<CPDF_StructElement> pParentElement =
107       AddPageNode(pParent, map, nLevel + 1);
108   bool bSave = false;
109   for (CPDF_StructKid& kid : *pParentElement->GetKids()) {
110     if (kid.m_Type == CPDF_StructKid::kElement && kid.m_pDict == pDict) {
111       kid.m_pElement = pElement;
112       bSave = true;
113     }
114   }
115   if (!bSave)
116     map->erase(pDict);
117   return pElement;
118 }
119 
AddTopLevelNode(const CPDF_Dictionary * pDict,const RetainPtr<CPDF_StructElement> & pElement)120 bool CPDF_StructTree::AddTopLevelNode(
121     const CPDF_Dictionary* pDict,
122     const RetainPtr<CPDF_StructElement>& pElement) {
123   const CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K");
124   if (!pObj)
125     return false;
126 
127   if (pObj->IsDictionary()) {
128     if (pObj->GetObjNum() != pDict->GetObjNum())
129       return false;
130     m_Kids[0] = pElement;
131   }
132 
133   const CPDF_Array* pTopKids = pObj->AsArray();
134   if (!pTopKids)
135     return true;
136 
137   bool bSave = false;
138   for (size_t i = 0; i < pTopKids->size(); i++) {
139     const CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i));
140     if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
141       m_Kids[i] = pElement;
142       bSave = true;
143     }
144   }
145   return bSave;
146 }
147