1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfdoc/cpdf_nametree.h"
8 
9 #include <utility>
10 #include <vector>
11 
12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_string.h"
16 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
17 
18 namespace {
19 
20 constexpr int kNameTreeMaxRecursion = 32;
21 
GetNodeLimitsMaybeSwap(CPDF_Array * pLimits)22 std::pair<WideString, WideString> GetNodeLimitsMaybeSwap(CPDF_Array* pLimits) {
23   ASSERT(pLimits);
24   WideString csLeft = pLimits->GetUnicodeTextAt(0);
25   WideString csRight = pLimits->GetUnicodeTextAt(1);
26   // If the lower limit is greater than the upper limit, swap them.
27   if (csLeft.Compare(csRight) > 0) {
28     pLimits->SetNewAt<CPDF_String>(0, csRight);
29     pLimits->SetNewAt<CPDF_String>(1, csLeft);
30     csLeft = pLimits->GetUnicodeTextAt(0);
31     csRight = pLimits->GetUnicodeTextAt(1);
32   }
33   return {csLeft, csRight};
34 }
35 
36 // Get the limit arrays that leaf array |pFind| is under in the tree with root
37 // |pNode|. |pLimits| will hold all the limit arrays from the leaf up to before
38 // the root. Return true if successful.
GetNodeAncestorsLimits(const CPDF_Dictionary * pNode,const CPDF_Array * pFind,int nLevel,std::vector<CPDF_Array * > * pLimits)39 bool GetNodeAncestorsLimits(const CPDF_Dictionary* pNode,
40                             const CPDF_Array* pFind,
41                             int nLevel,
42                             std::vector<CPDF_Array*>* pLimits) {
43   if (nLevel > kNameTreeMaxRecursion)
44     return false;
45 
46   if (pNode->GetArrayFor("Names") == pFind) {
47     pLimits->push_back(pNode->GetArrayFor("Limits"));
48     return true;
49   }
50 
51   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
52   if (!pKids)
53     return false;
54 
55   for (size_t i = 0; i < pKids->GetCount(); ++i) {
56     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
57     if (!pKid)
58       continue;
59 
60     if (GetNodeAncestorsLimits(pKid, pFind, nLevel + 1, pLimits)) {
61       pLimits->push_back(pNode->GetArrayFor("Limits"));
62       return true;
63     }
64   }
65   return false;
66 }
67 
68 // Upon the deletion of |csName| from leaf array |pFind|, update the ancestors
69 // of |pFind|. Specifically, the limits of |pFind|'s ancestors will be updated
70 // if needed, and any ancestors that are now empty will be removed.
UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary * pNode,const CPDF_Array * pFind,const WideString & csName,int nLevel)71 bool UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary* pNode,
72                                       const CPDF_Array* pFind,
73                                       const WideString& csName,
74                                       int nLevel) {
75   if (nLevel > kNameTreeMaxRecursion)
76     return false;
77 
78   CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
79   WideString csLeft;
80   WideString csRight;
81   if (pLimits)
82     std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
83 
84   CPDF_Array* pNames = pNode->GetArrayFor("Names");
85   if (pNames) {
86     if (pNames != pFind)
87       return false;
88     if (pNames->IsEmpty() || !pLimits)
89       return true;
90     if (csLeft != csName && csRight != csName)
91       return true;
92 
93     // Since |csName| defines |pNode|'s limits, we need to loop through the
94     // names to find the new lower and upper limits.
95     WideString csNewLeft = csRight;
96     WideString csNewRight = csLeft;
97     for (size_t i = 0; i < pNames->GetCount() / 2; ++i) {
98       WideString wsName = pNames->GetUnicodeTextAt(i * 2);
99       if (wsName.Compare(csNewLeft) < 0)
100         csNewLeft = wsName;
101       if (wsName.Compare(csNewRight) > 0)
102         csNewRight = wsName;
103     }
104     pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
105     pLimits->SetNewAt<CPDF_String>(1, csNewRight);
106     return true;
107   }
108 
109   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
110   if (!pKids)
111     return false;
112 
113   // Loop through the kids to find the leaf array |pFind|.
114   for (size_t i = 0; i < pKids->GetCount(); ++i) {
115     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
116     if (!pKid)
117       continue;
118     if (!UpdateNodesAndLimitsUponDeletion(pKid, pFind, csName, nLevel + 1))
119       continue;
120 
121     // Remove this child node if it's empty.
122     if ((pKid->KeyExist("Names") && pKid->GetArrayFor("Names")->IsEmpty()) ||
123         (pKid->KeyExist("Kids") && pKid->GetArrayFor("Kids")->IsEmpty())) {
124       pKids->RemoveAt(i);
125     }
126     if (pKids->IsEmpty() || !pLimits)
127       return true;
128     if (csLeft != csName && csRight != csName)
129       return true;
130 
131     // Since |csName| defines |pNode|'s limits, we need to loop through the
132     // kids to find the new lower and upper limits.
133     WideString csNewLeft = csRight;
134     WideString csNewRight = csLeft;
135     for (size_t j = 0; j < pKids->GetCount(); ++j) {
136       CPDF_Array* pKidLimits = pKids->GetDictAt(j)->GetArrayFor("Limits");
137       ASSERT(pKidLimits);
138       if (pKidLimits->GetUnicodeTextAt(0).Compare(csNewLeft) < 0)
139         csNewLeft = pKidLimits->GetUnicodeTextAt(0);
140       if (pKidLimits->GetUnicodeTextAt(1).Compare(csNewRight) > 0)
141         csNewRight = pKidLimits->GetUnicodeTextAt(1);
142     }
143     pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
144     pLimits->SetNewAt<CPDF_String>(1, csNewRight);
145     return true;
146   }
147   return false;
148 }
149 
150 // Search for |csName| in the tree with root |pNode|. If successful, return the
151 // value that |csName| points to; |nIndex| will be the index of |csName|,
152 // |ppFind| will be the leaf array that |csName| is found in, and |pFindIndex|
153 // will be the index of |csName| in |ppFind|. If |csName| is not found, |ppFind|
154 // will be the leaf array that |csName| should be added to, and |pFindIndex|
155 // will be the index that it should be added at.
SearchNameNodeByName(const CPDF_Dictionary * pNode,const WideString & csName,int nLevel,size_t * nIndex,CPDF_Array ** ppFind,int * pFindIndex)156 CPDF_Object* SearchNameNodeByName(const CPDF_Dictionary* pNode,
157                                   const WideString& csName,
158                                   int nLevel,
159                                   size_t* nIndex,
160                                   CPDF_Array** ppFind,
161                                   int* pFindIndex) {
162   if (nLevel > kNameTreeMaxRecursion)
163     return nullptr;
164 
165   CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
166   CPDF_Array* pNames = pNode->GetArrayFor("Names");
167   if (pLimits) {
168     WideString csLeft;
169     WideString csRight;
170     std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
171     // Skip this node if the name to look for is smaller than its lower limit.
172     if (csName.Compare(csLeft) < 0)
173       return nullptr;
174 
175     // Skip this node if the name to look for is greater than its higher limit,
176     // and the node itself is a leaf node.
177     if (csName.Compare(csRight) > 0 && pNames) {
178       if (ppFind)
179         *ppFind = pNames;
180       if (pFindIndex)
181         *pFindIndex = pNames->GetCount() / 2 - 1;
182 
183       return nullptr;
184     }
185   }
186 
187   // If the node is a leaf node, look for the name in its names array.
188   if (pNames) {
189     size_t dwCount = pNames->GetCount() / 2;
190     for (size_t i = 0; i < dwCount; i++) {
191       WideString csValue = pNames->GetUnicodeTextAt(i * 2);
192       int32_t iCompare = csValue.Compare(csName);
193       if (iCompare > 0)
194         break;
195       if (ppFind)
196         *ppFind = pNames;
197       if (pFindIndex)
198         *pFindIndex = i;
199       if (iCompare < 0)
200         continue;
201 
202       *nIndex += i;
203       return pNames->GetDirectObjectAt(i * 2 + 1);
204     }
205     *nIndex += dwCount;
206     return nullptr;
207   }
208 
209   // Search through the node's children.
210   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
211   if (!pKids)
212     return nullptr;
213 
214   for (size_t i = 0; i < pKids->GetCount(); i++) {
215     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
216     if (!pKid)
217       continue;
218 
219     CPDF_Object* pFound = SearchNameNodeByName(pKid, csName, nLevel + 1, nIndex,
220                                                ppFind, pFindIndex);
221     if (pFound)
222       return pFound;
223   }
224   return nullptr;
225 }
226 
227 // Get the key-value pair at |nIndex| in the tree with root |pNode|. If
228 // successful, return the value object; |csName| will be the key, |ppFind|
229 // will be the leaf array that this pair is in, and |pFindIndex| will be the
230 // index of the pair in |pFind|.
SearchNameNodeByIndex(const CPDF_Dictionary * pNode,size_t nIndex,int nLevel,size_t * nCurIndex,WideString * csName,CPDF_Array ** ppFind,int * pFindIndex)231 CPDF_Object* SearchNameNodeByIndex(const CPDF_Dictionary* pNode,
232                                    size_t nIndex,
233                                    int nLevel,
234                                    size_t* nCurIndex,
235                                    WideString* csName,
236                                    CPDF_Array** ppFind,
237                                    int* pFindIndex) {
238   if (nLevel > kNameTreeMaxRecursion)
239     return nullptr;
240 
241   CPDF_Array* pNames = pNode->GetArrayFor("Names");
242   if (pNames) {
243     size_t nCount = pNames->GetCount() / 2;
244     if (nIndex >= *nCurIndex + nCount) {
245       *nCurIndex += nCount;
246       return nullptr;
247     }
248     if (ppFind)
249       *ppFind = pNames;
250     if (pFindIndex)
251       *pFindIndex = nIndex - *nCurIndex;
252 
253     *csName = pNames->GetUnicodeTextAt((nIndex - *nCurIndex) * 2);
254     return pNames->GetDirectObjectAt((nIndex - *nCurIndex) * 2 + 1);
255   }
256 
257   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
258   if (!pKids)
259     return nullptr;
260 
261   for (size_t i = 0; i < pKids->GetCount(); i++) {
262     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
263     if (!pKid)
264       continue;
265     CPDF_Object* pFound = SearchNameNodeByIndex(
266         pKid, nIndex, nLevel + 1, nCurIndex, csName, ppFind, pFindIndex);
267     if (pFound)
268       return pFound;
269   }
270   return nullptr;
271 }
272 
273 // Get the total number of key-value pairs in the tree with root |pNode|.
CountNamesInternal(CPDF_Dictionary * pNode,int nLevel)274 size_t CountNamesInternal(CPDF_Dictionary* pNode, int nLevel) {
275   if (nLevel > kNameTreeMaxRecursion)
276     return 0;
277 
278   CPDF_Array* pNames = pNode->GetArrayFor("Names");
279   if (pNames)
280     return pNames->GetCount() / 2;
281 
282   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
283   if (!pKids)
284     return 0;
285 
286   size_t nCount = 0;
287   for (size_t i = 0; i < pKids->GetCount(); i++) {
288     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
289     if (!pKid)
290       continue;
291 
292     nCount += CountNamesInternal(pKid, nLevel + 1);
293   }
294   return nCount;
295 }
296 
297 }  // namespace
298 
CPDF_NameTree(CPDF_Dictionary * pRoot)299 CPDF_NameTree::CPDF_NameTree(CPDF_Dictionary* pRoot) : m_pRoot(pRoot) {}
300 
CPDF_NameTree(const CPDF_Document * pDoc,const ByteString & category)301 CPDF_NameTree::CPDF_NameTree(const CPDF_Document* pDoc,
302                              const ByteString& category)
303     : m_pRoot(nullptr) {
304   const CPDF_Dictionary* pRoot = pDoc->GetRoot();
305   if (!pRoot)
306     return;
307 
308   CPDF_Dictionary* pNames = pRoot->GetDictFor("Names");
309   if (!pNames)
310     return;
311 
312   m_pRoot = pNames->GetDictFor(category);
313 }
314 
~CPDF_NameTree()315 CPDF_NameTree::~CPDF_NameTree() {}
316 
GetCount() const317 size_t CPDF_NameTree::GetCount() const {
318   return m_pRoot ? CountNamesInternal(m_pRoot.Get(), 0) : 0;
319 }
320 
GetIndex(const WideString & csName) const321 int CPDF_NameTree::GetIndex(const WideString& csName) const {
322   if (!m_pRoot)
323     return -1;
324 
325   size_t nIndex = 0;
326   if (!SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr,
327                             nullptr)) {
328     return -1;
329   }
330   return nIndex;
331 }
332 
AddValueAndName(std::unique_ptr<CPDF_Object> pObj,const WideString & name)333 bool CPDF_NameTree::AddValueAndName(std::unique_ptr<CPDF_Object> pObj,
334                                     const WideString& name) {
335   if (!m_pRoot)
336     return false;
337 
338   size_t nIndex = 0;
339   CPDF_Array* pFind = nullptr;
340   int nFindIndex = -1;
341   // Fail if the tree already contains this name or if the tree is too deep.
342   if (SearchNameNodeByName(m_pRoot.Get(), name, 0, &nIndex, &pFind,
343                            &nFindIndex)) {
344     return false;
345   }
346 
347   // If the returned |pFind| is a nullptr, then |name| is smaller than all
348   // existing entries in the tree, and we did not find a leaf array to place
349   // |name| into. We instead will find the leftmost leaf array in which to place
350   // |name| and |pObj|.
351   if (!pFind) {
352     size_t nCurIndex = 0;
353     WideString csName;
354     SearchNameNodeByIndex(m_pRoot.Get(), 0, 0, &nCurIndex, &csName, &pFind,
355                           nullptr);
356   }
357   ASSERT(pFind);
358 
359   // Insert the name and the object into the leaf array found. Note that the
360   // insertion position is right after the key-value pair returned by |index|.
361   size_t nNameIndex = (nFindIndex + 1) * 2;
362   size_t nValueIndex = nNameIndex + 1;
363   pFind->InsertNewAt<CPDF_String>(nNameIndex, name);
364   pFind->InsertAt(nValueIndex, std::move(pObj));
365 
366   // Expand the limits that the newly added name is under, if the name falls
367   // outside of the limits of its leaf array or any arrays above it.
368   std::vector<CPDF_Array*> pLimits;
369   GetNodeAncestorsLimits(m_pRoot.Get(), pFind, 0, &pLimits);
370   for (auto* pLimit : pLimits) {
371     if (!pLimit)
372       continue;
373 
374     if (name.Compare(pLimit->GetUnicodeTextAt(0)) < 0)
375       pLimit->SetNewAt<CPDF_String>(0, name);
376 
377     if (name.Compare(pLimit->GetUnicodeTextAt(1)) > 0)
378       pLimit->SetNewAt<CPDF_String>(1, name);
379   }
380   return true;
381 }
382 
DeleteValueAndName(int nIndex)383 bool CPDF_NameTree::DeleteValueAndName(int nIndex) {
384   if (!m_pRoot)
385     return false;
386 
387   size_t nCurIndex = 0;
388   WideString csName;
389   CPDF_Array* pFind = nullptr;
390   int nFindIndex = -1;
391   // Fail if the tree does not contain |nIndex|.
392   if (!SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, &csName,
393                              &pFind, &nFindIndex)) {
394     return false;
395   }
396 
397   // Remove the name and the object from the leaf array |pFind|.
398   pFind->RemoveAt(nFindIndex * 2);
399   pFind->RemoveAt(nFindIndex * 2);
400 
401   // Delete empty nodes and update the limits of |pFind|'s ancestors as needed.
402   UpdateNodesAndLimitsUponDeletion(m_pRoot.Get(), pFind, csName, 0);
403   return true;
404 }
405 
LookupValueAndName(int nIndex,WideString * csName) const406 CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
407                                                WideString* csName) const {
408   csName->clear();
409   if (!m_pRoot)
410     return nullptr;
411 
412   size_t nCurIndex = 0;
413   return SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, csName,
414                                nullptr, nullptr);
415 }
416 
LookupValue(const WideString & csName) const417 CPDF_Object* CPDF_NameTree::LookupValue(const WideString& csName) const {
418   if (!m_pRoot)
419     return nullptr;
420 
421   size_t nIndex = 0;
422   return SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr,
423                               nullptr);
424 }
425 
LookupNamedDest(CPDF_Document * pDoc,const WideString & sName)426 CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc,
427                                            const WideString& sName) {
428   CPDF_Object* pValue = LookupValue(sName);
429   if (!pValue) {
430     CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests");
431     if (!pDests)
432       return nullptr;
433     pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName));
434   }
435   if (!pValue)
436     return nullptr;
437   if (CPDF_Array* pArray = pValue->AsArray())
438     return pArray;
439   if (CPDF_Dictionary* pDict = pValue->AsDictionary())
440     return pDict->GetArrayFor("D");
441   return nullptr;
442 }
443