1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfdoc/cpdf_nametree.h"
8 
9 #include <utility>
10 #include <vector>
11 
12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_string.h"
16 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
17 
18 namespace {
19 
20 constexpr int kNameTreeMaxRecursion = 32;
21 
GetNodeLimitsMaybeSwap(CPDF_Array * pLimits)22 std::pair<WideString, WideString> GetNodeLimitsMaybeSwap(CPDF_Array* pLimits) {
23   ASSERT(pLimits);
24   WideString csLeft = pLimits->GetUnicodeTextAt(0);
25   WideString csRight = pLimits->GetUnicodeTextAt(1);
26   // If the lower limit is greater than the upper limit, swap them.
27   if (csLeft.Compare(csRight) > 0) {
28     pLimits->SetNewAt<CPDF_String>(0, csRight);
29     pLimits->SetNewAt<CPDF_String>(1, csLeft);
30     csLeft = pLimits->GetUnicodeTextAt(0);
31     csRight = pLimits->GetUnicodeTextAt(1);
32   }
33   return {csLeft, csRight};
34 }
35 
36 // Get the limit arrays that leaf array |pFind| is under in the tree with root
37 // |pNode|. |pLimits| will hold all the limit arrays from the leaf up to before
38 // the root. Return true if successful.
GetNodeAncestorsLimits(CPDF_Dictionary * pNode,const CPDF_Array * pFind,int nLevel,std::vector<CPDF_Array * > * pLimits)39 bool GetNodeAncestorsLimits(CPDF_Dictionary* pNode,
40                             const CPDF_Array* pFind,
41                             int nLevel,
42                             std::vector<CPDF_Array*>* pLimits) {
43   if (nLevel > kNameTreeMaxRecursion)
44     return false;
45 
46   if (pNode->GetArrayFor("Names") == pFind) {
47     pLimits->push_back(pNode->GetArrayFor("Limits"));
48     return true;
49   }
50 
51   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
52   if (!pKids)
53     return false;
54 
55   for (size_t i = 0; i < pKids->size(); ++i) {
56     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
57     if (!pKid)
58       continue;
59 
60     if (GetNodeAncestorsLimits(pKid, pFind, nLevel + 1, pLimits)) {
61       pLimits->push_back(pNode->GetArrayFor("Limits"));
62       return true;
63     }
64   }
65   return false;
66 }
67 
68 // Upon the deletion of |csName| from leaf array |pFind|, update the ancestors
69 // of |pFind|. Specifically, the limits of |pFind|'s ancestors will be updated
70 // if needed, and any ancestors that are now empty will be removed.
UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary * pNode,const CPDF_Array * pFind,const WideString & csName,int nLevel)71 bool UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary* pNode,
72                                       const CPDF_Array* pFind,
73                                       const WideString& csName,
74                                       int nLevel) {
75   if (nLevel > kNameTreeMaxRecursion)
76     return false;
77 
78   CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
79   WideString csLeft;
80   WideString csRight;
81   if (pLimits)
82     std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
83 
84   CPDF_Array* pNames = pNode->GetArrayFor("Names");
85   if (pNames) {
86     if (pNames != pFind)
87       return false;
88     if (pNames->IsEmpty() || !pLimits)
89       return true;
90     if (csLeft != csName && csRight != csName)
91       return true;
92 
93     // Since |csName| defines |pNode|'s limits, we need to loop through the
94     // names to find the new lower and upper limits.
95     WideString csNewLeft = csRight;
96     WideString csNewRight = csLeft;
97     for (size_t i = 0; i < pNames->size() / 2; ++i) {
98       WideString wsName = pNames->GetUnicodeTextAt(i * 2);
99       if (wsName.Compare(csNewLeft) < 0)
100         csNewLeft = wsName;
101       if (wsName.Compare(csNewRight) > 0)
102         csNewRight = wsName;
103     }
104     pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
105     pLimits->SetNewAt<CPDF_String>(1, csNewRight);
106     return true;
107   }
108 
109   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
110   if (!pKids)
111     return false;
112 
113   // Loop through the kids to find the leaf array |pFind|.
114   for (size_t i = 0; i < pKids->size(); ++i) {
115     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
116     if (!pKid)
117       continue;
118     if (!UpdateNodesAndLimitsUponDeletion(pKid, pFind, csName, nLevel + 1))
119       continue;
120 
121     // Remove this child node if it's empty.
122     if ((pKid->KeyExist("Names") && pKid->GetArrayFor("Names")->IsEmpty()) ||
123         (pKid->KeyExist("Kids") && pKid->GetArrayFor("Kids")->IsEmpty())) {
124       pKids->RemoveAt(i);
125     }
126     if (pKids->IsEmpty() || !pLimits)
127       return true;
128     if (csLeft != csName && csRight != csName)
129       return true;
130 
131     // Since |csName| defines |pNode|'s limits, we need to loop through the
132     // kids to find the new lower and upper limits.
133     WideString csNewLeft = csRight;
134     WideString csNewRight = csLeft;
135     for (size_t j = 0; j < pKids->size(); ++j) {
136       CPDF_Array* pKidLimits = pKids->GetDictAt(j)->GetArrayFor("Limits");
137       ASSERT(pKidLimits);
138       if (pKidLimits->GetUnicodeTextAt(0).Compare(csNewLeft) < 0)
139         csNewLeft = pKidLimits->GetUnicodeTextAt(0);
140       if (pKidLimits->GetUnicodeTextAt(1).Compare(csNewRight) > 0)
141         csNewRight = pKidLimits->GetUnicodeTextAt(1);
142     }
143     pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
144     pLimits->SetNewAt<CPDF_String>(1, csNewRight);
145     return true;
146   }
147   return false;
148 }
149 
150 // Search for |csName| in the tree with root |pNode|. If successful, return the
151 // value that |csName| points to; |nIndex| will be the index of |csName|,
152 // |ppFind| will be the leaf array that |csName| is found in, and |pFindIndex|
153 // will be the index of |csName| in |ppFind|. If |csName| is not found, |ppFind|
154 // will be the leaf array that |csName| should be added to, and |pFindIndex|
155 // will be the index that it should be added at.
SearchNameNodeByName(CPDF_Dictionary * pNode,const WideString & csName,int nLevel,size_t * nIndex,CPDF_Array ** ppFind,int * pFindIndex)156 CPDF_Object* SearchNameNodeByName(CPDF_Dictionary* pNode,
157                                   const WideString& csName,
158                                   int nLevel,
159                                   size_t* nIndex,
160                                   CPDF_Array** ppFind,
161                                   int* pFindIndex) {
162   if (nLevel > kNameTreeMaxRecursion)
163     return nullptr;
164 
165   CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
166   CPDF_Array* pNames = pNode->GetArrayFor("Names");
167   if (pLimits) {
168     WideString csLeft;
169     WideString csRight;
170     std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
171     // Skip this node if the name to look for is smaller than its lower limit.
172     if (csName.Compare(csLeft) < 0)
173       return nullptr;
174 
175     // Skip this node if the name to look for is greater than its higher limit,
176     // and the node itself is a leaf node.
177     if (csName.Compare(csRight) > 0 && pNames) {
178       if (ppFind)
179         *ppFind = pNames;
180       if (pFindIndex)
181         *pFindIndex = pNames->size() / 2 - 1;
182 
183       return nullptr;
184     }
185   }
186 
187   // If the node is a leaf node, look for the name in its names array.
188   if (pNames) {
189     size_t dwCount = pNames->size() / 2;
190     for (size_t i = 0; i < dwCount; i++) {
191       WideString csValue = pNames->GetUnicodeTextAt(i * 2);
192       int32_t iCompare = csValue.Compare(csName);
193       if (iCompare > 0)
194         break;
195       if (ppFind)
196         *ppFind = pNames;
197       if (pFindIndex)
198         *pFindIndex = i;
199       if (iCompare < 0)
200         continue;
201 
202       *nIndex += i;
203       return pNames->GetDirectObjectAt(i * 2 + 1);
204     }
205     *nIndex += dwCount;
206     return nullptr;
207   }
208 
209   // Search through the node's children.
210   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
211   if (!pKids)
212     return nullptr;
213 
214   for (size_t i = 0; i < pKids->size(); i++) {
215     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
216     if (!pKid)
217       continue;
218 
219     CPDF_Object* pFound = SearchNameNodeByName(pKid, csName, nLevel + 1, nIndex,
220                                                ppFind, pFindIndex);
221     if (pFound)
222       return pFound;
223   }
224   return nullptr;
225 }
226 
227 // Get the key-value pair at |nIndex| in the tree with root |pNode|. If
228 // successful, return the value object; |csName| will be the key, |ppFind|
229 // will be the leaf array that this pair is in, and |pFindIndex| will be the
230 // index of the pair in |pFind|.
SearchNameNodeByIndex(CPDF_Dictionary * pNode,size_t nIndex,int nLevel,size_t * nCurIndex,WideString * csName,CPDF_Array ** ppFind,int * pFindIndex)231 CPDF_Object* SearchNameNodeByIndex(CPDF_Dictionary* pNode,
232                                    size_t nIndex,
233                                    int nLevel,
234                                    size_t* nCurIndex,
235                                    WideString* csName,
236                                    CPDF_Array** ppFind,
237                                    int* pFindIndex) {
238   if (nLevel > kNameTreeMaxRecursion)
239     return nullptr;
240 
241   CPDF_Array* pNames = pNode->GetArrayFor("Names");
242   if (pNames) {
243     size_t nCount = pNames->size() / 2;
244     if (nIndex >= *nCurIndex + nCount) {
245       *nCurIndex += nCount;
246       return nullptr;
247     }
248     if (ppFind)
249       *ppFind = pNames;
250     if (pFindIndex)
251       *pFindIndex = nIndex - *nCurIndex;
252 
253     *csName = pNames->GetUnicodeTextAt((nIndex - *nCurIndex) * 2);
254     return pNames->GetDirectObjectAt((nIndex - *nCurIndex) * 2 + 1);
255   }
256 
257   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
258   if (!pKids)
259     return nullptr;
260 
261   for (size_t i = 0; i < pKids->size(); i++) {
262     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
263     if (!pKid)
264       continue;
265     CPDF_Object* pFound = SearchNameNodeByIndex(
266         pKid, nIndex, nLevel + 1, nCurIndex, csName, ppFind, pFindIndex);
267     if (pFound)
268       return pFound;
269   }
270   return nullptr;
271 }
272 
273 // Get the total number of key-value pairs in the tree with root |pNode|.
CountNamesInternal(CPDF_Dictionary * pNode,int nLevel)274 size_t CountNamesInternal(CPDF_Dictionary* pNode, int nLevel) {
275   if (nLevel > kNameTreeMaxRecursion)
276     return 0;
277 
278   CPDF_Array* pNames = pNode->GetArrayFor("Names");
279   if (pNames)
280     return pNames->size() / 2;
281 
282   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
283   if (!pKids)
284     return 0;
285 
286   size_t nCount = 0;
287   for (size_t i = 0; i < pKids->size(); i++) {
288     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
289     if (!pKid)
290       continue;
291 
292     nCount += CountNamesInternal(pKid, nLevel + 1);
293   }
294   return nCount;
295 }
296 
297 }  // namespace
298 
CPDF_NameTree(CPDF_Dictionary * pRoot)299 CPDF_NameTree::CPDF_NameTree(CPDF_Dictionary* pRoot) : m_pRoot(pRoot) {}
300 
CPDF_NameTree(CPDF_Document * pDoc,const ByteString & category)301 CPDF_NameTree::CPDF_NameTree(CPDF_Document* pDoc, const ByteString& category) {
302   CPDF_Dictionary* pRoot = pDoc->GetRoot();
303   if (!pRoot)
304     return;
305 
306   CPDF_Dictionary* pNames = pRoot->GetDictFor("Names");
307   if (!pNames)
308     return;
309 
310   m_pRoot.Reset(pNames->GetDictFor(category));
311 }
312 
~CPDF_NameTree()313 CPDF_NameTree::~CPDF_NameTree() {}
314 
GetCount() const315 size_t CPDF_NameTree::GetCount() const {
316   return m_pRoot ? CountNamesInternal(m_pRoot.Get(), 0) : 0;
317 }
318 
AddValueAndName(RetainPtr<CPDF_Object> pObj,const WideString & name)319 bool CPDF_NameTree::AddValueAndName(RetainPtr<CPDF_Object> pObj,
320                                     const WideString& name) {
321   if (!m_pRoot)
322     return false;
323 
324   size_t nIndex = 0;
325   CPDF_Array* pFind = nullptr;
326   int nFindIndex = -1;
327   // Fail if the tree already contains this name or if the tree is too deep.
328   if (SearchNameNodeByName(m_pRoot.Get(), name, 0, &nIndex, &pFind,
329                            &nFindIndex)) {
330     return false;
331   }
332 
333   // If the returned |pFind| is a nullptr, then |name| is smaller than all
334   // existing entries in the tree, and we did not find a leaf array to place
335   // |name| into. We instead will find the leftmost leaf array in which to place
336   // |name| and |pObj|.
337   if (!pFind) {
338     size_t nCurIndex = 0;
339     WideString csName;
340     SearchNameNodeByIndex(m_pRoot.Get(), 0, 0, &nCurIndex, &csName, &pFind,
341                           nullptr);
342   }
343   // Give up if that fails too.
344   if (!pFind)
345     return false;
346 
347   // Insert the name and the object into the leaf array found. Note that the
348   // insertion position is right after the key-value pair returned by |index|.
349   size_t nNameIndex = (nFindIndex + 1) * 2;
350   size_t nValueIndex = nNameIndex + 1;
351   pFind->InsertNewAt<CPDF_String>(nNameIndex, name);
352   pFind->InsertAt(nValueIndex, std::move(pObj));
353 
354   // Expand the limits that the newly added name is under, if the name falls
355   // outside of the limits of its leaf array or any arrays above it.
356   std::vector<CPDF_Array*> pLimits;
357   GetNodeAncestorsLimits(m_pRoot.Get(), pFind, 0, &pLimits);
358   for (auto* pLimit : pLimits) {
359     if (!pLimit)
360       continue;
361 
362     if (name.Compare(pLimit->GetUnicodeTextAt(0)) < 0)
363       pLimit->SetNewAt<CPDF_String>(0, name);
364 
365     if (name.Compare(pLimit->GetUnicodeTextAt(1)) > 0)
366       pLimit->SetNewAt<CPDF_String>(1, name);
367   }
368   return true;
369 }
370 
DeleteValueAndName(int nIndex)371 bool CPDF_NameTree::DeleteValueAndName(int nIndex) {
372   if (!m_pRoot)
373     return false;
374 
375   size_t nCurIndex = 0;
376   WideString csName;
377   CPDF_Array* pFind = nullptr;
378   int nFindIndex = -1;
379   // Fail if the tree does not contain |nIndex|.
380   if (!SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, &csName,
381                              &pFind, &nFindIndex)) {
382     return false;
383   }
384 
385   // Remove the name and the object from the leaf array |pFind|.
386   pFind->RemoveAt(nFindIndex * 2);
387   pFind->RemoveAt(nFindIndex * 2);
388 
389   // Delete empty nodes and update the limits of |pFind|'s ancestors as needed.
390   UpdateNodesAndLimitsUponDeletion(m_pRoot.Get(), pFind, csName, 0);
391   return true;
392 }
393 
LookupValueAndName(int nIndex,WideString * csName) const394 CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
395                                                WideString* csName) const {
396   csName->clear();
397   if (!m_pRoot)
398     return nullptr;
399 
400   size_t nCurIndex = 0;
401   return SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, csName,
402                                nullptr, nullptr);
403 }
404 
LookupValue(const WideString & csName) const405 CPDF_Object* CPDF_NameTree::LookupValue(const WideString& csName) const {
406   if (!m_pRoot)
407     return nullptr;
408 
409   size_t nIndex = 0;
410   return SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr,
411                               nullptr);
412 }
413 
LookupNamedDest(CPDF_Document * pDoc,const WideString & sName)414 CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc,
415                                            const WideString& sName) {
416   CPDF_Object* pValue = LookupValue(sName);
417   if (!pValue) {
418     CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests");
419     if (!pDests)
420       return nullptr;
421     pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName));
422   }
423   if (!pValue)
424     return nullptr;
425   if (CPDF_Array* pArray = pValue->AsArray())
426     return pArray;
427   if (CPDF_Dictionary* pDict = pValue->AsDictionary())
428     return pDict->GetArrayFor("D");
429   return nullptr;
430 }
431