1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "text.h"
18
19 #include <string>
20 #include <vector>
21
22 #include "cpp/fpdf_scopers.h"
23 #include "fpdf_annot.h"
24 #include "fpdf_edit.h"
25 #include "fpdf_structtree.h"
26 #include "fpdfview.h"
27 #include "utf.h"
28 // #include "util/gtl/map_util.h" // @Todo(b/312339259) - find a way to uncomment it
29
30 namespace pdfClient_utils {
31
32 namespace {
33 // Maximum number of struct tree levels to recurse over.
34 constexpr int kRecursionLimit = 100;
35 } // namespace
36
FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT elem)37 std::string FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT elem) {
38 return GetUtf8Result<void>(std::bind(::FPDF_StructElement_GetAltText, elem,
39 std::placeholders::_1, std::placeholders::_2));
40 }
41
FPDFAnnot_GetStringValue(FPDF_ANNOTATION annot,FPDF_BYTESTRING key)42 std::string FPDFAnnot_GetStringValue(FPDF_ANNOTATION annot, FPDF_BYTESTRING key) {
43 return GetUtf8Result<FPDF_WCHAR>(std::bind(::FPDFAnnot_GetStringValue, annot, key,
44 std::placeholders::_1, std::placeholders::_2));
45 }
46
FPDFAnnot_GetOptionLabel(FPDF_FORMHANDLE hHandle,FPDF_ANNOTATION annot,int index)47 std::string FPDFAnnot_GetOptionLabel(FPDF_FORMHANDLE hHandle, FPDF_ANNOTATION annot, int index) {
48 return GetUtf8Result<FPDF_WCHAR>(std::bind(::FPDFAnnot_GetOptionLabel, hHandle, annot, index,
49 std::placeholders::_1, std::placeholders::_2));
50 }
51
FORM_GetFocusedText(FPDF_FORMHANDLE hHandle,FPDF_PAGE page)52 std::string FORM_GetFocusedText(FPDF_FORMHANDLE hHandle, FPDF_PAGE page) {
53 return GetUtf8Result<void>(std::bind(::FORM_GetFocusedText, hHandle, page,
54 std::placeholders::_1, std::placeholders::_2));
55 }
56
57 // Extracts alt text from |elem| and puts it in the |result| vector if
58 // non-empty.
GetAltTextFromElement(const FPDF_STRUCTELEMENT elem,std::vector<std::string> * result)59 void GetAltTextFromElement(const FPDF_STRUCTELEMENT elem, std::vector<std::string>* result) {
60 std::string alt = FPDF_StructElement_GetAltText(elem);
61 if (!alt.empty()) {
62 result->push_back(alt);
63 }
64 }
65
66 // Extracts alt text from |elem| and puts it in the |result| map keyed by marked
67 // content ID if non-empty. Skips duplicate IDs.
GetAltTextFromElement(const FPDF_STRUCTELEMENT elem,std::unordered_map<int,std::string> * result)68 void GetAltTextFromElement(const FPDF_STRUCTELEMENT elem,
69 std::unordered_map<int, std::string>* result) {
70 std::string alt = FPDF_StructElement_GetAltText(elem);
71 if (!alt.empty()) {
72 int id = FPDF_StructElement_GetMarkedContentID(elem);
73 // if (!gtl::InsertIfNotPresent(result, id, alt)) {
74 // VLOG(2) << "Duplicate alt text marked content ID found! Ignoring.";
75 // } // @Todo(b/312339259)
76 }
77 }
78
79 // Recursively traverses the element tree under |elem| and inserts alt text into
80 // |result|.
81 template <typename ResultType>
GetAltTextFromElementTree(const FPDF_STRUCTELEMENT elem,int recursion_level,ResultType * result)82 void GetAltTextFromElementTree(const FPDF_STRUCTELEMENT elem, int recursion_level,
83 ResultType* result) {
84 GetAltTextFromElement(elem, result);
85
86 if (recursion_level > kRecursionLimit) return;
87
88 int num_children = FPDF_StructElement_CountChildren(elem);
89 for (int i = 0; i < num_children; i++) {
90 GetAltTextFromElementTree(FPDF_StructElement_GetChildAtIndex(elem, i), recursion_level + 1,
91 result);
92 }
93 }
94
95 // Extracts alt text from all child element trees in |page| and inserts into
96 // |result|.
97 template <typename ResultType>
GetAltTextFromPage(const FPDF_PAGE page,ResultType * result)98 void GetAltTextFromPage(const FPDF_PAGE page, ResultType* result) {
99 ScopedFPDFStructTree tree(FPDF_StructTree_GetForPage(page));
100 int num_children = FPDF_StructTree_CountChildren(tree.get());
101 for (int i = 0; i < num_children; ++i) {
102 GetAltTextFromElementTree(FPDF_StructTree_GetChildAtIndex(tree.get(), i), 0, result);
103 }
104 }
105
GetAltText(const FPDF_PAGE page,std::vector<std::string> * result)106 void GetAltText(const FPDF_PAGE page, std::vector<std::string>* result) {
107 GetAltTextFromPage(page, result);
108 }
109
GetAltText(const FPDF_PAGE page,std::unordered_map<int,std::string> * result)110 void GetAltText(const FPDF_PAGE page, std::unordered_map<int, std::string>* result) {
111 GetAltTextFromPage(page, result);
112 }
113
114 } // namespace pdfClient_utils