1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "text.h"
18 
19 #include <string>
20 #include <vector>
21 
22 #include "cpp/fpdf_scopers.h"
23 #include "fpdf_annot.h"
24 #include "fpdf_edit.h"
25 #include "fpdf_structtree.h"
26 #include "fpdfview.h"
27 #include "utf.h"
28 // #include "util/gtl/map_util.h" // @Todo(b/312339259) - find a way to uncomment it
29 
30 namespace pdfClient_utils {
31 
32 namespace {
33 // Maximum number of struct tree levels to recurse over.
34 constexpr int kRecursionLimit = 100;
35 }  // namespace
36 
FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT elem)37 std::string FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT elem) {
38     return GetUtf8Result<void>(std::bind(::FPDF_StructElement_GetAltText, elem,
39                                          std::placeholders::_1, std::placeholders::_2));
40 }
41 
FPDFAnnot_GetStringValue(FPDF_ANNOTATION annot,FPDF_BYTESTRING key)42 std::string FPDFAnnot_GetStringValue(FPDF_ANNOTATION annot, FPDF_BYTESTRING key) {
43     return GetUtf8Result<FPDF_WCHAR>(std::bind(::FPDFAnnot_GetStringValue, annot, key,
44                                                std::placeholders::_1, std::placeholders::_2));
45 }
46 
FPDFAnnot_GetOptionLabel(FPDF_FORMHANDLE hHandle,FPDF_ANNOTATION annot,int index)47 std::string FPDFAnnot_GetOptionLabel(FPDF_FORMHANDLE hHandle, FPDF_ANNOTATION annot, int index) {
48     return GetUtf8Result<FPDF_WCHAR>(std::bind(::FPDFAnnot_GetOptionLabel, hHandle, annot, index,
49                                                std::placeholders::_1, std::placeholders::_2));
50 }
51 
FORM_GetFocusedText(FPDF_FORMHANDLE hHandle,FPDF_PAGE page)52 std::string FORM_GetFocusedText(FPDF_FORMHANDLE hHandle, FPDF_PAGE page) {
53     return GetUtf8Result<void>(std::bind(::FORM_GetFocusedText, hHandle, page,
54                                          std::placeholders::_1, std::placeholders::_2));
55 }
56 
57 // Extracts alt text from |elem| and puts it in the |result| vector if
58 // non-empty.
GetAltTextFromElement(const FPDF_STRUCTELEMENT elem,std::vector<std::string> * result)59 void GetAltTextFromElement(const FPDF_STRUCTELEMENT elem, std::vector<std::string>* result) {
60     std::string alt = FPDF_StructElement_GetAltText(elem);
61     if (!alt.empty()) {
62         result->push_back(alt);
63     }
64 }
65 
66 // Extracts alt text from |elem| and puts it in the |result| map keyed by marked
67 // content ID if non-empty. Skips duplicate IDs.
GetAltTextFromElement(const FPDF_STRUCTELEMENT elem,std::unordered_map<int,std::string> * result)68 void GetAltTextFromElement(const FPDF_STRUCTELEMENT elem,
69                            std::unordered_map<int, std::string>* result) {
70     std::string alt = FPDF_StructElement_GetAltText(elem);
71     if (!alt.empty()) {
72         int id = FPDF_StructElement_GetMarkedContentID(elem);
73         //    if (!gtl::InsertIfNotPresent(result, id, alt)) {
74         //      VLOG(2) << "Duplicate alt text marked content ID found! Ignoring.";
75         //    } // @Todo(b/312339259)
76     }
77 }
78 
79 // Recursively traverses the element tree under |elem| and inserts alt text into
80 // |result|.
81 template <typename ResultType>
GetAltTextFromElementTree(const FPDF_STRUCTELEMENT elem,int recursion_level,ResultType * result)82 void GetAltTextFromElementTree(const FPDF_STRUCTELEMENT elem, int recursion_level,
83                                ResultType* result) {
84     GetAltTextFromElement(elem, result);
85 
86     if (recursion_level > kRecursionLimit) return;
87 
88     int num_children = FPDF_StructElement_CountChildren(elem);
89     for (int i = 0; i < num_children; i++) {
90         GetAltTextFromElementTree(FPDF_StructElement_GetChildAtIndex(elem, i), recursion_level + 1,
91                                   result);
92     }
93 }
94 
95 // Extracts alt text from all child element trees in |page| and inserts into
96 // |result|.
97 template <typename ResultType>
GetAltTextFromPage(const FPDF_PAGE page,ResultType * result)98 void GetAltTextFromPage(const FPDF_PAGE page, ResultType* result) {
99     ScopedFPDFStructTree tree(FPDF_StructTree_GetForPage(page));
100     int num_children = FPDF_StructTree_CountChildren(tree.get());
101     for (int i = 0; i < num_children; ++i) {
102         GetAltTextFromElementTree(FPDF_StructTree_GetChildAtIndex(tree.get(), i), 0, result);
103     }
104 }
105 
GetAltText(const FPDF_PAGE page,std::vector<std::string> * result)106 void GetAltText(const FPDF_PAGE page, std::vector<std::string>* result) {
107     GetAltTextFromPage(page, result);
108 }
109 
GetAltText(const FPDF_PAGE page,std::unordered_map<int,std::string> * result)110 void GetAltText(const FPDF_PAGE page, std::unordered_map<int, std::string>* result) {
111     GetAltTextFromPage(page, result);
112 }
113 
114 }  // namespace pdfClient_utils