1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkPDFDocumentPriv.h"
9 #include "SkPDFTag.h"
10
11 // Table 333 in PDF 32000-1:2008
tag_name_from_type(SkPDF::DocumentStructureType type)12 static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
13 switch (type) {
14 #define M(X) case SkPDF::DocumentStructureType::k ## X: return #X
15 M(Document);
16 M(Part);
17 M(Art);
18 M(Sect);
19 M(Div);
20 M(BlockQuote);
21 M(Caption);
22 M(TOC);
23 M(TOCI);
24 M(Index);
25 M(NonStruct);
26 M(Private);
27 M(H);
28 M(H1);
29 M(H2);
30 M(H3);
31 M(H4);
32 M(H5);
33 M(H6);
34 M(P);
35 M(L);
36 M(LI);
37 M(Lbl);
38 M(LBody);
39 M(Table);
40 M(TR);
41 M(TH);
42 M(TD);
43 M(THead);
44 M(TBody);
45 M(TFoot);
46 M(Span);
47 M(Quote);
48 M(Note);
49 M(Reference);
50 M(BibEntry);
51 M(Code);
52 M(Link);
53 M(Annot);
54 M(Ruby);
55 M(RB);
56 M(RT);
57 M(RP);
58 M(Warichu);
59 M(WT);
60 M(WP);
61 M(Figure);
62 M(Formula);
63 M(Form);
64 #undef M
65 }
66 SK_ABORT("bad tag");
67 return "";
68 }
69
70 struct SkPDFTagNode {
71 SkPDFTagNode* fChildren = nullptr;
72 size_t fChildCount = 0;
73 struct MarkedContentInfo {
74 unsigned fPageIndex;
75 int fMarkId;
76 };
77 SkTArray<MarkedContentInfo> fMarkedContent;
78 int fNodeId;
79 SkPDF::DocumentStructureType fType;
80 SkPDFIndirectReference fRef;
81 enum State {
82 kUnknown,
83 kYes,
84 kNo,
85 } fCanDiscard = kUnknown;
86 };
87
SkPDFTagTree()88 SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
89
90 SkPDFTagTree::~SkPDFTagTree() = default;
91
copy(const SkPDF::StructureElementNode & node,SkPDFTagNode * dst,SkArenaAlloc * arena,SkTHashMap<int,SkPDFTagNode * > * nodeMap)92 static void copy(const SkPDF::StructureElementNode& node,
93 SkPDFTagNode* dst,
94 SkArenaAlloc* arena,
95 SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
96 nodeMap->set(node.fNodeId, dst);
97 size_t childCount = node.fChildCount;
98 SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
99 dst->fChildCount = childCount;
100 dst->fNodeId = node.fNodeId;
101 dst->fType = node.fType;
102 dst->fChildren = children;
103 for (size_t i = 0; i < childCount; ++i) {
104 copy(node.fChildren[i], &children[i], arena, nodeMap);
105 }
106 }
107
init(const SkPDF::StructureElementNode * node)108 void SkPDFTagTree::init(const SkPDF::StructureElementNode* node) {
109 if (node) {
110 fRoot = fArena.make<SkPDFTagNode>();
111 copy(*node, fRoot, &fArena, &fNodeMap);
112 }
113 }
114
reset()115 void SkPDFTagTree::reset() {
116 fArena.reset();
117 fNodeMap.reset();
118 fMarksPerPage.reset();
119 fRoot = nullptr;
120 }
121
getMarkIdForNodeId(int nodeId,unsigned pageIndex)122 int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) {
123 if (!fRoot) {
124 return -1;
125 }
126 SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
127 if (!tagPtr) {
128 return -1;
129 }
130 SkPDFTagNode* tag = *tagPtr;
131 SkASSERT(tag);
132 while (fMarksPerPage.size() < pageIndex + 1) {
133 fMarksPerPage.push_back();
134 }
135 SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex];
136 int markId = pageMarks.count();
137 tag->fMarkedContent.push_back({pageIndex, markId});
138 pageMarks.push_back(tag);
139 return markId;
140 }
141
can_discard(SkPDFTagNode * node)142 static bool can_discard(SkPDFTagNode* node) {
143 if (node->fCanDiscard == SkPDFTagNode::kYes) {
144 return true;
145 }
146 if (node->fCanDiscard == SkPDFTagNode::kNo) {
147 return false;
148 }
149 if (!node->fMarkedContent.empty()) {
150 node->fCanDiscard = SkPDFTagNode::kNo;
151 return false;
152 }
153 for (size_t i = 0; i < node->fChildCount; ++i) {
154 if (!can_discard(&node->fChildren[i])) {
155 node->fCanDiscard = SkPDFTagNode::kNo;
156 return false;
157 }
158 }
159 node->fCanDiscard = SkPDFTagNode::kYes;
160 return true;
161 }
162
163
prepare_tag_tree_to_emit(SkPDFIndirectReference parent,SkPDFTagNode * node,SkPDFDocument * doc)164 SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
165 SkPDFTagNode* node,
166 SkPDFDocument* doc) {
167 SkPDFIndirectReference ref = doc->reserveRef();
168 std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
169 SkPDFTagNode* children = node->fChildren;
170 size_t childCount = node->fChildCount;
171 for (size_t i = 0; i < childCount; ++i) {
172 SkPDFTagNode* child = &children[i];
173 if (!(can_discard(child))) {
174 kids->appendRef(prepare_tag_tree_to_emit(ref, child, doc));
175 }
176 }
177 for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
178 std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR");
179 mcr->insertRef("Pg", doc->getPage(info.fPageIndex));
180 mcr->insertInt("MCID", info.fMarkId);
181 kids->appendObject(std::move(mcr));
182 }
183 node->fRef = ref;
184 SkPDFDict dict("StructElem");
185 dict.insertName("S", tag_name_from_type(node->fType));
186 dict.insertRef("P", parent);
187 dict.insertObject("K", std::move(kids));
188 return doc->emit(dict, ref);
189 }
190
makeStructTreeRoot(SkPDFDocument * doc)191 SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
192 if (!fRoot) {
193 return SkPDFIndirectReference();
194 }
195 if (can_discard(fRoot)) {
196 SkDEBUGFAIL("PDF has tag tree but no marked content.");
197 }
198 SkPDFIndirectReference ref = doc->reserveRef();
199
200 unsigned pageCount = SkToUInt(doc->pageCount());
201
202 // Build the StructTreeRoot.
203 SkPDFDict structTreeRoot("StructTreeRoot");
204 structTreeRoot.insertRef("K", prepare_tag_tree_to_emit(ref, fRoot, doc));
205 structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
206
207 // Build the parent tree, which is a mapping from the marked
208 // content IDs on each page to their corressponding tags.
209 SkPDFDict parentTree("ParentTree");
210 auto parentTreeNums = SkPDFMakeArray();
211
212 SkASSERT(fMarksPerPage.size() <= pageCount);
213 for (size_t j = 0; j < fMarksPerPage.size(); ++j) {
214 const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
215 SkPDFArray markToTagArray;
216 for (SkPDFTagNode* mark : pageMarks) {
217 SkASSERT(mark->fRef);
218 markToTagArray.appendRef(mark->fRef);
219 }
220 parentTreeNums->appendInt(j);
221 parentTreeNums->appendRef(doc->emit(markToTagArray));
222 }
223 parentTree.insertObject("Nums", std::move(parentTreeNums));
224 structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
225 return doc->emit(structTreeRoot, ref);
226 }
227
228