1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Logger.h"
18 #include "Util.h"
19 #include "XmlDom.h"
20 #include "XmlPullParser.h"
21 
22 #include <cassert>
23 #include <memory>
24 #include <stack>
25 #include <string>
26 #include <tuple>
27 
28 namespace aapt {
29 namespace xml {
30 
31 constexpr char kXmlNamespaceSep = 1;
32 
33 struct Stack {
34     std::unique_ptr<xml::Node> root;
35     std::stack<xml::Node*> nodeStack;
36     std::u16string pendingComment;
37 };
38 
39 /**
40  * Extracts the namespace and name of an expanded element or attribute name.
41  */
splitName(const char * name,std::u16string * outNs,std::u16string * outName)42 static void splitName(const char* name, std::u16string* outNs, std::u16string* outName) {
43     const char* p = name;
44     while (*p != 0 && *p != kXmlNamespaceSep) {
45         p++;
46     }
47 
48     if (*p == 0) {
49         outNs->clear();
50         *outName = util::utf8ToUtf16(name);
51     } else {
52         *outNs = util::utf8ToUtf16(StringPiece(name, (p - name)));
53         *outName = util::utf8ToUtf16(p + 1);
54     }
55 }
56 
addToStack(Stack * stack,XML_Parser parser,std::unique_ptr<Node> node)57 static void addToStack(Stack* stack, XML_Parser parser, std::unique_ptr<Node> node) {
58     node->lineNumber = XML_GetCurrentLineNumber(parser);
59     node->columnNumber = XML_GetCurrentColumnNumber(parser);
60 
61     Node* thisNode = node.get();
62     if (!stack->nodeStack.empty()) {
63         stack->nodeStack.top()->addChild(std::move(node));
64     } else {
65         stack->root = std::move(node);
66     }
67 
68     if (thisNode->type != NodeType::kText) {
69         stack->nodeStack.push(thisNode);
70     }
71 }
72 
startNamespaceHandler(void * userData,const char * prefix,const char * uri)73 static void XMLCALL startNamespaceHandler(void* userData, const char* prefix, const char* uri) {
74     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
75     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
76 
77     std::unique_ptr<Namespace> ns = util::make_unique<Namespace>();
78     if (prefix) {
79         ns->namespacePrefix = util::utf8ToUtf16(prefix);
80     }
81 
82     if (uri) {
83         ns->namespaceUri = util::utf8ToUtf16(uri);
84     }
85 
86     addToStack(stack, parser, std::move(ns));
87 }
88 
endNamespaceHandler(void * userData,const char * prefix)89 static void XMLCALL endNamespaceHandler(void* userData, const char* prefix) {
90     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
91     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
92 
93     assert(!stack->nodeStack.empty());
94     stack->nodeStack.pop();
95 }
96 
lessAttribute(const Attribute & lhs,const Attribute & rhs)97 static bool lessAttribute(const Attribute& lhs, const Attribute& rhs) {
98     return std::tie(lhs.namespaceUri, lhs.name, lhs.value) <
99             std::tie(rhs.namespaceUri, rhs.name, rhs.value);
100 }
101 
startElementHandler(void * userData,const char * name,const char ** attrs)102 static void XMLCALL startElementHandler(void* userData, const char* name, const char** attrs) {
103     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
104     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
105 
106     std::unique_ptr<Element> el = util::make_unique<Element>();
107     splitName(name, &el->namespaceUri, &el->name);
108 
109     while (*attrs) {
110         Attribute attribute;
111         splitName(*attrs++, &attribute.namespaceUri, &attribute.name);
112         attribute.value = util::utf8ToUtf16(*attrs++);
113 
114         // Insert in sorted order.
115         auto iter = std::lower_bound(el->attributes.begin(), el->attributes.end(), attribute,
116                                      lessAttribute);
117         el->attributes.insert(iter, std::move(attribute));
118     }
119 
120     el->comment = std::move(stack->pendingComment);
121     addToStack(stack, parser, std::move(el));
122 }
123 
endElementHandler(void * userData,const char * name)124 static void XMLCALL endElementHandler(void* userData, const char* name) {
125     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
126     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
127 
128     assert(!stack->nodeStack.empty());
129     stack->nodeStack.top()->comment = std::move(stack->pendingComment);
130     stack->nodeStack.pop();
131 }
132 
characterDataHandler(void * userData,const char * s,int len)133 static void XMLCALL characterDataHandler(void* userData, const char* s, int len) {
134     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
135     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
136 
137     if (!s || len <= 0) {
138         return;
139     }
140 
141     // See if we can just append the text to a previous text node.
142     if (!stack->nodeStack.empty()) {
143         Node* currentParent = stack->nodeStack.top();
144         if (!currentParent->children.empty()) {
145             Node* lastChild = currentParent->children.back().get();
146             if (lastChild->type == NodeType::kText) {
147                 Text* text = static_cast<Text*>(lastChild);
148                 text->text += util::utf8ToUtf16(StringPiece(s, len));
149                 return;
150             }
151         }
152     }
153 
154     std::unique_ptr<Text> text = util::make_unique<Text>();
155     text->text = util::utf8ToUtf16(StringPiece(s, len));
156     addToStack(stack, parser, std::move(text));
157 }
158 
commentDataHandler(void * userData,const char * comment)159 static void XMLCALL commentDataHandler(void* userData, const char* comment) {
160     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
161     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
162 
163     if (!stack->pendingComment.empty()) {
164         stack->pendingComment += '\n';
165     }
166     stack->pendingComment += util::utf8ToUtf16(comment);
167 }
168 
inflate(std::istream * in,SourceLogger * logger)169 std::unique_ptr<Node> inflate(std::istream* in, SourceLogger* logger) {
170     Stack stack;
171 
172     XML_Parser parser = XML_ParserCreateNS(nullptr, kXmlNamespaceSep);
173     XML_SetUserData(parser, &stack);
174     XML_UseParserAsHandlerArg(parser);
175     XML_SetElementHandler(parser, startElementHandler, endElementHandler);
176     XML_SetNamespaceDeclHandler(parser, startNamespaceHandler, endNamespaceHandler);
177     XML_SetCharacterDataHandler(parser, characterDataHandler);
178     XML_SetCommentHandler(parser, commentDataHandler);
179 
180     char buffer[1024];
181     while (!in->eof()) {
182         in->read(buffer, sizeof(buffer) / sizeof(buffer[0]));
183         if (in->bad() && !in->eof()) {
184             stack.root = {};
185             logger->error() << strerror(errno) << std::endl;
186             break;
187         }
188 
189         if (XML_Parse(parser, buffer, in->gcount(), in->eof()) == XML_STATUS_ERROR) {
190             stack.root = {};
191             logger->error(XML_GetCurrentLineNumber(parser))
192                     << XML_ErrorString(XML_GetErrorCode(parser)) << std::endl;
193             break;
194         }
195     }
196 
197     XML_ParserFree(parser);
198     return std::move(stack.root);
199 }
200 
copyAttributes(Element * el,android::ResXMLParser * parser)201 static void copyAttributes(Element* el, android::ResXMLParser* parser) {
202     const size_t attrCount = parser->getAttributeCount();
203     if (attrCount > 0) {
204         el->attributes.reserve(attrCount);
205         for (size_t i = 0; i < attrCount; i++) {
206             Attribute attr;
207             size_t len;
208             const char16_t* str16 = parser->getAttributeNamespace(i, &len);
209             if (str16) {
210                 attr.namespaceUri.assign(str16, len);
211             }
212 
213             str16 = parser->getAttributeName(i, &len);
214             if (str16) {
215                 attr.name.assign(str16, len);
216             }
217 
218             str16 = parser->getAttributeStringValue(i, &len);
219             if (str16) {
220                 attr.value.assign(str16, len);
221             }
222             el->attributes.push_back(std::move(attr));
223         }
224     }
225 }
226 
inflate(const void * data,size_t dataLen,SourceLogger * logger)227 std::unique_ptr<Node> inflate(const void* data, size_t dataLen, SourceLogger* logger) {
228     std::unique_ptr<Node> root;
229     std::stack<Node*> nodeStack;
230 
231     android::ResXMLTree tree;
232     if (tree.setTo(data, dataLen) != android::NO_ERROR) {
233         return {};
234     }
235 
236     android::ResXMLParser::event_code_t code;
237     while ((code = tree.next()) != android::ResXMLParser::BAD_DOCUMENT &&
238             code != android::ResXMLParser::END_DOCUMENT) {
239         std::unique_ptr<Node> newNode;
240         switch (code) {
241             case android::ResXMLParser::START_NAMESPACE: {
242                 std::unique_ptr<Namespace> node = util::make_unique<Namespace>();
243                 size_t len;
244                 const char16_t* str16 = tree.getNamespacePrefix(&len);
245                 if (str16) {
246                     node->namespacePrefix.assign(str16, len);
247                 }
248 
249                 str16 = tree.getNamespaceUri(&len);
250                 if (str16) {
251                     node->namespaceUri.assign(str16, len);
252                 }
253                 newNode = std::move(node);
254                 break;
255             }
256 
257             case android::ResXMLParser::START_TAG: {
258                 std::unique_ptr<Element> node = util::make_unique<Element>();
259                 size_t len;
260                 const char16_t* str16 = tree.getElementNamespace(&len);
261                 if (str16) {
262                     node->namespaceUri.assign(str16, len);
263                 }
264 
265                 str16 = tree.getElementName(&len);
266                 if (str16) {
267                     node->name.assign(str16, len);
268                 }
269 
270                 copyAttributes(node.get(), &tree);
271 
272                 newNode = std::move(node);
273                 break;
274             }
275 
276             case android::ResXMLParser::TEXT: {
277                 std::unique_ptr<Text> node = util::make_unique<Text>();
278                 size_t len;
279                 const char16_t* str16 = tree.getText(&len);
280                 if (str16) {
281                     node->text.assign(str16, len);
282                 }
283                 newNode = std::move(node);
284                 break;
285             }
286 
287             case android::ResXMLParser::END_NAMESPACE:
288             case android::ResXMLParser::END_TAG:
289                 assert(!nodeStack.empty());
290                 nodeStack.pop();
291                 break;
292 
293             default:
294                 assert(false);
295                 break;
296         }
297 
298         if (newNode) {
299             newNode->lineNumber = tree.getLineNumber();
300 
301             Node* thisNode = newNode.get();
302             if (!root) {
303                 assert(nodeStack.empty());
304                 root = std::move(newNode);
305             } else {
306                 assert(!nodeStack.empty());
307                 nodeStack.top()->addChild(std::move(newNode));
308             }
309 
310             if (thisNode->type != NodeType::kText) {
311                 nodeStack.push(thisNode);
312             }
313         }
314     }
315     return std::move(root);
316 }
317 
Node(NodeType type)318 Node::Node(NodeType type) : type(type), parent(nullptr), lineNumber(0), columnNumber(0) {
319 }
320 
addChild(std::unique_ptr<Node> child)321 void Node::addChild(std::unique_ptr<Node> child) {
322     child->parent = this;
323     children.push_back(std::move(child));
324 }
325 
Namespace()326 Namespace::Namespace() : BaseNode(NodeType::kNamespace) {
327 }
328 
clone() const329 std::unique_ptr<Node> Namespace::clone() const {
330     Namespace* ns = new Namespace();
331     ns->lineNumber = lineNumber;
332     ns->columnNumber = columnNumber;
333     ns->comment = comment;
334     ns->namespacePrefix = namespacePrefix;
335     ns->namespaceUri = namespaceUri;
336     for (auto& child : children) {
337         ns->addChild(child->clone());
338     }
339     return std::unique_ptr<Node>(ns);
340 }
341 
Element()342 Element::Element() : BaseNode(NodeType::kElement) {
343 }
344 
clone() const345 std::unique_ptr<Node> Element::clone() const {
346     Element* el = new Element();
347     el->lineNumber = lineNumber;
348     el->columnNumber = columnNumber;
349     el->comment = comment;
350     el->namespaceUri = namespaceUri;
351     el->name = name;
352     el->attributes = attributes;
353     for (auto& child : children) {
354         el->addChild(child->clone());
355     }
356     return std::unique_ptr<Node>(el);
357 }
358 
findAttribute(const StringPiece16 & ns,const StringPiece16 & name)359 Attribute* Element::findAttribute(const StringPiece16& ns, const StringPiece16& name) {
360     for (auto& attr : attributes) {
361         if (ns == attr.namespaceUri && name == attr.name) {
362             return &attr;
363         }
364     }
365     return nullptr;
366 }
367 
findChild(const StringPiece16 & ns,const StringPiece16 & name)368 Element* Element::findChild(const StringPiece16& ns, const StringPiece16& name) {
369     return findChildWithAttribute(ns, name, nullptr);
370 }
371 
findChildWithAttribute(const StringPiece16 & ns,const StringPiece16 & name,const Attribute * reqAttr)372 Element* Element::findChildWithAttribute(const StringPiece16& ns, const StringPiece16& name,
373                                          const Attribute* reqAttr) {
374     for (auto& childNode : children) {
375         Node* child = childNode.get();
376         while (child->type == NodeType::kNamespace) {
377             if (child->children.empty()) {
378                 break;
379             }
380             child = child->children[0].get();
381         }
382 
383         if (child->type == NodeType::kElement) {
384             Element* el = static_cast<Element*>(child);
385             if (ns == el->namespaceUri && name == el->name) {
386                 if (!reqAttr) {
387                     return el;
388                 }
389 
390                 Attribute* attrName = el->findAttribute(reqAttr->namespaceUri, reqAttr->name);
391                 if (attrName && attrName->value == reqAttr->value) {
392                     return el;
393                 }
394             }
395         }
396     }
397     return nullptr;
398 }
399 
getChildElements()400 std::vector<Element*> Element::getChildElements() {
401     std::vector<Element*> elements;
402     for (auto& childNode : children) {
403         Node* child = childNode.get();
404         while (child->type == NodeType::kNamespace) {
405             if (child->children.empty()) {
406                 break;
407             }
408             child = child->children[0].get();
409         }
410 
411         if (child->type == NodeType::kElement) {
412             elements.push_back(static_cast<Element*>(child));
413         }
414     }
415     return elements;
416 }
417 
Text()418 Text::Text() : BaseNode(NodeType::kText) {
419 }
420 
clone() const421 std::unique_ptr<Node> Text::clone() const {
422     Text* el = new Text();
423     el->lineNumber = lineNumber;
424     el->columnNumber = columnNumber;
425     el->comment = comment;
426     el->text = text;
427     return std::unique_ptr<Node>(el);
428 }
429 
430 } // namespace xml
431 } // namespace aapt
432