1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "XmlDom.h"
18 
19 #include <expat.h>
20 
21 #include <memory>
22 #include <stack>
23 #include <string>
24 #include <tuple>
25 
26 #include "android-base/logging.h"
27 
28 #include "ResourceUtils.h"
29 #include "XmlPullParser.h"
30 #include "util/Util.h"
31 
32 using ::aapt::io::InputStream;
33 using ::android::StringPiece;
34 using ::android::StringPiece16;
35 
36 namespace aapt {
37 namespace xml {
38 
39 constexpr char kXmlNamespaceSep = 1;
40 
41 struct Stack {
42   std::unique_ptr<xml::Element> root;
43   std::stack<xml::Element*> node_stack;
44   std::unique_ptr<xml::Element> pending_element;
45   std::string pending_comment;
46   std::unique_ptr<xml::Text> last_text_node;
47 };
48 
49 // Extracts the namespace and name of an expanded element or attribute name.
SplitName(const char * name,std::string * out_ns,std::string * out_name)50 static void SplitName(const char* name, std::string* out_ns, std::string* out_name) {
51   const char* p = name;
52   while (*p != 0 && *p != kXmlNamespaceSep) {
53     p++;
54   }
55 
56   if (*p == 0) {
57     out_ns->clear();
58     out_name->assign(name);
59   } else {
60     out_ns->assign(name, (p - name));
61     out_name->assign(p + 1);
62   }
63 }
64 
FinishPendingText(Stack * stack)65 static void FinishPendingText(Stack* stack) {
66   if (stack->last_text_node != nullptr) {
67     if (!stack->last_text_node->text.empty()) {
68       CHECK(!stack->node_stack.empty());
69       stack->node_stack.top()->AppendChild(std::move(stack->last_text_node));
70     } else {
71       // Drop an empty text node.
72     }
73     stack->last_text_node = nullptr;
74   }
75 }
76 
StartNamespaceHandler(void * user_data,const char * prefix,const char * uri)77 static void XMLCALL StartNamespaceHandler(void* user_data, const char* prefix, const char* uri) {
78   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
79   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
80   FinishPendingText(stack);
81 
82   NamespaceDecl decl;
83   decl.line_number = XML_GetCurrentLineNumber(parser);
84   decl.column_number = XML_GetCurrentColumnNumber(parser);
85   decl.prefix = prefix ? prefix : "";
86   decl.uri = uri ? uri : "";
87 
88   if (stack->pending_element == nullptr) {
89     stack->pending_element = util::make_unique<Element>();
90   }
91   stack->pending_element->namespace_decls.push_back(std::move(decl));
92 }
93 
EndNamespaceHandler(void * user_data,const char *)94 static void XMLCALL EndNamespaceHandler(void* user_data, const char* /*prefix*/) {
95   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
96   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
97   FinishPendingText(stack);
98 }
99 
less_attribute(const Attribute & lhs,const Attribute & rhs)100 static bool less_attribute(const Attribute& lhs, const Attribute& rhs) {
101   return std::tie(lhs.namespace_uri, lhs.name, lhs.value) <
102          std::tie(rhs.namespace_uri, rhs.name, rhs.value);
103 }
104 
StartElementHandler(void * user_data,const char * name,const char ** attrs)105 static void XMLCALL StartElementHandler(void* user_data, const char* name, const char** attrs) {
106   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
107   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
108   FinishPendingText(stack);
109 
110   std::unique_ptr<Element> el;
111   if (stack->pending_element != nullptr) {
112     el = std::move(stack->pending_element);
113   } else {
114     el = util::make_unique<Element>();
115   }
116 
117   el->line_number = XML_GetCurrentLineNumber(parser);
118   el->column_number = XML_GetCurrentColumnNumber(parser);
119   el->comment = std::move(stack->pending_comment);
120 
121   SplitName(name, &el->namespace_uri, &el->name);
122 
123   while (*attrs) {
124     Attribute attribute;
125     SplitName(*attrs++, &attribute.namespace_uri, &attribute.name);
126     attribute.value = *attrs++;
127     el->attributes.push_back(std::move(attribute));
128   }
129 
130   // Sort the attributes.
131   std::sort(el->attributes.begin(), el->attributes.end(), less_attribute);
132 
133   // Add to the stack.
134   Element* this_el = el.get();
135   if (!stack->node_stack.empty()) {
136     stack->node_stack.top()->AppendChild(std::move(el));
137   } else {
138     stack->root = std::move(el);
139   }
140   stack->node_stack.push(this_el);
141 }
142 
EndElementHandler(void * user_data,const char * name)143 static void XMLCALL EndElementHandler(void* user_data, const char* name) {
144   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
145   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
146   FinishPendingText(stack);
147 
148   CHECK(!stack->node_stack.empty());
149   // stack->nodeStack.top()->comment = std::move(stack->pendingComment);
150   stack->node_stack.pop();
151 }
152 
CharacterDataHandler(void * user_data,const char * s,int len)153 static void XMLCALL CharacterDataHandler(void* user_data, const char* s, int len) {
154   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
155   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
156 
157   const StringPiece str(s, len);
158   if (str.empty()) {
159     return;
160   }
161 
162   // See if we can just append the text to a previous text node.
163   if (stack->last_text_node != nullptr) {
164     stack->last_text_node->text.append(str.data(), str.size());
165     return;
166   }
167 
168   stack->last_text_node = util::make_unique<Text>();
169   stack->last_text_node->line_number = XML_GetCurrentLineNumber(parser);
170   stack->last_text_node->column_number = XML_GetCurrentColumnNumber(parser);
171   stack->last_text_node->text = str.to_string();
172 }
173 
CommentDataHandler(void * user_data,const char * comment)174 static void XMLCALL CommentDataHandler(void* user_data, const char* comment) {
175   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
176   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
177   FinishPendingText(stack);
178 
179   if (!stack->pending_comment.empty()) {
180     stack->pending_comment += '\n';
181   }
182   stack->pending_comment += comment;
183 }
184 
Inflate(InputStream * in,IDiagnostics * diag,const Source & source)185 std::unique_ptr<XmlResource> Inflate(InputStream* in, IDiagnostics* diag, const Source& source) {
186   Stack stack;
187 
188   std::unique_ptr<std::remove_pointer<XML_Parser>::type, decltype(XML_ParserFree)*> parser = {
189       XML_ParserCreateNS(nullptr, kXmlNamespaceSep), XML_ParserFree};
190   XML_SetUserData(parser.get(), &stack);
191   XML_UseParserAsHandlerArg(parser.get());
192   XML_SetElementHandler(parser.get(), StartElementHandler, EndElementHandler);
193   XML_SetNamespaceDeclHandler(parser.get(), StartNamespaceHandler, EndNamespaceHandler);
194   XML_SetCharacterDataHandler(parser.get(), CharacterDataHandler);
195   XML_SetCommentHandler(parser.get(), CommentDataHandler);
196 
197   const char* buffer = nullptr;
198   size_t buffer_size = 0;
199   while (in->Next(reinterpret_cast<const void**>(&buffer), &buffer_size)) {
200     if (XML_Parse(parser.get(), buffer, buffer_size, false) == XML_STATUS_ERROR) {
201       diag->Error(DiagMessage(source.WithLine(XML_GetCurrentLineNumber(parser.get())))
202                   << XML_ErrorString(XML_GetErrorCode(parser.get())));
203       return {};
204     }
205   }
206 
207   if (in->HadError()) {
208     diag->Error(DiagMessage(source) << in->GetError());
209     return {};
210   } else {
211     // Finish off the parsing.
212     if (XML_Parse(parser.get(), nullptr, 0u, true) == XML_STATUS_ERROR) {
213       diag->Error(DiagMessage(source.WithLine(XML_GetCurrentLineNumber(parser.get())))
214                   << XML_ErrorString(XML_GetErrorCode(parser.get())));
215       return {};
216     }
217   }
218   return util::make_unique<XmlResource>(ResourceFile{{}, {}, ResourceFile::Type::kUnknown, source},
219                                         StringPool{}, std::move(stack.root));
220 }
221 
CopyAttributes(Element * el,android::ResXMLParser * parser,StringPool * out_pool)222 static void CopyAttributes(Element* el, android::ResXMLParser* parser, StringPool* out_pool) {
223   const size_t attr_count = parser->getAttributeCount();
224   if (attr_count > 0) {
225     el->attributes.reserve(attr_count);
226     for (size_t i = 0; i < attr_count; i++) {
227       Attribute attr;
228       size_t len;
229       const char16_t* str16 = parser->getAttributeNamespace(i, &len);
230       if (str16) {
231         attr.namespace_uri = util::Utf16ToUtf8(StringPiece16(str16, len));
232       }
233 
234       str16 = parser->getAttributeName(i, &len);
235       if (str16) {
236         attr.name = util::Utf16ToUtf8(StringPiece16(str16, len));
237       }
238 
239       uint32_t res_id = parser->getAttributeNameResID(i);
240       if (res_id > 0) {
241         attr.compiled_attribute = AaptAttribute(::aapt::Attribute(), {res_id});
242       }
243 
244       str16 = parser->getAttributeStringValue(i, &len);
245       if (str16) {
246         attr.value = util::Utf16ToUtf8(StringPiece16(str16, len));
247       }
248 
249       android::Res_value res_value;
250       if (parser->getAttributeValue(i, &res_value) > 0) {
251         // Only compile the value if it is not a string, or it is a string that differs from
252         // the raw attribute value.
253         int32_t raw_value_idx = parser->getAttributeValueStringID(i);
254         if (res_value.dataType != android::Res_value::TYPE_STRING || raw_value_idx < 0 ||
255             static_cast<uint32_t>(raw_value_idx) != res_value.data) {
256           attr.compiled_value = ResourceUtils::ParseBinaryResValue(
257               ResourceType::kAnim, {}, parser->getStrings(), res_value, out_pool);
258         }
259       }
260 
261       el->attributes.push_back(std::move(attr));
262     }
263   }
264 }
265 
Inflate(const void * data,size_t len,std::string * out_error)266 std::unique_ptr<XmlResource> Inflate(const void* data, size_t len, std::string* out_error) {
267   // We import the android namespace because on Windows NO_ERROR is a macro, not
268   // an enum, which causes errors when qualifying it with android::
269   using namespace android;
270 
271   std::unique_ptr<XmlResource> xml_resource = util::make_unique<XmlResource>();
272 
273   std::stack<Element*> node_stack;
274   std::unique_ptr<Element> pending_element;
275 
276   ResXMLTree tree;
277   if (tree.setTo(data, len) != NO_ERROR) {
278     if (out_error != nullptr) {
279       *out_error = "failed to initialize ResXMLTree";
280     }
281     return {};
282   }
283 
284   ResXMLParser::event_code_t code;
285   while ((code = tree.next()) != ResXMLParser::BAD_DOCUMENT && code != ResXMLParser::END_DOCUMENT) {
286     std::unique_ptr<Node> new_node;
287     switch (code) {
288       case ResXMLParser::START_NAMESPACE: {
289         NamespaceDecl decl;
290         decl.line_number = tree.getLineNumber();
291 
292         size_t len;
293         const char16_t* str16 = tree.getNamespacePrefix(&len);
294         if (str16) {
295           decl.prefix = util::Utf16ToUtf8(StringPiece16(str16, len));
296         }
297 
298         str16 = tree.getNamespaceUri(&len);
299         if (str16) {
300           decl.uri = util::Utf16ToUtf8(StringPiece16(str16, len));
301         }
302 
303         if (pending_element == nullptr) {
304           pending_element = util::make_unique<Element>();
305         }
306         pending_element->namespace_decls.push_back(std::move(decl));
307         break;
308       }
309 
310       case ResXMLParser::START_TAG: {
311         std::unique_ptr<Element> el;
312         if (pending_element != nullptr) {
313           el = std::move(pending_element);
314         } else {
315           el = util::make_unique<Element>();
316         }
317         el->line_number = tree.getLineNumber();
318 
319         size_t len;
320         const char16_t* str16 = tree.getElementNamespace(&len);
321         if (str16) {
322           el->namespace_uri = util::Utf16ToUtf8(StringPiece16(str16, len));
323         }
324 
325         str16 = tree.getElementName(&len);
326         if (str16) {
327           el->name = util::Utf16ToUtf8(StringPiece16(str16, len));
328         }
329 
330         Element* this_el = el.get();
331         CopyAttributes(el.get(), &tree, &xml_resource->string_pool);
332 
333         if (!node_stack.empty()) {
334           node_stack.top()->AppendChild(std::move(el));
335         } else {
336           xml_resource->root = std::move(el);
337         }
338         node_stack.push(this_el);
339         break;
340       }
341 
342       case ResXMLParser::TEXT: {
343         std::unique_ptr<Text> text = util::make_unique<Text>();
344         text->line_number = tree.getLineNumber();
345         size_t len;
346         const char16_t* str16 = tree.getText(&len);
347         if (str16) {
348           text->text = util::Utf16ToUtf8(StringPiece16(str16, len));
349         }
350         CHECK(!node_stack.empty());
351         node_stack.top()->AppendChild(std::move(text));
352         break;
353       }
354 
355       case ResXMLParser::END_NAMESPACE:
356         break;
357 
358       case ResXMLParser::END_TAG:
359         CHECK(!node_stack.empty());
360         node_stack.pop();
361         break;
362 
363       default:
364         LOG(FATAL) << "unhandled XML chunk type";
365         break;
366     }
367   }
368   return xml_resource;
369 }
370 
Clone() const371 std::unique_ptr<XmlResource> XmlResource::Clone() const {
372   std::unique_ptr<XmlResource> cloned = util::make_unique<XmlResource>(file);
373   if (root != nullptr) {
374     cloned->root = root->CloneElement([&](const xml::Element& src, xml::Element* dst) {
375       dst->attributes.reserve(src.attributes.size());
376       for (const xml::Attribute& attr : src.attributes) {
377         xml::Attribute cloned_attr;
378         cloned_attr.name = attr.name;
379         cloned_attr.namespace_uri = attr.namespace_uri;
380         cloned_attr.value = attr.value;
381         cloned_attr.compiled_attribute = attr.compiled_attribute;
382         if (attr.compiled_value != nullptr) {
383           cloned_attr.compiled_value.reset(attr.compiled_value->Clone(&cloned->string_pool));
384         }
385         dst->attributes.push_back(std::move(cloned_attr));
386       }
387     });
388   }
389   return cloned;
390 }
391 
FindRootElement(Node * node)392 Element* FindRootElement(Node* node) {
393   if (node == nullptr) {
394     return nullptr;
395   }
396 
397   while (node->parent != nullptr) {
398     node = node->parent;
399   }
400   return NodeCast<Element>(node);
401 }
402 
AppendChild(std::unique_ptr<Node> child)403 void Element::AppendChild(std::unique_ptr<Node> child) {
404   child->parent = this;
405   children.push_back(std::move(child));
406 }
407 
InsertChild(size_t index,std::unique_ptr<Node> child)408 void Element::InsertChild(size_t index, std::unique_ptr<Node> child) {
409   child->parent = this;
410   children.insert(children.begin() + index, std::move(child));
411 }
412 
FindAttribute(const StringPiece & ns,const StringPiece & name)413 Attribute* Element::FindAttribute(const StringPiece& ns, const StringPiece& name) {
414   return const_cast<Attribute*>(static_cast<const Element*>(this)->FindAttribute(ns, name));
415 }
416 
FindAttribute(const StringPiece & ns,const StringPiece & name) const417 const Attribute* Element::FindAttribute(const StringPiece& ns, const StringPiece& name) const {
418   for (const auto& attr : attributes) {
419     if (ns == attr.namespace_uri && name == attr.name) {
420       return &attr;
421     }
422   }
423   return nullptr;
424 }
425 
FindOrCreateAttribute(const StringPiece & ns,const StringPiece & name)426 Attribute* Element::FindOrCreateAttribute(const StringPiece& ns, const StringPiece& name) {
427   Attribute* attr = FindAttribute(ns, name);
428   if (attr == nullptr) {
429     attributes.push_back(Attribute{ns.to_string(), name.to_string()});
430     attr = &attributes.back();
431   }
432   return attr;
433 }
434 
FindChild(const StringPiece & ns,const StringPiece & name)435 Element* Element::FindChild(const StringPiece& ns, const StringPiece& name) {
436   return FindChildWithAttribute(ns, name, {}, {}, {});
437 }
438 
FindChild(const StringPiece & ns,const StringPiece & name) const439 const Element* Element::FindChild(const StringPiece& ns, const StringPiece& name) const {
440   return FindChildWithAttribute(ns, name, {}, {}, {});
441 }
442 
FindChildWithAttribute(const StringPiece & ns,const StringPiece & name,const StringPiece & attr_ns,const StringPiece & attr_name,const StringPiece & attr_value)443 Element* Element::FindChildWithAttribute(const StringPiece& ns, const StringPiece& name,
444                                          const StringPiece& attr_ns, const StringPiece& attr_name,
445                                          const StringPiece& attr_value) {
446   return const_cast<Element*>(static_cast<const Element*>(this)->FindChildWithAttribute(
447       ns, name, attr_ns, attr_name, attr_value));
448 }
449 
FindChildWithAttribute(const StringPiece & ns,const StringPiece & name,const StringPiece & attr_ns,const StringPiece & attr_name,const StringPiece & attr_value) const450 const Element* Element::FindChildWithAttribute(const StringPiece& ns, const StringPiece& name,
451                                                const StringPiece& attr_ns,
452                                                const StringPiece& attr_name,
453                                                const StringPiece& attr_value) const {
454   for (const auto& child : children) {
455     if (const Element* el = NodeCast<Element>(child.get())) {
456       if (ns == el->namespace_uri && name == el->name) {
457         if (attr_ns.empty() && attr_name.empty()) {
458           return el;
459         }
460 
461         const Attribute* attr = el->FindAttribute(attr_ns, attr_name);
462         if (attr && attr_value == attr->value) {
463           return el;
464         }
465       }
466     }
467   }
468   return nullptr;
469 }
470 
GetChildElements()471 std::vector<Element*> Element::GetChildElements() {
472   std::vector<Element*> elements;
473   for (auto& child_node : children) {
474     if (Element* child = NodeCast<Element>(child_node.get())) {
475       elements.push_back(child);
476     }
477   }
478   return elements;
479 }
480 
Clone(const ElementCloneFunc & el_cloner) const481 std::unique_ptr<Node> Element::Clone(const ElementCloneFunc& el_cloner) const {
482   auto el = util::make_unique<Element>();
483   el->namespace_decls = namespace_decls;
484   el->comment = comment;
485   el->line_number = line_number;
486   el->column_number = column_number;
487   el->name = name;
488   el->namespace_uri = namespace_uri;
489   el->attributes.reserve(attributes.size());
490   el_cloner(*this, el.get());
491   el->children.reserve(children.size());
492   for (const std::unique_ptr<xml::Node>& child : children) {
493     el->AppendChild(child->Clone(el_cloner));
494   }
495   return std::move(el);
496 }
497 
CloneElement(const ElementCloneFunc & el_cloner) const498 std::unique_ptr<Element> Element::CloneElement(const ElementCloneFunc& el_cloner) const {
499   return std::unique_ptr<Element>(static_cast<Element*>(Clone(el_cloner).release()));
500 }
501 
Accept(Visitor * visitor)502 void Element::Accept(Visitor* visitor) {
503   visitor->BeforeVisitElement(this);
504   visitor->Visit(this);
505   visitor->AfterVisitElement(this);
506 }
507 
Accept(ConstVisitor * visitor) const508 void Element::Accept(ConstVisitor* visitor) const {
509   visitor->BeforeVisitElement(this);
510   visitor->Visit(this);
511   visitor->AfterVisitElement(this);
512 }
513 
Clone(const ElementCloneFunc &) const514 std::unique_ptr<Node> Text::Clone(const ElementCloneFunc&) const {
515   auto t = util::make_unique<Text>();
516   t->comment = comment;
517   t->line_number = line_number;
518   t->column_number = column_number;
519   t->text = text;
520   return std::move(t);
521 }
522 
Accept(Visitor * visitor)523 void Text::Accept(Visitor* visitor) {
524   visitor->Visit(this);
525 }
526 
Accept(ConstVisitor * visitor) const527 void Text::Accept(ConstVisitor* visitor) const {
528   visitor->Visit(this);
529 }
530 
BeforeVisitElement(Element * el)531 void PackageAwareVisitor::BeforeVisitElement(Element* el) {
532   std::vector<PackageDecl> decls;
533   for (const NamespaceDecl& decl : el->namespace_decls) {
534     if (Maybe<ExtractedPackage> maybe_package = ExtractPackageFromNamespace(decl.uri)) {
535       decls.push_back(PackageDecl{decl.prefix, std::move(maybe_package.value())});
536     }
537   }
538   package_decls_.push_back(std::move(decls));
539 }
540 
AfterVisitElement(Element * el)541 void PackageAwareVisitor::AfterVisitElement(Element* el) {
542   package_decls_.pop_back();
543 }
544 
TransformPackageAlias(const StringPiece & alias) const545 Maybe<ExtractedPackage> PackageAwareVisitor::TransformPackageAlias(const StringPiece& alias) const {
546   if (alias.empty()) {
547     return ExtractedPackage{{}, false /*private*/};
548   }
549 
550   const auto rend = package_decls_.rend();
551   for (auto iter = package_decls_.rbegin(); iter != rend; ++iter) {
552     const std::vector<PackageDecl>& decls = *iter;
553     const auto rend2 = decls.rend();
554     for (auto iter2 = decls.rbegin(); iter2 != rend2; ++iter2) {
555       const PackageDecl& decl = *iter2;
556       if (alias == decl.prefix) {
557         if (decl.package.package.empty()) {
558           return ExtractedPackage{{}, decl.package.private_namespace};
559         }
560         return decl.package;
561       }
562     }
563   }
564   return {};
565 }
566 
567 }  // namespace xml
568 }  // namespace aapt
569