1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "XmlDom.h"
18 
19 #include <expat.h>
20 
21 #include <memory>
22 #include <stack>
23 #include <string>
24 #include <tuple>
25 
26 #include "android-base/logging.h"
27 
28 #include "ResourceUtils.h"
29 #include "XmlPullParser.h"
30 #include "util/Util.h"
31 
32 using android::StringPiece;
33 using android::StringPiece16;
34 
35 namespace aapt {
36 namespace xml {
37 
38 constexpr char kXmlNamespaceSep = 1;
39 
40 struct Stack {
41   std::unique_ptr<xml::Node> root;
42   std::stack<xml::Node*> node_stack;
43   std::string pending_comment;
44   std::unique_ptr<xml::Text> last_text_node;
45 };
46 
47 /**
48  * Extracts the namespace and name of an expanded element or attribute name.
49  */
SplitName(const char * name,std::string * out_ns,std::string * out_name)50 static void SplitName(const char* name, std::string* out_ns,
51                       std::string* out_name) {
52   const char* p = name;
53   while (*p != 0 && *p != kXmlNamespaceSep) {
54     p++;
55   }
56 
57   if (*p == 0) {
58     out_ns->clear();
59     out_name->assign(name);
60   } else {
61     out_ns->assign(name, (p - name));
62     out_name->assign(p + 1);
63   }
64 }
65 
FinishPendingText(Stack * stack)66 static void FinishPendingText(Stack* stack) {
67   if (stack->last_text_node != nullptr) {
68     if (!stack->last_text_node->text.empty()) {
69       stack->node_stack.top()->AppendChild(std::move(stack->last_text_node));
70     } else {
71       // Drop an empty text node.
72     }
73     stack->last_text_node = nullptr;
74   }
75 }
76 
AddToStack(Stack * stack,XML_Parser parser,std::unique_ptr<Node> node)77 static void AddToStack(Stack* stack, XML_Parser parser,
78                        std::unique_ptr<Node> node) {
79   node->line_number = XML_GetCurrentLineNumber(parser);
80   node->column_number = XML_GetCurrentColumnNumber(parser);
81 
82   Node* this_node = node.get();
83   if (!stack->node_stack.empty()) {
84     stack->node_stack.top()->AppendChild(std::move(node));
85   } else {
86     stack->root = std::move(node);
87   }
88 
89   if (!NodeCast<Text>(this_node)) {
90     stack->node_stack.push(this_node);
91   }
92 }
93 
StartNamespaceHandler(void * user_data,const char * prefix,const char * uri)94 static void XMLCALL StartNamespaceHandler(void* user_data, const char* prefix,
95                                           const char* uri) {
96   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
97   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
98   FinishPendingText(stack);
99 
100   std::unique_ptr<Namespace> ns = util::make_unique<Namespace>();
101   if (prefix) {
102     ns->namespace_prefix = prefix;
103   }
104 
105   if (uri) {
106     ns->namespace_uri = uri;
107   }
108 
109   AddToStack(stack, parser, std::move(ns));
110 }
111 
EndNamespaceHandler(void * user_data,const char * prefix)112 static void XMLCALL EndNamespaceHandler(void* user_data, const char* prefix) {
113   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
114   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
115   FinishPendingText(stack);
116 
117   CHECK(!stack->node_stack.empty());
118   stack->node_stack.pop();
119 }
120 
less_attribute(const Attribute & lhs,const Attribute & rhs)121 static bool less_attribute(const Attribute& lhs, const Attribute& rhs) {
122   return std::tie(lhs.namespace_uri, lhs.name, lhs.value) <
123          std::tie(rhs.namespace_uri, rhs.name, rhs.value);
124 }
125 
StartElementHandler(void * user_data,const char * name,const char ** attrs)126 static void XMLCALL StartElementHandler(void* user_data, const char* name,
127                                         const char** attrs) {
128   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
129   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
130   FinishPendingText(stack);
131 
132   std::unique_ptr<Element> el = util::make_unique<Element>();
133   SplitName(name, &el->namespace_uri, &el->name);
134 
135   while (*attrs) {
136     Attribute attribute;
137     SplitName(*attrs++, &attribute.namespace_uri, &attribute.name);
138     attribute.value = *attrs++;
139 
140     // Insert in sorted order.
141     auto iter = std::lower_bound(el->attributes.begin(), el->attributes.end(), attribute,
142                                  less_attribute);
143     el->attributes.insert(iter, std::move(attribute));
144   }
145 
146   el->comment = std::move(stack->pending_comment);
147   AddToStack(stack, parser, std::move(el));
148 }
149 
EndElementHandler(void * user_data,const char * name)150 static void XMLCALL EndElementHandler(void* user_data, const char* name) {
151   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
152   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
153   FinishPendingText(stack);
154 
155   CHECK(!stack->node_stack.empty());
156   // stack->nodeStack.top()->comment = std::move(stack->pendingComment);
157   stack->node_stack.pop();
158 }
159 
CharacterDataHandler(void * user_data,const char * s,int len)160 static void XMLCALL CharacterDataHandler(void* user_data, const char* s, int len) {
161   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
162   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
163 
164   const StringPiece str(s, len);
165   if (str.empty()) {
166     return;
167   }
168 
169   // See if we can just append the text to a previous text node.
170   if (stack->last_text_node != nullptr) {
171     stack->last_text_node->text.append(str.data(), str.size());
172     return;
173   }
174 
175   stack->last_text_node = util::make_unique<Text>();
176   stack->last_text_node->line_number = XML_GetCurrentLineNumber(parser);
177   stack->last_text_node->column_number = XML_GetCurrentColumnNumber(parser);
178   stack->last_text_node->text = str.to_string();
179 }
180 
CommentDataHandler(void * user_data,const char * comment)181 static void XMLCALL CommentDataHandler(void* user_data, const char* comment) {
182   XML_Parser parser = reinterpret_cast<XML_Parser>(user_data);
183   Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
184   FinishPendingText(stack);
185 
186   if (!stack->pending_comment.empty()) {
187     stack->pending_comment += '\n';
188   }
189   stack->pending_comment += comment;
190 }
191 
Inflate(std::istream * in,IDiagnostics * diag,const Source & source)192 std::unique_ptr<XmlResource> Inflate(std::istream* in, IDiagnostics* diag, const Source& source) {
193   Stack stack;
194 
195   XML_Parser parser = XML_ParserCreateNS(nullptr, kXmlNamespaceSep);
196   XML_SetUserData(parser, &stack);
197   XML_UseParserAsHandlerArg(parser);
198   XML_SetElementHandler(parser, StartElementHandler, EndElementHandler);
199   XML_SetNamespaceDeclHandler(parser, StartNamespaceHandler, EndNamespaceHandler);
200   XML_SetCharacterDataHandler(parser, CharacterDataHandler);
201   XML_SetCommentHandler(parser, CommentDataHandler);
202 
203   char buffer[1024];
204   while (!in->eof()) {
205     in->read(buffer, sizeof(buffer) / sizeof(buffer[0]));
206     if (in->bad() && !in->eof()) {
207       stack.root = {};
208       diag->Error(DiagMessage(source) << strerror(errno));
209       break;
210     }
211 
212     if (XML_Parse(parser, buffer, in->gcount(), in->eof()) == XML_STATUS_ERROR) {
213       stack.root = {};
214       diag->Error(DiagMessage(source.WithLine(XML_GetCurrentLineNumber(parser)))
215                   << XML_ErrorString(XML_GetErrorCode(parser)));
216       break;
217     }
218   }
219 
220   XML_ParserFree(parser);
221   if (stack.root) {
222     return util::make_unique<XmlResource>(ResourceFile{{}, {}, source}, StringPool{},
223                                           std::move(stack.root));
224   }
225   return {};
226 }
227 
CopyAttributes(Element * el,android::ResXMLParser * parser,StringPool * out_pool)228 static void CopyAttributes(Element* el, android::ResXMLParser* parser, StringPool* out_pool) {
229   const size_t attr_count = parser->getAttributeCount();
230   if (attr_count > 0) {
231     el->attributes.reserve(attr_count);
232     for (size_t i = 0; i < attr_count; i++) {
233       Attribute attr;
234       size_t len;
235       const char16_t* str16 = parser->getAttributeNamespace(i, &len);
236       if (str16) {
237         attr.namespace_uri = util::Utf16ToUtf8(StringPiece16(str16, len));
238       }
239 
240       str16 = parser->getAttributeName(i, &len);
241       if (str16) {
242         attr.name = util::Utf16ToUtf8(StringPiece16(str16, len));
243       }
244 
245       str16 = parser->getAttributeStringValue(i, &len);
246       if (str16) {
247         attr.value = util::Utf16ToUtf8(StringPiece16(str16, len));
248       }
249 
250       android::Res_value res_value;
251       if (parser->getAttributeValue(i, &res_value) > 0) {
252         attr.compiled_value = ResourceUtils::ParseBinaryResValue(
253             ResourceType::kAnim, {}, parser->getStrings(), res_value, out_pool);
254       }
255 
256       el->attributes.push_back(std::move(attr));
257     }
258   }
259 }
260 
Inflate(const void * data,size_t data_len,IDiagnostics * diag,const Source & source)261 std::unique_ptr<XmlResource> Inflate(const void* data, size_t data_len, IDiagnostics* diag,
262                                      const Source& source) {
263   // We import the android namespace because on Windows NO_ERROR is a macro, not
264   // an enum, which
265   // causes errors when qualifying it with android::
266   using namespace android;
267 
268   StringPool string_pool;
269   std::unique_ptr<Node> root;
270   std::stack<Node*> node_stack;
271 
272   ResXMLTree tree;
273   if (tree.setTo(data, data_len) != NO_ERROR) {
274     return {};
275   }
276 
277   ResXMLParser::event_code_t code;
278   while ((code = tree.next()) != ResXMLParser::BAD_DOCUMENT &&
279          code != ResXMLParser::END_DOCUMENT) {
280     std::unique_ptr<Node> new_node;
281     switch (code) {
282       case ResXMLParser::START_NAMESPACE: {
283         std::unique_ptr<Namespace> node = util::make_unique<Namespace>();
284         size_t len;
285         const char16_t* str16 = tree.getNamespacePrefix(&len);
286         if (str16) {
287           node->namespace_prefix = util::Utf16ToUtf8(StringPiece16(str16, len));
288         }
289 
290         str16 = tree.getNamespaceUri(&len);
291         if (str16) {
292           node->namespace_uri = util::Utf16ToUtf8(StringPiece16(str16, len));
293         }
294         new_node = std::move(node);
295         break;
296       }
297 
298       case ResXMLParser::START_TAG: {
299         std::unique_ptr<Element> node = util::make_unique<Element>();
300         size_t len;
301         const char16_t* str16 = tree.getElementNamespace(&len);
302         if (str16) {
303           node->namespace_uri = util::Utf16ToUtf8(StringPiece16(str16, len));
304         }
305 
306         str16 = tree.getElementName(&len);
307         if (str16) {
308           node->name = util::Utf16ToUtf8(StringPiece16(str16, len));
309         }
310 
311         CopyAttributes(node.get(), &tree, &string_pool);
312 
313         new_node = std::move(node);
314         break;
315       }
316 
317       case ResXMLParser::TEXT: {
318         std::unique_ptr<Text> node = util::make_unique<Text>();
319         size_t len;
320         const char16_t* str16 = tree.getText(&len);
321         if (str16) {
322           node->text = util::Utf16ToUtf8(StringPiece16(str16, len));
323         }
324         new_node = std::move(node);
325         break;
326       }
327 
328       case ResXMLParser::END_NAMESPACE:
329       case ResXMLParser::END_TAG:
330         CHECK(!node_stack.empty());
331         node_stack.pop();
332         break;
333 
334       default:
335         LOG(FATAL) << "unhandled XML chunk type";
336         break;
337     }
338 
339     if (new_node) {
340       new_node->line_number = tree.getLineNumber();
341 
342       Node* this_node = new_node.get();
343       if (!root) {
344         CHECK(node_stack.empty()) << "node stack should be empty";
345         root = std::move(new_node);
346       } else {
347         CHECK(!node_stack.empty()) << "node stack should not be empty";
348         node_stack.top()->AppendChild(std::move(new_node));
349       }
350 
351       if (!NodeCast<Text>(this_node)) {
352         node_stack.push(this_node);
353       }
354     }
355   }
356   return util::make_unique<XmlResource>(ResourceFile{}, std::move(string_pool), std::move(root));
357 }
358 
Clone(const ElementCloneFunc & el_cloner)359 std::unique_ptr<Node> Namespace::Clone(const ElementCloneFunc& el_cloner) {
360   auto ns = util::make_unique<Namespace>();
361   ns->comment = comment;
362   ns->line_number = line_number;
363   ns->column_number = column_number;
364   ns->namespace_prefix = namespace_prefix;
365   ns->namespace_uri = namespace_uri;
366   ns->children.reserve(children.size());
367   for (const std::unique_ptr<xml::Node>& child : children) {
368     ns->AppendChild(child->Clone(el_cloner));
369   }
370   return std::move(ns);
371 }
372 
FindRootElement(XmlResource * doc)373 Element* FindRootElement(XmlResource* doc) {
374   return FindRootElement(doc->root.get());
375 }
376 
FindRootElement(Node * node)377 Element* FindRootElement(Node* node) {
378   if (!node) {
379     return nullptr;
380   }
381 
382   Element* el = nullptr;
383   while ((el = NodeCast<Element>(node)) == nullptr) {
384     if (node->children.empty()) {
385       return nullptr;
386     }
387     // We are looking for the first element, and namespaces can only have one
388     // child.
389     node = node->children.front().get();
390   }
391   return el;
392 }
393 
AppendChild(std::unique_ptr<Node> child)394 void Node::AppendChild(std::unique_ptr<Node> child) {
395   child->parent = this;
396   children.push_back(std::move(child));
397 }
398 
InsertChild(size_t index,std::unique_ptr<Node> child)399 void Node::InsertChild(size_t index, std::unique_ptr<Node> child) {
400   child->parent = this;
401   children.insert(children.begin() + index, std::move(child));
402 }
403 
FindAttribute(const StringPiece & ns,const StringPiece & name)404 Attribute* Element::FindAttribute(const StringPiece& ns,
405                                   const StringPiece& name) {
406   for (auto& attr : attributes) {
407     if (ns == attr.namespace_uri && name == attr.name) {
408       return &attr;
409     }
410   }
411   return nullptr;
412 }
413 
FindAttribute(const StringPiece & ns,const StringPiece & name) const414 const Attribute* Element::FindAttribute(const StringPiece& ns, const StringPiece& name) const {
415   for (const auto& attr : attributes) {
416     if (ns == attr.namespace_uri && name == attr.name) {
417       return &attr;
418     }
419   }
420   return nullptr;
421 }
422 
FindChild(const StringPiece & ns,const StringPiece & name)423 Element* Element::FindChild(const StringPiece& ns, const StringPiece& name) {
424   return FindChildWithAttribute(ns, name, {}, {}, {});
425 }
426 
FindChildWithAttribute(const StringPiece & ns,const StringPiece & name,const StringPiece & attr_ns,const StringPiece & attr_name,const StringPiece & attr_value)427 Element* Element::FindChildWithAttribute(const StringPiece& ns,
428                                          const StringPiece& name,
429                                          const StringPiece& attr_ns,
430                                          const StringPiece& attr_name,
431                                          const StringPiece& attr_value) {
432   for (auto& child_node : children) {
433     Node* child = child_node.get();
434     while (NodeCast<Namespace>(child)) {
435       if (child->children.empty()) {
436         break;
437       }
438       child = child->children[0].get();
439     }
440 
441     if (Element* el = NodeCast<Element>(child)) {
442       if (ns == el->namespace_uri && name == el->name) {
443         if (attr_ns.empty() && attr_name.empty()) {
444           return el;
445         }
446 
447         Attribute* attr = el->FindAttribute(attr_ns, attr_name);
448         if (attr && attr_value == attr->value) {
449           return el;
450         }
451       }
452     }
453   }
454   return nullptr;
455 }
456 
GetChildElements()457 std::vector<Element*> Element::GetChildElements() {
458   std::vector<Element*> elements;
459   for (auto& child_node : children) {
460     Node* child = child_node.get();
461     while (NodeCast<Namespace>(child)) {
462       if (child->children.empty()) {
463         break;
464       }
465       child = child->children[0].get();
466     }
467 
468     if (Element* el = NodeCast<Element>(child)) {
469       elements.push_back(el);
470     }
471   }
472   return elements;
473 }
474 
Clone(const ElementCloneFunc & el_cloner)475 std::unique_ptr<Node> Element::Clone(const ElementCloneFunc& el_cloner) {
476   auto el = util::make_unique<Element>();
477   el->comment = comment;
478   el->line_number = line_number;
479   el->column_number = column_number;
480   el->name = name;
481   el->namespace_uri = namespace_uri;
482   el->attributes.reserve(attributes.size());
483   el_cloner(*this, el.get());
484   el->children.reserve(children.size());
485   for (const std::unique_ptr<xml::Node>& child : children) {
486     el->AppendChild(child->Clone(el_cloner));
487   }
488   return std::move(el);
489 }
490 
Clone(const ElementCloneFunc &)491 std::unique_ptr<Node> Text::Clone(const ElementCloneFunc&) {
492   auto t = util::make_unique<Text>();
493   t->comment = comment;
494   t->line_number = line_number;
495   t->column_number = column_number;
496   t->text = text;
497   return std::move(t);
498 }
499 
Visit(Namespace * ns)500 void PackageAwareVisitor::Visit(Namespace* ns) {
501   bool added = false;
502   if (Maybe<ExtractedPackage> maybe_package =
503           ExtractPackageFromNamespace(ns->namespace_uri)) {
504     ExtractedPackage& package = maybe_package.value();
505     package_decls_.push_back(
506         PackageDecl{ns->namespace_prefix, std::move(package)});
507     added = true;
508   }
509 
510   Visitor::Visit(ns);
511 
512   if (added) {
513     package_decls_.pop_back();
514   }
515 }
516 
TransformPackageAlias(const StringPiece & alias,const StringPiece & local_package) const517 Maybe<ExtractedPackage> PackageAwareVisitor::TransformPackageAlias(
518     const StringPiece& alias, const StringPiece& local_package) const {
519   if (alias.empty()) {
520     return ExtractedPackage{local_package.to_string(), false /* private */};
521   }
522 
523   const auto rend = package_decls_.rend();
524   for (auto iter = package_decls_.rbegin(); iter != rend; ++iter) {
525     if (alias == iter->prefix) {
526       if (iter->package.package.empty()) {
527         return ExtractedPackage{local_package.to_string(), iter->package.private_namespace};
528       }
529       return iter->package;
530     }
531   }
532   return {};
533 }
534 
535 }  // namespace xml
536 }  // namespace aapt
537