1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright (C) 2013-2020 Red Hat, Inc.
5 
6 /// @file
7 
8 #include <string>
9 #include <iostream>
10 
11 #include "abg-internal.h"
12 // <headers defining libabigail's API go under here>
13 ABG_BEGIN_EXPORT_DECLARATIONS
14 
15 #include "abg-libxml-utils.h"
16 
17 ABG_END_EXPORT_DECLARATIONS
18 // </headers defining libabigail's API>
19 
20 namespace abigail
21 {
22 
23 namespace sptr_utils
24 {
25 /// Build and return a shared_ptr for a pointer to xmlTextReader
26 template<>
27 shared_ptr<xmlTextReader>
build_sptr(::xmlTextReader * p)28 build_sptr<xmlTextReader>(::xmlTextReader *p)
29 {
30   return shared_ptr<xmlTextReader>(p, abigail::xml::textReaderDeleter());
31 }
32 
33 /// Build and return a shared_ptr for a pointer to xmlChar
34 template<>
35 shared_ptr<xmlChar>
build_sptr(xmlChar * p)36 build_sptr<xmlChar>(xmlChar *p)
37 {
38   return shared_ptr<xmlChar>(p, abigail::xml::charDeleter());
39 }
40 
41 }//end namespace sptr_utils
42 
43 namespace xml
44 {
45 using std::istream;
46 
47 /// Instantiate an xmlTextReader that parses the content of an on-disk
48 /// file, wrap it into a smart pointer and return it.
49 ///
50 /// @param path the path to the file to be parsed by the returned
51 /// instance of xmlTextReader.
52 reader_sptr
new_reader_from_file(const std::string & path)53 new_reader_from_file(const std::string& path)
54 {
55   reader_sptr p =
56     build_sptr(xmlNewTextReaderFilename (path.c_str()));
57 
58   return p;
59 }
60 
61 /// Instanciate an xmlTextReader that parses the content of an
62 /// in-memory buffer, wrap it into a smart pointer and return it.
63 ///
64 /// @param buffer the in-memory buffer to be parsed by the returned
65 /// instance of xmlTextReader.
66 reader_sptr
new_reader_from_buffer(const std::string & buffer)67 new_reader_from_buffer(const std::string& buffer)
68 {
69   reader_sptr p =
70     build_sptr(xmlReaderForMemory(buffer.c_str(),
71 				  buffer.length(),
72 				  "", 0, 0));
73   return p;
74 }
75 
76 /// This is an xmlInputReadCallback, meant to be passed to
77 /// xmlNewTextReaderForIO.  It reads a number of bytes from an istream.
78 ///
79 /// @param context an std::istream* cast into a void*.  This is the
80 /// istream that the xmlTextReader is too read data from.
81 ///
82 /// @param buffer the buffer where to copy the data read from the
83 /// input stream.
84 ///
85 /// @param len the number of byte to read from the input stream and to
86 /// copy into @p buffer.
87 ///
88 /// @return the number of bytes read or -1 in case of error.
89 static int
xml_istream_input_read(void * context,char * buffer,int len)90 xml_istream_input_read(void*	context,
91 		       char*	buffer,
92 		       int	len)
93 {
94   istream* in = reinterpret_cast<istream*>(context);
95   in->read(buffer, len);
96   return in->gcount();
97 }
98 
99 /// This is an xmlInputCloseCallback, meant to be passed to
100 /// xmlNewTextReaderForIO.  It's supposed to close the input stream
101 /// that the xmlTextReader is reading from.  This particular
102 /// implementation is noop; it does nothing.
103 ///
104 /// @return 0.
105 static int
xml_istream_input_close(void *)106 xml_istream_input_close(void*)
107 {return 0;}
108 
109 /// Instanciate an xmlTextReader that parses a content coming from an
110 /// input stream.
111 ///
112 /// @param in the input stream to consider.
113 ///
114 /// @return reader_sptr a pointer to the newly instantiated xml
115 /// reader.
116 reader_sptr
new_reader_from_istream(std::istream * in)117 new_reader_from_istream(std::istream* in)
118 {
119   reader_sptr p =
120     build_sptr(xmlReaderForIO(&xml_istream_input_read,
121 			      &xml_istream_input_close,
122 			      in, "", 0, 0));
123   return p;
124 }
125 
126 /// Convert a shared pointer to xmlChar into an std::string.
127 ///
128 /// If the xmlChar is NULL, set "" to the string.
129 ///
130 /// @param ssptr the shared point to xmlChar to convert.
131 ///
132 /// @param s the output string.
133 ///
134 /// @return true if the shared pointer to xmlChar contained a non NULL
135 /// string, false otherwise.
136 bool
xml_char_sptr_to_string(xml_char_sptr ssptr,std::string & s)137 xml_char_sptr_to_string(xml_char_sptr ssptr, std::string& s)
138 {
139   bool non_nil = false;
140   if (CHAR_STR(ssptr))
141     {
142       s = CHAR_STR(ssptr);
143       non_nil = true;
144     }
145   else
146     {
147       s = "";
148       non_nil = false;
149     }
150 
151   return non_nil;
152 }
153 
154 /// Return the depth of an xml element node.
155 ///
156 /// Note that the node must be attached to an XML document.
157 ///
158 /// @param n the xml to consider.
159 ///
160 /// @return a positive or zero number for an XML node properly
161 /// attached to an xml document, -1 otherwise.  Note that the function
162 /// returns -1 if passed an xml document as well.
163 int
get_xml_node_depth(xmlNodePtr n)164 get_xml_node_depth(xmlNodePtr n)
165 {
166   if (n->type == XML_DOCUMENT_NODE || n->parent == NULL)
167     return -1;
168 
169   if (n->parent->type == XML_DOCUMENT_NODE)
170     return 0;
171 
172   return 1 + get_xml_node_depth(n->parent);
173 }
174 
175 /// Escape the 5 characters representing the predefined XML entities.
176 ///
177 /// The resulting entities and their matching characters are:
178 ///
179 ///   &lt; for the character '<', &gt; for the character '>', &apos; for
180 ///   the character ''', &quot; for the character '"', and &amp; for the
181 ///   character '&'.
182 ///
183 //// @param str the input string to read to search for the characters
184 //// to escape.
185 ////
186 //// @param escaped the output string where to write the resulting
187 //// string that contains the pre-defined characters escaped as
188 //// predefined entitites.
189 void
escape_xml_string(const std::string & str,std::string & escaped)190 escape_xml_string(const std::string& str,
191 		  std::string& escaped)
192 {
193   for (std::string::const_iterator i = str.begin(); i != str.end(); ++i)
194     switch (*i)
195       {
196       case '<':
197 	escaped += "&lt;";
198 	break;
199       case '>':
200 	escaped += "&gt;";
201 	break;
202       case '&':
203 	escaped += "&amp;";
204 	break;
205       case '\'':
206 	escaped += "&apos;";
207 	break;
208       case '"':
209 	escaped += "&quot;";
210 	break;
211       default:
212 	escaped += *i;
213       }
214 }
215 
216 /// Escape the 5 characters representing the predefined XML entities.
217 ///
218 /// The resulting entities and their matching characters are:
219 ///
220 ///   &lt; for the character '<', &gt; for the character '>', &apos; for
221 ///   the character ''', &quot; for the character '"', and &amp; for the
222 ///   character '&'.
223 ///
224 //// @param str the input string to read to search for the characters
225 //// to escape.
226 ////
227 //// @return the resulting string that contains the pre-defined
228 //// characters escaped as predefined entitites.
229 std::string
escape_xml_string(const std::string & str)230 escape_xml_string(const std::string& str)
231 {
232   std::string result;
233   escape_xml_string(str, result);
234   return result;
235 }
236 
237 /// Escape the '-' character, to avoid having a '--' in a comment.
238 ///
239 /// The resulting entity for '-' is '&#45;'.
240 ///
241 //// @param str the input string to read to search for the characters
242 //// to escape.
243 ////
244 //// @param escaped the output string where to write the resulting
245 //// string that contains the pre-defined characters escaped as
246 //// predefined entitites.
247 void
escape_xml_comment(const std::string & str,std::string & escaped)248 escape_xml_comment(const std::string& str,
249 		   std::string& escaped)
250 {
251   for (std::string::const_iterator i = str.begin(); i != str.end(); ++i)
252     switch (*i)
253       {
254       case '-':
255 	escaped += "&#45;";
256 	break;
257       default:
258 	escaped += *i;
259       }
260 }
261 
262 /// Escape the '-' character, to avoid having a '--' in a comment.
263 ///
264 /// The resulting entity for '-' is '&#45;'.
265 ///
266 //// @param str the input string to read to search for the characters
267 //// to escape.
268 ////
269 //// @return the resulting string that contains the pre-defined
270 //// characters escaped as predefined entitites.
271 std::string
escape_xml_comment(const std::string & str)272 escape_xml_comment(const std::string& str)
273 {
274   std::string result;
275   escape_xml_comment(str, result);
276   return result;
277 }
278 
279 /// Read a string, detect the 5 predefined XML entities it may contain
280 /// and un-escape them, by writting their corresponding characters
281 /// back in.  The pre-defined entities are:
282 ///
283 ///   &lt; for the character '<', &gt; for the character '>', &apos; for
284 ///   the character ''', &quot; for the character '"', and &amp; for the
285 ///   character '&'.
286 ///
287 ///   @param str the input XML string to consider.
288 ///
289 ///   @param escaped where to write the resulting un-escaped string.
290 void
unescape_xml_string(const std::string & str,std::string & escaped)291 unescape_xml_string(const std::string& str,
292 		  std::string& escaped)
293 {
294   std::string::size_type i = 0;
295   while (i < str.size())
296     {
297       if (str[i] == '&')
298 	{
299 	  if (str[i+1]    == 'l'
300 	      && str[i+2] == 't'
301 	      && str[i+3] == ';')
302 	    {
303 	      escaped += '<';
304 	      i+= 4;
305 	    }
306 	  else if (str[i+1]    == 'g'
307 		   && str[i+2] == 't'
308 		   && str[i+3] == ';')
309 	    {
310 	      escaped += '>';
311 	      i += 4;
312 	    }
313 	  else if (str[i+1]    == 'a'
314 		   && str[i+2] == 'm'
315 		   && str[i+3] == 'p'
316 		   && str[i+4] == ';')
317 	    {
318 	      escaped += '&';
319 	      i += 5;
320 	    }
321 	  else if (str[i+1]    == 'a'
322 		   && str[i+2] == 'p'
323 		   && str[i+3] == 'o'
324 		   && str[i+4] == 's'
325 		   && str[i+5] == ';')
326 	    {
327 	      escaped += '\'';
328 	      i += 6;
329 	    }
330 	  else if (str[i+1]    == 'q'
331 		   && str[i+2] == 'u'
332 		   && str[i+3] == 'o'
333 		   && str[i+4] == 't'
334 		   && str[i+5] == ';')
335 	    {
336 	      escaped += '"';
337 	      i += 6;
338 	    }
339 	  else
340 	    {
341 	      escaped += str[i];
342 	      ++i;
343 	    }
344 	}
345       else
346 	{
347 	  escaped += str[i];
348 	  ++i;
349 	}
350     }
351 }
352 
353 /// Read a string, detect the 5 predefined XML entities it may contain
354 /// and un-escape them, by writting their corresponding characters
355 /// back in.  The pre-defined entities are:
356 ///
357 ///   &lt; for the character '<', &gt; for the character '>', &apos; for
358 ///   the character ''', &quot; for the character '"', and &amp; for the
359 ///   character '&'.
360 ///
361 ///   @param str the input XML string to consider.
362 ///
363 ///   @return escaped where to write the resulting un-escaped string.
364 std::string
unescape_xml_string(const std::string & str)365 unescape_xml_string(const std::string& str)
366 {
367   std::string result;
368   unescape_xml_string(str, result);
369   return result;
370 }
371 
372 /// Read a string, detect the '#&45;' entity and un-escape it into
373 /// the '-' character.
374 ///
375 ///   @param str the input XML string to consider.
376 ///
377 ///   @param escaped where to write the resulting un-escaped string.
378 void
unescape_xml_comment(const std::string & str,std::string & escaped)379 unescape_xml_comment(const std::string& str,
380 		     std::string& escaped)
381 {
382   std::string::size_type i = 0;
383   while (i < str.size())
384     {
385       if (str[i] == '&'
386 	  && str[i + 1] == '#'
387 	  && str[i + 2] == '4'
388 	  && str[i + 3] == '5'
389 	  && str[i + 4] == ';')
390 	{
391 	  escaped += '-';
392 	  i += 5;
393 	}
394       else
395 	{
396 	  escaped += str[i];
397 	  ++i;
398 	}
399     }
400 }
401 
402 /// Read a string, detect the '#&45;' entity and un-escape it into
403 /// the '-' character.
404 ///
405 ///   @param str the input XML string to consider.
406 ///
407 ///   @return escaped where to write the resulting un-escaped string.
408 std::string
unescape_xml_comment(const std::string & str)409 unescape_xml_comment(const std::string& str)
410 {
411   std::string result;
412   unescape_xml_comment(str, result);
413   return result;
414 }
415 
416 /// Maybe get the next sibling element node of an XML node, or stay to the sam
417 ///
418 /// If there is no next sibling xml element node, the function returns
419 /// the initial node.
420 ///
421 /// @param node the initial node to consider.
422 ///
423 /// @return the next sibling node or the initial node @p node.
424 static xmlNodePtr
go_to_next_sibling_element_or_stay(xmlNodePtr node)425 go_to_next_sibling_element_or_stay(xmlNodePtr node)
426 {
427   xmlNodePtr n;
428   for (n = node; n; n = n->next)
429     {
430       if (n->type == XML_ELEMENT_NODE)
431 	break;
432     }
433   return n ? n : node;
434 }
435 
436 /// Get the next sibling element node of an XML node.
437 ///
438 /// If there is no next sibling xml element node, the function returns nil.
439 ///
440 /// @param node the XML node to consider.
441 ///
442 /// @return the next sibling element node or nil.
443 xmlNodePtr
advance_to_next_sibling_element(xmlNodePtr node)444 advance_to_next_sibling_element(xmlNodePtr node)
445 {
446   xmlNodePtr n = go_to_next_sibling_element_or_stay(node->next);
447   if (n == 0 || n->type != XML_ELEMENT_NODE)
448     return 0;
449   return n;
450 }
451 
452 }//end namespace xml
453 }//end namespace abigail
454