1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * daniel@veillard.com
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <string.h>
15 
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
20 
21 /**
22  * MAX_URI_LENGTH:
23  *
24  * The definition of the URI regexp in the above RFC has no size limit
25  * In practice they are usually relativey short except for the
26  * data URI scheme as defined in RFC 2397. Even for data URI the usual
27  * maximum size before hitting random practical limits is around 64 KB
28  * and 4KB is usually a maximum admitted limit for proper operations.
29  * The value below is more a security limit than anything else and
30  * really should never be hit by 'normal' operations
31  * Set to 1 MByte in 2012, this is only enforced on output
32  */
33 #define MAX_URI_LENGTH 1024 * 1024
34 
35 static void
xmlURIErrMemory(const char * extra)36 xmlURIErrMemory(const char *extra)
37 {
38     if (extra)
39         __xmlRaiseError(NULL, NULL, NULL,
40                         NULL, NULL, XML_FROM_URI,
41                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42                         extra, NULL, NULL, 0, 0,
43                         "Memory allocation failed : %s\n", extra);
44     else
45         __xmlRaiseError(NULL, NULL, NULL,
46                         NULL, NULL, XML_FROM_URI,
47                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48                         NULL, NULL, NULL, 0, 0,
49                         "Memory allocation failed\n");
50 }
51 
52 static void xmlCleanURI(xmlURIPtr uri);
53 
54 /*
55  * Old rule from 2396 used in legacy handling code
56  * alpha    = lowalpha | upalpha
57  */
58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59 
60 
61 /*
62  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64  *            "u" | "v" | "w" | "x" | "y" | "z"
65  */
66 
67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68 
69 /*
70  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72  *           "U" | "V" | "W" | "X" | "Y" | "Z"
73  */
74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75 
76 #ifdef IS_DIGIT
77 #undef IS_DIGIT
78 #endif
79 /*
80  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81  */
82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83 
84 /*
85  * alphanum = alpha | digit
86  */
87 
88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89 
90 /*
91  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92  */
93 
94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
95     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
96     ((x) == '(') || ((x) == ')'))
97 
98 /*
99  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
100  */
101 
102 #define IS_UNWISE(p)                                                    \
103       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
104        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
105        ((*(p) == ']')) || ((*(p) == '`')))
106 /*
107  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108  *            "[" | "]"
109  */
110 
111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114         ((x) == ']'))
115 
116 /*
117  * unreserved = alphanum | mark
118  */
119 
120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121 
122 /*
123  * Skip to next pointer char, handle escaped sequences
124  */
125 
126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
127 
128 /*
129  * Productions from the spec.
130  *
131  *    authority     = server | reg_name
132  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
133  *                        ";" | ":" | "@" | "&" | "=" | "+" )
134  *
135  * path          = [ abs_path | opaque_part ]
136  */
137 
138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139 
140 /************************************************************************
141  *									*
142  *                         RFC 3986 parser				*
143  *									*
144  ************************************************************************/
145 
146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
148                       ((*(p) >= 'A') && (*(p) <= 'Z')))
149 #define ISA_HEXDIG(p)							\
150        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
151         ((*(p) >= 'A') && (*(p) <= 'F')))
152 
153 /*
154  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
155  *                     / "*" / "+" / "," / ";" / "="
156  */
157 #define ISA_SUB_DELIM(p)						\
158       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
159        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
160        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
161        ((*(p) == '=')) || ((*(p) == '\'')))
162 
163 /*
164  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165  */
166 #define ISA_GEN_DELIM(p)						\
167       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
168        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
169        ((*(p) == '@')))
170 
171 /*
172  *    reserved      = gen-delims / sub-delims
173  */
174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175 
176 /*
177  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
178  */
179 #define ISA_UNRESERVED(p)						\
180       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
181        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182 
183 /*
184  *    pct-encoded   = "%" HEXDIG HEXDIG
185  */
186 #define ISA_PCT_ENCODED(p)						\
187      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188 
189 /*
190  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
191  */
192 #define ISA_PCHAR(p)							\
193      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
194       ((*(p) == ':')) || ((*(p) == '@')))
195 
196 /**
197  * xmlParse3986Scheme:
198  * @uri:  pointer to an URI structure
199  * @str:  pointer to the string to analyze
200  *
201  * Parse an URI scheme
202  *
203  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204  *
205  * Returns 0 or the error code
206  */
207 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209     const char *cur;
210 
211     if (str == NULL)
212 	return(-1);
213 
214     cur = *str;
215     if (!ISA_ALPHA(cur))
216 	return(2);
217     cur++;
218     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220     if (uri != NULL) {
221 	if (uri->scheme != NULL) xmlFree(uri->scheme);
222 	uri->scheme = STRNDUP(*str, cur - *str);
223     }
224     *str = cur;
225     return(0);
226 }
227 
228 /**
229  * xmlParse3986Fragment:
230  * @uri:  pointer to an URI structure
231  * @str:  pointer to the string to analyze
232  *
233  * Parse the query part of an URI
234  *
235  * fragment      = *( pchar / "/" / "?" )
236  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237  *       in the fragment identifier but this is used very broadly for
238  *       xpointer scheme selection, so we are allowing it here to not break
239  *       for example all the DocBook processing chains.
240  *
241  * Returns 0 or the error code
242  */
243 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)244 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245 {
246     const char *cur;
247 
248     if (str == NULL)
249         return (-1);
250 
251     cur = *str;
252 
253     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254            (*cur == '[') || (*cur == ']') ||
255            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256         NEXT(cur);
257     if (uri != NULL) {
258         if (uri->fragment != NULL)
259             xmlFree(uri->fragment);
260 	if (uri->cleanup & 2)
261 	    uri->fragment = STRNDUP(*str, cur - *str);
262 	else
263 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264     }
265     *str = cur;
266     return (0);
267 }
268 
269 /**
270  * xmlParse3986Query:
271  * @uri:  pointer to an URI structure
272  * @str:  pointer to the string to analyze
273  *
274  * Parse the query part of an URI
275  *
276  * query = *uric
277  *
278  * Returns 0 or the error code
279  */
280 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)281 xmlParse3986Query(xmlURIPtr uri, const char **str)
282 {
283     const char *cur;
284 
285     if (str == NULL)
286         return (-1);
287 
288     cur = *str;
289 
290     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292         NEXT(cur);
293     if (uri != NULL) {
294         if (uri->query != NULL)
295             xmlFree(uri->query);
296 	if (uri->cleanup & 2)
297 	    uri->query = STRNDUP(*str, cur - *str);
298 	else
299 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300 
301 	/* Save the raw bytes of the query as well.
302 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303 	 */
304 	if (uri->query_raw != NULL)
305 	    xmlFree (uri->query_raw);
306 	uri->query_raw = STRNDUP (*str, cur - *str);
307     }
308     *str = cur;
309     return (0);
310 }
311 
312 /**
313  * xmlParse3986Port:
314  * @uri:  pointer to an URI structure
315  * @str:  the string to analyze
316  *
317  * Parse a port  part and fills in the appropriate fields
318  * of the @uri structure
319  *
320  * port          = *DIGIT
321  *
322  * Returns 0 or the error code
323  */
324 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)325 xmlParse3986Port(xmlURIPtr uri, const char **str)
326 {
327     const char *cur = *str;
328 
329     if (ISA_DIGIT(cur)) {
330 	if (uri != NULL)
331 	    uri->port = 0;
332 	while (ISA_DIGIT(cur)) {
333 	    if (uri != NULL)
334 		uri->port = uri->port * 10 + (*cur - '0');
335 	    cur++;
336 	}
337 	*str = cur;
338 	return(0);
339     }
340     return(1);
341 }
342 
343 /**
344  * xmlParse3986Userinfo:
345  * @uri:  pointer to an URI structure
346  * @str:  the string to analyze
347  *
348  * Parse an user informations part and fills in the appropriate fields
349  * of the @uri structure
350  *
351  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
352  *
353  * Returns 0 or the error code
354  */
355 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)356 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
357 {
358     const char *cur;
359 
360     cur = *str;
361     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
362            ISA_SUB_DELIM(cur) || (*cur == ':'))
363 	NEXT(cur);
364     if (*cur == '@') {
365 	if (uri != NULL) {
366 	    if (uri->user != NULL) xmlFree(uri->user);
367 	    if (uri->cleanup & 2)
368 		uri->user = STRNDUP(*str, cur - *str);
369 	    else
370 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
371 	}
372 	*str = cur;
373 	return(0);
374     }
375     return(1);
376 }
377 
378 /**
379  * xmlParse3986DecOctet:
380  * @str:  the string to analyze
381  *
382  *    dec-octet     = DIGIT                 ; 0-9
383  *                  / %x31-39 DIGIT         ; 10-99
384  *                  / "1" 2DIGIT            ; 100-199
385  *                  / "2" %x30-34 DIGIT     ; 200-249
386  *                  / "25" %x30-35          ; 250-255
387  *
388  * Skip a dec-octet.
389  *
390  * Returns 0 if found and skipped, 1 otherwise
391  */
392 static int
xmlParse3986DecOctet(const char ** str)393 xmlParse3986DecOctet(const char **str) {
394     const char *cur = *str;
395 
396     if (!(ISA_DIGIT(cur)))
397         return(1);
398     if (!ISA_DIGIT(cur+1))
399 	cur++;
400     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
401 	cur += 2;
402     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
403 	cur += 3;
404     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
405 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
406 	cur += 3;
407     else if ((*cur == '2') && (*(cur + 1) == '5') &&
408 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
409 	cur += 3;
410     else
411         return(1);
412     *str = cur;
413     return(0);
414 }
415 /**
416  * xmlParse3986Host:
417  * @uri:  pointer to an URI structure
418  * @str:  the string to analyze
419  *
420  * Parse an host part and fills in the appropriate fields
421  * of the @uri structure
422  *
423  * host          = IP-literal / IPv4address / reg-name
424  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
425  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
426  * reg-name      = *( unreserved / pct-encoded / sub-delims )
427  *
428  * Returns 0 or the error code
429  */
430 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)431 xmlParse3986Host(xmlURIPtr uri, const char **str)
432 {
433     const char *cur = *str;
434     const char *host;
435 
436     host = cur;
437     /*
438      * IPv6 and future adressing scheme are enclosed between brackets
439      */
440     if (*cur == '[') {
441         cur++;
442 	while ((*cur != ']') && (*cur != 0))
443 	    cur++;
444 	if (*cur != ']')
445 	    return(1);
446 	cur++;
447 	goto found;
448     }
449     /*
450      * try to parse an IPv4
451      */
452     if (ISA_DIGIT(cur)) {
453         if (xmlParse3986DecOctet(&cur) != 0)
454 	    goto not_ipv4;
455 	if (*cur != '.')
456 	    goto not_ipv4;
457 	cur++;
458         if (xmlParse3986DecOctet(&cur) != 0)
459 	    goto not_ipv4;
460 	if (*cur != '.')
461 	    goto not_ipv4;
462         if (xmlParse3986DecOctet(&cur) != 0)
463 	    goto not_ipv4;
464 	if (*cur != '.')
465 	    goto not_ipv4;
466         if (xmlParse3986DecOctet(&cur) != 0)
467 	    goto not_ipv4;
468 	goto found;
469 not_ipv4:
470         cur = *str;
471     }
472     /*
473      * then this should be a hostname which can be empty
474      */
475     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
476         NEXT(cur);
477 found:
478     if (uri != NULL) {
479 	if (uri->authority != NULL) xmlFree(uri->authority);
480 	uri->authority = NULL;
481 	if (uri->server != NULL) xmlFree(uri->server);
482 	if (cur != host) {
483 	    if (uri->cleanup & 2)
484 		uri->server = STRNDUP(host, cur - host);
485 	    else
486 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
487 	} else
488 	    uri->server = NULL;
489     }
490     *str = cur;
491     return(0);
492 }
493 
494 /**
495  * xmlParse3986Authority:
496  * @uri:  pointer to an URI structure
497  * @str:  the string to analyze
498  *
499  * Parse an authority part and fills in the appropriate fields
500  * of the @uri structure
501  *
502  * authority     = [ userinfo "@" ] host [ ":" port ]
503  *
504  * Returns 0 or the error code
505  */
506 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)507 xmlParse3986Authority(xmlURIPtr uri, const char **str)
508 {
509     const char *cur;
510     int ret;
511 
512     cur = *str;
513     /*
514      * try to parse an userinfo and check for the trailing @
515      */
516     ret = xmlParse3986Userinfo(uri, &cur);
517     if ((ret != 0) || (*cur != '@'))
518         cur = *str;
519     else
520         cur++;
521     ret = xmlParse3986Host(uri, &cur);
522     if (ret != 0) return(ret);
523     if (*cur == ':') {
524         cur++;
525         ret = xmlParse3986Port(uri, &cur);
526 	if (ret != 0) return(ret);
527     }
528     *str = cur;
529     return(0);
530 }
531 
532 /**
533  * xmlParse3986Segment:
534  * @str:  the string to analyze
535  * @forbid: an optional forbidden character
536  * @empty: allow an empty segment
537  *
538  * Parse a segment and fills in the appropriate fields
539  * of the @uri structure
540  *
541  * segment       = *pchar
542  * segment-nz    = 1*pchar
543  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
544  *               ; non-zero-length segment without any colon ":"
545  *
546  * Returns 0 or the error code
547  */
548 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)549 xmlParse3986Segment(const char **str, char forbid, int empty)
550 {
551     const char *cur;
552 
553     cur = *str;
554     if (!ISA_PCHAR(cur)) {
555         if (empty)
556 	    return(0);
557 	return(1);
558     }
559     while (ISA_PCHAR(cur) && (*cur != forbid))
560         NEXT(cur);
561     *str = cur;
562     return (0);
563 }
564 
565 /**
566  * xmlParse3986PathAbEmpty:
567  * @uri:  pointer to an URI structure
568  * @str:  the string to analyze
569  *
570  * Parse an path absolute or empty and fills in the appropriate fields
571  * of the @uri structure
572  *
573  * path-abempty  = *( "/" segment )
574  *
575  * Returns 0 or the error code
576  */
577 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)578 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
579 {
580     const char *cur;
581     int ret;
582 
583     cur = *str;
584 
585     while (*cur == '/') {
586         cur++;
587 	ret = xmlParse3986Segment(&cur, 0, 1);
588 	if (ret != 0) return(ret);
589     }
590     if (uri != NULL) {
591 	if (uri->path != NULL) xmlFree(uri->path);
592         if (*str != cur) {
593             if (uri->cleanup & 2)
594                 uri->path = STRNDUP(*str, cur - *str);
595             else
596                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
597         } else {
598             uri->path = NULL;
599         }
600     }
601     *str = cur;
602     return (0);
603 }
604 
605 /**
606  * xmlParse3986PathAbsolute:
607  * @uri:  pointer to an URI structure
608  * @str:  the string to analyze
609  *
610  * Parse an path absolute and fills in the appropriate fields
611  * of the @uri structure
612  *
613  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
614  *
615  * Returns 0 or the error code
616  */
617 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)618 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
619 {
620     const char *cur;
621     int ret;
622 
623     cur = *str;
624 
625     if (*cur != '/')
626         return(1);
627     cur++;
628     ret = xmlParse3986Segment(&cur, 0, 0);
629     if (ret == 0) {
630 	while (*cur == '/') {
631 	    cur++;
632 	    ret = xmlParse3986Segment(&cur, 0, 1);
633 	    if (ret != 0) return(ret);
634 	}
635     }
636     if (uri != NULL) {
637 	if (uri->path != NULL) xmlFree(uri->path);
638         if (cur != *str) {
639             if (uri->cleanup & 2)
640                 uri->path = STRNDUP(*str, cur - *str);
641             else
642                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
643         } else {
644             uri->path = NULL;
645         }
646     }
647     *str = cur;
648     return (0);
649 }
650 
651 /**
652  * xmlParse3986PathRootless:
653  * @uri:  pointer to an URI structure
654  * @str:  the string to analyze
655  *
656  * Parse an path without root and fills in the appropriate fields
657  * of the @uri structure
658  *
659  * path-rootless = segment-nz *( "/" segment )
660  *
661  * Returns 0 or the error code
662  */
663 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)664 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
665 {
666     const char *cur;
667     int ret;
668 
669     cur = *str;
670 
671     ret = xmlParse3986Segment(&cur, 0, 0);
672     if (ret != 0) return(ret);
673     while (*cur == '/') {
674         cur++;
675 	ret = xmlParse3986Segment(&cur, 0, 1);
676 	if (ret != 0) return(ret);
677     }
678     if (uri != NULL) {
679 	if (uri->path != NULL) xmlFree(uri->path);
680         if (cur != *str) {
681             if (uri->cleanup & 2)
682                 uri->path = STRNDUP(*str, cur - *str);
683             else
684                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
685         } else {
686             uri->path = NULL;
687         }
688     }
689     *str = cur;
690     return (0);
691 }
692 
693 /**
694  * xmlParse3986PathNoScheme:
695  * @uri:  pointer to an URI structure
696  * @str:  the string to analyze
697  *
698  * Parse an path which is not a scheme and fills in the appropriate fields
699  * of the @uri structure
700  *
701  * path-noscheme = segment-nz-nc *( "/" segment )
702  *
703  * Returns 0 or the error code
704  */
705 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)706 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
707 {
708     const char *cur;
709     int ret;
710 
711     cur = *str;
712 
713     ret = xmlParse3986Segment(&cur, ':', 0);
714     if (ret != 0) return(ret);
715     while (*cur == '/') {
716         cur++;
717 	ret = xmlParse3986Segment(&cur, 0, 1);
718 	if (ret != 0) return(ret);
719     }
720     if (uri != NULL) {
721 	if (uri->path != NULL) xmlFree(uri->path);
722         if (cur != *str) {
723             if (uri->cleanup & 2)
724                 uri->path = STRNDUP(*str, cur - *str);
725             else
726                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
727         } else {
728             uri->path = NULL;
729         }
730     }
731     *str = cur;
732     return (0);
733 }
734 
735 /**
736  * xmlParse3986HierPart:
737  * @uri:  pointer to an URI structure
738  * @str:  the string to analyze
739  *
740  * Parse an hierarchical part and fills in the appropriate fields
741  * of the @uri structure
742  *
743  * hier-part     = "//" authority path-abempty
744  *                / path-absolute
745  *                / path-rootless
746  *                / path-empty
747  *
748  * Returns 0 or the error code
749  */
750 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)751 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
752 {
753     const char *cur;
754     int ret;
755 
756     cur = *str;
757 
758     if ((*cur == '/') && (*(cur + 1) == '/')) {
759         cur += 2;
760 	ret = xmlParse3986Authority(uri, &cur);
761 	if (ret != 0) return(ret);
762 	if (uri->server == NULL)
763 	    uri->port = -1;
764 	ret = xmlParse3986PathAbEmpty(uri, &cur);
765 	if (ret != 0) return(ret);
766 	*str = cur;
767 	return(0);
768     } else if (*cur == '/') {
769         ret = xmlParse3986PathAbsolute(uri, &cur);
770 	if (ret != 0) return(ret);
771     } else if (ISA_PCHAR(cur)) {
772         ret = xmlParse3986PathRootless(uri, &cur);
773 	if (ret != 0) return(ret);
774     } else {
775 	/* path-empty is effectively empty */
776 	if (uri != NULL) {
777 	    if (uri->path != NULL) xmlFree(uri->path);
778 	    uri->path = NULL;
779 	}
780     }
781     *str = cur;
782     return (0);
783 }
784 
785 /**
786  * xmlParse3986RelativeRef:
787  * @uri:  pointer to an URI structure
788  * @str:  the string to analyze
789  *
790  * Parse an URI string and fills in the appropriate fields
791  * of the @uri structure
792  *
793  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
794  * relative-part = "//" authority path-abempty
795  *               / path-absolute
796  *               / path-noscheme
797  *               / path-empty
798  *
799  * Returns 0 or the error code
800  */
801 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)802 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
803     int ret;
804 
805     if ((*str == '/') && (*(str + 1) == '/')) {
806         str += 2;
807 	ret = xmlParse3986Authority(uri, &str);
808 	if (ret != 0) return(ret);
809 	ret = xmlParse3986PathAbEmpty(uri, &str);
810 	if (ret != 0) return(ret);
811     } else if (*str == '/') {
812 	ret = xmlParse3986PathAbsolute(uri, &str);
813 	if (ret != 0) return(ret);
814     } else if (ISA_PCHAR(str)) {
815         ret = xmlParse3986PathNoScheme(uri, &str);
816 	if (ret != 0) return(ret);
817     } else {
818 	/* path-empty is effectively empty */
819 	if (uri != NULL) {
820 	    if (uri->path != NULL) xmlFree(uri->path);
821 	    uri->path = NULL;
822 	}
823     }
824 
825     if (*str == '?') {
826 	str++;
827 	ret = xmlParse3986Query(uri, &str);
828 	if (ret != 0) return(ret);
829     }
830     if (*str == '#') {
831 	str++;
832 	ret = xmlParse3986Fragment(uri, &str);
833 	if (ret != 0) return(ret);
834     }
835     if (*str != 0) {
836 	xmlCleanURI(uri);
837 	return(1);
838     }
839     return(0);
840 }
841 
842 
843 /**
844  * xmlParse3986URI:
845  * @uri:  pointer to an URI structure
846  * @str:  the string to analyze
847  *
848  * Parse an URI string and fills in the appropriate fields
849  * of the @uri structure
850  *
851  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
852  *
853  * Returns 0 or the error code
854  */
855 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)856 xmlParse3986URI(xmlURIPtr uri, const char *str) {
857     int ret;
858 
859     ret = xmlParse3986Scheme(uri, &str);
860     if (ret != 0) return(ret);
861     if (*str != ':') {
862 	return(1);
863     }
864     str++;
865     ret = xmlParse3986HierPart(uri, &str);
866     if (ret != 0) return(ret);
867     if (*str == '?') {
868 	str++;
869 	ret = xmlParse3986Query(uri, &str);
870 	if (ret != 0) return(ret);
871     }
872     if (*str == '#') {
873 	str++;
874 	ret = xmlParse3986Fragment(uri, &str);
875 	if (ret != 0) return(ret);
876     }
877     if (*str != 0) {
878 	xmlCleanURI(uri);
879 	return(1);
880     }
881     return(0);
882 }
883 
884 /**
885  * xmlParse3986URIReference:
886  * @uri:  pointer to an URI structure
887  * @str:  the string to analyze
888  *
889  * Parse an URI reference string and fills in the appropriate fields
890  * of the @uri structure
891  *
892  * URI-reference = URI / relative-ref
893  *
894  * Returns 0 or the error code
895  */
896 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)897 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
898     int ret;
899 
900     if (str == NULL)
901 	return(-1);
902     xmlCleanURI(uri);
903 
904     /*
905      * Try first to parse absolute refs, then fallback to relative if
906      * it fails.
907      */
908     ret = xmlParse3986URI(uri, str);
909     if (ret != 0) {
910 	xmlCleanURI(uri);
911         ret = xmlParse3986RelativeRef(uri, str);
912 	if (ret != 0) {
913 	    xmlCleanURI(uri);
914 	    return(ret);
915 	}
916     }
917     return(0);
918 }
919 
920 /**
921  * xmlParseURI:
922  * @str:  the URI string to analyze
923  *
924  * Parse an URI based on RFC 3986
925  *
926  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
927  *
928  * Returns a newly built xmlURIPtr or NULL in case of error
929  */
930 xmlURIPtr
xmlParseURI(const char * str)931 xmlParseURI(const char *str) {
932     xmlURIPtr uri;
933     int ret;
934 
935     if (str == NULL)
936 	return(NULL);
937     uri = xmlCreateURI();
938     if (uri != NULL) {
939 	ret = xmlParse3986URIReference(uri, str);
940         if (ret) {
941 	    xmlFreeURI(uri);
942 	    return(NULL);
943 	}
944     }
945     return(uri);
946 }
947 
948 /**
949  * xmlParseURIReference:
950  * @uri:  pointer to an URI structure
951  * @str:  the string to analyze
952  *
953  * Parse an URI reference string based on RFC 3986 and fills in the
954  * appropriate fields of the @uri structure
955  *
956  * URI-reference = URI / relative-ref
957  *
958  * Returns 0 or the error code
959  */
960 int
xmlParseURIReference(xmlURIPtr uri,const char * str)961 xmlParseURIReference(xmlURIPtr uri, const char *str) {
962     return(xmlParse3986URIReference(uri, str));
963 }
964 
965 /**
966  * xmlParseURIRaw:
967  * @str:  the URI string to analyze
968  * @raw:  if 1 unescaping of URI pieces are disabled
969  *
970  * Parse an URI but allows to keep intact the original fragments.
971  *
972  * URI-reference = URI / relative-ref
973  *
974  * Returns a newly built xmlURIPtr or NULL in case of error
975  */
976 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)977 xmlParseURIRaw(const char *str, int raw) {
978     xmlURIPtr uri;
979     int ret;
980 
981     if (str == NULL)
982 	return(NULL);
983     uri = xmlCreateURI();
984     if (uri != NULL) {
985         if (raw) {
986 	    uri->cleanup |= 2;
987 	}
988 	ret = xmlParseURIReference(uri, str);
989         if (ret) {
990 	    xmlFreeURI(uri);
991 	    return(NULL);
992 	}
993     }
994     return(uri);
995 }
996 
997 /************************************************************************
998  *									*
999  *			Generic URI structure functions			*
1000  *									*
1001  ************************************************************************/
1002 
1003 /**
1004  * xmlCreateURI:
1005  *
1006  * Simply creates an empty xmlURI
1007  *
1008  * Returns the new structure or NULL in case of error
1009  */
1010 xmlURIPtr
xmlCreateURI(void)1011 xmlCreateURI(void) {
1012     xmlURIPtr ret;
1013 
1014     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1015     if (ret == NULL) {
1016         xmlURIErrMemory("creating URI structure\n");
1017 	return(NULL);
1018     }
1019     memset(ret, 0, sizeof(xmlURI));
1020     return(ret);
1021 }
1022 
1023 /**
1024  * xmlSaveUriRealloc:
1025  *
1026  * Function to handle properly a reallocation when saving an URI
1027  * Also imposes some limit on the length of an URI string output
1028  */
1029 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1030 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1031     xmlChar *temp;
1032     int tmp;
1033 
1034     if (*max > MAX_URI_LENGTH) {
1035         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1036         return(NULL);
1037     }
1038     tmp = *max * 2;
1039     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1040     if (temp == NULL) {
1041         xmlURIErrMemory("saving URI\n");
1042         return(NULL);
1043     }
1044     *max = tmp;
1045     return(temp);
1046 }
1047 
1048 /**
1049  * xmlSaveUri:
1050  * @uri:  pointer to an xmlURI
1051  *
1052  * Save the URI as an escaped string
1053  *
1054  * Returns a new string (to be deallocated by caller)
1055  */
1056 xmlChar *
xmlSaveUri(xmlURIPtr uri)1057 xmlSaveUri(xmlURIPtr uri) {
1058     xmlChar *ret = NULL;
1059     xmlChar *temp;
1060     const char *p;
1061     int len;
1062     int max;
1063 
1064     if (uri == NULL) return(NULL);
1065 
1066 
1067     max = 80;
1068     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1069     if (ret == NULL) {
1070         xmlURIErrMemory("saving URI\n");
1071 	return(NULL);
1072     }
1073     len = 0;
1074 
1075     if (uri->scheme != NULL) {
1076 	p = uri->scheme;
1077 	while (*p != 0) {
1078 	    if (len >= max) {
1079                 temp = xmlSaveUriRealloc(ret, &max);
1080                 if (temp == NULL) goto mem_error;
1081 		ret = temp;
1082 	    }
1083 	    ret[len++] = *p++;
1084 	}
1085 	if (len >= max) {
1086             temp = xmlSaveUriRealloc(ret, &max);
1087             if (temp == NULL) goto mem_error;
1088             ret = temp;
1089 	}
1090 	ret[len++] = ':';
1091     }
1092     if (uri->opaque != NULL) {
1093 	p = uri->opaque;
1094 	while (*p != 0) {
1095 	    if (len + 3 >= max) {
1096                 temp = xmlSaveUriRealloc(ret, &max);
1097                 if (temp == NULL) goto mem_error;
1098                 ret = temp;
1099 	    }
1100 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1101 		ret[len++] = *p++;
1102 	    else {
1103 		int val = *(unsigned char *)p++;
1104 		int hi = val / 0x10, lo = val % 0x10;
1105 		ret[len++] = '%';
1106 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1107 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1108 	    }
1109 	}
1110     } else {
1111 	if ((uri->server != NULL) || (uri->port == -1)) {
1112 	    if (len + 3 >= max) {
1113                 temp = xmlSaveUriRealloc(ret, &max);
1114                 if (temp == NULL) goto mem_error;
1115                 ret = temp;
1116 	    }
1117 	    ret[len++] = '/';
1118 	    ret[len++] = '/';
1119 	    if (uri->user != NULL) {
1120 		p = uri->user;
1121 		while (*p != 0) {
1122 		    if (len + 3 >= max) {
1123                         temp = xmlSaveUriRealloc(ret, &max);
1124                         if (temp == NULL) goto mem_error;
1125                         ret = temp;
1126 		    }
1127 		    if ((IS_UNRESERVED(*(p))) ||
1128 			((*(p) == ';')) || ((*(p) == ':')) ||
1129 			((*(p) == '&')) || ((*(p) == '=')) ||
1130 			((*(p) == '+')) || ((*(p) == '$')) ||
1131 			((*(p) == ',')))
1132 			ret[len++] = *p++;
1133 		    else {
1134 			int val = *(unsigned char *)p++;
1135 			int hi = val / 0x10, lo = val % 0x10;
1136 			ret[len++] = '%';
1137 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1138 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1139 		    }
1140 		}
1141 		if (len + 3 >= max) {
1142                     temp = xmlSaveUriRealloc(ret, &max);
1143                     if (temp == NULL) goto mem_error;
1144                     ret = temp;
1145 		}
1146 		ret[len++] = '@';
1147 	    }
1148 	    if (uri->server != NULL) {
1149 		p = uri->server;
1150 		while (*p != 0) {
1151 		    if (len >= max) {
1152 			temp = xmlSaveUriRealloc(ret, &max);
1153 			if (temp == NULL) goto mem_error;
1154 			ret = temp;
1155 		    }
1156 		    ret[len++] = *p++;
1157 		}
1158 		if (uri->port > 0) {
1159 		    if (len + 10 >= max) {
1160 			temp = xmlSaveUriRealloc(ret, &max);
1161 			if (temp == NULL) goto mem_error;
1162 			ret = temp;
1163 		    }
1164 		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1165 		}
1166 	    }
1167 	} else if (uri->authority != NULL) {
1168 	    if (len + 3 >= max) {
1169                 temp = xmlSaveUriRealloc(ret, &max);
1170                 if (temp == NULL) goto mem_error;
1171                 ret = temp;
1172 	    }
1173 	    ret[len++] = '/';
1174 	    ret[len++] = '/';
1175 	    p = uri->authority;
1176 	    while (*p != 0) {
1177 		if (len + 3 >= max) {
1178                     temp = xmlSaveUriRealloc(ret, &max);
1179                     if (temp == NULL) goto mem_error;
1180                     ret = temp;
1181 		}
1182 		if ((IS_UNRESERVED(*(p))) ||
1183                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1184                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1185                     ((*(p) == '=')) || ((*(p) == '+')))
1186 		    ret[len++] = *p++;
1187 		else {
1188 		    int val = *(unsigned char *)p++;
1189 		    int hi = val / 0x10, lo = val % 0x10;
1190 		    ret[len++] = '%';
1191 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1192 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1193 		}
1194 	    }
1195 	} else if (uri->scheme != NULL) {
1196 	    if (len + 3 >= max) {
1197                 temp = xmlSaveUriRealloc(ret, &max);
1198                 if (temp == NULL) goto mem_error;
1199                 ret = temp;
1200 	    }
1201 	}
1202 	if (uri->path != NULL) {
1203 	    p = uri->path;
1204 	    /*
1205 	     * the colon in file:///d: should not be escaped or
1206 	     * Windows accesses fail later.
1207 	     */
1208 	    if ((uri->scheme != NULL) &&
1209 		(p[0] == '/') &&
1210 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1211 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1212 		(p[2] == ':') &&
1213 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1214 		if (len + 3 >= max) {
1215                     temp = xmlSaveUriRealloc(ret, &max);
1216                     if (temp == NULL) goto mem_error;
1217                     ret = temp;
1218 		}
1219 		ret[len++] = *p++;
1220 		ret[len++] = *p++;
1221 		ret[len++] = *p++;
1222 	    }
1223 	    while (*p != 0) {
1224 		if (len + 3 >= max) {
1225                     temp = xmlSaveUriRealloc(ret, &max);
1226                     if (temp == NULL) goto mem_error;
1227                     ret = temp;
1228 		}
1229 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1230                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1231 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1232 	            ((*(p) == ',')))
1233 		    ret[len++] = *p++;
1234 		else {
1235 		    int val = *(unsigned char *)p++;
1236 		    int hi = val / 0x10, lo = val % 0x10;
1237 		    ret[len++] = '%';
1238 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1239 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1240 		}
1241 	    }
1242 	}
1243 	if (uri->query_raw != NULL) {
1244 	    if (len + 1 >= max) {
1245                 temp = xmlSaveUriRealloc(ret, &max);
1246                 if (temp == NULL) goto mem_error;
1247                 ret = temp;
1248 	    }
1249 	    ret[len++] = '?';
1250 	    p = uri->query_raw;
1251 	    while (*p != 0) {
1252 		if (len + 1 >= max) {
1253                     temp = xmlSaveUriRealloc(ret, &max);
1254                     if (temp == NULL) goto mem_error;
1255                     ret = temp;
1256 		}
1257 		ret[len++] = *p++;
1258 	    }
1259 	} else if (uri->query != NULL) {
1260 	    if (len + 3 >= max) {
1261                 temp = xmlSaveUriRealloc(ret, &max);
1262                 if (temp == NULL) goto mem_error;
1263                 ret = temp;
1264 	    }
1265 	    ret[len++] = '?';
1266 	    p = uri->query;
1267 	    while (*p != 0) {
1268 		if (len + 3 >= max) {
1269                     temp = xmlSaveUriRealloc(ret, &max);
1270                     if (temp == NULL) goto mem_error;
1271                     ret = temp;
1272 		}
1273 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1274 		    ret[len++] = *p++;
1275 		else {
1276 		    int val = *(unsigned char *)p++;
1277 		    int hi = val / 0x10, lo = val % 0x10;
1278 		    ret[len++] = '%';
1279 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1280 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1281 		}
1282 	    }
1283 	}
1284     }
1285     if (uri->fragment != NULL) {
1286 	if (len + 3 >= max) {
1287             temp = xmlSaveUriRealloc(ret, &max);
1288             if (temp == NULL) goto mem_error;
1289             ret = temp;
1290 	}
1291 	ret[len++] = '#';
1292 	p = uri->fragment;
1293 	while (*p != 0) {
1294 	    if (len + 3 >= max) {
1295                 temp = xmlSaveUriRealloc(ret, &max);
1296                 if (temp == NULL) goto mem_error;
1297                 ret = temp;
1298 	    }
1299 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1300 		ret[len++] = *p++;
1301 	    else {
1302 		int val = *(unsigned char *)p++;
1303 		int hi = val / 0x10, lo = val % 0x10;
1304 		ret[len++] = '%';
1305 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1306 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1307 	    }
1308 	}
1309     }
1310     if (len >= max) {
1311         temp = xmlSaveUriRealloc(ret, &max);
1312         if (temp == NULL) goto mem_error;
1313         ret = temp;
1314     }
1315     ret[len] = 0;
1316     return(ret);
1317 
1318 mem_error:
1319     xmlFree(ret);
1320     return(NULL);
1321 }
1322 
1323 /**
1324  * xmlPrintURI:
1325  * @stream:  a FILE* for the output
1326  * @uri:  pointer to an xmlURI
1327  *
1328  * Prints the URI in the stream @stream.
1329  */
1330 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1331 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1332     xmlChar *out;
1333 
1334     out = xmlSaveUri(uri);
1335     if (out != NULL) {
1336 	fprintf(stream, "%s", (char *) out);
1337 	xmlFree(out);
1338     }
1339 }
1340 
1341 /**
1342  * xmlCleanURI:
1343  * @uri:  pointer to an xmlURI
1344  *
1345  * Make sure the xmlURI struct is free of content
1346  */
1347 static void
xmlCleanURI(xmlURIPtr uri)1348 xmlCleanURI(xmlURIPtr uri) {
1349     if (uri == NULL) return;
1350 
1351     if (uri->scheme != NULL) xmlFree(uri->scheme);
1352     uri->scheme = NULL;
1353     if (uri->server != NULL) xmlFree(uri->server);
1354     uri->server = NULL;
1355     if (uri->user != NULL) xmlFree(uri->user);
1356     uri->user = NULL;
1357     if (uri->path != NULL) xmlFree(uri->path);
1358     uri->path = NULL;
1359     if (uri->fragment != NULL) xmlFree(uri->fragment);
1360     uri->fragment = NULL;
1361     if (uri->opaque != NULL) xmlFree(uri->opaque);
1362     uri->opaque = NULL;
1363     if (uri->authority != NULL) xmlFree(uri->authority);
1364     uri->authority = NULL;
1365     if (uri->query != NULL) xmlFree(uri->query);
1366     uri->query = NULL;
1367     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1368     uri->query_raw = NULL;
1369 }
1370 
1371 /**
1372  * xmlFreeURI:
1373  * @uri:  pointer to an xmlURI
1374  *
1375  * Free up the xmlURI struct
1376  */
1377 void
xmlFreeURI(xmlURIPtr uri)1378 xmlFreeURI(xmlURIPtr uri) {
1379     if (uri == NULL) return;
1380 
1381     if (uri->scheme != NULL) xmlFree(uri->scheme);
1382     if (uri->server != NULL) xmlFree(uri->server);
1383     if (uri->user != NULL) xmlFree(uri->user);
1384     if (uri->path != NULL) xmlFree(uri->path);
1385     if (uri->fragment != NULL) xmlFree(uri->fragment);
1386     if (uri->opaque != NULL) xmlFree(uri->opaque);
1387     if (uri->authority != NULL) xmlFree(uri->authority);
1388     if (uri->query != NULL) xmlFree(uri->query);
1389     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1390     xmlFree(uri);
1391 }
1392 
1393 /************************************************************************
1394  *									*
1395  *			Helper functions				*
1396  *									*
1397  ************************************************************************/
1398 
1399 /**
1400  * xmlNormalizeURIPath:
1401  * @path:  pointer to the path string
1402  *
1403  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1404  * Section 5.2, steps 6.c through 6.g.
1405  *
1406  * Normalization occurs directly on the string, no new allocation is done
1407  *
1408  * Returns 0 or an error code
1409  */
1410 int
xmlNormalizeURIPath(char * path)1411 xmlNormalizeURIPath(char *path) {
1412     char *cur, *out;
1413 
1414     if (path == NULL)
1415 	return(-1);
1416 
1417     /* Skip all initial "/" chars.  We want to get to the beginning of the
1418      * first non-empty segment.
1419      */
1420     cur = path;
1421     while (cur[0] == '/')
1422       ++cur;
1423     if (cur[0] == '\0')
1424       return(0);
1425 
1426     /* Keep everything we've seen so far.  */
1427     out = cur;
1428 
1429     /*
1430      * Analyze each segment in sequence for cases (c) and (d).
1431      */
1432     while (cur[0] != '\0') {
1433 	/*
1434 	 * c) All occurrences of "./", where "." is a complete path segment,
1435 	 *    are removed from the buffer string.
1436 	 */
1437 	if ((cur[0] == '.') && (cur[1] == '/')) {
1438 	    cur += 2;
1439 	    /* '//' normalization should be done at this point too */
1440 	    while (cur[0] == '/')
1441 		cur++;
1442 	    continue;
1443 	}
1444 
1445 	/*
1446 	 * d) If the buffer string ends with "." as a complete path segment,
1447 	 *    that "." is removed.
1448 	 */
1449 	if ((cur[0] == '.') && (cur[1] == '\0'))
1450 	    break;
1451 
1452 	/* Otherwise keep the segment.  */
1453 	while (cur[0] != '/') {
1454             if (cur[0] == '\0')
1455               goto done_cd;
1456 	    (out++)[0] = (cur++)[0];
1457 	}
1458 	/* nomalize // */
1459 	while ((cur[0] == '/') && (cur[1] == '/'))
1460 	    cur++;
1461 
1462         (out++)[0] = (cur++)[0];
1463     }
1464  done_cd:
1465     out[0] = '\0';
1466 
1467     /* Reset to the beginning of the first segment for the next sequence.  */
1468     cur = path;
1469     while (cur[0] == '/')
1470       ++cur;
1471     if (cur[0] == '\0')
1472 	return(0);
1473 
1474     /*
1475      * Analyze each segment in sequence for cases (e) and (f).
1476      *
1477      * e) All occurrences of "<segment>/../", where <segment> is a
1478      *    complete path segment not equal to "..", are removed from the
1479      *    buffer string.  Removal of these path segments is performed
1480      *    iteratively, removing the leftmost matching pattern on each
1481      *    iteration, until no matching pattern remains.
1482      *
1483      * f) If the buffer string ends with "<segment>/..", where <segment>
1484      *    is a complete path segment not equal to "..", that
1485      *    "<segment>/.." is removed.
1486      *
1487      * To satisfy the "iterative" clause in (e), we need to collapse the
1488      * string every time we find something that needs to be removed.  Thus,
1489      * we don't need to keep two pointers into the string: we only need a
1490      * "current position" pointer.
1491      */
1492     while (1) {
1493         char *segp, *tmp;
1494 
1495         /* At the beginning of each iteration of this loop, "cur" points to
1496          * the first character of the segment we want to examine.
1497          */
1498 
1499         /* Find the end of the current segment.  */
1500         segp = cur;
1501         while ((segp[0] != '/') && (segp[0] != '\0'))
1502           ++segp;
1503 
1504         /* If this is the last segment, we're done (we need at least two
1505          * segments to meet the criteria for the (e) and (f) cases).
1506          */
1507         if (segp[0] == '\0')
1508           break;
1509 
1510         /* If the first segment is "..", or if the next segment _isn't_ "..",
1511          * keep this segment and try the next one.
1512          */
1513         ++segp;
1514         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1515             || ((segp[0] != '.') || (segp[1] != '.')
1516                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1517           cur = segp;
1518           continue;
1519         }
1520 
1521         /* If we get here, remove this segment and the next one and back up
1522          * to the previous segment (if there is one), to implement the
1523          * "iteratively" clause.  It's pretty much impossible to back up
1524          * while maintaining two pointers into the buffer, so just compact
1525          * the whole buffer now.
1526          */
1527 
1528         /* If this is the end of the buffer, we're done.  */
1529         if (segp[2] == '\0') {
1530           cur[0] = '\0';
1531           break;
1532         }
1533         /* Valgrind complained, strcpy(cur, segp + 3); */
1534         /* string will overlap, do not use strcpy */
1535         tmp = cur;
1536         segp += 3;
1537         while ((*tmp++ = *segp++) != 0)
1538           ;
1539 
1540         /* If there are no previous segments, then keep going from here.  */
1541         segp = cur;
1542         while ((segp > path) && ((--segp)[0] == '/'))
1543           ;
1544         if (segp == path)
1545           continue;
1546 
1547         /* "segp" is pointing to the end of a previous segment; find it's
1548          * start.  We need to back up to the previous segment and start
1549          * over with that to handle things like "foo/bar/../..".  If we
1550          * don't do this, then on the first pass we'll remove the "bar/..",
1551          * but be pointing at the second ".." so we won't realize we can also
1552          * remove the "foo/..".
1553          */
1554         cur = segp;
1555         while ((cur > path) && (cur[-1] != '/'))
1556           --cur;
1557     }
1558     out[0] = '\0';
1559 
1560     /*
1561      * g) If the resulting buffer string still begins with one or more
1562      *    complete path segments of "..", then the reference is
1563      *    considered to be in error. Implementations may handle this
1564      *    error by retaining these components in the resolved path (i.e.,
1565      *    treating them as part of the final URI), by removing them from
1566      *    the resolved path (i.e., discarding relative levels above the
1567      *    root), or by avoiding traversal of the reference.
1568      *
1569      * We discard them from the final path.
1570      */
1571     if (path[0] == '/') {
1572       cur = path;
1573       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1574              && ((cur[3] == '/') || (cur[3] == '\0')))
1575 	cur += 3;
1576 
1577       if (cur != path) {
1578 	out = path;
1579 	while (cur[0] != '\0')
1580           (out++)[0] = (cur++)[0];
1581 	out[0] = 0;
1582       }
1583     }
1584 
1585     return(0);
1586 }
1587 
is_hex(char c)1588 static int is_hex(char c) {
1589     if (((c >= '0') && (c <= '9')) ||
1590         ((c >= 'a') && (c <= 'f')) ||
1591         ((c >= 'A') && (c <= 'F')))
1592 	return(1);
1593     return(0);
1594 }
1595 
1596 /**
1597  * xmlURIUnescapeString:
1598  * @str:  the string to unescape
1599  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1600  * @target:  optional destination buffer
1601  *
1602  * Unescaping routine, but does not check that the string is an URI. The
1603  * output is a direct unsigned char translation of %XX values (no encoding)
1604  * Note that the length of the result can only be smaller or same size as
1605  * the input string.
1606  *
1607  * Returns a copy of the string, but unescaped, will return NULL only in case
1608  * of error
1609  */
1610 char *
xmlURIUnescapeString(const char * str,int len,char * target)1611 xmlURIUnescapeString(const char *str, int len, char *target) {
1612     char *ret, *out;
1613     const char *in;
1614 
1615     if (str == NULL)
1616 	return(NULL);
1617     if (len <= 0) len = strlen(str);
1618     if (len < 0) return(NULL);
1619 
1620     if (target == NULL) {
1621 	ret = (char *) xmlMallocAtomic(len + 1);
1622 	if (ret == NULL) {
1623             xmlURIErrMemory("unescaping URI value\n");
1624 	    return(NULL);
1625 	}
1626     } else
1627 	ret = target;
1628     in = str;
1629     out = ret;
1630     while(len > 0) {
1631 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1632 	    in++;
1633 	    if ((*in >= '0') && (*in <= '9'))
1634 	        *out = (*in - '0');
1635 	    else if ((*in >= 'a') && (*in <= 'f'))
1636 	        *out = (*in - 'a') + 10;
1637 	    else if ((*in >= 'A') && (*in <= 'F'))
1638 	        *out = (*in - 'A') + 10;
1639 	    in++;
1640 	    if ((*in >= '0') && (*in <= '9'))
1641 	        *out = *out * 16 + (*in - '0');
1642 	    else if ((*in >= 'a') && (*in <= 'f'))
1643 	        *out = *out * 16 + (*in - 'a') + 10;
1644 	    else if ((*in >= 'A') && (*in <= 'F'))
1645 	        *out = *out * 16 + (*in - 'A') + 10;
1646 	    in++;
1647 	    len -= 3;
1648 	    out++;
1649 	} else {
1650 	    *out++ = *in++;
1651 	    len--;
1652 	}
1653     }
1654     *out = 0;
1655     return(ret);
1656 }
1657 
1658 /**
1659  * xmlURIEscapeStr:
1660  * @str:  string to escape
1661  * @list: exception list string of chars not to escape
1662  *
1663  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1664  * and the characters in the exception list.
1665  *
1666  * Returns a new escaped string or NULL in case of error.
1667  */
1668 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1669 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1670     xmlChar *ret, ch;
1671     xmlChar *temp;
1672     const xmlChar *in;
1673     int len, out;
1674 
1675     if (str == NULL)
1676 	return(NULL);
1677     if (str[0] == 0)
1678 	return(xmlStrdup(str));
1679     len = xmlStrlen(str);
1680     if (!(len > 0)) return(NULL);
1681 
1682     len += 20;
1683     ret = (xmlChar *) xmlMallocAtomic(len);
1684     if (ret == NULL) {
1685         xmlURIErrMemory("escaping URI value\n");
1686 	return(NULL);
1687     }
1688     in = (const xmlChar *) str;
1689     out = 0;
1690     while(*in != 0) {
1691 	if (len - out <= 3) {
1692             temp = xmlSaveUriRealloc(ret, &len);
1693 	    if (temp == NULL) {
1694                 xmlURIErrMemory("escaping URI value\n");
1695 		xmlFree(ret);
1696 		return(NULL);
1697 	    }
1698 	    ret = temp;
1699 	}
1700 
1701 	ch = *in;
1702 
1703 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1704 	    unsigned char val;
1705 	    ret[out++] = '%';
1706 	    val = ch >> 4;
1707 	    if (val <= 9)
1708 		ret[out++] = '0' + val;
1709 	    else
1710 		ret[out++] = 'A' + val - 0xA;
1711 	    val = ch & 0xF;
1712 	    if (val <= 9)
1713 		ret[out++] = '0' + val;
1714 	    else
1715 		ret[out++] = 'A' + val - 0xA;
1716 	    in++;
1717 	} else {
1718 	    ret[out++] = *in++;
1719 	}
1720 
1721     }
1722     ret[out] = 0;
1723     return(ret);
1724 }
1725 
1726 /**
1727  * xmlURIEscape:
1728  * @str:  the string of the URI to escape
1729  *
1730  * Escaping routine, does not do validity checks !
1731  * It will try to escape the chars needing this, but this is heuristic
1732  * based it's impossible to be sure.
1733  *
1734  * Returns an copy of the string, but escaped
1735  *
1736  * 25 May 2001
1737  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1738  * according to RFC2396.
1739  *   - Carl Douglas
1740  */
1741 xmlChar *
xmlURIEscape(const xmlChar * str)1742 xmlURIEscape(const xmlChar * str)
1743 {
1744     xmlChar *ret, *segment = NULL;
1745     xmlURIPtr uri;
1746     int ret2;
1747 
1748 #define NULLCHK(p) if(!p) { \
1749          xmlURIErrMemory("escaping URI value\n"); \
1750          xmlFreeURI(uri); \
1751          return NULL; } \
1752 
1753     if (str == NULL)
1754         return (NULL);
1755 
1756     uri = xmlCreateURI();
1757     if (uri != NULL) {
1758 	/*
1759 	 * Allow escaping errors in the unescaped form
1760 	 */
1761         uri->cleanup = 1;
1762         ret2 = xmlParseURIReference(uri, (const char *)str);
1763         if (ret2) {
1764             xmlFreeURI(uri);
1765             return (NULL);
1766         }
1767     }
1768 
1769     if (!uri)
1770         return NULL;
1771 
1772     ret = NULL;
1773 
1774     if (uri->scheme) {
1775         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1776         NULLCHK(segment)
1777         ret = xmlStrcat(ret, segment);
1778         ret = xmlStrcat(ret, BAD_CAST ":");
1779         xmlFree(segment);
1780     }
1781 
1782     if (uri->authority) {
1783         segment =
1784             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1785         NULLCHK(segment)
1786         ret = xmlStrcat(ret, BAD_CAST "//");
1787         ret = xmlStrcat(ret, segment);
1788         xmlFree(segment);
1789     }
1790 
1791     if (uri->user) {
1792         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1793         NULLCHK(segment)
1794 		ret = xmlStrcat(ret,BAD_CAST "//");
1795         ret = xmlStrcat(ret, segment);
1796         ret = xmlStrcat(ret, BAD_CAST "@");
1797         xmlFree(segment);
1798     }
1799 
1800     if (uri->server) {
1801         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1802         NULLCHK(segment)
1803 		if (uri->user == NULL)
1804 		ret = xmlStrcat(ret, BAD_CAST "//");
1805         ret = xmlStrcat(ret, segment);
1806         xmlFree(segment);
1807     }
1808 
1809     if (uri->port) {
1810         xmlChar port[10];
1811 
1812         snprintf((char *) port, 10, "%d", uri->port);
1813         ret = xmlStrcat(ret, BAD_CAST ":");
1814         ret = xmlStrcat(ret, port);
1815     }
1816 
1817     if (uri->path) {
1818         segment =
1819             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1820         NULLCHK(segment)
1821         ret = xmlStrcat(ret, segment);
1822         xmlFree(segment);
1823     }
1824 
1825     if (uri->query_raw) {
1826         ret = xmlStrcat(ret, BAD_CAST "?");
1827         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1828     }
1829     else if (uri->query) {
1830         segment =
1831             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1832         NULLCHK(segment)
1833         ret = xmlStrcat(ret, BAD_CAST "?");
1834         ret = xmlStrcat(ret, segment);
1835         xmlFree(segment);
1836     }
1837 
1838     if (uri->opaque) {
1839         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1840         NULLCHK(segment)
1841         ret = xmlStrcat(ret, segment);
1842         xmlFree(segment);
1843     }
1844 
1845     if (uri->fragment) {
1846         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1847         NULLCHK(segment)
1848         ret = xmlStrcat(ret, BAD_CAST "#");
1849         ret = xmlStrcat(ret, segment);
1850         xmlFree(segment);
1851     }
1852 
1853     xmlFreeURI(uri);
1854 #undef NULLCHK
1855 
1856     return (ret);
1857 }
1858 
1859 /************************************************************************
1860  *									*
1861  *			Public functions				*
1862  *									*
1863  ************************************************************************/
1864 
1865 /**
1866  * xmlBuildURI:
1867  * @URI:  the URI instance found in the document
1868  * @base:  the base value
1869  *
1870  * Computes he final URI of the reference done by checking that
1871  * the given URI is valid, and building the final URI using the
1872  * base URI. This is processed according to section 5.2 of the
1873  * RFC 2396
1874  *
1875  * 5.2. Resolving Relative References to Absolute Form
1876  *
1877  * Returns a new URI string (to be freed by the caller) or NULL in case
1878  *         of error.
1879  */
1880 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1881 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1882     xmlChar *val = NULL;
1883     int ret, len, indx, cur, out;
1884     xmlURIPtr ref = NULL;
1885     xmlURIPtr bas = NULL;
1886     xmlURIPtr res = NULL;
1887 
1888     /*
1889      * 1) The URI reference is parsed into the potential four components and
1890      *    fragment identifier, as described in Section 4.3.
1891      *
1892      *    NOTE that a completely empty URI is treated by modern browsers
1893      *    as a reference to "." rather than as a synonym for the current
1894      *    URI.  Should we do that here?
1895      */
1896     if (URI == NULL)
1897 	ret = -1;
1898     else {
1899 	if (*URI) {
1900 	    ref = xmlCreateURI();
1901 	    if (ref == NULL)
1902 		goto done;
1903 	    ret = xmlParseURIReference(ref, (const char *) URI);
1904 	}
1905 	else
1906 	    ret = 0;
1907     }
1908     if (ret != 0)
1909 	goto done;
1910     if ((ref != NULL) && (ref->scheme != NULL)) {
1911 	/*
1912 	 * The URI is absolute don't modify.
1913 	 */
1914 	val = xmlStrdup(URI);
1915 	goto done;
1916     }
1917     if (base == NULL)
1918 	ret = -1;
1919     else {
1920 	bas = xmlCreateURI();
1921 	if (bas == NULL)
1922 	    goto done;
1923 	ret = xmlParseURIReference(bas, (const char *) base);
1924     }
1925     if (ret != 0) {
1926 	if (ref)
1927 	    val = xmlSaveUri(ref);
1928 	goto done;
1929     }
1930     if (ref == NULL) {
1931 	/*
1932 	 * the base fragment must be ignored
1933 	 */
1934 	if (bas->fragment != NULL) {
1935 	    xmlFree(bas->fragment);
1936 	    bas->fragment = NULL;
1937 	}
1938 	val = xmlSaveUri(bas);
1939 	goto done;
1940     }
1941 
1942     /*
1943      * 2) If the path component is empty and the scheme, authority, and
1944      *    query components are undefined, then it is a reference to the
1945      *    current document and we are done.  Otherwise, the reference URI's
1946      *    query and fragment components are defined as found (or not found)
1947      *    within the URI reference and not inherited from the base URI.
1948      *
1949      *    NOTE that in modern browsers, the parsing differs from the above
1950      *    in the following aspect:  the query component is allowed to be
1951      *    defined while still treating this as a reference to the current
1952      *    document.
1953      */
1954     res = xmlCreateURI();
1955     if (res == NULL)
1956 	goto done;
1957     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1958 	((ref->authority == NULL) && (ref->server == NULL))) {
1959 	if (bas->scheme != NULL)
1960 	    res->scheme = xmlMemStrdup(bas->scheme);
1961 	if (bas->authority != NULL)
1962 	    res->authority = xmlMemStrdup(bas->authority);
1963 	else if (bas->server != NULL) {
1964 	    res->server = xmlMemStrdup(bas->server);
1965 	    if (bas->user != NULL)
1966 		res->user = xmlMemStrdup(bas->user);
1967 	    res->port = bas->port;
1968 	}
1969 	if (bas->path != NULL)
1970 	    res->path = xmlMemStrdup(bas->path);
1971 	if (ref->query_raw != NULL)
1972 	    res->query_raw = xmlMemStrdup (ref->query_raw);
1973 	else if (ref->query != NULL)
1974 	    res->query = xmlMemStrdup(ref->query);
1975 	else if (bas->query_raw != NULL)
1976 	    res->query_raw = xmlMemStrdup(bas->query_raw);
1977 	else if (bas->query != NULL)
1978 	    res->query = xmlMemStrdup(bas->query);
1979 	if (ref->fragment != NULL)
1980 	    res->fragment = xmlMemStrdup(ref->fragment);
1981 	goto step_7;
1982     }
1983 
1984     /*
1985      * 3) If the scheme component is defined, indicating that the reference
1986      *    starts with a scheme name, then the reference is interpreted as an
1987      *    absolute URI and we are done.  Otherwise, the reference URI's
1988      *    scheme is inherited from the base URI's scheme component.
1989      */
1990     if (ref->scheme != NULL) {
1991 	val = xmlSaveUri(ref);
1992 	goto done;
1993     }
1994     if (bas->scheme != NULL)
1995 	res->scheme = xmlMemStrdup(bas->scheme);
1996 
1997     if (ref->query_raw != NULL)
1998 	res->query_raw = xmlMemStrdup(ref->query_raw);
1999     else if (ref->query != NULL)
2000 	res->query = xmlMemStrdup(ref->query);
2001     if (ref->fragment != NULL)
2002 	res->fragment = xmlMemStrdup(ref->fragment);
2003 
2004     /*
2005      * 4) If the authority component is defined, then the reference is a
2006      *    network-path and we skip to step 7.  Otherwise, the reference
2007      *    URI's authority is inherited from the base URI's authority
2008      *    component, which will also be undefined if the URI scheme does not
2009      *    use an authority component.
2010      */
2011     if ((ref->authority != NULL) || (ref->server != NULL)) {
2012 	if (ref->authority != NULL)
2013 	    res->authority = xmlMemStrdup(ref->authority);
2014 	else {
2015 	    res->server = xmlMemStrdup(ref->server);
2016 	    if (ref->user != NULL)
2017 		res->user = xmlMemStrdup(ref->user);
2018             res->port = ref->port;
2019 	}
2020 	if (ref->path != NULL)
2021 	    res->path = xmlMemStrdup(ref->path);
2022 	goto step_7;
2023     }
2024     if (bas->authority != NULL)
2025 	res->authority = xmlMemStrdup(bas->authority);
2026     else if (bas->server != NULL) {
2027 	res->server = xmlMemStrdup(bas->server);
2028 	if (bas->user != NULL)
2029 	    res->user = xmlMemStrdup(bas->user);
2030 	res->port = bas->port;
2031     }
2032 
2033     /*
2034      * 5) If the path component begins with a slash character ("/"), then
2035      *    the reference is an absolute-path and we skip to step 7.
2036      */
2037     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2038 	res->path = xmlMemStrdup(ref->path);
2039 	goto step_7;
2040     }
2041 
2042 
2043     /*
2044      * 6) If this step is reached, then we are resolving a relative-path
2045      *    reference.  The relative path needs to be merged with the base
2046      *    URI's path.  Although there are many ways to do this, we will
2047      *    describe a simple method using a separate string buffer.
2048      *
2049      * Allocate a buffer large enough for the result string.
2050      */
2051     len = 2; /* extra / and 0 */
2052     if (ref->path != NULL)
2053 	len += strlen(ref->path);
2054     if (bas->path != NULL)
2055 	len += strlen(bas->path);
2056     res->path = (char *) xmlMallocAtomic(len);
2057     if (res->path == NULL) {
2058         xmlURIErrMemory("resolving URI against base\n");
2059 	goto done;
2060     }
2061     res->path[0] = 0;
2062 
2063     /*
2064      * a) All but the last segment of the base URI's path component is
2065      *    copied to the buffer.  In other words, any characters after the
2066      *    last (right-most) slash character, if any, are excluded.
2067      */
2068     cur = 0;
2069     out = 0;
2070     if (bas->path != NULL) {
2071 	while (bas->path[cur] != 0) {
2072 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2073 		cur++;
2074 	    if (bas->path[cur] == 0)
2075 		break;
2076 
2077 	    cur++;
2078 	    while (out < cur) {
2079 		res->path[out] = bas->path[out];
2080 		out++;
2081 	    }
2082 	}
2083     }
2084     res->path[out] = 0;
2085 
2086     /*
2087      * b) The reference's path component is appended to the buffer
2088      *    string.
2089      */
2090     if (ref->path != NULL && ref->path[0] != 0) {
2091 	indx = 0;
2092 	/*
2093 	 * Ensure the path includes a '/'
2094 	 */
2095 	if ((out == 0) && (bas->server != NULL))
2096 	    res->path[out++] = '/';
2097 	while (ref->path[indx] != 0) {
2098 	    res->path[out++] = ref->path[indx++];
2099 	}
2100     }
2101     res->path[out] = 0;
2102 
2103     /*
2104      * Steps c) to h) are really path normalization steps
2105      */
2106     xmlNormalizeURIPath(res->path);
2107 
2108 step_7:
2109 
2110     /*
2111      * 7) The resulting URI components, including any inherited from the
2112      *    base URI, are recombined to give the absolute form of the URI
2113      *    reference.
2114      */
2115     val = xmlSaveUri(res);
2116 
2117 done:
2118     if (ref != NULL)
2119 	xmlFreeURI(ref);
2120     if (bas != NULL)
2121 	xmlFreeURI(bas);
2122     if (res != NULL)
2123 	xmlFreeURI(res);
2124     return(val);
2125 }
2126 
2127 /**
2128  * xmlBuildRelativeURI:
2129  * @URI:  the URI reference under consideration
2130  * @base:  the base value
2131  *
2132  * Expresses the URI of the reference in terms relative to the
2133  * base.  Some examples of this operation include:
2134  *     base = "http://site1.com/docs/book1.html"
2135  *        URI input                        URI returned
2136  *     docs/pic1.gif                    pic1.gif
2137  *     docs/img/pic1.gif                img/pic1.gif
2138  *     img/pic1.gif                     ../img/pic1.gif
2139  *     http://site1.com/docs/pic1.gif   pic1.gif
2140  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2141  *
2142  *     base = "docs/book1.html"
2143  *        URI input                        URI returned
2144  *     docs/pic1.gif                    pic1.gif
2145  *     docs/img/pic1.gif                img/pic1.gif
2146  *     img/pic1.gif                     ../img/pic1.gif
2147  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2148  *
2149  *
2150  * Note: if the URI reference is really wierd or complicated, it may be
2151  *       worthwhile to first convert it into a "nice" one by calling
2152  *       xmlBuildURI (using 'base') before calling this routine,
2153  *       since this routine (for reasonable efficiency) assumes URI has
2154  *       already been through some validation.
2155  *
2156  * Returns a new URI string (to be freed by the caller) or NULL in case
2157  * error.
2158  */
2159 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2160 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2161 {
2162     xmlChar *val = NULL;
2163     int ret;
2164     int ix;
2165     int pos = 0;
2166     int nbslash = 0;
2167     int len;
2168     xmlURIPtr ref = NULL;
2169     xmlURIPtr bas = NULL;
2170     xmlChar *bptr, *uptr, *vptr;
2171     int remove_path = 0;
2172 
2173     if ((URI == NULL) || (*URI == 0))
2174 	return NULL;
2175 
2176     /*
2177      * First parse URI into a standard form
2178      */
2179     ref = xmlCreateURI ();
2180     if (ref == NULL)
2181 	return NULL;
2182     /* If URI not already in "relative" form */
2183     if (URI[0] != '.') {
2184 	ret = xmlParseURIReference (ref, (const char *) URI);
2185 	if (ret != 0)
2186 	    goto done;		/* Error in URI, return NULL */
2187     } else
2188 	ref->path = (char *)xmlStrdup(URI);
2189 
2190     /*
2191      * Next parse base into the same standard form
2192      */
2193     if ((base == NULL) || (*base == 0)) {
2194 	val = xmlStrdup (URI);
2195 	goto done;
2196     }
2197     bas = xmlCreateURI ();
2198     if (bas == NULL)
2199 	goto done;
2200     if (base[0] != '.') {
2201 	ret = xmlParseURIReference (bas, (const char *) base);
2202 	if (ret != 0)
2203 	    goto done;		/* Error in base, return NULL */
2204     } else
2205 	bas->path = (char *)xmlStrdup(base);
2206 
2207     /*
2208      * If the scheme / server on the URI differs from the base,
2209      * just return the URI
2210      */
2211     if ((ref->scheme != NULL) &&
2212 	((bas->scheme == NULL) ||
2213 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2214 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2215 	val = xmlStrdup (URI);
2216 	goto done;
2217     }
2218     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2219 	val = xmlStrdup(BAD_CAST "");
2220 	goto done;
2221     }
2222     if (bas->path == NULL) {
2223 	val = xmlStrdup((xmlChar *)ref->path);
2224 	goto done;
2225     }
2226     if (ref->path == NULL) {
2227         ref->path = (char *) "/";
2228 	remove_path = 1;
2229     }
2230 
2231     /*
2232      * At this point (at last!) we can compare the two paths
2233      *
2234      * First we take care of the special case where either of the
2235      * two path components may be missing (bug 316224)
2236      */
2237     if (bas->path == NULL) {
2238 	if (ref->path != NULL) {
2239 	    uptr = (xmlChar *) ref->path;
2240 	    if (*uptr == '/')
2241 		uptr++;
2242 	    /* exception characters from xmlSaveUri */
2243 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2244 	}
2245 	goto done;
2246     }
2247     bptr = (xmlChar *)bas->path;
2248     if (ref->path == NULL) {
2249 	for (ix = 0; bptr[ix] != 0; ix++) {
2250 	    if (bptr[ix] == '/')
2251 		nbslash++;
2252 	}
2253 	uptr = NULL;
2254 	len = 1;	/* this is for a string terminator only */
2255     } else {
2256     /*
2257      * Next we compare the two strings and find where they first differ
2258      */
2259 	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2260             pos += 2;
2261 	if ((*bptr == '.') && (bptr[1] == '/'))
2262             bptr += 2;
2263 	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2264 	    bptr++;
2265 	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2266 	    pos++;
2267 
2268 	if (bptr[pos] == ref->path[pos]) {
2269 	    val = xmlStrdup(BAD_CAST "");
2270 	    goto done;		/* (I can't imagine why anyone would do this) */
2271 	}
2272 
2273 	/*
2274 	 * In URI, "back up" to the last '/' encountered.  This will be the
2275 	 * beginning of the "unique" suffix of URI
2276 	 */
2277 	ix = pos;
2278 	if ((ref->path[ix] == '/') && (ix > 0))
2279 	    ix--;
2280 	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2281 	    ix -= 2;
2282 	for (; ix > 0; ix--) {
2283 	    if (ref->path[ix] == '/')
2284 		break;
2285 	}
2286 	if (ix == 0) {
2287 	    uptr = (xmlChar *)ref->path;
2288 	} else {
2289 	    ix++;
2290 	    uptr = (xmlChar *)&ref->path[ix];
2291 	}
2292 
2293 	/*
2294 	 * In base, count the number of '/' from the differing point
2295 	 */
2296 	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2297 	    for (; bptr[ix] != 0; ix++) {
2298 		if (bptr[ix] == '/')
2299 		    nbslash++;
2300 	    }
2301 	}
2302 	len = xmlStrlen (uptr) + 1;
2303     }
2304 
2305     if (nbslash == 0) {
2306 	if (uptr != NULL)
2307 	    /* exception characters from xmlSaveUri */
2308 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2309 	goto done;
2310     }
2311 
2312     /*
2313      * Allocate just enough space for the returned string -
2314      * length of the remainder of the URI, plus enough space
2315      * for the "../" groups, plus one for the terminator
2316      */
2317     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2318     if (val == NULL) {
2319         xmlURIErrMemory("building relative URI\n");
2320 	goto done;
2321     }
2322     vptr = val;
2323     /*
2324      * Put in as many "../" as needed
2325      */
2326     for (; nbslash>0; nbslash--) {
2327 	*vptr++ = '.';
2328 	*vptr++ = '.';
2329 	*vptr++ = '/';
2330     }
2331     /*
2332      * Finish up with the end of the URI
2333      */
2334     if (uptr != NULL) {
2335         if ((vptr > val) && (len > 0) &&
2336 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2337 	    memcpy (vptr, uptr + 1, len - 1);
2338 	    vptr[len - 2] = 0;
2339 	} else {
2340 	    memcpy (vptr, uptr, len);
2341 	    vptr[len - 1] = 0;
2342 	}
2343     } else {
2344 	vptr[len - 1] = 0;
2345     }
2346 
2347     /* escape the freshly-built path */
2348     vptr = val;
2349 	/* exception characters from xmlSaveUri */
2350     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2351     xmlFree(vptr);
2352 
2353 done:
2354     /*
2355      * Free the working variables
2356      */
2357     if (remove_path != 0)
2358         ref->path = NULL;
2359     if (ref != NULL)
2360 	xmlFreeURI (ref);
2361     if (bas != NULL)
2362 	xmlFreeURI (bas);
2363 
2364     return val;
2365 }
2366 
2367 /**
2368  * xmlCanonicPath:
2369  * @path:  the resource locator in a filesystem notation
2370  *
2371  * Constructs a canonic path from the specified path.
2372  *
2373  * Returns a new canonic path, or a duplicate of the path parameter if the
2374  * construction fails. The caller is responsible for freeing the memory occupied
2375  * by the returned string. If there is insufficient memory available, or the
2376  * argument is NULL, the function returns NULL.
2377  */
2378 #define IS_WINDOWS_PATH(p)					\
2379 	((p != NULL) &&						\
2380 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2381 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2382 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2383 xmlChar *
xmlCanonicPath(const xmlChar * path)2384 xmlCanonicPath(const xmlChar *path)
2385 {
2386 /*
2387  * For Windows implementations, additional work needs to be done to
2388  * replace backslashes in pathnames with "forward slashes"
2389  */
2390 #if defined(_WIN32) && !defined(__CYGWIN__)
2391     int len = 0;
2392     int i = 0;
2393     xmlChar *p = NULL;
2394 #endif
2395     xmlURIPtr uri;
2396     xmlChar *ret;
2397     const xmlChar *absuri;
2398 
2399     if (path == NULL)
2400 	return(NULL);
2401 
2402 #if defined(_WIN32)
2403     /*
2404      * We must not change the backslashes to slashes if the the path
2405      * starts with \\?\
2406      * Those paths can be up to 32k characters long.
2407      * Was added specifically for OpenOffice, those paths can't be converted
2408      * to URIs anyway.
2409      */
2410     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2411         (path[3] == '\\') )
2412 	return xmlStrdup((const xmlChar *) path);
2413 #endif
2414 
2415 	/* sanitize filename starting with // so it can be used as URI */
2416     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2417         path++;
2418 
2419     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2420 	xmlFreeURI(uri);
2421 	return xmlStrdup(path);
2422     }
2423 
2424     /* Check if this is an "absolute uri" */
2425     absuri = xmlStrstr(path, BAD_CAST "://");
2426     if (absuri != NULL) {
2427         int l, j;
2428 	unsigned char c;
2429 	xmlChar *escURI;
2430 
2431         /*
2432 	 * this looks like an URI where some parts have not been
2433 	 * escaped leading to a parsing problem.  Check that the first
2434 	 * part matches a protocol.
2435 	 */
2436 	l = absuri - path;
2437 	/* Bypass if first part (part before the '://') is > 20 chars */
2438 	if ((l <= 0) || (l > 20))
2439 	    goto path_processing;
2440 	/* Bypass if any non-alpha characters are present in first part */
2441 	for (j = 0;j < l;j++) {
2442 	    c = path[j];
2443 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2444 	        goto path_processing;
2445 	}
2446 
2447 	/* Escape all except the characters specified in the supplied path */
2448         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2449 	if (escURI != NULL) {
2450 	    /* Try parsing the escaped path */
2451 	    uri = xmlParseURI((const char *) escURI);
2452 	    /* If successful, return the escaped string */
2453 	    if (uri != NULL) {
2454 	        xmlFreeURI(uri);
2455 		return escURI;
2456 	    }
2457 	}
2458     }
2459 
2460 path_processing:
2461 /* For Windows implementations, replace backslashes with 'forward slashes' */
2462 #if defined(_WIN32) && !defined(__CYGWIN__)
2463     /*
2464      * Create a URI structure
2465      */
2466     uri = xmlCreateURI();
2467     if (uri == NULL) {		/* Guard against 'out of memory' */
2468         return(NULL);
2469     }
2470 
2471     len = xmlStrlen(path);
2472     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2473         /* make the scheme 'file' */
2474 	uri->scheme = xmlStrdup(BAD_CAST "file");
2475 	/* allocate space for leading '/' + path + string terminator */
2476 	uri->path = xmlMallocAtomic(len + 2);
2477 	if (uri->path == NULL) {
2478 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2479 	    return(NULL);
2480 	}
2481 	/* Put in leading '/' plus path */
2482 	uri->path[0] = '/';
2483 	p = uri->path + 1;
2484 	strncpy(p, path, len + 1);
2485     } else {
2486 	uri->path = xmlStrdup(path);
2487 	if (uri->path == NULL) {
2488 	    xmlFreeURI(uri);
2489 	    return(NULL);
2490 	}
2491 	p = uri->path;
2492     }
2493     /* Now change all occurences of '\' to '/' */
2494     while (*p != '\0') {
2495 	if (*p == '\\')
2496 	    *p = '/';
2497 	p++;
2498     }
2499 
2500     if (uri->scheme == NULL) {
2501 	ret = xmlStrdup((const xmlChar *) uri->path);
2502     } else {
2503 	ret = xmlSaveUri(uri);
2504     }
2505 
2506     xmlFreeURI(uri);
2507 #else
2508     ret = xmlStrdup((const xmlChar *) path);
2509 #endif
2510     return(ret);
2511 }
2512 
2513 /**
2514  * xmlPathToURI:
2515  * @path:  the resource locator in a filesystem notation
2516  *
2517  * Constructs an URI expressing the existing path
2518  *
2519  * Returns a new URI, or a duplicate of the path parameter if the
2520  * construction fails. The caller is responsible for freeing the memory
2521  * occupied by the returned string. If there is insufficient memory available,
2522  * or the argument is NULL, the function returns NULL.
2523  */
2524 xmlChar *
xmlPathToURI(const xmlChar * path)2525 xmlPathToURI(const xmlChar *path)
2526 {
2527     xmlURIPtr uri;
2528     xmlURI temp;
2529     xmlChar *ret, *cal;
2530 
2531     if (path == NULL)
2532         return(NULL);
2533 
2534     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2535 	xmlFreeURI(uri);
2536 	return xmlStrdup(path);
2537     }
2538     cal = xmlCanonicPath(path);
2539     if (cal == NULL)
2540         return(NULL);
2541 #if defined(_WIN32) && !defined(__CYGWIN__)
2542     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2543        If 'cal' is a valid URI allready then we are done here, as continuing would make
2544        it invalid. */
2545     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2546 	xmlFreeURI(uri);
2547 	return cal;
2548     }
2549     /* 'cal' can contain a relative path with backslashes. If that is processed
2550        by xmlSaveURI, they will be escaped and the external entity loader machinery
2551        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2552     ret = cal;
2553     while (*ret != '\0') {
2554 	if (*ret == '\\')
2555 	    *ret = '/';
2556 	ret++;
2557     }
2558 #endif
2559     memset(&temp, 0, sizeof(temp));
2560     temp.path = (char *) cal;
2561     ret = xmlSaveUri(&temp);
2562     xmlFree(cal);
2563     return(ret);
2564 }
2565 #define bottom_uri
2566 #include "elfgcchack.h"
2567