1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * daniel@veillard.com
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <string.h>
15 
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
20 
21 /**
22  * MAX_URI_LENGTH:
23  *
24  * The definition of the URI regexp in the above RFC has no size limit
25  * In practice they are usually relativey short except for the
26  * data URI scheme as defined in RFC 2397. Even for data URI the usual
27  * maximum size before hitting random practical limits is around 64 KB
28  * and 4KB is usually a maximum admitted limit for proper operations.
29  * The value below is more a security limit than anything else and
30  * really should never be hit by 'normal' operations
31  * Set to 1 MByte in 2012, this is only enforced on output
32  */
33 #define MAX_URI_LENGTH 1024 * 1024
34 
35 static void
xmlURIErrMemory(const char * extra)36 xmlURIErrMemory(const char *extra)
37 {
38     if (extra)
39         __xmlRaiseError(NULL, NULL, NULL,
40                         NULL, NULL, XML_FROM_URI,
41                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42                         extra, NULL, NULL, 0, 0,
43                         "Memory allocation failed : %s\n", extra);
44     else
45         __xmlRaiseError(NULL, NULL, NULL,
46                         NULL, NULL, XML_FROM_URI,
47                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48                         NULL, NULL, NULL, 0, 0,
49                         "Memory allocation failed\n");
50 }
51 
52 static void xmlCleanURI(xmlURIPtr uri);
53 
54 /*
55  * Old rule from 2396 used in legacy handling code
56  * alpha    = lowalpha | upalpha
57  */
58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59 
60 
61 /*
62  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64  *            "u" | "v" | "w" | "x" | "y" | "z"
65  */
66 
67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68 
69 /*
70  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72  *           "U" | "V" | "W" | "X" | "Y" | "Z"
73  */
74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75 
76 #ifdef IS_DIGIT
77 #undef IS_DIGIT
78 #endif
79 /*
80  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81  */
82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83 
84 /*
85  * alphanum = alpha | digit
86  */
87 
88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89 
90 /*
91  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92  */
93 
94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
95     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
96     ((x) == '(') || ((x) == ')'))
97 
98 /*
99  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
100  */
101 
102 #define IS_UNWISE(p)                                                    \
103       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
104        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
105        ((*(p) == ']')) || ((*(p) == '`')))
106 /*
107  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108  *            "[" | "]"
109  */
110 
111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114         ((x) == ']'))
115 
116 /*
117  * unreserved = alphanum | mark
118  */
119 
120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121 
122 /*
123  * Skip to next pointer char, handle escaped sequences
124  */
125 
126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
127 
128 /*
129  * Productions from the spec.
130  *
131  *    authority     = server | reg_name
132  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
133  *                        ";" | ":" | "@" | "&" | "=" | "+" )
134  *
135  * path          = [ abs_path | opaque_part ]
136  */
137 
138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139 
140 /************************************************************************
141  *									*
142  *                         RFC 3986 parser				*
143  *									*
144  ************************************************************************/
145 
146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
148                       ((*(p) >= 'A') && (*(p) <= 'Z')))
149 #define ISA_HEXDIG(p)							\
150        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
151         ((*(p) >= 'A') && (*(p) <= 'F')))
152 
153 /*
154  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
155  *                     / "*" / "+" / "," / ";" / "="
156  */
157 #define ISA_SUB_DELIM(p)						\
158       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
159        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
160        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
161        ((*(p) == '=')) || ((*(p) == '\'')))
162 
163 /*
164  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165  */
166 #define ISA_GEN_DELIM(p)						\
167       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
168        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
169        ((*(p) == '@')))
170 
171 /*
172  *    reserved      = gen-delims / sub-delims
173  */
174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175 
176 /*
177  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
178  */
179 #define ISA_UNRESERVED(p)						\
180       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
181        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182 
183 /*
184  *    pct-encoded   = "%" HEXDIG HEXDIG
185  */
186 #define ISA_PCT_ENCODED(p)						\
187      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188 
189 /*
190  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
191  */
192 #define ISA_PCHAR(p)							\
193      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
194       ((*(p) == ':')) || ((*(p) == '@')))
195 
196 /**
197  * xmlParse3986Scheme:
198  * @uri:  pointer to an URI structure
199  * @str:  pointer to the string to analyze
200  *
201  * Parse an URI scheme
202  *
203  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204  *
205  * Returns 0 or the error code
206  */
207 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209     const char *cur;
210 
211     if (str == NULL)
212 	return(-1);
213 
214     cur = *str;
215     if (!ISA_ALPHA(cur))
216 	return(2);
217     cur++;
218     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220     if (uri != NULL) {
221 	if (uri->scheme != NULL) xmlFree(uri->scheme);
222 	uri->scheme = STRNDUP(*str, cur - *str);
223     }
224     *str = cur;
225     return(0);
226 }
227 
228 /**
229  * xmlParse3986Fragment:
230  * @uri:  pointer to an URI structure
231  * @str:  pointer to the string to analyze
232  *
233  * Parse the query part of an URI
234  *
235  * fragment      = *( pchar / "/" / "?" )
236  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237  *       in the fragment identifier but this is used very broadly for
238  *       xpointer scheme selection, so we are allowing it here to not break
239  *       for example all the DocBook processing chains.
240  *
241  * Returns 0 or the error code
242  */
243 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)244 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245 {
246     const char *cur;
247 
248     if (str == NULL)
249         return (-1);
250 
251     cur = *str;
252 
253     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254            (*cur == '[') || (*cur == ']') ||
255            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256         NEXT(cur);
257     if (uri != NULL) {
258         if (uri->fragment != NULL)
259             xmlFree(uri->fragment);
260 	if (uri->cleanup & 2)
261 	    uri->fragment = STRNDUP(*str, cur - *str);
262 	else
263 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264     }
265     *str = cur;
266     return (0);
267 }
268 
269 /**
270  * xmlParse3986Query:
271  * @uri:  pointer to an URI structure
272  * @str:  pointer to the string to analyze
273  *
274  * Parse the query part of an URI
275  *
276  * query = *uric
277  *
278  * Returns 0 or the error code
279  */
280 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)281 xmlParse3986Query(xmlURIPtr uri, const char **str)
282 {
283     const char *cur;
284 
285     if (str == NULL)
286         return (-1);
287 
288     cur = *str;
289 
290     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292         NEXT(cur);
293     if (uri != NULL) {
294         if (uri->query != NULL)
295             xmlFree(uri->query);
296 	if (uri->cleanup & 2)
297 	    uri->query = STRNDUP(*str, cur - *str);
298 	else
299 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300 
301 	/* Save the raw bytes of the query as well.
302 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303 	 */
304 	if (uri->query_raw != NULL)
305 	    xmlFree (uri->query_raw);
306 	uri->query_raw = STRNDUP (*str, cur - *str);
307     }
308     *str = cur;
309     return (0);
310 }
311 
312 /**
313  * xmlParse3986Port:
314  * @uri:  pointer to an URI structure
315  * @str:  the string to analyze
316  *
317  * Parse a port part and fills in the appropriate fields
318  * of the @uri structure
319  *
320  * port          = *DIGIT
321  *
322  * Returns 0 or the error code
323  */
324 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)325 xmlParse3986Port(xmlURIPtr uri, const char **str)
326 {
327     const char *cur = *str;
328     unsigned port = 0; /* unsigned for defined overflow behavior */
329 
330     if (ISA_DIGIT(cur)) {
331 	while (ISA_DIGIT(cur)) {
332 	    port = port * 10 + (*cur - '0');
333 
334 	    cur++;
335 	}
336 	if (uri != NULL)
337 	    uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
338 	*str = cur;
339 	return(0);
340     }
341     return(1);
342 }
343 
344 /**
345  * xmlParse3986Userinfo:
346  * @uri:  pointer to an URI structure
347  * @str:  the string to analyze
348  *
349  * Parse an user informations part and fills in the appropriate fields
350  * of the @uri structure
351  *
352  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
353  *
354  * Returns 0 or the error code
355  */
356 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)357 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
358 {
359     const char *cur;
360 
361     cur = *str;
362     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
363            ISA_SUB_DELIM(cur) || (*cur == ':'))
364 	NEXT(cur);
365     if (*cur == '@') {
366 	if (uri != NULL) {
367 	    if (uri->user != NULL) xmlFree(uri->user);
368 	    if (uri->cleanup & 2)
369 		uri->user = STRNDUP(*str, cur - *str);
370 	    else
371 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
372 	}
373 	*str = cur;
374 	return(0);
375     }
376     return(1);
377 }
378 
379 /**
380  * xmlParse3986DecOctet:
381  * @str:  the string to analyze
382  *
383  *    dec-octet     = DIGIT                 ; 0-9
384  *                  / %x31-39 DIGIT         ; 10-99
385  *                  / "1" 2DIGIT            ; 100-199
386  *                  / "2" %x30-34 DIGIT     ; 200-249
387  *                  / "25" %x30-35          ; 250-255
388  *
389  * Skip a dec-octet.
390  *
391  * Returns 0 if found and skipped, 1 otherwise
392  */
393 static int
xmlParse3986DecOctet(const char ** str)394 xmlParse3986DecOctet(const char **str) {
395     const char *cur = *str;
396 
397     if (!(ISA_DIGIT(cur)))
398         return(1);
399     if (!ISA_DIGIT(cur+1))
400 	cur++;
401     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
402 	cur += 2;
403     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
404 	cur += 3;
405     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
406 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
407 	cur += 3;
408     else if ((*cur == '2') && (*(cur + 1) == '5') &&
409 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
410 	cur += 3;
411     else
412         return(1);
413     *str = cur;
414     return(0);
415 }
416 /**
417  * xmlParse3986Host:
418  * @uri:  pointer to an URI structure
419  * @str:  the string to analyze
420  *
421  * Parse an host part and fills in the appropriate fields
422  * of the @uri structure
423  *
424  * host          = IP-literal / IPv4address / reg-name
425  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
426  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
427  * reg-name      = *( unreserved / pct-encoded / sub-delims )
428  *
429  * Returns 0 or the error code
430  */
431 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)432 xmlParse3986Host(xmlURIPtr uri, const char **str)
433 {
434     const char *cur = *str;
435     const char *host;
436 
437     host = cur;
438     /*
439      * IPv6 and future adressing scheme are enclosed between brackets
440      */
441     if (*cur == '[') {
442         cur++;
443 	while ((*cur != ']') && (*cur != 0))
444 	    cur++;
445 	if (*cur != ']')
446 	    return(1);
447 	cur++;
448 	goto found;
449     }
450     /*
451      * try to parse an IPv4
452      */
453     if (ISA_DIGIT(cur)) {
454         if (xmlParse3986DecOctet(&cur) != 0)
455 	    goto not_ipv4;
456 	if (*cur != '.')
457 	    goto not_ipv4;
458 	cur++;
459         if (xmlParse3986DecOctet(&cur) != 0)
460 	    goto not_ipv4;
461 	if (*cur != '.')
462 	    goto not_ipv4;
463         if (xmlParse3986DecOctet(&cur) != 0)
464 	    goto not_ipv4;
465 	if (*cur != '.')
466 	    goto not_ipv4;
467         if (xmlParse3986DecOctet(&cur) != 0)
468 	    goto not_ipv4;
469 	goto found;
470 not_ipv4:
471         cur = *str;
472     }
473     /*
474      * then this should be a hostname which can be empty
475      */
476     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
477         NEXT(cur);
478 found:
479     if (uri != NULL) {
480 	if (uri->authority != NULL) xmlFree(uri->authority);
481 	uri->authority = NULL;
482 	if (uri->server != NULL) xmlFree(uri->server);
483 	if (cur != host) {
484 	    if (uri->cleanup & 2)
485 		uri->server = STRNDUP(host, cur - host);
486 	    else
487 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
488 	} else
489 	    uri->server = NULL;
490     }
491     *str = cur;
492     return(0);
493 }
494 
495 /**
496  * xmlParse3986Authority:
497  * @uri:  pointer to an URI structure
498  * @str:  the string to analyze
499  *
500  * Parse an authority part and fills in the appropriate fields
501  * of the @uri structure
502  *
503  * authority     = [ userinfo "@" ] host [ ":" port ]
504  *
505  * Returns 0 or the error code
506  */
507 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)508 xmlParse3986Authority(xmlURIPtr uri, const char **str)
509 {
510     const char *cur;
511     int ret;
512 
513     cur = *str;
514     /*
515      * try to parse an userinfo and check for the trailing @
516      */
517     ret = xmlParse3986Userinfo(uri, &cur);
518     if ((ret != 0) || (*cur != '@'))
519         cur = *str;
520     else
521         cur++;
522     ret = xmlParse3986Host(uri, &cur);
523     if (ret != 0) return(ret);
524     if (*cur == ':') {
525         cur++;
526         ret = xmlParse3986Port(uri, &cur);
527 	if (ret != 0) return(ret);
528     }
529     *str = cur;
530     return(0);
531 }
532 
533 /**
534  * xmlParse3986Segment:
535  * @str:  the string to analyze
536  * @forbid: an optional forbidden character
537  * @empty: allow an empty segment
538  *
539  * Parse a segment and fills in the appropriate fields
540  * of the @uri structure
541  *
542  * segment       = *pchar
543  * segment-nz    = 1*pchar
544  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
545  *               ; non-zero-length segment without any colon ":"
546  *
547  * Returns 0 or the error code
548  */
549 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)550 xmlParse3986Segment(const char **str, char forbid, int empty)
551 {
552     const char *cur;
553 
554     cur = *str;
555     if (!ISA_PCHAR(cur)) {
556         if (empty)
557 	    return(0);
558 	return(1);
559     }
560     while (ISA_PCHAR(cur) && (*cur != forbid))
561         NEXT(cur);
562     *str = cur;
563     return (0);
564 }
565 
566 /**
567  * xmlParse3986PathAbEmpty:
568  * @uri:  pointer to an URI structure
569  * @str:  the string to analyze
570  *
571  * Parse an path absolute or empty and fills in the appropriate fields
572  * of the @uri structure
573  *
574  * path-abempty  = *( "/" segment )
575  *
576  * Returns 0 or the error code
577  */
578 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)579 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
580 {
581     const char *cur;
582     int ret;
583 
584     cur = *str;
585 
586     while (*cur == '/') {
587         cur++;
588 	ret = xmlParse3986Segment(&cur, 0, 1);
589 	if (ret != 0) return(ret);
590     }
591     if (uri != NULL) {
592 	if (uri->path != NULL) xmlFree(uri->path);
593         if (*str != cur) {
594             if (uri->cleanup & 2)
595                 uri->path = STRNDUP(*str, cur - *str);
596             else
597                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
598         } else {
599             uri->path = NULL;
600         }
601     }
602     *str = cur;
603     return (0);
604 }
605 
606 /**
607  * xmlParse3986PathAbsolute:
608  * @uri:  pointer to an URI structure
609  * @str:  the string to analyze
610  *
611  * Parse an path absolute and fills in the appropriate fields
612  * of the @uri structure
613  *
614  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
615  *
616  * Returns 0 or the error code
617  */
618 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)619 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
620 {
621     const char *cur;
622     int ret;
623 
624     cur = *str;
625 
626     if (*cur != '/')
627         return(1);
628     cur++;
629     ret = xmlParse3986Segment(&cur, 0, 0);
630     if (ret == 0) {
631 	while (*cur == '/') {
632 	    cur++;
633 	    ret = xmlParse3986Segment(&cur, 0, 1);
634 	    if (ret != 0) return(ret);
635 	}
636     }
637     if (uri != NULL) {
638 	if (uri->path != NULL) xmlFree(uri->path);
639         if (cur != *str) {
640             if (uri->cleanup & 2)
641                 uri->path = STRNDUP(*str, cur - *str);
642             else
643                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
644         } else {
645             uri->path = NULL;
646         }
647     }
648     *str = cur;
649     return (0);
650 }
651 
652 /**
653  * xmlParse3986PathRootless:
654  * @uri:  pointer to an URI structure
655  * @str:  the string to analyze
656  *
657  * Parse an path without root and fills in the appropriate fields
658  * of the @uri structure
659  *
660  * path-rootless = segment-nz *( "/" segment )
661  *
662  * Returns 0 or the error code
663  */
664 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)665 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
666 {
667     const char *cur;
668     int ret;
669 
670     cur = *str;
671 
672     ret = xmlParse3986Segment(&cur, 0, 0);
673     if (ret != 0) return(ret);
674     while (*cur == '/') {
675         cur++;
676 	ret = xmlParse3986Segment(&cur, 0, 1);
677 	if (ret != 0) return(ret);
678     }
679     if (uri != NULL) {
680 	if (uri->path != NULL) xmlFree(uri->path);
681         if (cur != *str) {
682             if (uri->cleanup & 2)
683                 uri->path = STRNDUP(*str, cur - *str);
684             else
685                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
686         } else {
687             uri->path = NULL;
688         }
689     }
690     *str = cur;
691     return (0);
692 }
693 
694 /**
695  * xmlParse3986PathNoScheme:
696  * @uri:  pointer to an URI structure
697  * @str:  the string to analyze
698  *
699  * Parse an path which is not a scheme and fills in the appropriate fields
700  * of the @uri structure
701  *
702  * path-noscheme = segment-nz-nc *( "/" segment )
703  *
704  * Returns 0 or the error code
705  */
706 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)707 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
708 {
709     const char *cur;
710     int ret;
711 
712     cur = *str;
713 
714     ret = xmlParse3986Segment(&cur, ':', 0);
715     if (ret != 0) return(ret);
716     while (*cur == '/') {
717         cur++;
718 	ret = xmlParse3986Segment(&cur, 0, 1);
719 	if (ret != 0) return(ret);
720     }
721     if (uri != NULL) {
722 	if (uri->path != NULL) xmlFree(uri->path);
723         if (cur != *str) {
724             if (uri->cleanup & 2)
725                 uri->path = STRNDUP(*str, cur - *str);
726             else
727                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
728         } else {
729             uri->path = NULL;
730         }
731     }
732     *str = cur;
733     return (0);
734 }
735 
736 /**
737  * xmlParse3986HierPart:
738  * @uri:  pointer to an URI structure
739  * @str:  the string to analyze
740  *
741  * Parse an hierarchical part and fills in the appropriate fields
742  * of the @uri structure
743  *
744  * hier-part     = "//" authority path-abempty
745  *                / path-absolute
746  *                / path-rootless
747  *                / path-empty
748  *
749  * Returns 0 or the error code
750  */
751 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)752 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
753 {
754     const char *cur;
755     int ret;
756 
757     cur = *str;
758 
759     if ((*cur == '/') && (*(cur + 1) == '/')) {
760         cur += 2;
761 	ret = xmlParse3986Authority(uri, &cur);
762 	if (ret != 0) return(ret);
763 	if (uri->server == NULL)
764 	    uri->port = -1;
765 	ret = xmlParse3986PathAbEmpty(uri, &cur);
766 	if (ret != 0) return(ret);
767 	*str = cur;
768 	return(0);
769     } else if (*cur == '/') {
770         ret = xmlParse3986PathAbsolute(uri, &cur);
771 	if (ret != 0) return(ret);
772     } else if (ISA_PCHAR(cur)) {
773         ret = xmlParse3986PathRootless(uri, &cur);
774 	if (ret != 0) return(ret);
775     } else {
776 	/* path-empty is effectively empty */
777 	if (uri != NULL) {
778 	    if (uri->path != NULL) xmlFree(uri->path);
779 	    uri->path = NULL;
780 	}
781     }
782     *str = cur;
783     return (0);
784 }
785 
786 /**
787  * xmlParse3986RelativeRef:
788  * @uri:  pointer to an URI structure
789  * @str:  the string to analyze
790  *
791  * Parse an URI string and fills in the appropriate fields
792  * of the @uri structure
793  *
794  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
795  * relative-part = "//" authority path-abempty
796  *               / path-absolute
797  *               / path-noscheme
798  *               / path-empty
799  *
800  * Returns 0 or the error code
801  */
802 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)803 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
804     int ret;
805 
806     if ((*str == '/') && (*(str + 1) == '/')) {
807         str += 2;
808 	ret = xmlParse3986Authority(uri, &str);
809 	if (ret != 0) return(ret);
810 	ret = xmlParse3986PathAbEmpty(uri, &str);
811 	if (ret != 0) return(ret);
812     } else if (*str == '/') {
813 	ret = xmlParse3986PathAbsolute(uri, &str);
814 	if (ret != 0) return(ret);
815     } else if (ISA_PCHAR(str)) {
816         ret = xmlParse3986PathNoScheme(uri, &str);
817 	if (ret != 0) return(ret);
818     } else {
819 	/* path-empty is effectively empty */
820 	if (uri != NULL) {
821 	    if (uri->path != NULL) xmlFree(uri->path);
822 	    uri->path = NULL;
823 	}
824     }
825 
826     if (*str == '?') {
827 	str++;
828 	ret = xmlParse3986Query(uri, &str);
829 	if (ret != 0) return(ret);
830     }
831     if (*str == '#') {
832 	str++;
833 	ret = xmlParse3986Fragment(uri, &str);
834 	if (ret != 0) return(ret);
835     }
836     if (*str != 0) {
837 	xmlCleanURI(uri);
838 	return(1);
839     }
840     return(0);
841 }
842 
843 
844 /**
845  * xmlParse3986URI:
846  * @uri:  pointer to an URI structure
847  * @str:  the string to analyze
848  *
849  * Parse an URI string and fills in the appropriate fields
850  * of the @uri structure
851  *
852  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
853  *
854  * Returns 0 or the error code
855  */
856 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)857 xmlParse3986URI(xmlURIPtr uri, const char *str) {
858     int ret;
859 
860     ret = xmlParse3986Scheme(uri, &str);
861     if (ret != 0) return(ret);
862     if (*str != ':') {
863 	return(1);
864     }
865     str++;
866     ret = xmlParse3986HierPart(uri, &str);
867     if (ret != 0) return(ret);
868     if (*str == '?') {
869 	str++;
870 	ret = xmlParse3986Query(uri, &str);
871 	if (ret != 0) return(ret);
872     }
873     if (*str == '#') {
874 	str++;
875 	ret = xmlParse3986Fragment(uri, &str);
876 	if (ret != 0) return(ret);
877     }
878     if (*str != 0) {
879 	xmlCleanURI(uri);
880 	return(1);
881     }
882     return(0);
883 }
884 
885 /**
886  * xmlParse3986URIReference:
887  * @uri:  pointer to an URI structure
888  * @str:  the string to analyze
889  *
890  * Parse an URI reference string and fills in the appropriate fields
891  * of the @uri structure
892  *
893  * URI-reference = URI / relative-ref
894  *
895  * Returns 0 or the error code
896  */
897 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)898 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
899     int ret;
900 
901     if (str == NULL)
902 	return(-1);
903     xmlCleanURI(uri);
904 
905     /*
906      * Try first to parse absolute refs, then fallback to relative if
907      * it fails.
908      */
909     ret = xmlParse3986URI(uri, str);
910     if (ret != 0) {
911 	xmlCleanURI(uri);
912         ret = xmlParse3986RelativeRef(uri, str);
913 	if (ret != 0) {
914 	    xmlCleanURI(uri);
915 	    return(ret);
916 	}
917     }
918     return(0);
919 }
920 
921 /**
922  * xmlParseURI:
923  * @str:  the URI string to analyze
924  *
925  * Parse an URI based on RFC 3986
926  *
927  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
928  *
929  * Returns a newly built xmlURIPtr or NULL in case of error
930  */
931 xmlURIPtr
xmlParseURI(const char * str)932 xmlParseURI(const char *str) {
933     xmlURIPtr uri;
934     int ret;
935 
936     if (str == NULL)
937 	return(NULL);
938     uri = xmlCreateURI();
939     if (uri != NULL) {
940 	ret = xmlParse3986URIReference(uri, str);
941         if (ret) {
942 	    xmlFreeURI(uri);
943 	    return(NULL);
944 	}
945     }
946     return(uri);
947 }
948 
949 /**
950  * xmlParseURIReference:
951  * @uri:  pointer to an URI structure
952  * @str:  the string to analyze
953  *
954  * Parse an URI reference string based on RFC 3986 and fills in the
955  * appropriate fields of the @uri structure
956  *
957  * URI-reference = URI / relative-ref
958  *
959  * Returns 0 or the error code
960  */
961 int
xmlParseURIReference(xmlURIPtr uri,const char * str)962 xmlParseURIReference(xmlURIPtr uri, const char *str) {
963     return(xmlParse3986URIReference(uri, str));
964 }
965 
966 /**
967  * xmlParseURIRaw:
968  * @str:  the URI string to analyze
969  * @raw:  if 1 unescaping of URI pieces are disabled
970  *
971  * Parse an URI but allows to keep intact the original fragments.
972  *
973  * URI-reference = URI / relative-ref
974  *
975  * Returns a newly built xmlURIPtr or NULL in case of error
976  */
977 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)978 xmlParseURIRaw(const char *str, int raw) {
979     xmlURIPtr uri;
980     int ret;
981 
982     if (str == NULL)
983 	return(NULL);
984     uri = xmlCreateURI();
985     if (uri != NULL) {
986         if (raw) {
987 	    uri->cleanup |= 2;
988 	}
989 	ret = xmlParseURIReference(uri, str);
990         if (ret) {
991 	    xmlFreeURI(uri);
992 	    return(NULL);
993 	}
994     }
995     return(uri);
996 }
997 
998 /************************************************************************
999  *									*
1000  *			Generic URI structure functions			*
1001  *									*
1002  ************************************************************************/
1003 
1004 /**
1005  * xmlCreateURI:
1006  *
1007  * Simply creates an empty xmlURI
1008  *
1009  * Returns the new structure or NULL in case of error
1010  */
1011 xmlURIPtr
xmlCreateURI(void)1012 xmlCreateURI(void) {
1013     xmlURIPtr ret;
1014 
1015     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1016     if (ret == NULL) {
1017         xmlURIErrMemory("creating URI structure\n");
1018 	return(NULL);
1019     }
1020     memset(ret, 0, sizeof(xmlURI));
1021     return(ret);
1022 }
1023 
1024 /**
1025  * xmlSaveUriRealloc:
1026  *
1027  * Function to handle properly a reallocation when saving an URI
1028  * Also imposes some limit on the length of an URI string output
1029  */
1030 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1031 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1032     xmlChar *temp;
1033     int tmp;
1034 
1035     if (*max > MAX_URI_LENGTH) {
1036         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037         return(NULL);
1038     }
1039     tmp = *max * 2;
1040     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1041     if (temp == NULL) {
1042         xmlURIErrMemory("saving URI\n");
1043         return(NULL);
1044     }
1045     *max = tmp;
1046     return(temp);
1047 }
1048 
1049 /**
1050  * xmlSaveUri:
1051  * @uri:  pointer to an xmlURI
1052  *
1053  * Save the URI as an escaped string
1054  *
1055  * Returns a new string (to be deallocated by caller)
1056  */
1057 xmlChar *
xmlSaveUri(xmlURIPtr uri)1058 xmlSaveUri(xmlURIPtr uri) {
1059     xmlChar *ret = NULL;
1060     xmlChar *temp;
1061     const char *p;
1062     int len;
1063     int max;
1064 
1065     if (uri == NULL) return(NULL);
1066 
1067 
1068     max = 80;
1069     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1070     if (ret == NULL) {
1071         xmlURIErrMemory("saving URI\n");
1072 	return(NULL);
1073     }
1074     len = 0;
1075 
1076     if (uri->scheme != NULL) {
1077 	p = uri->scheme;
1078 	while (*p != 0) {
1079 	    if (len >= max) {
1080                 temp = xmlSaveUriRealloc(ret, &max);
1081                 if (temp == NULL) goto mem_error;
1082 		ret = temp;
1083 	    }
1084 	    ret[len++] = *p++;
1085 	}
1086 	if (len >= max) {
1087             temp = xmlSaveUriRealloc(ret, &max);
1088             if (temp == NULL) goto mem_error;
1089             ret = temp;
1090 	}
1091 	ret[len++] = ':';
1092     }
1093     if (uri->opaque != NULL) {
1094 	p = uri->opaque;
1095 	while (*p != 0) {
1096 	    if (len + 3 >= max) {
1097                 temp = xmlSaveUriRealloc(ret, &max);
1098                 if (temp == NULL) goto mem_error;
1099                 ret = temp;
1100 	    }
1101 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1102 		ret[len++] = *p++;
1103 	    else {
1104 		int val = *(unsigned char *)p++;
1105 		int hi = val / 0x10, lo = val % 0x10;
1106 		ret[len++] = '%';
1107 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1109 	    }
1110 	}
1111     } else {
1112 	if ((uri->server != NULL) || (uri->port == -1)) {
1113 	    if (len + 3 >= max) {
1114                 temp = xmlSaveUriRealloc(ret, &max);
1115                 if (temp == NULL) goto mem_error;
1116                 ret = temp;
1117 	    }
1118 	    ret[len++] = '/';
1119 	    ret[len++] = '/';
1120 	    if (uri->user != NULL) {
1121 		p = uri->user;
1122 		while (*p != 0) {
1123 		    if (len + 3 >= max) {
1124                         temp = xmlSaveUriRealloc(ret, &max);
1125                         if (temp == NULL) goto mem_error;
1126                         ret = temp;
1127 		    }
1128 		    if ((IS_UNRESERVED(*(p))) ||
1129 			((*(p) == ';')) || ((*(p) == ':')) ||
1130 			((*(p) == '&')) || ((*(p) == '=')) ||
1131 			((*(p) == '+')) || ((*(p) == '$')) ||
1132 			((*(p) == ',')))
1133 			ret[len++] = *p++;
1134 		    else {
1135 			int val = *(unsigned char *)p++;
1136 			int hi = val / 0x10, lo = val % 0x10;
1137 			ret[len++] = '%';
1138 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1139 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1140 		    }
1141 		}
1142 		if (len + 3 >= max) {
1143                     temp = xmlSaveUriRealloc(ret, &max);
1144                     if (temp == NULL) goto mem_error;
1145                     ret = temp;
1146 		}
1147 		ret[len++] = '@';
1148 	    }
1149 	    if (uri->server != NULL) {
1150 		p = uri->server;
1151 		while (*p != 0) {
1152 		    if (len >= max) {
1153 			temp = xmlSaveUriRealloc(ret, &max);
1154 			if (temp == NULL) goto mem_error;
1155 			ret = temp;
1156 		    }
1157 		    ret[len++] = *p++;
1158 		}
1159 		if (uri->port > 0) {
1160 		    if (len + 10 >= max) {
1161 			temp = xmlSaveUriRealloc(ret, &max);
1162 			if (temp == NULL) goto mem_error;
1163 			ret = temp;
1164 		    }
1165 		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1166 		}
1167 	    }
1168 	} else if (uri->authority != NULL) {
1169 	    if (len + 3 >= max) {
1170                 temp = xmlSaveUriRealloc(ret, &max);
1171                 if (temp == NULL) goto mem_error;
1172                 ret = temp;
1173 	    }
1174 	    ret[len++] = '/';
1175 	    ret[len++] = '/';
1176 	    p = uri->authority;
1177 	    while (*p != 0) {
1178 		if (len + 3 >= max) {
1179                     temp = xmlSaveUriRealloc(ret, &max);
1180                     if (temp == NULL) goto mem_error;
1181                     ret = temp;
1182 		}
1183 		if ((IS_UNRESERVED(*(p))) ||
1184                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1185                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1186                     ((*(p) == '=')) || ((*(p) == '+')))
1187 		    ret[len++] = *p++;
1188 		else {
1189 		    int val = *(unsigned char *)p++;
1190 		    int hi = val / 0x10, lo = val % 0x10;
1191 		    ret[len++] = '%';
1192 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1193 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1194 		}
1195 	    }
1196 	} else if (uri->scheme != NULL) {
1197 	    if (len + 3 >= max) {
1198                 temp = xmlSaveUriRealloc(ret, &max);
1199                 if (temp == NULL) goto mem_error;
1200                 ret = temp;
1201 	    }
1202 	}
1203 	if (uri->path != NULL) {
1204 	    p = uri->path;
1205 	    /*
1206 	     * the colon in file:///d: should not be escaped or
1207 	     * Windows accesses fail later.
1208 	     */
1209 	    if ((uri->scheme != NULL) &&
1210 		(p[0] == '/') &&
1211 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1212 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213 		(p[2] == ':') &&
1214 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215 		if (len + 3 >= max) {
1216                     temp = xmlSaveUriRealloc(ret, &max);
1217                     if (temp == NULL) goto mem_error;
1218                     ret = temp;
1219 		}
1220 		ret[len++] = *p++;
1221 		ret[len++] = *p++;
1222 		ret[len++] = *p++;
1223 	    }
1224 	    while (*p != 0) {
1225 		if (len + 3 >= max) {
1226                     temp = xmlSaveUriRealloc(ret, &max);
1227                     if (temp == NULL) goto mem_error;
1228                     ret = temp;
1229 		}
1230 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1231                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1232 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1233 	            ((*(p) == ',')))
1234 		    ret[len++] = *p++;
1235 		else {
1236 		    int val = *(unsigned char *)p++;
1237 		    int hi = val / 0x10, lo = val % 0x10;
1238 		    ret[len++] = '%';
1239 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1241 		}
1242 	    }
1243 	}
1244 	if (uri->query_raw != NULL) {
1245 	    if (len + 1 >= max) {
1246                 temp = xmlSaveUriRealloc(ret, &max);
1247                 if (temp == NULL) goto mem_error;
1248                 ret = temp;
1249 	    }
1250 	    ret[len++] = '?';
1251 	    p = uri->query_raw;
1252 	    while (*p != 0) {
1253 		if (len + 1 >= max) {
1254                     temp = xmlSaveUriRealloc(ret, &max);
1255                     if (temp == NULL) goto mem_error;
1256                     ret = temp;
1257 		}
1258 		ret[len++] = *p++;
1259 	    }
1260 	} else if (uri->query != NULL) {
1261 	    if (len + 3 >= max) {
1262                 temp = xmlSaveUriRealloc(ret, &max);
1263                 if (temp == NULL) goto mem_error;
1264                 ret = temp;
1265 	    }
1266 	    ret[len++] = '?';
1267 	    p = uri->query;
1268 	    while (*p != 0) {
1269 		if (len + 3 >= max) {
1270                     temp = xmlSaveUriRealloc(ret, &max);
1271                     if (temp == NULL) goto mem_error;
1272                     ret = temp;
1273 		}
1274 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1275 		    ret[len++] = *p++;
1276 		else {
1277 		    int val = *(unsigned char *)p++;
1278 		    int hi = val / 0x10, lo = val % 0x10;
1279 		    ret[len++] = '%';
1280 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1281 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1282 		}
1283 	    }
1284 	}
1285     }
1286     if (uri->fragment != NULL) {
1287 	if (len + 3 >= max) {
1288             temp = xmlSaveUriRealloc(ret, &max);
1289             if (temp == NULL) goto mem_error;
1290             ret = temp;
1291 	}
1292 	ret[len++] = '#';
1293 	p = uri->fragment;
1294 	while (*p != 0) {
1295 	    if (len + 3 >= max) {
1296                 temp = xmlSaveUriRealloc(ret, &max);
1297                 if (temp == NULL) goto mem_error;
1298                 ret = temp;
1299 	    }
1300 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1301 		ret[len++] = *p++;
1302 	    else {
1303 		int val = *(unsigned char *)p++;
1304 		int hi = val / 0x10, lo = val % 0x10;
1305 		ret[len++] = '%';
1306 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1307 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1308 	    }
1309 	}
1310     }
1311     if (len >= max) {
1312         temp = xmlSaveUriRealloc(ret, &max);
1313         if (temp == NULL) goto mem_error;
1314         ret = temp;
1315     }
1316     ret[len] = 0;
1317     return(ret);
1318 
1319 mem_error:
1320     xmlFree(ret);
1321     return(NULL);
1322 }
1323 
1324 /**
1325  * xmlPrintURI:
1326  * @stream:  a FILE* for the output
1327  * @uri:  pointer to an xmlURI
1328  *
1329  * Prints the URI in the stream @stream.
1330  */
1331 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1332 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1333     xmlChar *out;
1334 
1335     out = xmlSaveUri(uri);
1336     if (out != NULL) {
1337 	fprintf(stream, "%s", (char *) out);
1338 	xmlFree(out);
1339     }
1340 }
1341 
1342 /**
1343  * xmlCleanURI:
1344  * @uri:  pointer to an xmlURI
1345  *
1346  * Make sure the xmlURI struct is free of content
1347  */
1348 static void
xmlCleanURI(xmlURIPtr uri)1349 xmlCleanURI(xmlURIPtr uri) {
1350     if (uri == NULL) return;
1351 
1352     if (uri->scheme != NULL) xmlFree(uri->scheme);
1353     uri->scheme = NULL;
1354     if (uri->server != NULL) xmlFree(uri->server);
1355     uri->server = NULL;
1356     if (uri->user != NULL) xmlFree(uri->user);
1357     uri->user = NULL;
1358     if (uri->path != NULL) xmlFree(uri->path);
1359     uri->path = NULL;
1360     if (uri->fragment != NULL) xmlFree(uri->fragment);
1361     uri->fragment = NULL;
1362     if (uri->opaque != NULL) xmlFree(uri->opaque);
1363     uri->opaque = NULL;
1364     if (uri->authority != NULL) xmlFree(uri->authority);
1365     uri->authority = NULL;
1366     if (uri->query != NULL) xmlFree(uri->query);
1367     uri->query = NULL;
1368     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369     uri->query_raw = NULL;
1370 }
1371 
1372 /**
1373  * xmlFreeURI:
1374  * @uri:  pointer to an xmlURI
1375  *
1376  * Free up the xmlURI struct
1377  */
1378 void
xmlFreeURI(xmlURIPtr uri)1379 xmlFreeURI(xmlURIPtr uri) {
1380     if (uri == NULL) return;
1381 
1382     if (uri->scheme != NULL) xmlFree(uri->scheme);
1383     if (uri->server != NULL) xmlFree(uri->server);
1384     if (uri->user != NULL) xmlFree(uri->user);
1385     if (uri->path != NULL) xmlFree(uri->path);
1386     if (uri->fragment != NULL) xmlFree(uri->fragment);
1387     if (uri->opaque != NULL) xmlFree(uri->opaque);
1388     if (uri->authority != NULL) xmlFree(uri->authority);
1389     if (uri->query != NULL) xmlFree(uri->query);
1390     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1391     xmlFree(uri);
1392 }
1393 
1394 /************************************************************************
1395  *									*
1396  *			Helper functions				*
1397  *									*
1398  ************************************************************************/
1399 
1400 /**
1401  * xmlNormalizeURIPath:
1402  * @path:  pointer to the path string
1403  *
1404  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1405  * Section 5.2, steps 6.c through 6.g.
1406  *
1407  * Normalization occurs directly on the string, no new allocation is done
1408  *
1409  * Returns 0 or an error code
1410  */
1411 int
xmlNormalizeURIPath(char * path)1412 xmlNormalizeURIPath(char *path) {
1413     char *cur, *out;
1414 
1415     if (path == NULL)
1416 	return(-1);
1417 
1418     /* Skip all initial "/" chars.  We want to get to the beginning of the
1419      * first non-empty segment.
1420      */
1421     cur = path;
1422     while (cur[0] == '/')
1423       ++cur;
1424     if (cur[0] == '\0')
1425       return(0);
1426 
1427     /* Keep everything we've seen so far.  */
1428     out = cur;
1429 
1430     /*
1431      * Analyze each segment in sequence for cases (c) and (d).
1432      */
1433     while (cur[0] != '\0') {
1434 	/*
1435 	 * c) All occurrences of "./", where "." is a complete path segment,
1436 	 *    are removed from the buffer string.
1437 	 */
1438 	if ((cur[0] == '.') && (cur[1] == '/')) {
1439 	    cur += 2;
1440 	    /* '//' normalization should be done at this point too */
1441 	    while (cur[0] == '/')
1442 		cur++;
1443 	    continue;
1444 	}
1445 
1446 	/*
1447 	 * d) If the buffer string ends with "." as a complete path segment,
1448 	 *    that "." is removed.
1449 	 */
1450 	if ((cur[0] == '.') && (cur[1] == '\0'))
1451 	    break;
1452 
1453 	/* Otherwise keep the segment.  */
1454 	while (cur[0] != '/') {
1455             if (cur[0] == '\0')
1456               goto done_cd;
1457 	    (out++)[0] = (cur++)[0];
1458 	}
1459 	/* nomalize // */
1460 	while ((cur[0] == '/') && (cur[1] == '/'))
1461 	    cur++;
1462 
1463         (out++)[0] = (cur++)[0];
1464     }
1465  done_cd:
1466     out[0] = '\0';
1467 
1468     /* Reset to the beginning of the first segment for the next sequence.  */
1469     cur = path;
1470     while (cur[0] == '/')
1471       ++cur;
1472     if (cur[0] == '\0')
1473 	return(0);
1474 
1475     /*
1476      * Analyze each segment in sequence for cases (e) and (f).
1477      *
1478      * e) All occurrences of "<segment>/../", where <segment> is a
1479      *    complete path segment not equal to "..", are removed from the
1480      *    buffer string.  Removal of these path segments is performed
1481      *    iteratively, removing the leftmost matching pattern on each
1482      *    iteration, until no matching pattern remains.
1483      *
1484      * f) If the buffer string ends with "<segment>/..", where <segment>
1485      *    is a complete path segment not equal to "..", that
1486      *    "<segment>/.." is removed.
1487      *
1488      * To satisfy the "iterative" clause in (e), we need to collapse the
1489      * string every time we find something that needs to be removed.  Thus,
1490      * we don't need to keep two pointers into the string: we only need a
1491      * "current position" pointer.
1492      */
1493     while (1) {
1494         char *segp, *tmp;
1495 
1496         /* At the beginning of each iteration of this loop, "cur" points to
1497          * the first character of the segment we want to examine.
1498          */
1499 
1500         /* Find the end of the current segment.  */
1501         segp = cur;
1502         while ((segp[0] != '/') && (segp[0] != '\0'))
1503           ++segp;
1504 
1505         /* If this is the last segment, we're done (we need at least two
1506          * segments to meet the criteria for the (e) and (f) cases).
1507          */
1508         if (segp[0] == '\0')
1509           break;
1510 
1511         /* If the first segment is "..", or if the next segment _isn't_ "..",
1512          * keep this segment and try the next one.
1513          */
1514         ++segp;
1515         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1516             || ((segp[0] != '.') || (segp[1] != '.')
1517                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1518           cur = segp;
1519           continue;
1520         }
1521 
1522         /* If we get here, remove this segment and the next one and back up
1523          * to the previous segment (if there is one), to implement the
1524          * "iteratively" clause.  It's pretty much impossible to back up
1525          * while maintaining two pointers into the buffer, so just compact
1526          * the whole buffer now.
1527          */
1528 
1529         /* If this is the end of the buffer, we're done.  */
1530         if (segp[2] == '\0') {
1531           cur[0] = '\0';
1532           break;
1533         }
1534         /* Valgrind complained, strcpy(cur, segp + 3); */
1535         /* string will overlap, do not use strcpy */
1536         tmp = cur;
1537         segp += 3;
1538         while ((*tmp++ = *segp++) != 0)
1539           ;
1540 
1541         /* If there are no previous segments, then keep going from here.  */
1542         segp = cur;
1543         while ((segp > path) && ((--segp)[0] == '/'))
1544           ;
1545         if (segp == path)
1546           continue;
1547 
1548         /* "segp" is pointing to the end of a previous segment; find it's
1549          * start.  We need to back up to the previous segment and start
1550          * over with that to handle things like "foo/bar/../..".  If we
1551          * don't do this, then on the first pass we'll remove the "bar/..",
1552          * but be pointing at the second ".." so we won't realize we can also
1553          * remove the "foo/..".
1554          */
1555         cur = segp;
1556         while ((cur > path) && (cur[-1] != '/'))
1557           --cur;
1558     }
1559     out[0] = '\0';
1560 
1561     /*
1562      * g) If the resulting buffer string still begins with one or more
1563      *    complete path segments of "..", then the reference is
1564      *    considered to be in error. Implementations may handle this
1565      *    error by retaining these components in the resolved path (i.e.,
1566      *    treating them as part of the final URI), by removing them from
1567      *    the resolved path (i.e., discarding relative levels above the
1568      *    root), or by avoiding traversal of the reference.
1569      *
1570      * We discard them from the final path.
1571      */
1572     if (path[0] == '/') {
1573       cur = path;
1574       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1575              && ((cur[3] == '/') || (cur[3] == '\0')))
1576 	cur += 3;
1577 
1578       if (cur != path) {
1579 	out = path;
1580 	while (cur[0] != '\0')
1581           (out++)[0] = (cur++)[0];
1582 	out[0] = 0;
1583       }
1584     }
1585 
1586     return(0);
1587 }
1588 
is_hex(char c)1589 static int is_hex(char c) {
1590     if (((c >= '0') && (c <= '9')) ||
1591         ((c >= 'a') && (c <= 'f')) ||
1592         ((c >= 'A') && (c <= 'F')))
1593 	return(1);
1594     return(0);
1595 }
1596 
1597 /**
1598  * xmlURIUnescapeString:
1599  * @str:  the string to unescape
1600  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1601  * @target:  optional destination buffer
1602  *
1603  * Unescaping routine, but does not check that the string is an URI. The
1604  * output is a direct unsigned char translation of %XX values (no encoding)
1605  * Note that the length of the result can only be smaller or same size as
1606  * the input string.
1607  *
1608  * Returns a copy of the string, but unescaped, will return NULL only in case
1609  * of error
1610  */
1611 char *
xmlURIUnescapeString(const char * str,int len,char * target)1612 xmlURIUnescapeString(const char *str, int len, char *target) {
1613     char *ret, *out;
1614     const char *in;
1615 
1616     if (str == NULL)
1617 	return(NULL);
1618     if (len <= 0) len = strlen(str);
1619     if (len < 0) return(NULL);
1620 
1621     if (target == NULL) {
1622 	ret = (char *) xmlMallocAtomic(len + 1);
1623 	if (ret == NULL) {
1624             xmlURIErrMemory("unescaping URI value\n");
1625 	    return(NULL);
1626 	}
1627     } else
1628 	ret = target;
1629     in = str;
1630     out = ret;
1631     while(len > 0) {
1632 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1633 	    in++;
1634 	    if ((*in >= '0') && (*in <= '9'))
1635 	        *out = (*in - '0');
1636 	    else if ((*in >= 'a') && (*in <= 'f'))
1637 	        *out = (*in - 'a') + 10;
1638 	    else if ((*in >= 'A') && (*in <= 'F'))
1639 	        *out = (*in - 'A') + 10;
1640 	    in++;
1641 	    if ((*in >= '0') && (*in <= '9'))
1642 	        *out = *out * 16 + (*in - '0');
1643 	    else if ((*in >= 'a') && (*in <= 'f'))
1644 	        *out = *out * 16 + (*in - 'a') + 10;
1645 	    else if ((*in >= 'A') && (*in <= 'F'))
1646 	        *out = *out * 16 + (*in - 'A') + 10;
1647 	    in++;
1648 	    len -= 3;
1649 	    out++;
1650 	} else {
1651 	    *out++ = *in++;
1652 	    len--;
1653 	}
1654     }
1655     *out = 0;
1656     return(ret);
1657 }
1658 
1659 /**
1660  * xmlURIEscapeStr:
1661  * @str:  string to escape
1662  * @list: exception list string of chars not to escape
1663  *
1664  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1665  * and the characters in the exception list.
1666  *
1667  * Returns a new escaped string or NULL in case of error.
1668  */
1669 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1670 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1671     xmlChar *ret, ch;
1672     xmlChar *temp;
1673     const xmlChar *in;
1674     int len, out;
1675 
1676     if (str == NULL)
1677 	return(NULL);
1678     if (str[0] == 0)
1679 	return(xmlStrdup(str));
1680     len = xmlStrlen(str);
1681     if (!(len > 0)) return(NULL);
1682 
1683     len += 20;
1684     ret = (xmlChar *) xmlMallocAtomic(len);
1685     if (ret == NULL) {
1686         xmlURIErrMemory("escaping URI value\n");
1687 	return(NULL);
1688     }
1689     in = (const xmlChar *) str;
1690     out = 0;
1691     while(*in != 0) {
1692 	if (len - out <= 3) {
1693             temp = xmlSaveUriRealloc(ret, &len);
1694 	    if (temp == NULL) {
1695                 xmlURIErrMemory("escaping URI value\n");
1696 		xmlFree(ret);
1697 		return(NULL);
1698 	    }
1699 	    ret = temp;
1700 	}
1701 
1702 	ch = *in;
1703 
1704 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1705 	    unsigned char val;
1706 	    ret[out++] = '%';
1707 	    val = ch >> 4;
1708 	    if (val <= 9)
1709 		ret[out++] = '0' + val;
1710 	    else
1711 		ret[out++] = 'A' + val - 0xA;
1712 	    val = ch & 0xF;
1713 	    if (val <= 9)
1714 		ret[out++] = '0' + val;
1715 	    else
1716 		ret[out++] = 'A' + val - 0xA;
1717 	    in++;
1718 	} else {
1719 	    ret[out++] = *in++;
1720 	}
1721 
1722     }
1723     ret[out] = 0;
1724     return(ret);
1725 }
1726 
1727 /**
1728  * xmlURIEscape:
1729  * @str:  the string of the URI to escape
1730  *
1731  * Escaping routine, does not do validity checks !
1732  * It will try to escape the chars needing this, but this is heuristic
1733  * based it's impossible to be sure.
1734  *
1735  * Returns an copy of the string, but escaped
1736  *
1737  * 25 May 2001
1738  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739  * according to RFC2396.
1740  *   - Carl Douglas
1741  */
1742 xmlChar *
xmlURIEscape(const xmlChar * str)1743 xmlURIEscape(const xmlChar * str)
1744 {
1745     xmlChar *ret, *segment = NULL;
1746     xmlURIPtr uri;
1747     int ret2;
1748 
1749 #define NULLCHK(p) if(!p) { \
1750          xmlURIErrMemory("escaping URI value\n"); \
1751          xmlFreeURI(uri); \
1752          return NULL; } \
1753 
1754     if (str == NULL)
1755         return (NULL);
1756 
1757     uri = xmlCreateURI();
1758     if (uri != NULL) {
1759 	/*
1760 	 * Allow escaping errors in the unescaped form
1761 	 */
1762         uri->cleanup = 1;
1763         ret2 = xmlParseURIReference(uri, (const char *)str);
1764         if (ret2) {
1765             xmlFreeURI(uri);
1766             return (NULL);
1767         }
1768     }
1769 
1770     if (!uri)
1771         return NULL;
1772 
1773     ret = NULL;
1774 
1775     if (uri->scheme) {
1776         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1777         NULLCHK(segment)
1778         ret = xmlStrcat(ret, segment);
1779         ret = xmlStrcat(ret, BAD_CAST ":");
1780         xmlFree(segment);
1781     }
1782 
1783     if (uri->authority) {
1784         segment =
1785             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1786         NULLCHK(segment)
1787         ret = xmlStrcat(ret, BAD_CAST "//");
1788         ret = xmlStrcat(ret, segment);
1789         xmlFree(segment);
1790     }
1791 
1792     if (uri->user) {
1793         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1794         NULLCHK(segment)
1795 		ret = xmlStrcat(ret,BAD_CAST "//");
1796         ret = xmlStrcat(ret, segment);
1797         ret = xmlStrcat(ret, BAD_CAST "@");
1798         xmlFree(segment);
1799     }
1800 
1801     if (uri->server) {
1802         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1803         NULLCHK(segment)
1804 		if (uri->user == NULL)
1805 		ret = xmlStrcat(ret, BAD_CAST "//");
1806         ret = xmlStrcat(ret, segment);
1807         xmlFree(segment);
1808     }
1809 
1810     if (uri->port) {
1811         xmlChar port[10];
1812 
1813         snprintf((char *) port, 10, "%d", uri->port);
1814         ret = xmlStrcat(ret, BAD_CAST ":");
1815         ret = xmlStrcat(ret, port);
1816     }
1817 
1818     if (uri->path) {
1819         segment =
1820             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1821         NULLCHK(segment)
1822         ret = xmlStrcat(ret, segment);
1823         xmlFree(segment);
1824     }
1825 
1826     if (uri->query_raw) {
1827         ret = xmlStrcat(ret, BAD_CAST "?");
1828         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1829     }
1830     else if (uri->query) {
1831         segment =
1832             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1833         NULLCHK(segment)
1834         ret = xmlStrcat(ret, BAD_CAST "?");
1835         ret = xmlStrcat(ret, segment);
1836         xmlFree(segment);
1837     }
1838 
1839     if (uri->opaque) {
1840         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1841         NULLCHK(segment)
1842         ret = xmlStrcat(ret, segment);
1843         xmlFree(segment);
1844     }
1845 
1846     if (uri->fragment) {
1847         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1848         NULLCHK(segment)
1849         ret = xmlStrcat(ret, BAD_CAST "#");
1850         ret = xmlStrcat(ret, segment);
1851         xmlFree(segment);
1852     }
1853 
1854     xmlFreeURI(uri);
1855 #undef NULLCHK
1856 
1857     return (ret);
1858 }
1859 
1860 /************************************************************************
1861  *									*
1862  *			Public functions				*
1863  *									*
1864  ************************************************************************/
1865 
1866 /**
1867  * xmlBuildURI:
1868  * @URI:  the URI instance found in the document
1869  * @base:  the base value
1870  *
1871  * Computes he final URI of the reference done by checking that
1872  * the given URI is valid, and building the final URI using the
1873  * base URI. This is processed according to section 5.2 of the
1874  * RFC 2396
1875  *
1876  * 5.2. Resolving Relative References to Absolute Form
1877  *
1878  * Returns a new URI string (to be freed by the caller) or NULL in case
1879  *         of error.
1880  */
1881 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1882 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1883     xmlChar *val = NULL;
1884     int ret, len, indx, cur, out;
1885     xmlURIPtr ref = NULL;
1886     xmlURIPtr bas = NULL;
1887     xmlURIPtr res = NULL;
1888 
1889     /*
1890      * 1) The URI reference is parsed into the potential four components and
1891      *    fragment identifier, as described in Section 4.3.
1892      *
1893      *    NOTE that a completely empty URI is treated by modern browsers
1894      *    as a reference to "." rather than as a synonym for the current
1895      *    URI.  Should we do that here?
1896      */
1897     if (URI == NULL)
1898 	ret = -1;
1899     else {
1900 	if (*URI) {
1901 	    ref = xmlCreateURI();
1902 	    if (ref == NULL)
1903 		goto done;
1904 	    ret = xmlParseURIReference(ref, (const char *) URI);
1905 	}
1906 	else
1907 	    ret = 0;
1908     }
1909     if (ret != 0)
1910 	goto done;
1911     if ((ref != NULL) && (ref->scheme != NULL)) {
1912 	/*
1913 	 * The URI is absolute don't modify.
1914 	 */
1915 	val = xmlStrdup(URI);
1916 	goto done;
1917     }
1918     if (base == NULL)
1919 	ret = -1;
1920     else {
1921 	bas = xmlCreateURI();
1922 	if (bas == NULL)
1923 	    goto done;
1924 	ret = xmlParseURIReference(bas, (const char *) base);
1925     }
1926     if (ret != 0) {
1927 	if (ref)
1928 	    val = xmlSaveUri(ref);
1929 	goto done;
1930     }
1931     if (ref == NULL) {
1932 	/*
1933 	 * the base fragment must be ignored
1934 	 */
1935 	if (bas->fragment != NULL) {
1936 	    xmlFree(bas->fragment);
1937 	    bas->fragment = NULL;
1938 	}
1939 	val = xmlSaveUri(bas);
1940 	goto done;
1941     }
1942 
1943     /*
1944      * 2) If the path component is empty and the scheme, authority, and
1945      *    query components are undefined, then it is a reference to the
1946      *    current document and we are done.  Otherwise, the reference URI's
1947      *    query and fragment components are defined as found (or not found)
1948      *    within the URI reference and not inherited from the base URI.
1949      *
1950      *    NOTE that in modern browsers, the parsing differs from the above
1951      *    in the following aspect:  the query component is allowed to be
1952      *    defined while still treating this as a reference to the current
1953      *    document.
1954      */
1955     res = xmlCreateURI();
1956     if (res == NULL)
1957 	goto done;
1958     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959 	((ref->authority == NULL) && (ref->server == NULL))) {
1960 	if (bas->scheme != NULL)
1961 	    res->scheme = xmlMemStrdup(bas->scheme);
1962 	if (bas->authority != NULL)
1963 	    res->authority = xmlMemStrdup(bas->authority);
1964 	else if ((bas->server != NULL) || (bas->port == -1)) {
1965 	    if (bas->server != NULL)
1966 		res->server = xmlMemStrdup(bas->server);
1967 	    if (bas->user != NULL)
1968 		res->user = xmlMemStrdup(bas->user);
1969 	    res->port = bas->port;
1970 	}
1971 	if (bas->path != NULL)
1972 	    res->path = xmlMemStrdup(bas->path);
1973 	if (ref->query_raw != NULL)
1974 	    res->query_raw = xmlMemStrdup (ref->query_raw);
1975 	else if (ref->query != NULL)
1976 	    res->query = xmlMemStrdup(ref->query);
1977 	else if (bas->query_raw != NULL)
1978 	    res->query_raw = xmlMemStrdup(bas->query_raw);
1979 	else if (bas->query != NULL)
1980 	    res->query = xmlMemStrdup(bas->query);
1981 	if (ref->fragment != NULL)
1982 	    res->fragment = xmlMemStrdup(ref->fragment);
1983 	goto step_7;
1984     }
1985 
1986     /*
1987      * 3) If the scheme component is defined, indicating that the reference
1988      *    starts with a scheme name, then the reference is interpreted as an
1989      *    absolute URI and we are done.  Otherwise, the reference URI's
1990      *    scheme is inherited from the base URI's scheme component.
1991      */
1992     if (ref->scheme != NULL) {
1993 	val = xmlSaveUri(ref);
1994 	goto done;
1995     }
1996     if (bas->scheme != NULL)
1997 	res->scheme = xmlMemStrdup(bas->scheme);
1998 
1999     if (ref->query_raw != NULL)
2000 	res->query_raw = xmlMemStrdup(ref->query_raw);
2001     else if (ref->query != NULL)
2002 	res->query = xmlMemStrdup(ref->query);
2003     if (ref->fragment != NULL)
2004 	res->fragment = xmlMemStrdup(ref->fragment);
2005 
2006     /*
2007      * 4) If the authority component is defined, then the reference is a
2008      *    network-path and we skip to step 7.  Otherwise, the reference
2009      *    URI's authority is inherited from the base URI's authority
2010      *    component, which will also be undefined if the URI scheme does not
2011      *    use an authority component.
2012      */
2013     if ((ref->authority != NULL) || (ref->server != NULL)) {
2014 	if (ref->authority != NULL)
2015 	    res->authority = xmlMemStrdup(ref->authority);
2016 	else {
2017 	    res->server = xmlMemStrdup(ref->server);
2018 	    if (ref->user != NULL)
2019 		res->user = xmlMemStrdup(ref->user);
2020             res->port = ref->port;
2021 	}
2022 	if (ref->path != NULL)
2023 	    res->path = xmlMemStrdup(ref->path);
2024 	goto step_7;
2025     }
2026     if (bas->authority != NULL)
2027 	res->authority = xmlMemStrdup(bas->authority);
2028     else if ((bas->server != NULL) || (bas->port == -1)) {
2029 	if (bas->server != NULL)
2030 	    res->server = xmlMemStrdup(bas->server);
2031 	if (bas->user != NULL)
2032 	    res->user = xmlMemStrdup(bas->user);
2033 	res->port = bas->port;
2034     }
2035 
2036     /*
2037      * 5) If the path component begins with a slash character ("/"), then
2038      *    the reference is an absolute-path and we skip to step 7.
2039      */
2040     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2041 	res->path = xmlMemStrdup(ref->path);
2042 	goto step_7;
2043     }
2044 
2045 
2046     /*
2047      * 6) If this step is reached, then we are resolving a relative-path
2048      *    reference.  The relative path needs to be merged with the base
2049      *    URI's path.  Although there are many ways to do this, we will
2050      *    describe a simple method using a separate string buffer.
2051      *
2052      * Allocate a buffer large enough for the result string.
2053      */
2054     len = 2; /* extra / and 0 */
2055     if (ref->path != NULL)
2056 	len += strlen(ref->path);
2057     if (bas->path != NULL)
2058 	len += strlen(bas->path);
2059     res->path = (char *) xmlMallocAtomic(len);
2060     if (res->path == NULL) {
2061         xmlURIErrMemory("resolving URI against base\n");
2062 	goto done;
2063     }
2064     res->path[0] = 0;
2065 
2066     /*
2067      * a) All but the last segment of the base URI's path component is
2068      *    copied to the buffer.  In other words, any characters after the
2069      *    last (right-most) slash character, if any, are excluded.
2070      */
2071     cur = 0;
2072     out = 0;
2073     if (bas->path != NULL) {
2074 	while (bas->path[cur] != 0) {
2075 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2076 		cur++;
2077 	    if (bas->path[cur] == 0)
2078 		break;
2079 
2080 	    cur++;
2081 	    while (out < cur) {
2082 		res->path[out] = bas->path[out];
2083 		out++;
2084 	    }
2085 	}
2086     }
2087     res->path[out] = 0;
2088 
2089     /*
2090      * b) The reference's path component is appended to the buffer
2091      *    string.
2092      */
2093     if (ref->path != NULL && ref->path[0] != 0) {
2094 	indx = 0;
2095 	/*
2096 	 * Ensure the path includes a '/'
2097 	 */
2098 	if ((out == 0) && (bas->server != NULL))
2099 	    res->path[out++] = '/';
2100 	while (ref->path[indx] != 0) {
2101 	    res->path[out++] = ref->path[indx++];
2102 	}
2103     }
2104     res->path[out] = 0;
2105 
2106     /*
2107      * Steps c) to h) are really path normalization steps
2108      */
2109     xmlNormalizeURIPath(res->path);
2110 
2111 step_7:
2112 
2113     /*
2114      * 7) The resulting URI components, including any inherited from the
2115      *    base URI, are recombined to give the absolute form of the URI
2116      *    reference.
2117      */
2118     val = xmlSaveUri(res);
2119 
2120 done:
2121     if (ref != NULL)
2122 	xmlFreeURI(ref);
2123     if (bas != NULL)
2124 	xmlFreeURI(bas);
2125     if (res != NULL)
2126 	xmlFreeURI(res);
2127     return(val);
2128 }
2129 
2130 /**
2131  * xmlBuildRelativeURI:
2132  * @URI:  the URI reference under consideration
2133  * @base:  the base value
2134  *
2135  * Expresses the URI of the reference in terms relative to the
2136  * base.  Some examples of this operation include:
2137  *     base = "http://site1.com/docs/book1.html"
2138  *        URI input                        URI returned
2139  *     docs/pic1.gif                    pic1.gif
2140  *     docs/img/pic1.gif                img/pic1.gif
2141  *     img/pic1.gif                     ../img/pic1.gif
2142  *     http://site1.com/docs/pic1.gif   pic1.gif
2143  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2144  *
2145  *     base = "docs/book1.html"
2146  *        URI input                        URI returned
2147  *     docs/pic1.gif                    pic1.gif
2148  *     docs/img/pic1.gif                img/pic1.gif
2149  *     img/pic1.gif                     ../img/pic1.gif
2150  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2151  *
2152  *
2153  * Note: if the URI reference is really wierd or complicated, it may be
2154  *       worthwhile to first convert it into a "nice" one by calling
2155  *       xmlBuildURI (using 'base') before calling this routine,
2156  *       since this routine (for reasonable efficiency) assumes URI has
2157  *       already been through some validation.
2158  *
2159  * Returns a new URI string (to be freed by the caller) or NULL in case
2160  * error.
2161  */
2162 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2163 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2164 {
2165     xmlChar *val = NULL;
2166     int ret;
2167     int ix;
2168     int nbslash = 0;
2169     int len;
2170     xmlURIPtr ref = NULL;
2171     xmlURIPtr bas = NULL;
2172     xmlChar *bptr, *uptr, *vptr;
2173     int remove_path = 0;
2174 
2175     if ((URI == NULL) || (*URI == 0))
2176 	return NULL;
2177 
2178     /*
2179      * First parse URI into a standard form
2180      */
2181     ref = xmlCreateURI ();
2182     if (ref == NULL)
2183 	return NULL;
2184     /* If URI not already in "relative" form */
2185     if (URI[0] != '.') {
2186 	ret = xmlParseURIReference (ref, (const char *) URI);
2187 	if (ret != 0)
2188 	    goto done;		/* Error in URI, return NULL */
2189     } else
2190 	ref->path = (char *)xmlStrdup(URI);
2191 
2192     /*
2193      * Next parse base into the same standard form
2194      */
2195     if ((base == NULL) || (*base == 0)) {
2196 	val = xmlStrdup (URI);
2197 	goto done;
2198     }
2199     bas = xmlCreateURI ();
2200     if (bas == NULL)
2201 	goto done;
2202     if (base[0] != '.') {
2203 	ret = xmlParseURIReference (bas, (const char *) base);
2204 	if (ret != 0)
2205 	    goto done;		/* Error in base, return NULL */
2206     } else
2207 	bas->path = (char *)xmlStrdup(base);
2208 
2209     /*
2210      * If the scheme / server on the URI differs from the base,
2211      * just return the URI
2212      */
2213     if ((ref->scheme != NULL) &&
2214 	((bas->scheme == NULL) ||
2215 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2216 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2217 	val = xmlStrdup (URI);
2218 	goto done;
2219     }
2220     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2221 	val = xmlStrdup(BAD_CAST "");
2222 	goto done;
2223     }
2224     if (bas->path == NULL) {
2225 	val = xmlStrdup((xmlChar *)ref->path);
2226 	goto done;
2227     }
2228     if (ref->path == NULL) {
2229         ref->path = (char *) "/";
2230 	remove_path = 1;
2231     }
2232 
2233     /*
2234      * At this point (at last!) we can compare the two paths
2235      *
2236      * First we take care of the special case where either of the
2237      * two path components may be missing (bug 316224)
2238      */
2239     bptr = (xmlChar *)bas->path;
2240     {
2241         xmlChar *rptr = (xmlChar *) ref->path;
2242         int pos = 0;
2243 
2244         /*
2245          * Next we compare the two strings and find where they first differ
2246          */
2247 	if ((*rptr == '.') && (rptr[1] == '/'))
2248             rptr += 2;
2249 	if ((*bptr == '.') && (bptr[1] == '/'))
2250             bptr += 2;
2251 	else if ((*bptr == '/') && (*rptr != '/'))
2252 	    bptr++;
2253 	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2254 	    pos++;
2255 
2256 	if (bptr[pos] == rptr[pos]) {
2257 	    val = xmlStrdup(BAD_CAST "");
2258 	    goto done;		/* (I can't imagine why anyone would do this) */
2259 	}
2260 
2261 	/*
2262 	 * In URI, "back up" to the last '/' encountered.  This will be the
2263 	 * beginning of the "unique" suffix of URI
2264 	 */
2265 	ix = pos;
2266 	for (; ix > 0; ix--) {
2267 	    if (rptr[ix - 1] == '/')
2268 		break;
2269 	}
2270 	uptr = (xmlChar *)&rptr[ix];
2271 
2272 	/*
2273 	 * In base, count the number of '/' from the differing point
2274 	 */
2275 	for (; bptr[ix] != 0; ix++) {
2276 	    if (bptr[ix] == '/')
2277 		nbslash++;
2278 	}
2279 
2280 	/*
2281 	 * e.g: URI="foo/" base="foo/bar" -> "./"
2282 	 */
2283 	if (nbslash == 0 && !uptr[0]) {
2284 	    val = xmlStrdup(BAD_CAST "./");
2285 	    goto done;
2286 	}
2287 
2288 	len = xmlStrlen (uptr) + 1;
2289     }
2290 
2291     if (nbslash == 0) {
2292 	if (uptr != NULL)
2293 	    /* exception characters from xmlSaveUri */
2294 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2295 	goto done;
2296     }
2297 
2298     /*
2299      * Allocate just enough space for the returned string -
2300      * length of the remainder of the URI, plus enough space
2301      * for the "../" groups, plus one for the terminator
2302      */
2303     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2304     if (val == NULL) {
2305         xmlURIErrMemory("building relative URI\n");
2306 	goto done;
2307     }
2308     vptr = val;
2309     /*
2310      * Put in as many "../" as needed
2311      */
2312     for (; nbslash>0; nbslash--) {
2313 	*vptr++ = '.';
2314 	*vptr++ = '.';
2315 	*vptr++ = '/';
2316     }
2317     /*
2318      * Finish up with the end of the URI
2319      */
2320     if (uptr != NULL) {
2321         if ((vptr > val) && (len > 0) &&
2322 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2323 	    memcpy (vptr, uptr + 1, len - 1);
2324 	    vptr[len - 2] = 0;
2325 	} else {
2326 	    memcpy (vptr, uptr, len);
2327 	    vptr[len - 1] = 0;
2328 	}
2329     } else {
2330 	vptr[len - 1] = 0;
2331     }
2332 
2333     /* escape the freshly-built path */
2334     vptr = val;
2335 	/* exception characters from xmlSaveUri */
2336     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2337     xmlFree(vptr);
2338 
2339 done:
2340     /*
2341      * Free the working variables
2342      */
2343     if (remove_path != 0)
2344         ref->path = NULL;
2345     if (ref != NULL)
2346 	xmlFreeURI (ref);
2347     if (bas != NULL)
2348 	xmlFreeURI (bas);
2349 
2350     return val;
2351 }
2352 
2353 /**
2354  * xmlCanonicPath:
2355  * @path:  the resource locator in a filesystem notation
2356  *
2357  * Constructs a canonic path from the specified path.
2358  *
2359  * Returns a new canonic path, or a duplicate of the path parameter if the
2360  * construction fails. The caller is responsible for freeing the memory occupied
2361  * by the returned string. If there is insufficient memory available, or the
2362  * argument is NULL, the function returns NULL.
2363  */
2364 #define IS_WINDOWS_PATH(p)					\
2365 	((p != NULL) &&						\
2366 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2367 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2368 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2369 xmlChar *
xmlCanonicPath(const xmlChar * path)2370 xmlCanonicPath(const xmlChar *path)
2371 {
2372 /*
2373  * For Windows implementations, additional work needs to be done to
2374  * replace backslashes in pathnames with "forward slashes"
2375  */
2376 #if defined(_WIN32) && !defined(__CYGWIN__)
2377     int len = 0;
2378     char *p = NULL;
2379 #endif
2380     xmlURIPtr uri;
2381     xmlChar *ret;
2382     const xmlChar *absuri;
2383 
2384     if (path == NULL)
2385 	return(NULL);
2386 
2387 #if defined(_WIN32)
2388     /*
2389      * We must not change the backslashes to slashes if the the path
2390      * starts with \\?\
2391      * Those paths can be up to 32k characters long.
2392      * Was added specifically for OpenOffice, those paths can't be converted
2393      * to URIs anyway.
2394      */
2395     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2396         (path[3] == '\\') )
2397 	return xmlStrdup((const xmlChar *) path);
2398 #endif
2399 
2400 	/* sanitize filename starting with // so it can be used as URI */
2401     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2402         path++;
2403 
2404     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2405 	xmlFreeURI(uri);
2406 	return xmlStrdup(path);
2407     }
2408 
2409     /* Check if this is an "absolute uri" */
2410     absuri = xmlStrstr(path, BAD_CAST "://");
2411     if (absuri != NULL) {
2412         int l, j;
2413 	unsigned char c;
2414 	xmlChar *escURI;
2415 
2416         /*
2417 	 * this looks like an URI where some parts have not been
2418 	 * escaped leading to a parsing problem.  Check that the first
2419 	 * part matches a protocol.
2420 	 */
2421 	l = absuri - path;
2422 	/* Bypass if first part (part before the '://') is > 20 chars */
2423 	if ((l <= 0) || (l > 20))
2424 	    goto path_processing;
2425 	/* Bypass if any non-alpha characters are present in first part */
2426 	for (j = 0;j < l;j++) {
2427 	    c = path[j];
2428 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2429 	        goto path_processing;
2430 	}
2431 
2432 	/* Escape all except the characters specified in the supplied path */
2433         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2434 	if (escURI != NULL) {
2435 	    /* Try parsing the escaped path */
2436 	    uri = xmlParseURI((const char *) escURI);
2437 	    /* If successful, return the escaped string */
2438 	    if (uri != NULL) {
2439 	        xmlFreeURI(uri);
2440 		return escURI;
2441 	    }
2442             xmlFree(escURI);
2443 	}
2444     }
2445 
2446 path_processing:
2447 /* For Windows implementations, replace backslashes with 'forward slashes' */
2448 #if defined(_WIN32) && !defined(__CYGWIN__)
2449     /*
2450      * Create a URI structure
2451      */
2452     uri = xmlCreateURI();
2453     if (uri == NULL) {		/* Guard against 'out of memory' */
2454         return(NULL);
2455     }
2456 
2457     len = xmlStrlen(path);
2458     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2459         /* make the scheme 'file' */
2460 	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2461 	/* allocate space for leading '/' + path + string terminator */
2462 	uri->path = xmlMallocAtomic(len + 2);
2463 	if (uri->path == NULL) {
2464 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2465 	    return(NULL);
2466 	}
2467 	/* Put in leading '/' plus path */
2468 	uri->path[0] = '/';
2469 	p = uri->path + 1;
2470 	strncpy(p, (char *) path, len + 1);
2471     } else {
2472 	uri->path = (char *) xmlStrdup(path);
2473 	if (uri->path == NULL) {
2474 	    xmlFreeURI(uri);
2475 	    return(NULL);
2476 	}
2477 	p = uri->path;
2478     }
2479     /* Now change all occurences of '\' to '/' */
2480     while (*p != '\0') {
2481 	if (*p == '\\')
2482 	    *p = '/';
2483 	p++;
2484     }
2485 
2486     if (uri->scheme == NULL) {
2487 	ret = xmlStrdup((const xmlChar *) uri->path);
2488     } else {
2489 	ret = xmlSaveUri(uri);
2490     }
2491 
2492     xmlFreeURI(uri);
2493 #else
2494     ret = xmlStrdup((const xmlChar *) path);
2495 #endif
2496     return(ret);
2497 }
2498 
2499 /**
2500  * xmlPathToURI:
2501  * @path:  the resource locator in a filesystem notation
2502  *
2503  * Constructs an URI expressing the existing path
2504  *
2505  * Returns a new URI, or a duplicate of the path parameter if the
2506  * construction fails. The caller is responsible for freeing the memory
2507  * occupied by the returned string. If there is insufficient memory available,
2508  * or the argument is NULL, the function returns NULL.
2509  */
2510 xmlChar *
xmlPathToURI(const xmlChar * path)2511 xmlPathToURI(const xmlChar *path)
2512 {
2513     xmlURIPtr uri;
2514     xmlURI temp;
2515     xmlChar *ret, *cal;
2516 
2517     if (path == NULL)
2518         return(NULL);
2519 
2520     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2521 	xmlFreeURI(uri);
2522 	return xmlStrdup(path);
2523     }
2524     cal = xmlCanonicPath(path);
2525     if (cal == NULL)
2526         return(NULL);
2527 #if defined(_WIN32) && !defined(__CYGWIN__)
2528     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2529        If 'cal' is a valid URI allready then we are done here, as continuing would make
2530        it invalid. */
2531     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2532 	xmlFreeURI(uri);
2533 	return cal;
2534     }
2535     /* 'cal' can contain a relative path with backslashes. If that is processed
2536        by xmlSaveURI, they will be escaped and the external entity loader machinery
2537        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2538     ret = cal;
2539     while (*ret != '\0') {
2540 	if (*ret == '\\')
2541 	    *ret = '/';
2542 	ret++;
2543     }
2544 #endif
2545     memset(&temp, 0, sizeof(temp));
2546     temp.path = (char *) cal;
2547     ret = xmlSaveUri(&temp);
2548     xmlFree(cal);
2549     return(ret);
2550 }
2551 #define bottom_uri
2552 #include "elfgcchack.h"
2553