1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * daniel@veillard.com
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <string.h>
15 #include <limits.h>
16 
17 #include <libxml/xmlmemory.h>
18 #include <libxml/uri.h>
19 #include <libxml/globals.h>
20 #include <libxml/xmlerror.h>
21 
22 /**
23  * MAX_URI_LENGTH:
24  *
25  * The definition of the URI regexp in the above RFC has no size limit
26  * In practice they are usually relativey short except for the
27  * data URI scheme as defined in RFC 2397. Even for data URI the usual
28  * maximum size before hitting random practical limits is around 64 KB
29  * and 4KB is usually a maximum admitted limit for proper operations.
30  * The value below is more a security limit than anything else and
31  * really should never be hit by 'normal' operations
32  * Set to 1 MByte in 2012, this is only enforced on output
33  */
34 #define MAX_URI_LENGTH 1024 * 1024
35 
36 static void
xmlURIErrMemory(const char * extra)37 xmlURIErrMemory(const char *extra)
38 {
39     if (extra)
40         __xmlRaiseError(NULL, NULL, NULL,
41                         NULL, NULL, XML_FROM_URI,
42                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
43                         extra, NULL, NULL, 0, 0,
44                         "Memory allocation failed : %s\n", extra);
45     else
46         __xmlRaiseError(NULL, NULL, NULL,
47                         NULL, NULL, XML_FROM_URI,
48                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
49                         NULL, NULL, NULL, 0, 0,
50                         "Memory allocation failed\n");
51 }
52 
53 static void xmlCleanURI(xmlURIPtr uri);
54 
55 /*
56  * Old rule from 2396 used in legacy handling code
57  * alpha    = lowalpha | upalpha
58  */
59 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
60 
61 
62 /*
63  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
64  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
65  *            "u" | "v" | "w" | "x" | "y" | "z"
66  */
67 
68 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
69 
70 /*
71  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
72  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
73  *           "U" | "V" | "W" | "X" | "Y" | "Z"
74  */
75 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
76 
77 #ifdef IS_DIGIT
78 #undef IS_DIGIT
79 #endif
80 /*
81  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82  */
83 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
84 
85 /*
86  * alphanum = alpha | digit
87  */
88 
89 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
90 
91 /*
92  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
93  */
94 
95 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
96     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
97     ((x) == '(') || ((x) == ')'))
98 
99 /*
100  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
101  */
102 
103 #define IS_UNWISE(p)                                                    \
104       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
105        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
106        ((*(p) == ']')) || ((*(p) == '`')))
107 /*
108  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
109  *            "[" | "]"
110  */
111 
112 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
113         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
114         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
115         ((x) == ']'))
116 
117 /*
118  * unreserved = alphanum | mark
119  */
120 
121 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
122 
123 /*
124  * Skip to next pointer char, handle escaped sequences
125  */
126 
127 #define NEXT(p) ((*p == '%')? p += 3 : p++)
128 
129 /*
130  * Productions from the spec.
131  *
132  *    authority     = server | reg_name
133  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
134  *                        ";" | ":" | "@" | "&" | "=" | "+" )
135  *
136  * path          = [ abs_path | opaque_part ]
137  */
138 
139 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140 
141 /************************************************************************
142  *									*
143  *                         RFC 3986 parser				*
144  *									*
145  ************************************************************************/
146 
147 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
148 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
149                       ((*(p) >= 'A') && (*(p) <= 'Z')))
150 #define ISA_HEXDIG(p)							\
151        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
152         ((*(p) >= 'A') && (*(p) <= 'F')))
153 
154 /*
155  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
156  *                     / "*" / "+" / "," / ";" / "="
157  */
158 #define ISA_SUB_DELIM(p)						\
159       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
160        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
161        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
162        ((*(p) == '=')) || ((*(p) == '\'')))
163 
164 /*
165  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166  */
167 #define ISA_GEN_DELIM(p)						\
168       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
169        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
170        ((*(p) == '@')))
171 
172 /*
173  *    reserved      = gen-delims / sub-delims
174  */
175 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
176 
177 /*
178  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
179  */
180 #define ISA_UNRESERVED(p)						\
181       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
182        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
183 
184 /*
185  *    pct-encoded   = "%" HEXDIG HEXDIG
186  */
187 #define ISA_PCT_ENCODED(p)						\
188      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
189 
190 /*
191  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
192  */
193 #define ISA_PCHAR(p)							\
194      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
195       ((*(p) == ':')) || ((*(p) == '@')))
196 
197 /**
198  * xmlParse3986Scheme:
199  * @uri:  pointer to an URI structure
200  * @str:  pointer to the string to analyze
201  *
202  * Parse an URI scheme
203  *
204  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205  *
206  * Returns 0 or the error code
207  */
208 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)209 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
210     const char *cur;
211 
212     if (str == NULL)
213 	return(-1);
214 
215     cur = *str;
216     if (!ISA_ALPHA(cur))
217 	return(2);
218     cur++;
219     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
220            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
221     if (uri != NULL) {
222 	if (uri->scheme != NULL) xmlFree(uri->scheme);
223 	uri->scheme = STRNDUP(*str, cur - *str);
224     }
225     *str = cur;
226     return(0);
227 }
228 
229 /**
230  * xmlParse3986Fragment:
231  * @uri:  pointer to an URI structure
232  * @str:  pointer to the string to analyze
233  *
234  * Parse the query part of an URI
235  *
236  * fragment      = *( pchar / "/" / "?" )
237  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
238  *       in the fragment identifier but this is used very broadly for
239  *       xpointer scheme selection, so we are allowing it here to not break
240  *       for example all the DocBook processing chains.
241  *
242  * Returns 0 or the error code
243  */
244 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)245 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
246 {
247     const char *cur;
248 
249     if (str == NULL)
250         return (-1);
251 
252     cur = *str;
253 
254     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
255            (*cur == '[') || (*cur == ']') ||
256            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
257         NEXT(cur);
258     if (uri != NULL) {
259         if (uri->fragment != NULL)
260             xmlFree(uri->fragment);
261 	if (uri->cleanup & 2)
262 	    uri->fragment = STRNDUP(*str, cur - *str);
263 	else
264 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
265     }
266     *str = cur;
267     return (0);
268 }
269 
270 /**
271  * xmlParse3986Query:
272  * @uri:  pointer to an URI structure
273  * @str:  pointer to the string to analyze
274  *
275  * Parse the query part of an URI
276  *
277  * query = *uric
278  *
279  * Returns 0 or the error code
280  */
281 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)282 xmlParse3986Query(xmlURIPtr uri, const char **str)
283 {
284     const char *cur;
285 
286     if (str == NULL)
287         return (-1);
288 
289     cur = *str;
290 
291     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
292            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
293         NEXT(cur);
294     if (uri != NULL) {
295         if (uri->query != NULL)
296             xmlFree(uri->query);
297 	if (uri->cleanup & 2)
298 	    uri->query = STRNDUP(*str, cur - *str);
299 	else
300 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
301 
302 	/* Save the raw bytes of the query as well.
303 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304 	 */
305 	if (uri->query_raw != NULL)
306 	    xmlFree (uri->query_raw);
307 	uri->query_raw = STRNDUP (*str, cur - *str);
308     }
309     *str = cur;
310     return (0);
311 }
312 
313 /**
314  * xmlParse3986Port:
315  * @uri:  pointer to an URI structure
316  * @str:  the string to analyze
317  *
318  * Parse a port part and fills in the appropriate fields
319  * of the @uri structure
320  *
321  * port          = *DIGIT
322  *
323  * Returns 0 or the error code
324  */
325 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)326 xmlParse3986Port(xmlURIPtr uri, const char **str)
327 {
328     const char *cur = *str;
329     unsigned port = 0; /* unsigned for defined overflow behavior */
330 
331     if (ISA_DIGIT(cur)) {
332 	while (ISA_DIGIT(cur)) {
333 	    port = port * 10 + (*cur - '0');
334 
335 	    cur++;
336 	}
337 	if (uri != NULL)
338 	    uri->port = port & USHRT_MAX; /* port value modulo INT_MAX+1 */
339 	*str = cur;
340 	return(0);
341     }
342     return(1);
343 }
344 
345 /**
346  * xmlParse3986Userinfo:
347  * @uri:  pointer to an URI structure
348  * @str:  the string to analyze
349  *
350  * Parse an user informations part and fills in the appropriate fields
351  * of the @uri structure
352  *
353  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
354  *
355  * Returns 0 or the error code
356  */
357 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)358 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
359 {
360     const char *cur;
361 
362     cur = *str;
363     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
364            ISA_SUB_DELIM(cur) || (*cur == ':'))
365 	NEXT(cur);
366     if (*cur == '@') {
367 	if (uri != NULL) {
368 	    if (uri->user != NULL) xmlFree(uri->user);
369 	    if (uri->cleanup & 2)
370 		uri->user = STRNDUP(*str, cur - *str);
371 	    else
372 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
373 	}
374 	*str = cur;
375 	return(0);
376     }
377     return(1);
378 }
379 
380 /**
381  * xmlParse3986DecOctet:
382  * @str:  the string to analyze
383  *
384  *    dec-octet     = DIGIT                 ; 0-9
385  *                  / %x31-39 DIGIT         ; 10-99
386  *                  / "1" 2DIGIT            ; 100-199
387  *                  / "2" %x30-34 DIGIT     ; 200-249
388  *                  / "25" %x30-35          ; 250-255
389  *
390  * Skip a dec-octet.
391  *
392  * Returns 0 if found and skipped, 1 otherwise
393  */
394 static int
xmlParse3986DecOctet(const char ** str)395 xmlParse3986DecOctet(const char **str) {
396     const char *cur = *str;
397 
398     if (!(ISA_DIGIT(cur)))
399         return(1);
400     if (!ISA_DIGIT(cur+1))
401 	cur++;
402     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
403 	cur += 2;
404     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
405 	cur += 3;
406     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
407 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
408 	cur += 3;
409     else if ((*cur == '2') && (*(cur + 1) == '5') &&
410 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
411 	cur += 3;
412     else
413         return(1);
414     *str = cur;
415     return(0);
416 }
417 /**
418  * xmlParse3986Host:
419  * @uri:  pointer to an URI structure
420  * @str:  the string to analyze
421  *
422  * Parse an host part and fills in the appropriate fields
423  * of the @uri structure
424  *
425  * host          = IP-literal / IPv4address / reg-name
426  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
427  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
428  * reg-name      = *( unreserved / pct-encoded / sub-delims )
429  *
430  * Returns 0 or the error code
431  */
432 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)433 xmlParse3986Host(xmlURIPtr uri, const char **str)
434 {
435     const char *cur = *str;
436     const char *host;
437 
438     host = cur;
439     /*
440      * IPv6 and future adressing scheme are enclosed between brackets
441      */
442     if (*cur == '[') {
443         cur++;
444 	while ((*cur != ']') && (*cur != 0))
445 	    cur++;
446 	if (*cur != ']')
447 	    return(1);
448 	cur++;
449 	goto found;
450     }
451     /*
452      * try to parse an IPv4
453      */
454     if (ISA_DIGIT(cur)) {
455         if (xmlParse3986DecOctet(&cur) != 0)
456 	    goto not_ipv4;
457 	if (*cur != '.')
458 	    goto not_ipv4;
459 	cur++;
460         if (xmlParse3986DecOctet(&cur) != 0)
461 	    goto not_ipv4;
462 	if (*cur != '.')
463 	    goto not_ipv4;
464         if (xmlParse3986DecOctet(&cur) != 0)
465 	    goto not_ipv4;
466 	if (*cur != '.')
467 	    goto not_ipv4;
468         if (xmlParse3986DecOctet(&cur) != 0)
469 	    goto not_ipv4;
470 	goto found;
471 not_ipv4:
472         cur = *str;
473     }
474     /*
475      * then this should be a hostname which can be empty
476      */
477     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
478         NEXT(cur);
479 found:
480     if (uri != NULL) {
481 	if (uri->authority != NULL) xmlFree(uri->authority);
482 	uri->authority = NULL;
483 	if (uri->server != NULL) xmlFree(uri->server);
484 	if (cur != host) {
485 	    if (uri->cleanup & 2)
486 		uri->server = STRNDUP(host, cur - host);
487 	    else
488 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
489 	} else
490 	    uri->server = NULL;
491     }
492     *str = cur;
493     return(0);
494 }
495 
496 /**
497  * xmlParse3986Authority:
498  * @uri:  pointer to an URI structure
499  * @str:  the string to analyze
500  *
501  * Parse an authority part and fills in the appropriate fields
502  * of the @uri structure
503  *
504  * authority     = [ userinfo "@" ] host [ ":" port ]
505  *
506  * Returns 0 or the error code
507  */
508 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)509 xmlParse3986Authority(xmlURIPtr uri, const char **str)
510 {
511     const char *cur;
512     int ret;
513 
514     cur = *str;
515     /*
516      * try to parse an userinfo and check for the trailing @
517      */
518     ret = xmlParse3986Userinfo(uri, &cur);
519     if ((ret != 0) || (*cur != '@'))
520         cur = *str;
521     else
522         cur++;
523     ret = xmlParse3986Host(uri, &cur);
524     if (ret != 0) return(ret);
525     if (*cur == ':') {
526         cur++;
527         ret = xmlParse3986Port(uri, &cur);
528 	if (ret != 0) return(ret);
529     }
530     *str = cur;
531     return(0);
532 }
533 
534 /**
535  * xmlParse3986Segment:
536  * @str:  the string to analyze
537  * @forbid: an optional forbidden character
538  * @empty: allow an empty segment
539  *
540  * Parse a segment and fills in the appropriate fields
541  * of the @uri structure
542  *
543  * segment       = *pchar
544  * segment-nz    = 1*pchar
545  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
546  *               ; non-zero-length segment without any colon ":"
547  *
548  * Returns 0 or the error code
549  */
550 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)551 xmlParse3986Segment(const char **str, char forbid, int empty)
552 {
553     const char *cur;
554 
555     cur = *str;
556     if (!ISA_PCHAR(cur)) {
557         if (empty)
558 	    return(0);
559 	return(1);
560     }
561     while (ISA_PCHAR(cur) && (*cur != forbid))
562         NEXT(cur);
563     *str = cur;
564     return (0);
565 }
566 
567 /**
568  * xmlParse3986PathAbEmpty:
569  * @uri:  pointer to an URI structure
570  * @str:  the string to analyze
571  *
572  * Parse an path absolute or empty and fills in the appropriate fields
573  * of the @uri structure
574  *
575  * path-abempty  = *( "/" segment )
576  *
577  * Returns 0 or the error code
578  */
579 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)580 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
581 {
582     const char *cur;
583     int ret;
584 
585     cur = *str;
586 
587     while (*cur == '/') {
588         cur++;
589 	ret = xmlParse3986Segment(&cur, 0, 1);
590 	if (ret != 0) return(ret);
591     }
592     if (uri != NULL) {
593 	if (uri->path != NULL) xmlFree(uri->path);
594         if (*str != cur) {
595             if (uri->cleanup & 2)
596                 uri->path = STRNDUP(*str, cur - *str);
597             else
598                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
599         } else {
600             uri->path = NULL;
601         }
602     }
603     *str = cur;
604     return (0);
605 }
606 
607 /**
608  * xmlParse3986PathAbsolute:
609  * @uri:  pointer to an URI structure
610  * @str:  the string to analyze
611  *
612  * Parse an path absolute and fills in the appropriate fields
613  * of the @uri structure
614  *
615  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
616  *
617  * Returns 0 or the error code
618  */
619 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)620 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
621 {
622     const char *cur;
623     int ret;
624 
625     cur = *str;
626 
627     if (*cur != '/')
628         return(1);
629     cur++;
630     ret = xmlParse3986Segment(&cur, 0, 0);
631     if (ret == 0) {
632 	while (*cur == '/') {
633 	    cur++;
634 	    ret = xmlParse3986Segment(&cur, 0, 1);
635 	    if (ret != 0) return(ret);
636 	}
637     }
638     if (uri != NULL) {
639 	if (uri->path != NULL) xmlFree(uri->path);
640         if (cur != *str) {
641             if (uri->cleanup & 2)
642                 uri->path = STRNDUP(*str, cur - *str);
643             else
644                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
645         } else {
646             uri->path = NULL;
647         }
648     }
649     *str = cur;
650     return (0);
651 }
652 
653 /**
654  * xmlParse3986PathRootless:
655  * @uri:  pointer to an URI structure
656  * @str:  the string to analyze
657  *
658  * Parse an path without root and fills in the appropriate fields
659  * of the @uri structure
660  *
661  * path-rootless = segment-nz *( "/" segment )
662  *
663  * Returns 0 or the error code
664  */
665 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)666 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
667 {
668     const char *cur;
669     int ret;
670 
671     cur = *str;
672 
673     ret = xmlParse3986Segment(&cur, 0, 0);
674     if (ret != 0) return(ret);
675     while (*cur == '/') {
676         cur++;
677 	ret = xmlParse3986Segment(&cur, 0, 1);
678 	if (ret != 0) return(ret);
679     }
680     if (uri != NULL) {
681 	if (uri->path != NULL) xmlFree(uri->path);
682         if (cur != *str) {
683             if (uri->cleanup & 2)
684                 uri->path = STRNDUP(*str, cur - *str);
685             else
686                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
687         } else {
688             uri->path = NULL;
689         }
690     }
691     *str = cur;
692     return (0);
693 }
694 
695 /**
696  * xmlParse3986PathNoScheme:
697  * @uri:  pointer to an URI structure
698  * @str:  the string to analyze
699  *
700  * Parse an path which is not a scheme and fills in the appropriate fields
701  * of the @uri structure
702  *
703  * path-noscheme = segment-nz-nc *( "/" segment )
704  *
705  * Returns 0 or the error code
706  */
707 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)708 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
709 {
710     const char *cur;
711     int ret;
712 
713     cur = *str;
714 
715     ret = xmlParse3986Segment(&cur, ':', 0);
716     if (ret != 0) return(ret);
717     while (*cur == '/') {
718         cur++;
719 	ret = xmlParse3986Segment(&cur, 0, 1);
720 	if (ret != 0) return(ret);
721     }
722     if (uri != NULL) {
723 	if (uri->path != NULL) xmlFree(uri->path);
724         if (cur != *str) {
725             if (uri->cleanup & 2)
726                 uri->path = STRNDUP(*str, cur - *str);
727             else
728                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
729         } else {
730             uri->path = NULL;
731         }
732     }
733     *str = cur;
734     return (0);
735 }
736 
737 /**
738  * xmlParse3986HierPart:
739  * @uri:  pointer to an URI structure
740  * @str:  the string to analyze
741  *
742  * Parse an hierarchical part and fills in the appropriate fields
743  * of the @uri structure
744  *
745  * hier-part     = "//" authority path-abempty
746  *                / path-absolute
747  *                / path-rootless
748  *                / path-empty
749  *
750  * Returns 0 or the error code
751  */
752 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)753 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
754 {
755     const char *cur;
756     int ret;
757 
758     cur = *str;
759 
760     if ((*cur == '/') && (*(cur + 1) == '/')) {
761         cur += 2;
762 	ret = xmlParse3986Authority(uri, &cur);
763 	if (ret != 0) return(ret);
764 	if (uri->server == NULL)
765 	    uri->port = -1;
766 	ret = xmlParse3986PathAbEmpty(uri, &cur);
767 	if (ret != 0) return(ret);
768 	*str = cur;
769 	return(0);
770     } else if (*cur == '/') {
771         ret = xmlParse3986PathAbsolute(uri, &cur);
772 	if (ret != 0) return(ret);
773     } else if (ISA_PCHAR(cur)) {
774         ret = xmlParse3986PathRootless(uri, &cur);
775 	if (ret != 0) return(ret);
776     } else {
777 	/* path-empty is effectively empty */
778 	if (uri != NULL) {
779 	    if (uri->path != NULL) xmlFree(uri->path);
780 	    uri->path = NULL;
781 	}
782     }
783     *str = cur;
784     return (0);
785 }
786 
787 /**
788  * xmlParse3986RelativeRef:
789  * @uri:  pointer to an URI structure
790  * @str:  the string to analyze
791  *
792  * Parse an URI string and fills in the appropriate fields
793  * of the @uri structure
794  *
795  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
796  * relative-part = "//" authority path-abempty
797  *               / path-absolute
798  *               / path-noscheme
799  *               / path-empty
800  *
801  * Returns 0 or the error code
802  */
803 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)804 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
805     int ret;
806 
807     if ((*str == '/') && (*(str + 1) == '/')) {
808         str += 2;
809 	ret = xmlParse3986Authority(uri, &str);
810 	if (ret != 0) return(ret);
811 	ret = xmlParse3986PathAbEmpty(uri, &str);
812 	if (ret != 0) return(ret);
813     } else if (*str == '/') {
814 	ret = xmlParse3986PathAbsolute(uri, &str);
815 	if (ret != 0) return(ret);
816     } else if (ISA_PCHAR(str)) {
817         ret = xmlParse3986PathNoScheme(uri, &str);
818 	if (ret != 0) return(ret);
819     } else {
820 	/* path-empty is effectively empty */
821 	if (uri != NULL) {
822 	    if (uri->path != NULL) xmlFree(uri->path);
823 	    uri->path = NULL;
824 	}
825     }
826 
827     if (*str == '?') {
828 	str++;
829 	ret = xmlParse3986Query(uri, &str);
830 	if (ret != 0) return(ret);
831     }
832     if (*str == '#') {
833 	str++;
834 	ret = xmlParse3986Fragment(uri, &str);
835 	if (ret != 0) return(ret);
836     }
837     if (*str != 0) {
838 	xmlCleanURI(uri);
839 	return(1);
840     }
841     return(0);
842 }
843 
844 
845 /**
846  * xmlParse3986URI:
847  * @uri:  pointer to an URI structure
848  * @str:  the string to analyze
849  *
850  * Parse an URI string and fills in the appropriate fields
851  * of the @uri structure
852  *
853  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
854  *
855  * Returns 0 or the error code
856  */
857 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)858 xmlParse3986URI(xmlURIPtr uri, const char *str) {
859     int ret;
860 
861     ret = xmlParse3986Scheme(uri, &str);
862     if (ret != 0) return(ret);
863     if (*str != ':') {
864 	return(1);
865     }
866     str++;
867     ret = xmlParse3986HierPart(uri, &str);
868     if (ret != 0) return(ret);
869     if (*str == '?') {
870 	str++;
871 	ret = xmlParse3986Query(uri, &str);
872 	if (ret != 0) return(ret);
873     }
874     if (*str == '#') {
875 	str++;
876 	ret = xmlParse3986Fragment(uri, &str);
877 	if (ret != 0) return(ret);
878     }
879     if (*str != 0) {
880 	xmlCleanURI(uri);
881 	return(1);
882     }
883     return(0);
884 }
885 
886 /**
887  * xmlParse3986URIReference:
888  * @uri:  pointer to an URI structure
889  * @str:  the string to analyze
890  *
891  * Parse an URI reference string and fills in the appropriate fields
892  * of the @uri structure
893  *
894  * URI-reference = URI / relative-ref
895  *
896  * Returns 0 or the error code
897  */
898 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)899 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
900     int ret;
901 
902     if (str == NULL)
903 	return(-1);
904     xmlCleanURI(uri);
905 
906     /*
907      * Try first to parse absolute refs, then fallback to relative if
908      * it fails.
909      */
910     ret = xmlParse3986URI(uri, str);
911     if (ret != 0) {
912 	xmlCleanURI(uri);
913         ret = xmlParse3986RelativeRef(uri, str);
914 	if (ret != 0) {
915 	    xmlCleanURI(uri);
916 	    return(ret);
917 	}
918     }
919     return(0);
920 }
921 
922 /**
923  * xmlParseURI:
924  * @str:  the URI string to analyze
925  *
926  * Parse an URI based on RFC 3986
927  *
928  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
929  *
930  * Returns a newly built xmlURIPtr or NULL in case of error
931  */
932 xmlURIPtr
xmlParseURI(const char * str)933 xmlParseURI(const char *str) {
934     xmlURIPtr uri;
935     int ret;
936 
937     if (str == NULL)
938 	return(NULL);
939     uri = xmlCreateURI();
940     if (uri != NULL) {
941 	ret = xmlParse3986URIReference(uri, str);
942         if (ret) {
943 	    xmlFreeURI(uri);
944 	    return(NULL);
945 	}
946     }
947     return(uri);
948 }
949 
950 /**
951  * xmlParseURIReference:
952  * @uri:  pointer to an URI structure
953  * @str:  the string to analyze
954  *
955  * Parse an URI reference string based on RFC 3986 and fills in the
956  * appropriate fields of the @uri structure
957  *
958  * URI-reference = URI / relative-ref
959  *
960  * Returns 0 or the error code
961  */
962 int
xmlParseURIReference(xmlURIPtr uri,const char * str)963 xmlParseURIReference(xmlURIPtr uri, const char *str) {
964     return(xmlParse3986URIReference(uri, str));
965 }
966 
967 /**
968  * xmlParseURIRaw:
969  * @str:  the URI string to analyze
970  * @raw:  if 1 unescaping of URI pieces are disabled
971  *
972  * Parse an URI but allows to keep intact the original fragments.
973  *
974  * URI-reference = URI / relative-ref
975  *
976  * Returns a newly built xmlURIPtr or NULL in case of error
977  */
978 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)979 xmlParseURIRaw(const char *str, int raw) {
980     xmlURIPtr uri;
981     int ret;
982 
983     if (str == NULL)
984 	return(NULL);
985     uri = xmlCreateURI();
986     if (uri != NULL) {
987         if (raw) {
988 	    uri->cleanup |= 2;
989 	}
990 	ret = xmlParseURIReference(uri, str);
991         if (ret) {
992 	    xmlFreeURI(uri);
993 	    return(NULL);
994 	}
995     }
996     return(uri);
997 }
998 
999 /************************************************************************
1000  *									*
1001  *			Generic URI structure functions			*
1002  *									*
1003  ************************************************************************/
1004 
1005 /**
1006  * xmlCreateURI:
1007  *
1008  * Simply creates an empty xmlURI
1009  *
1010  * Returns the new structure or NULL in case of error
1011  */
1012 xmlURIPtr
xmlCreateURI(void)1013 xmlCreateURI(void) {
1014     xmlURIPtr ret;
1015 
1016     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1017     if (ret == NULL) {
1018         xmlURIErrMemory("creating URI structure\n");
1019 	return(NULL);
1020     }
1021     memset(ret, 0, sizeof(xmlURI));
1022     return(ret);
1023 }
1024 
1025 /**
1026  * xmlSaveUriRealloc:
1027  *
1028  * Function to handle properly a reallocation when saving an URI
1029  * Also imposes some limit on the length of an URI string output
1030  */
1031 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1032 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1033     xmlChar *temp;
1034     int tmp;
1035 
1036     if (*max > MAX_URI_LENGTH) {
1037         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1038         return(NULL);
1039     }
1040     tmp = *max * 2;
1041     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1042     if (temp == NULL) {
1043         xmlURIErrMemory("saving URI\n");
1044         return(NULL);
1045     }
1046     *max = tmp;
1047     return(temp);
1048 }
1049 
1050 /**
1051  * xmlSaveUri:
1052  * @uri:  pointer to an xmlURI
1053  *
1054  * Save the URI as an escaped string
1055  *
1056  * Returns a new string (to be deallocated by caller)
1057  */
1058 xmlChar *
xmlSaveUri(xmlURIPtr uri)1059 xmlSaveUri(xmlURIPtr uri) {
1060     xmlChar *ret = NULL;
1061     xmlChar *temp;
1062     const char *p;
1063     int len;
1064     int max;
1065 
1066     if (uri == NULL) return(NULL);
1067 
1068 
1069     max = 80;
1070     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1071     if (ret == NULL) {
1072         xmlURIErrMemory("saving URI\n");
1073 	return(NULL);
1074     }
1075     len = 0;
1076 
1077     if (uri->scheme != NULL) {
1078 	p = uri->scheme;
1079 	while (*p != 0) {
1080 	    if (len >= max) {
1081                 temp = xmlSaveUriRealloc(ret, &max);
1082                 if (temp == NULL) goto mem_error;
1083 		ret = temp;
1084 	    }
1085 	    ret[len++] = *p++;
1086 	}
1087 	if (len >= max) {
1088             temp = xmlSaveUriRealloc(ret, &max);
1089             if (temp == NULL) goto mem_error;
1090             ret = temp;
1091 	}
1092 	ret[len++] = ':';
1093     }
1094     if (uri->opaque != NULL) {
1095 	p = uri->opaque;
1096 	while (*p != 0) {
1097 	    if (len + 3 >= max) {
1098                 temp = xmlSaveUriRealloc(ret, &max);
1099                 if (temp == NULL) goto mem_error;
1100                 ret = temp;
1101 	    }
1102 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1103 		ret[len++] = *p++;
1104 	    else {
1105 		int val = *(unsigned char *)p++;
1106 		int hi = val / 0x10, lo = val % 0x10;
1107 		ret[len++] = '%';
1108 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1109 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1110 	    }
1111 	}
1112     } else {
1113 	if ((uri->server != NULL) || (uri->port == -1)) {
1114 	    if (len + 3 >= max) {
1115                 temp = xmlSaveUriRealloc(ret, &max);
1116                 if (temp == NULL) goto mem_error;
1117                 ret = temp;
1118 	    }
1119 	    ret[len++] = '/';
1120 	    ret[len++] = '/';
1121 	    if (uri->user != NULL) {
1122 		p = uri->user;
1123 		while (*p != 0) {
1124 		    if (len + 3 >= max) {
1125                         temp = xmlSaveUriRealloc(ret, &max);
1126                         if (temp == NULL) goto mem_error;
1127                         ret = temp;
1128 		    }
1129 		    if ((IS_UNRESERVED(*(p))) ||
1130 			((*(p) == ';')) || ((*(p) == ':')) ||
1131 			((*(p) == '&')) || ((*(p) == '=')) ||
1132 			((*(p) == '+')) || ((*(p) == '$')) ||
1133 			((*(p) == ',')))
1134 			ret[len++] = *p++;
1135 		    else {
1136 			int val = *(unsigned char *)p++;
1137 			int hi = val / 0x10, lo = val % 0x10;
1138 			ret[len++] = '%';
1139 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1140 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1141 		    }
1142 		}
1143 		if (len + 3 >= max) {
1144                     temp = xmlSaveUriRealloc(ret, &max);
1145                     if (temp == NULL) goto mem_error;
1146                     ret = temp;
1147 		}
1148 		ret[len++] = '@';
1149 	    }
1150 	    if (uri->server != NULL) {
1151 		p = uri->server;
1152 		while (*p != 0) {
1153 		    if (len >= max) {
1154 			temp = xmlSaveUriRealloc(ret, &max);
1155 			if (temp == NULL) goto mem_error;
1156 			ret = temp;
1157 		    }
1158 		    ret[len++] = *p++;
1159 		}
1160 		if (uri->port > 0) {
1161 		    if (len + 10 >= max) {
1162 			temp = xmlSaveUriRealloc(ret, &max);
1163 			if (temp == NULL) goto mem_error;
1164 			ret = temp;
1165 		    }
1166 		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1167 		}
1168 	    }
1169 	} else if (uri->authority != NULL) {
1170 	    if (len + 3 >= max) {
1171                 temp = xmlSaveUriRealloc(ret, &max);
1172                 if (temp == NULL) goto mem_error;
1173                 ret = temp;
1174 	    }
1175 	    ret[len++] = '/';
1176 	    ret[len++] = '/';
1177 	    p = uri->authority;
1178 	    while (*p != 0) {
1179 		if (len + 3 >= max) {
1180                     temp = xmlSaveUriRealloc(ret, &max);
1181                     if (temp == NULL) goto mem_error;
1182                     ret = temp;
1183 		}
1184 		if ((IS_UNRESERVED(*(p))) ||
1185                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1186                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1187                     ((*(p) == '=')) || ((*(p) == '+')))
1188 		    ret[len++] = *p++;
1189 		else {
1190 		    int val = *(unsigned char *)p++;
1191 		    int hi = val / 0x10, lo = val % 0x10;
1192 		    ret[len++] = '%';
1193 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1194 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1195 		}
1196 	    }
1197 	} else if (uri->scheme != NULL) {
1198 	    if (len + 3 >= max) {
1199                 temp = xmlSaveUriRealloc(ret, &max);
1200                 if (temp == NULL) goto mem_error;
1201                 ret = temp;
1202 	    }
1203 	}
1204 	if (uri->path != NULL) {
1205 	    p = uri->path;
1206 	    /*
1207 	     * the colon in file:///d: should not be escaped or
1208 	     * Windows accesses fail later.
1209 	     */
1210 	    if ((uri->scheme != NULL) &&
1211 		(p[0] == '/') &&
1212 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1213 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1214 		(p[2] == ':') &&
1215 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1216 		if (len + 3 >= max) {
1217                     temp = xmlSaveUriRealloc(ret, &max);
1218                     if (temp == NULL) goto mem_error;
1219                     ret = temp;
1220 		}
1221 		ret[len++] = *p++;
1222 		ret[len++] = *p++;
1223 		ret[len++] = *p++;
1224 	    }
1225 	    while (*p != 0) {
1226 		if (len + 3 >= max) {
1227                     temp = xmlSaveUriRealloc(ret, &max);
1228                     if (temp == NULL) goto mem_error;
1229                     ret = temp;
1230 		}
1231 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1232                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1233 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1234 	            ((*(p) == ',')))
1235 		    ret[len++] = *p++;
1236 		else {
1237 		    int val = *(unsigned char *)p++;
1238 		    int hi = val / 0x10, lo = val % 0x10;
1239 		    ret[len++] = '%';
1240 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1241 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1242 		}
1243 	    }
1244 	}
1245 	if (uri->query_raw != NULL) {
1246 	    if (len + 1 >= max) {
1247                 temp = xmlSaveUriRealloc(ret, &max);
1248                 if (temp == NULL) goto mem_error;
1249                 ret = temp;
1250 	    }
1251 	    ret[len++] = '?';
1252 	    p = uri->query_raw;
1253 	    while (*p != 0) {
1254 		if (len + 1 >= max) {
1255                     temp = xmlSaveUriRealloc(ret, &max);
1256                     if (temp == NULL) goto mem_error;
1257                     ret = temp;
1258 		}
1259 		ret[len++] = *p++;
1260 	    }
1261 	} else if (uri->query != NULL) {
1262 	    if (len + 3 >= max) {
1263                 temp = xmlSaveUriRealloc(ret, &max);
1264                 if (temp == NULL) goto mem_error;
1265                 ret = temp;
1266 	    }
1267 	    ret[len++] = '?';
1268 	    p = uri->query;
1269 	    while (*p != 0) {
1270 		if (len + 3 >= max) {
1271                     temp = xmlSaveUriRealloc(ret, &max);
1272                     if (temp == NULL) goto mem_error;
1273                     ret = temp;
1274 		}
1275 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1276 		    ret[len++] = *p++;
1277 		else {
1278 		    int val = *(unsigned char *)p++;
1279 		    int hi = val / 0x10, lo = val % 0x10;
1280 		    ret[len++] = '%';
1281 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1282 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1283 		}
1284 	    }
1285 	}
1286     }
1287     if (uri->fragment != NULL) {
1288 	if (len + 3 >= max) {
1289             temp = xmlSaveUriRealloc(ret, &max);
1290             if (temp == NULL) goto mem_error;
1291             ret = temp;
1292 	}
1293 	ret[len++] = '#';
1294 	p = uri->fragment;
1295 	while (*p != 0) {
1296 	    if (len + 3 >= max) {
1297                 temp = xmlSaveUriRealloc(ret, &max);
1298                 if (temp == NULL) goto mem_error;
1299                 ret = temp;
1300 	    }
1301 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1302 		ret[len++] = *p++;
1303 	    else {
1304 		int val = *(unsigned char *)p++;
1305 		int hi = val / 0x10, lo = val % 0x10;
1306 		ret[len++] = '%';
1307 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1308 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1309 	    }
1310 	}
1311     }
1312     if (len >= max) {
1313         temp = xmlSaveUriRealloc(ret, &max);
1314         if (temp == NULL) goto mem_error;
1315         ret = temp;
1316     }
1317     ret[len] = 0;
1318     return(ret);
1319 
1320 mem_error:
1321     xmlFree(ret);
1322     return(NULL);
1323 }
1324 
1325 /**
1326  * xmlPrintURI:
1327  * @stream:  a FILE* for the output
1328  * @uri:  pointer to an xmlURI
1329  *
1330  * Prints the URI in the stream @stream.
1331  */
1332 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1333 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1334     xmlChar *out;
1335 
1336     out = xmlSaveUri(uri);
1337     if (out != NULL) {
1338 	fprintf(stream, "%s", (char *) out);
1339 	xmlFree(out);
1340     }
1341 }
1342 
1343 /**
1344  * xmlCleanURI:
1345  * @uri:  pointer to an xmlURI
1346  *
1347  * Make sure the xmlURI struct is free of content
1348  */
1349 static void
xmlCleanURI(xmlURIPtr uri)1350 xmlCleanURI(xmlURIPtr uri) {
1351     if (uri == NULL) return;
1352 
1353     if (uri->scheme != NULL) xmlFree(uri->scheme);
1354     uri->scheme = NULL;
1355     if (uri->server != NULL) xmlFree(uri->server);
1356     uri->server = NULL;
1357     if (uri->user != NULL) xmlFree(uri->user);
1358     uri->user = NULL;
1359     if (uri->path != NULL) xmlFree(uri->path);
1360     uri->path = NULL;
1361     if (uri->fragment != NULL) xmlFree(uri->fragment);
1362     uri->fragment = NULL;
1363     if (uri->opaque != NULL) xmlFree(uri->opaque);
1364     uri->opaque = NULL;
1365     if (uri->authority != NULL) xmlFree(uri->authority);
1366     uri->authority = NULL;
1367     if (uri->query != NULL) xmlFree(uri->query);
1368     uri->query = NULL;
1369     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1370     uri->query_raw = NULL;
1371 }
1372 
1373 /**
1374  * xmlFreeURI:
1375  * @uri:  pointer to an xmlURI
1376  *
1377  * Free up the xmlURI struct
1378  */
1379 void
xmlFreeURI(xmlURIPtr uri)1380 xmlFreeURI(xmlURIPtr uri) {
1381     if (uri == NULL) return;
1382 
1383     if (uri->scheme != NULL) xmlFree(uri->scheme);
1384     if (uri->server != NULL) xmlFree(uri->server);
1385     if (uri->user != NULL) xmlFree(uri->user);
1386     if (uri->path != NULL) xmlFree(uri->path);
1387     if (uri->fragment != NULL) xmlFree(uri->fragment);
1388     if (uri->opaque != NULL) xmlFree(uri->opaque);
1389     if (uri->authority != NULL) xmlFree(uri->authority);
1390     if (uri->query != NULL) xmlFree(uri->query);
1391     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1392     xmlFree(uri);
1393 }
1394 
1395 /************************************************************************
1396  *									*
1397  *			Helper functions				*
1398  *									*
1399  ************************************************************************/
1400 
1401 /**
1402  * xmlNormalizeURIPath:
1403  * @path:  pointer to the path string
1404  *
1405  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1406  * Section 5.2, steps 6.c through 6.g.
1407  *
1408  * Normalization occurs directly on the string, no new allocation is done
1409  *
1410  * Returns 0 or an error code
1411  */
1412 int
xmlNormalizeURIPath(char * path)1413 xmlNormalizeURIPath(char *path) {
1414     char *cur, *out;
1415 
1416     if (path == NULL)
1417 	return(-1);
1418 
1419     /* Skip all initial "/" chars.  We want to get to the beginning of the
1420      * first non-empty segment.
1421      */
1422     cur = path;
1423     while (cur[0] == '/')
1424       ++cur;
1425     if (cur[0] == '\0')
1426       return(0);
1427 
1428     /* Keep everything we've seen so far.  */
1429     out = cur;
1430 
1431     /*
1432      * Analyze each segment in sequence for cases (c) and (d).
1433      */
1434     while (cur[0] != '\0') {
1435 	/*
1436 	 * c) All occurrences of "./", where "." is a complete path segment,
1437 	 *    are removed from the buffer string.
1438 	 */
1439 	if ((cur[0] == '.') && (cur[1] == '/')) {
1440 	    cur += 2;
1441 	    /* '//' normalization should be done at this point too */
1442 	    while (cur[0] == '/')
1443 		cur++;
1444 	    continue;
1445 	}
1446 
1447 	/*
1448 	 * d) If the buffer string ends with "." as a complete path segment,
1449 	 *    that "." is removed.
1450 	 */
1451 	if ((cur[0] == '.') && (cur[1] == '\0'))
1452 	    break;
1453 
1454 	/* Otherwise keep the segment.  */
1455 	while (cur[0] != '/') {
1456             if (cur[0] == '\0')
1457               goto done_cd;
1458 	    (out++)[0] = (cur++)[0];
1459 	}
1460 	/* nomalize // */
1461 	while ((cur[0] == '/') && (cur[1] == '/'))
1462 	    cur++;
1463 
1464         (out++)[0] = (cur++)[0];
1465     }
1466  done_cd:
1467     out[0] = '\0';
1468 
1469     /* Reset to the beginning of the first segment for the next sequence.  */
1470     cur = path;
1471     while (cur[0] == '/')
1472       ++cur;
1473     if (cur[0] == '\0')
1474 	return(0);
1475 
1476     /*
1477      * Analyze each segment in sequence for cases (e) and (f).
1478      *
1479      * e) All occurrences of "<segment>/../", where <segment> is a
1480      *    complete path segment not equal to "..", are removed from the
1481      *    buffer string.  Removal of these path segments is performed
1482      *    iteratively, removing the leftmost matching pattern on each
1483      *    iteration, until no matching pattern remains.
1484      *
1485      * f) If the buffer string ends with "<segment>/..", where <segment>
1486      *    is a complete path segment not equal to "..", that
1487      *    "<segment>/.." is removed.
1488      *
1489      * To satisfy the "iterative" clause in (e), we need to collapse the
1490      * string every time we find something that needs to be removed.  Thus,
1491      * we don't need to keep two pointers into the string: we only need a
1492      * "current position" pointer.
1493      */
1494     while (1) {
1495         char *segp, *tmp;
1496 
1497         /* At the beginning of each iteration of this loop, "cur" points to
1498          * the first character of the segment we want to examine.
1499          */
1500 
1501         /* Find the end of the current segment.  */
1502         segp = cur;
1503         while ((segp[0] != '/') && (segp[0] != '\0'))
1504           ++segp;
1505 
1506         /* If this is the last segment, we're done (we need at least two
1507          * segments to meet the criteria for the (e) and (f) cases).
1508          */
1509         if (segp[0] == '\0')
1510           break;
1511 
1512         /* If the first segment is "..", or if the next segment _isn't_ "..",
1513          * keep this segment and try the next one.
1514          */
1515         ++segp;
1516         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1517             || ((segp[0] != '.') || (segp[1] != '.')
1518                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1519           cur = segp;
1520           continue;
1521         }
1522 
1523         /* If we get here, remove this segment and the next one and back up
1524          * to the previous segment (if there is one), to implement the
1525          * "iteratively" clause.  It's pretty much impossible to back up
1526          * while maintaining two pointers into the buffer, so just compact
1527          * the whole buffer now.
1528          */
1529 
1530         /* If this is the end of the buffer, we're done.  */
1531         if (segp[2] == '\0') {
1532           cur[0] = '\0';
1533           break;
1534         }
1535         /* Valgrind complained, strcpy(cur, segp + 3); */
1536         /* string will overlap, do not use strcpy */
1537         tmp = cur;
1538         segp += 3;
1539         while ((*tmp++ = *segp++) != 0)
1540           ;
1541 
1542         /* If there are no previous segments, then keep going from here.  */
1543         segp = cur;
1544         while ((segp > path) && ((--segp)[0] == '/'))
1545           ;
1546         if (segp == path)
1547           continue;
1548 
1549         /* "segp" is pointing to the end of a previous segment; find it's
1550          * start.  We need to back up to the previous segment and start
1551          * over with that to handle things like "foo/bar/../..".  If we
1552          * don't do this, then on the first pass we'll remove the "bar/..",
1553          * but be pointing at the second ".." so we won't realize we can also
1554          * remove the "foo/..".
1555          */
1556         cur = segp;
1557         while ((cur > path) && (cur[-1] != '/'))
1558           --cur;
1559     }
1560     out[0] = '\0';
1561 
1562     /*
1563      * g) If the resulting buffer string still begins with one or more
1564      *    complete path segments of "..", then the reference is
1565      *    considered to be in error. Implementations may handle this
1566      *    error by retaining these components in the resolved path (i.e.,
1567      *    treating them as part of the final URI), by removing them from
1568      *    the resolved path (i.e., discarding relative levels above the
1569      *    root), or by avoiding traversal of the reference.
1570      *
1571      * We discard them from the final path.
1572      */
1573     if (path[0] == '/') {
1574       cur = path;
1575       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1576              && ((cur[3] == '/') || (cur[3] == '\0')))
1577 	cur += 3;
1578 
1579       if (cur != path) {
1580 	out = path;
1581 	while (cur[0] != '\0')
1582           (out++)[0] = (cur++)[0];
1583 	out[0] = 0;
1584       }
1585     }
1586 
1587     return(0);
1588 }
1589 
is_hex(char c)1590 static int is_hex(char c) {
1591     if (((c >= '0') && (c <= '9')) ||
1592         ((c >= 'a') && (c <= 'f')) ||
1593         ((c >= 'A') && (c <= 'F')))
1594 	return(1);
1595     return(0);
1596 }
1597 
1598 /**
1599  * xmlURIUnescapeString:
1600  * @str:  the string to unescape
1601  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1602  * @target:  optional destination buffer
1603  *
1604  * Unescaping routine, but does not check that the string is an URI. The
1605  * output is a direct unsigned char translation of %XX values (no encoding)
1606  * Note that the length of the result can only be smaller or same size as
1607  * the input string.
1608  *
1609  * Returns a copy of the string, but unescaped, will return NULL only in case
1610  * of error
1611  */
1612 char *
xmlURIUnescapeString(const char * str,int len,char * target)1613 xmlURIUnescapeString(const char *str, int len, char *target) {
1614     char *ret, *out;
1615     const char *in;
1616 
1617     if (str == NULL)
1618 	return(NULL);
1619     if (len <= 0) len = strlen(str);
1620     if (len < 0) return(NULL);
1621 
1622     if (target == NULL) {
1623 	ret = (char *) xmlMallocAtomic(len + 1);
1624 	if (ret == NULL) {
1625             xmlURIErrMemory("unescaping URI value\n");
1626 	    return(NULL);
1627 	}
1628     } else
1629 	ret = target;
1630     in = str;
1631     out = ret;
1632     while(len > 0) {
1633 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1634 	    in++;
1635 	    if ((*in >= '0') && (*in <= '9'))
1636 	        *out = (*in - '0');
1637 	    else if ((*in >= 'a') && (*in <= 'f'))
1638 	        *out = (*in - 'a') + 10;
1639 	    else if ((*in >= 'A') && (*in <= 'F'))
1640 	        *out = (*in - 'A') + 10;
1641 	    in++;
1642 	    if ((*in >= '0') && (*in <= '9'))
1643 	        *out = *out * 16 + (*in - '0');
1644 	    else if ((*in >= 'a') && (*in <= 'f'))
1645 	        *out = *out * 16 + (*in - 'a') + 10;
1646 	    else if ((*in >= 'A') && (*in <= 'F'))
1647 	        *out = *out * 16 + (*in - 'A') + 10;
1648 	    in++;
1649 	    len -= 3;
1650 	    out++;
1651 	} else {
1652 	    *out++ = *in++;
1653 	    len--;
1654 	}
1655     }
1656     *out = 0;
1657     return(ret);
1658 }
1659 
1660 /**
1661  * xmlURIEscapeStr:
1662  * @str:  string to escape
1663  * @list: exception list string of chars not to escape
1664  *
1665  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1666  * and the characters in the exception list.
1667  *
1668  * Returns a new escaped string or NULL in case of error.
1669  */
1670 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1671 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1672     xmlChar *ret, ch;
1673     xmlChar *temp;
1674     const xmlChar *in;
1675     int len, out;
1676 
1677     if (str == NULL)
1678 	return(NULL);
1679     if (str[0] == 0)
1680 	return(xmlStrdup(str));
1681     len = xmlStrlen(str);
1682     if (!(len > 0)) return(NULL);
1683 
1684     len += 20;
1685     ret = (xmlChar *) xmlMallocAtomic(len);
1686     if (ret == NULL) {
1687         xmlURIErrMemory("escaping URI value\n");
1688 	return(NULL);
1689     }
1690     in = (const xmlChar *) str;
1691     out = 0;
1692     while(*in != 0) {
1693 	if (len - out <= 3) {
1694             temp = xmlSaveUriRealloc(ret, &len);
1695 	    if (temp == NULL) {
1696                 xmlURIErrMemory("escaping URI value\n");
1697 		xmlFree(ret);
1698 		return(NULL);
1699 	    }
1700 	    ret = temp;
1701 	}
1702 
1703 	ch = *in;
1704 
1705 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1706 	    unsigned char val;
1707 	    ret[out++] = '%';
1708 	    val = ch >> 4;
1709 	    if (val <= 9)
1710 		ret[out++] = '0' + val;
1711 	    else
1712 		ret[out++] = 'A' + val - 0xA;
1713 	    val = ch & 0xF;
1714 	    if (val <= 9)
1715 		ret[out++] = '0' + val;
1716 	    else
1717 		ret[out++] = 'A' + val - 0xA;
1718 	    in++;
1719 	} else {
1720 	    ret[out++] = *in++;
1721 	}
1722 
1723     }
1724     ret[out] = 0;
1725     return(ret);
1726 }
1727 
1728 /**
1729  * xmlURIEscape:
1730  * @str:  the string of the URI to escape
1731  *
1732  * Escaping routine, does not do validity checks !
1733  * It will try to escape the chars needing this, but this is heuristic
1734  * based it's impossible to be sure.
1735  *
1736  * Returns an copy of the string, but escaped
1737  *
1738  * 25 May 2001
1739  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1740  * according to RFC2396.
1741  *   - Carl Douglas
1742  */
1743 xmlChar *
xmlURIEscape(const xmlChar * str)1744 xmlURIEscape(const xmlChar * str)
1745 {
1746     xmlChar *ret, *segment = NULL;
1747     xmlURIPtr uri;
1748     int ret2;
1749 
1750 #define NULLCHK(p) if(!p) { \
1751          xmlURIErrMemory("escaping URI value\n"); \
1752          xmlFreeURI(uri); \
1753          return NULL; } \
1754 
1755     if (str == NULL)
1756         return (NULL);
1757 
1758     uri = xmlCreateURI();
1759     if (uri != NULL) {
1760 	/*
1761 	 * Allow escaping errors in the unescaped form
1762 	 */
1763         uri->cleanup = 1;
1764         ret2 = xmlParseURIReference(uri, (const char *)str);
1765         if (ret2) {
1766             xmlFreeURI(uri);
1767             return (NULL);
1768         }
1769     }
1770 
1771     if (!uri)
1772         return NULL;
1773 
1774     ret = NULL;
1775 
1776     if (uri->scheme) {
1777         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1778         NULLCHK(segment)
1779         ret = xmlStrcat(ret, segment);
1780         ret = xmlStrcat(ret, BAD_CAST ":");
1781         xmlFree(segment);
1782     }
1783 
1784     if (uri->authority) {
1785         segment =
1786             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1787         NULLCHK(segment)
1788         ret = xmlStrcat(ret, BAD_CAST "//");
1789         ret = xmlStrcat(ret, segment);
1790         xmlFree(segment);
1791     }
1792 
1793     if (uri->user) {
1794         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1795         NULLCHK(segment)
1796 		ret = xmlStrcat(ret,BAD_CAST "//");
1797         ret = xmlStrcat(ret, segment);
1798         ret = xmlStrcat(ret, BAD_CAST "@");
1799         xmlFree(segment);
1800     }
1801 
1802     if (uri->server) {
1803         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1804         NULLCHK(segment)
1805 		if (uri->user == NULL)
1806 		ret = xmlStrcat(ret, BAD_CAST "//");
1807         ret = xmlStrcat(ret, segment);
1808         xmlFree(segment);
1809     }
1810 
1811     if (uri->port) {
1812         xmlChar port[10];
1813 
1814         snprintf((char *) port, 10, "%d", uri->port);
1815         ret = xmlStrcat(ret, BAD_CAST ":");
1816         ret = xmlStrcat(ret, port);
1817     }
1818 
1819     if (uri->path) {
1820         segment =
1821             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1822         NULLCHK(segment)
1823         ret = xmlStrcat(ret, segment);
1824         xmlFree(segment);
1825     }
1826 
1827     if (uri->query_raw) {
1828         ret = xmlStrcat(ret, BAD_CAST "?");
1829         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1830     }
1831     else if (uri->query) {
1832         segment =
1833             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1834         NULLCHK(segment)
1835         ret = xmlStrcat(ret, BAD_CAST "?");
1836         ret = xmlStrcat(ret, segment);
1837         xmlFree(segment);
1838     }
1839 
1840     if (uri->opaque) {
1841         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1842         NULLCHK(segment)
1843         ret = xmlStrcat(ret, segment);
1844         xmlFree(segment);
1845     }
1846 
1847     if (uri->fragment) {
1848         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1849         NULLCHK(segment)
1850         ret = xmlStrcat(ret, BAD_CAST "#");
1851         ret = xmlStrcat(ret, segment);
1852         xmlFree(segment);
1853     }
1854 
1855     xmlFreeURI(uri);
1856 #undef NULLCHK
1857 
1858     return (ret);
1859 }
1860 
1861 /************************************************************************
1862  *									*
1863  *			Public functions				*
1864  *									*
1865  ************************************************************************/
1866 
1867 /**
1868  * xmlBuildURI:
1869  * @URI:  the URI instance found in the document
1870  * @base:  the base value
1871  *
1872  * Computes he final URI of the reference done by checking that
1873  * the given URI is valid, and building the final URI using the
1874  * base URI. This is processed according to section 5.2 of the
1875  * RFC 2396
1876  *
1877  * 5.2. Resolving Relative References to Absolute Form
1878  *
1879  * Returns a new URI string (to be freed by the caller) or NULL in case
1880  *         of error.
1881  */
1882 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1883 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1884     xmlChar *val = NULL;
1885     int ret, len, indx, cur, out;
1886     xmlURIPtr ref = NULL;
1887     xmlURIPtr bas = NULL;
1888     xmlURIPtr res = NULL;
1889 
1890     /*
1891      * 1) The URI reference is parsed into the potential four components and
1892      *    fragment identifier, as described in Section 4.3.
1893      *
1894      *    NOTE that a completely empty URI is treated by modern browsers
1895      *    as a reference to "." rather than as a synonym for the current
1896      *    URI.  Should we do that here?
1897      */
1898     if (URI == NULL)
1899 	ret = -1;
1900     else {
1901 	if (*URI) {
1902 	    ref = xmlCreateURI();
1903 	    if (ref == NULL)
1904 		goto done;
1905 	    ret = xmlParseURIReference(ref, (const char *) URI);
1906 	}
1907 	else
1908 	    ret = 0;
1909     }
1910     if (ret != 0)
1911 	goto done;
1912     if ((ref != NULL) && (ref->scheme != NULL)) {
1913 	/*
1914 	 * The URI is absolute don't modify.
1915 	 */
1916 	val = xmlStrdup(URI);
1917 	goto done;
1918     }
1919     if (base == NULL)
1920 	ret = -1;
1921     else {
1922 	bas = xmlCreateURI();
1923 	if (bas == NULL)
1924 	    goto done;
1925 	ret = xmlParseURIReference(bas, (const char *) base);
1926     }
1927     if (ret != 0) {
1928 	if (ref)
1929 	    val = xmlSaveUri(ref);
1930 	goto done;
1931     }
1932     if (ref == NULL) {
1933 	/*
1934 	 * the base fragment must be ignored
1935 	 */
1936 	if (bas->fragment != NULL) {
1937 	    xmlFree(bas->fragment);
1938 	    bas->fragment = NULL;
1939 	}
1940 	val = xmlSaveUri(bas);
1941 	goto done;
1942     }
1943 
1944     /*
1945      * 2) If the path component is empty and the scheme, authority, and
1946      *    query components are undefined, then it is a reference to the
1947      *    current document and we are done.  Otherwise, the reference URI's
1948      *    query and fragment components are defined as found (or not found)
1949      *    within the URI reference and not inherited from the base URI.
1950      *
1951      *    NOTE that in modern browsers, the parsing differs from the above
1952      *    in the following aspect:  the query component is allowed to be
1953      *    defined while still treating this as a reference to the current
1954      *    document.
1955      */
1956     res = xmlCreateURI();
1957     if (res == NULL)
1958 	goto done;
1959     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1960 	((ref->authority == NULL) && (ref->server == NULL))) {
1961 	if (bas->scheme != NULL)
1962 	    res->scheme = xmlMemStrdup(bas->scheme);
1963 	if (bas->authority != NULL)
1964 	    res->authority = xmlMemStrdup(bas->authority);
1965 	else if (bas->server != NULL) {
1966 	    res->server = xmlMemStrdup(bas->server);
1967 	    if (bas->user != NULL)
1968 		res->user = xmlMemStrdup(bas->user);
1969 	    res->port = bas->port;
1970 	}
1971 	if (bas->path != NULL)
1972 	    res->path = xmlMemStrdup(bas->path);
1973 	if (ref->query_raw != NULL)
1974 	    res->query_raw = xmlMemStrdup (ref->query_raw);
1975 	else if (ref->query != NULL)
1976 	    res->query = xmlMemStrdup(ref->query);
1977 	else if (bas->query_raw != NULL)
1978 	    res->query_raw = xmlMemStrdup(bas->query_raw);
1979 	else if (bas->query != NULL)
1980 	    res->query = xmlMemStrdup(bas->query);
1981 	if (ref->fragment != NULL)
1982 	    res->fragment = xmlMemStrdup(ref->fragment);
1983 	goto step_7;
1984     }
1985 
1986     /*
1987      * 3) If the scheme component is defined, indicating that the reference
1988      *    starts with a scheme name, then the reference is interpreted as an
1989      *    absolute URI and we are done.  Otherwise, the reference URI's
1990      *    scheme is inherited from the base URI's scheme component.
1991      */
1992     if (ref->scheme != NULL) {
1993 	val = xmlSaveUri(ref);
1994 	goto done;
1995     }
1996     if (bas->scheme != NULL)
1997 	res->scheme = xmlMemStrdup(bas->scheme);
1998 
1999     if (ref->query_raw != NULL)
2000 	res->query_raw = xmlMemStrdup(ref->query_raw);
2001     else if (ref->query != NULL)
2002 	res->query = xmlMemStrdup(ref->query);
2003     if (ref->fragment != NULL)
2004 	res->fragment = xmlMemStrdup(ref->fragment);
2005 
2006     /*
2007      * 4) If the authority component is defined, then the reference is a
2008      *    network-path and we skip to step 7.  Otherwise, the reference
2009      *    URI's authority is inherited from the base URI's authority
2010      *    component, which will also be undefined if the URI scheme does not
2011      *    use an authority component.
2012      */
2013     if ((ref->authority != NULL) || (ref->server != NULL)) {
2014 	if (ref->authority != NULL)
2015 	    res->authority = xmlMemStrdup(ref->authority);
2016 	else {
2017 	    res->server = xmlMemStrdup(ref->server);
2018 	    if (ref->user != NULL)
2019 		res->user = xmlMemStrdup(ref->user);
2020             res->port = ref->port;
2021 	}
2022 	if (ref->path != NULL)
2023 	    res->path = xmlMemStrdup(ref->path);
2024 	goto step_7;
2025     }
2026     if (bas->authority != NULL)
2027 	res->authority = xmlMemStrdup(bas->authority);
2028     else if (bas->server != NULL) {
2029 	res->server = xmlMemStrdup(bas->server);
2030 	if (bas->user != NULL)
2031 	    res->user = xmlMemStrdup(bas->user);
2032 	res->port = bas->port;
2033     }
2034 
2035     /*
2036      * 5) If the path component begins with a slash character ("/"), then
2037      *    the reference is an absolute-path and we skip to step 7.
2038      */
2039     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2040 	res->path = xmlMemStrdup(ref->path);
2041 	goto step_7;
2042     }
2043 
2044 
2045     /*
2046      * 6) If this step is reached, then we are resolving a relative-path
2047      *    reference.  The relative path needs to be merged with the base
2048      *    URI's path.  Although there are many ways to do this, we will
2049      *    describe a simple method using a separate string buffer.
2050      *
2051      * Allocate a buffer large enough for the result string.
2052      */
2053     len = 2; /* extra / and 0 */
2054     if (ref->path != NULL)
2055 	len += strlen(ref->path);
2056     if (bas->path != NULL)
2057 	len += strlen(bas->path);
2058     res->path = (char *) xmlMallocAtomic(len);
2059     if (res->path == NULL) {
2060         xmlURIErrMemory("resolving URI against base\n");
2061 	goto done;
2062     }
2063     res->path[0] = 0;
2064 
2065     /*
2066      * a) All but the last segment of the base URI's path component is
2067      *    copied to the buffer.  In other words, any characters after the
2068      *    last (right-most) slash character, if any, are excluded.
2069      */
2070     cur = 0;
2071     out = 0;
2072     if (bas->path != NULL) {
2073 	while (bas->path[cur] != 0) {
2074 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2075 		cur++;
2076 	    if (bas->path[cur] == 0)
2077 		break;
2078 
2079 	    cur++;
2080 	    while (out < cur) {
2081 		res->path[out] = bas->path[out];
2082 		out++;
2083 	    }
2084 	}
2085     }
2086     res->path[out] = 0;
2087 
2088     /*
2089      * b) The reference's path component is appended to the buffer
2090      *    string.
2091      */
2092     if (ref->path != NULL && ref->path[0] != 0) {
2093 	indx = 0;
2094 	/*
2095 	 * Ensure the path includes a '/'
2096 	 */
2097 	if ((out == 0) && (bas->server != NULL))
2098 	    res->path[out++] = '/';
2099 	while (ref->path[indx] != 0) {
2100 	    res->path[out++] = ref->path[indx++];
2101 	}
2102     }
2103     res->path[out] = 0;
2104 
2105     /*
2106      * Steps c) to h) are really path normalization steps
2107      */
2108     xmlNormalizeURIPath(res->path);
2109 
2110 step_7:
2111 
2112     /*
2113      * 7) The resulting URI components, including any inherited from the
2114      *    base URI, are recombined to give the absolute form of the URI
2115      *    reference.
2116      */
2117     val = xmlSaveUri(res);
2118 
2119 done:
2120     if (ref != NULL)
2121 	xmlFreeURI(ref);
2122     if (bas != NULL)
2123 	xmlFreeURI(bas);
2124     if (res != NULL)
2125 	xmlFreeURI(res);
2126     return(val);
2127 }
2128 
2129 /**
2130  * xmlBuildRelativeURI:
2131  * @URI:  the URI reference under consideration
2132  * @base:  the base value
2133  *
2134  * Expresses the URI of the reference in terms relative to the
2135  * base.  Some examples of this operation include:
2136  *     base = "http://site1.com/docs/book1.html"
2137  *        URI input                        URI returned
2138  *     docs/pic1.gif                    pic1.gif
2139  *     docs/img/pic1.gif                img/pic1.gif
2140  *     img/pic1.gif                     ../img/pic1.gif
2141  *     http://site1.com/docs/pic1.gif   pic1.gif
2142  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2143  *
2144  *     base = "docs/book1.html"
2145  *        URI input                        URI returned
2146  *     docs/pic1.gif                    pic1.gif
2147  *     docs/img/pic1.gif                img/pic1.gif
2148  *     img/pic1.gif                     ../img/pic1.gif
2149  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2150  *
2151  *
2152  * Note: if the URI reference is really wierd or complicated, it may be
2153  *       worthwhile to first convert it into a "nice" one by calling
2154  *       xmlBuildURI (using 'base') before calling this routine,
2155  *       since this routine (for reasonable efficiency) assumes URI has
2156  *       already been through some validation.
2157  *
2158  * Returns a new URI string (to be freed by the caller) or NULL in case
2159  * error.
2160  */
2161 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2162 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2163 {
2164     xmlChar *val = NULL;
2165     int ret;
2166     int ix;
2167     int pos = 0;
2168     int nbslash = 0;
2169     int len;
2170     xmlURIPtr ref = NULL;
2171     xmlURIPtr bas = NULL;
2172     xmlChar *bptr, *uptr, *vptr;
2173     int remove_path = 0;
2174 
2175     if ((URI == NULL) || (*URI == 0))
2176 	return NULL;
2177 
2178     /*
2179      * First parse URI into a standard form
2180      */
2181     ref = xmlCreateURI ();
2182     if (ref == NULL)
2183 	return NULL;
2184     /* If URI not already in "relative" form */
2185     if (URI[0] != '.') {
2186 	ret = xmlParseURIReference (ref, (const char *) URI);
2187 	if (ret != 0)
2188 	    goto done;		/* Error in URI, return NULL */
2189     } else
2190 	ref->path = (char *)xmlStrdup(URI);
2191 
2192     /*
2193      * Next parse base into the same standard form
2194      */
2195     if ((base == NULL) || (*base == 0)) {
2196 	val = xmlStrdup (URI);
2197 	goto done;
2198     }
2199     bas = xmlCreateURI ();
2200     if (bas == NULL)
2201 	goto done;
2202     if (base[0] != '.') {
2203 	ret = xmlParseURIReference (bas, (const char *) base);
2204 	if (ret != 0)
2205 	    goto done;		/* Error in base, return NULL */
2206     } else
2207 	bas->path = (char *)xmlStrdup(base);
2208 
2209     /*
2210      * If the scheme / server on the URI differs from the base,
2211      * just return the URI
2212      */
2213     if ((ref->scheme != NULL) &&
2214 	((bas->scheme == NULL) ||
2215 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2216 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2217 	val = xmlStrdup (URI);
2218 	goto done;
2219     }
2220     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2221 	val = xmlStrdup(BAD_CAST "");
2222 	goto done;
2223     }
2224     if (bas->path == NULL) {
2225 	val = xmlStrdup((xmlChar *)ref->path);
2226 	goto done;
2227     }
2228     if (ref->path == NULL) {
2229         ref->path = (char *) "/";
2230 	remove_path = 1;
2231     }
2232 
2233     /*
2234      * At this point (at last!) we can compare the two paths
2235      *
2236      * First we take care of the special case where either of the
2237      * two path components may be missing (bug 316224)
2238      */
2239     if (bas->path == NULL) {
2240 	if (ref->path != NULL) {
2241 	    uptr = (xmlChar *) ref->path;
2242 	    if (*uptr == '/')
2243 		uptr++;
2244 	    /* exception characters from xmlSaveUri */
2245 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2246 	}
2247 	goto done;
2248     }
2249     bptr = (xmlChar *)bas->path;
2250     if (ref->path == NULL) {
2251 	for (ix = 0; bptr[ix] != 0; ix++) {
2252 	    if (bptr[ix] == '/')
2253 		nbslash++;
2254 	}
2255 	uptr = NULL;
2256 	len = 1;	/* this is for a string terminator only */
2257     } else {
2258     /*
2259      * Next we compare the two strings and find where they first differ
2260      */
2261 	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2262             pos += 2;
2263 	if ((*bptr == '.') && (bptr[1] == '/'))
2264             bptr += 2;
2265 	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2266 	    bptr++;
2267 	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2268 	    pos++;
2269 
2270 	if (bptr[pos] == ref->path[pos]) {
2271 	    val = xmlStrdup(BAD_CAST "");
2272 	    goto done;		/* (I can't imagine why anyone would do this) */
2273 	}
2274 
2275 	/*
2276 	 * In URI, "back up" to the last '/' encountered.  This will be the
2277 	 * beginning of the "unique" suffix of URI
2278 	 */
2279 	ix = pos;
2280 	if ((ref->path[ix] == '/') && (ix > 0))
2281 	    ix--;
2282 	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2283 	    ix -= 2;
2284 	for (; ix > 0; ix--) {
2285 	    if (ref->path[ix] == '/')
2286 		break;
2287 	}
2288 	if (ix == 0) {
2289 	    uptr = (xmlChar *)ref->path;
2290 	} else {
2291 	    ix++;
2292 	    uptr = (xmlChar *)&ref->path[ix];
2293 	}
2294 
2295 	/*
2296 	 * In base, count the number of '/' from the differing point
2297 	 */
2298 	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2299 	    for (; bptr[ix] != 0; ix++) {
2300 		if (bptr[ix] == '/')
2301 		    nbslash++;
2302 	    }
2303 	}
2304 	len = xmlStrlen (uptr) + 1;
2305     }
2306 
2307     if (nbslash == 0) {
2308 	if (uptr != NULL)
2309 	    /* exception characters from xmlSaveUri */
2310 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2311 	goto done;
2312     }
2313 
2314     /*
2315      * Allocate just enough space for the returned string -
2316      * length of the remainder of the URI, plus enough space
2317      * for the "../" groups, plus one for the terminator
2318      */
2319     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2320     if (val == NULL) {
2321         xmlURIErrMemory("building relative URI\n");
2322 	goto done;
2323     }
2324     vptr = val;
2325     /*
2326      * Put in as many "../" as needed
2327      */
2328     for (; nbslash>0; nbslash--) {
2329 	*vptr++ = '.';
2330 	*vptr++ = '.';
2331 	*vptr++ = '/';
2332     }
2333     /*
2334      * Finish up with the end of the URI
2335      */
2336     if (uptr != NULL) {
2337         if ((vptr > val) && (len > 0) &&
2338 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2339 	    memcpy (vptr, uptr + 1, len - 1);
2340 	    vptr[len - 2] = 0;
2341 	} else {
2342 	    memcpy (vptr, uptr, len);
2343 	    vptr[len - 1] = 0;
2344 	}
2345     } else {
2346 	vptr[len - 1] = 0;
2347     }
2348 
2349     /* escape the freshly-built path */
2350     vptr = val;
2351 	/* exception characters from xmlSaveUri */
2352     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2353     xmlFree(vptr);
2354 
2355 done:
2356     /*
2357      * Free the working variables
2358      */
2359     if (remove_path != 0)
2360         ref->path = NULL;
2361     if (ref != NULL)
2362 	xmlFreeURI (ref);
2363     if (bas != NULL)
2364 	xmlFreeURI (bas);
2365 
2366     return val;
2367 }
2368 
2369 /**
2370  * xmlCanonicPath:
2371  * @path:  the resource locator in a filesystem notation
2372  *
2373  * Constructs a canonic path from the specified path.
2374  *
2375  * Returns a new canonic path, or a duplicate of the path parameter if the
2376  * construction fails. The caller is responsible for freeing the memory occupied
2377  * by the returned string. If there is insufficient memory available, or the
2378  * argument is NULL, the function returns NULL.
2379  */
2380 #define IS_WINDOWS_PATH(p)					\
2381 	((p != NULL) &&						\
2382 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2383 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2384 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2385 xmlChar *
xmlCanonicPath(const xmlChar * path)2386 xmlCanonicPath(const xmlChar *path)
2387 {
2388 /*
2389  * For Windows implementations, additional work needs to be done to
2390  * replace backslashes in pathnames with "forward slashes"
2391  */
2392 #if defined(_WIN32) && !defined(__CYGWIN__)
2393     int len = 0;
2394     int i = 0;
2395     xmlChar *p = NULL;
2396 #endif
2397     xmlURIPtr uri;
2398     xmlChar *ret;
2399     const xmlChar *absuri;
2400 
2401     if (path == NULL)
2402 	return(NULL);
2403 
2404 #if defined(_WIN32)
2405     /*
2406      * We must not change the backslashes to slashes if the the path
2407      * starts with \\?\
2408      * Those paths can be up to 32k characters long.
2409      * Was added specifically for OpenOffice, those paths can't be converted
2410      * to URIs anyway.
2411      */
2412     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2413         (path[3] == '\\') )
2414 	return xmlStrdup((const xmlChar *) path);
2415 #endif
2416 
2417 	/* sanitize filename starting with // so it can be used as URI */
2418     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2419         path++;
2420 
2421     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2422 	xmlFreeURI(uri);
2423 	return xmlStrdup(path);
2424     }
2425 
2426     /* Check if this is an "absolute uri" */
2427     absuri = xmlStrstr(path, BAD_CAST "://");
2428     if (absuri != NULL) {
2429         int l, j;
2430 	unsigned char c;
2431 	xmlChar *escURI;
2432 
2433         /*
2434 	 * this looks like an URI where some parts have not been
2435 	 * escaped leading to a parsing problem.  Check that the first
2436 	 * part matches a protocol.
2437 	 */
2438 	l = absuri - path;
2439 	/* Bypass if first part (part before the '://') is > 20 chars */
2440 	if ((l <= 0) || (l > 20))
2441 	    goto path_processing;
2442 	/* Bypass if any non-alpha characters are present in first part */
2443 	for (j = 0;j < l;j++) {
2444 	    c = path[j];
2445 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2446 	        goto path_processing;
2447 	}
2448 
2449 	/* Escape all except the characters specified in the supplied path */
2450         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2451 	if (escURI != NULL) {
2452 	    /* Try parsing the escaped path */
2453 	    uri = xmlParseURI((const char *) escURI);
2454 	    /* If successful, return the escaped string */
2455 	    if (uri != NULL) {
2456 	        xmlFreeURI(uri);
2457 		return escURI;
2458 	    }
2459 	}
2460     }
2461 
2462 path_processing:
2463 /* For Windows implementations, replace backslashes with 'forward slashes' */
2464 #if defined(_WIN32) && !defined(__CYGWIN__)
2465     /*
2466      * Create a URI structure
2467      */
2468     uri = xmlCreateURI();
2469     if (uri == NULL) {		/* Guard against 'out of memory' */
2470         return(NULL);
2471     }
2472 
2473     len = xmlStrlen(path);
2474     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2475         /* make the scheme 'file' */
2476 	uri->scheme = xmlStrdup(BAD_CAST "file");
2477 	/* allocate space for leading '/' + path + string terminator */
2478 	uri->path = xmlMallocAtomic(len + 2);
2479 	if (uri->path == NULL) {
2480 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2481 	    return(NULL);
2482 	}
2483 	/* Put in leading '/' plus path */
2484 	uri->path[0] = '/';
2485 	p = uri->path + 1;
2486 	strncpy(p, path, len + 1);
2487     } else {
2488 	uri->path = xmlStrdup(path);
2489 	if (uri->path == NULL) {
2490 	    xmlFreeURI(uri);
2491 	    return(NULL);
2492 	}
2493 	p = uri->path;
2494     }
2495     /* Now change all occurences of '\' to '/' */
2496     while (*p != '\0') {
2497 	if (*p == '\\')
2498 	    *p = '/';
2499 	p++;
2500     }
2501 
2502     if (uri->scheme == NULL) {
2503 	ret = xmlStrdup((const xmlChar *) uri->path);
2504     } else {
2505 	ret = xmlSaveUri(uri);
2506     }
2507 
2508     xmlFreeURI(uri);
2509 #else
2510     ret = xmlStrdup((const xmlChar *) path);
2511 #endif
2512     return(ret);
2513 }
2514 
2515 /**
2516  * xmlPathToURI:
2517  * @path:  the resource locator in a filesystem notation
2518  *
2519  * Constructs an URI expressing the existing path
2520  *
2521  * Returns a new URI, or a duplicate of the path parameter if the
2522  * construction fails. The caller is responsible for freeing the memory
2523  * occupied by the returned string. If there is insufficient memory available,
2524  * or the argument is NULL, the function returns NULL.
2525  */
2526 xmlChar *
xmlPathToURI(const xmlChar * path)2527 xmlPathToURI(const xmlChar *path)
2528 {
2529     xmlURIPtr uri;
2530     xmlURI temp;
2531     xmlChar *ret, *cal;
2532 
2533     if (path == NULL)
2534         return(NULL);
2535 
2536     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2537 	xmlFreeURI(uri);
2538 	return xmlStrdup(path);
2539     }
2540     cal = xmlCanonicPath(path);
2541     if (cal == NULL)
2542         return(NULL);
2543 #if defined(_WIN32) && !defined(__CYGWIN__)
2544     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2545        If 'cal' is a valid URI allready then we are done here, as continuing would make
2546        it invalid. */
2547     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2548 	xmlFreeURI(uri);
2549 	return cal;
2550     }
2551     /* 'cal' can contain a relative path with backslashes. If that is processed
2552        by xmlSaveURI, they will be escaped and the external entity loader machinery
2553        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2554     ret = cal;
2555     while (*ret != '\0') {
2556 	if (*ret == '\\')
2557 	    *ret = '/';
2558 	ret++;
2559     }
2560 #endif
2561     memset(&temp, 0, sizeof(temp));
2562     temp.path = (char *) cal;
2563     ret = xmlSaveUri(&temp);
2564     xmlFree(cal);
2565     return(ret);
2566 }
2567 #define bottom_uri
2568 #include "elfgcchack.h"
2569