1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * daniel@veillard.com
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <string.h>
15 #include <limits.h>
16 
17 #include <libxml/xmlmemory.h>
18 #include <libxml/uri.h>
19 #include <libxml/globals.h>
20 #include <libxml/xmlerror.h>
21 
22 /**
23  * MAX_URI_LENGTH:
24  *
25  * The definition of the URI regexp in the above RFC has no size limit
26  * In practice they are usually relativey short except for the
27  * data URI scheme as defined in RFC 2397. Even for data URI the usual
28  * maximum size before hitting random practical limits is around 64 KB
29  * and 4KB is usually a maximum admitted limit for proper operations.
30  * The value below is more a security limit than anything else and
31  * really should never be hit by 'normal' operations
32  * Set to 1 MByte in 2012, this is only enforced on output
33  */
34 #define MAX_URI_LENGTH 1024 * 1024
35 
36 static void
xmlURIErrMemory(const char * extra)37 xmlURIErrMemory(const char *extra)
38 {
39     if (extra)
40         __xmlRaiseError(NULL, NULL, NULL,
41                         NULL, NULL, XML_FROM_URI,
42                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
43                         extra, NULL, NULL, 0, 0,
44                         "Memory allocation failed : %s\n", extra);
45     else
46         __xmlRaiseError(NULL, NULL, NULL,
47                         NULL, NULL, XML_FROM_URI,
48                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
49                         NULL, NULL, NULL, 0, 0,
50                         "Memory allocation failed\n");
51 }
52 
53 static void xmlCleanURI(xmlURIPtr uri);
54 
55 /*
56  * Old rule from 2396 used in legacy handling code
57  * alpha    = lowalpha | upalpha
58  */
59 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
60 
61 
62 /*
63  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
64  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
65  *            "u" | "v" | "w" | "x" | "y" | "z"
66  */
67 
68 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
69 
70 /*
71  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
72  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
73  *           "U" | "V" | "W" | "X" | "Y" | "Z"
74  */
75 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
76 
77 #ifdef IS_DIGIT
78 #undef IS_DIGIT
79 #endif
80 /*
81  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82  */
83 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
84 
85 /*
86  * alphanum = alpha | digit
87  */
88 
89 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
90 
91 /*
92  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
93  */
94 
95 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
96     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
97     ((x) == '(') || ((x) == ')'))
98 
99 /*
100  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
101  */
102 
103 #define IS_UNWISE(p)                                                    \
104       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
105        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
106        ((*(p) == ']')) || ((*(p) == '`')))
107 /*
108  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
109  *            "[" | "]"
110  */
111 
112 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
113         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
114         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
115         ((x) == ']'))
116 
117 /*
118  * unreserved = alphanum | mark
119  */
120 
121 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
122 
123 /*
124  * Skip to next pointer char, handle escaped sequences
125  */
126 
127 #define NEXT(p) ((*p == '%')? p += 3 : p++)
128 
129 /*
130  * Productions from the spec.
131  *
132  *    authority     = server | reg_name
133  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
134  *                        ";" | ":" | "@" | "&" | "=" | "+" )
135  *
136  * path          = [ abs_path | opaque_part ]
137  */
138 
139 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140 
141 /************************************************************************
142  *									*
143  *                         RFC 3986 parser				*
144  *									*
145  ************************************************************************/
146 
147 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
148 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
149                       ((*(p) >= 'A') && (*(p) <= 'Z')))
150 #define ISA_HEXDIG(p)							\
151        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
152         ((*(p) >= 'A') && (*(p) <= 'F')))
153 
154 /*
155  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
156  *                     / "*" / "+" / "," / ";" / "="
157  */
158 #define ISA_SUB_DELIM(p)						\
159       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
160        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
161        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
162        ((*(p) == '=')) || ((*(p) == '\'')))
163 
164 /*
165  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166  */
167 #define ISA_GEN_DELIM(p)						\
168       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
169        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
170        ((*(p) == '@')))
171 
172 /*
173  *    reserved      = gen-delims / sub-delims
174  */
175 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
176 
177 /*
178  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
179  */
180 #define ISA_UNRESERVED(p)						\
181       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
182        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
183 
184 /*
185  *    pct-encoded   = "%" HEXDIG HEXDIG
186  */
187 #define ISA_PCT_ENCODED(p)						\
188      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
189 
190 /*
191  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
192  */
193 #define ISA_PCHAR(p)							\
194      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
195       ((*(p) == ':')) || ((*(p) == '@')))
196 
197 /**
198  * xmlParse3986Scheme:
199  * @uri:  pointer to an URI structure
200  * @str:  pointer to the string to analyze
201  *
202  * Parse an URI scheme
203  *
204  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205  *
206  * Returns 0 or the error code
207  */
208 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)209 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
210     const char *cur;
211 
212     if (str == NULL)
213 	return(-1);
214 
215     cur = *str;
216     if (!ISA_ALPHA(cur))
217 	return(2);
218     cur++;
219     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
220            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
221     if (uri != NULL) {
222 	if (uri->scheme != NULL) xmlFree(uri->scheme);
223 	uri->scheme = STRNDUP(*str, cur - *str);
224     }
225     *str = cur;
226     return(0);
227 }
228 
229 /**
230  * xmlParse3986Fragment:
231  * @uri:  pointer to an URI structure
232  * @str:  pointer to the string to analyze
233  *
234  * Parse the query part of an URI
235  *
236  * fragment      = *( pchar / "/" / "?" )
237  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
238  *       in the fragment identifier but this is used very broadly for
239  *       xpointer scheme selection, so we are allowing it here to not break
240  *       for example all the DocBook processing chains.
241  *
242  * Returns 0 or the error code
243  */
244 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)245 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
246 {
247     const char *cur;
248 
249     if (str == NULL)
250         return (-1);
251 
252     cur = *str;
253 
254     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
255            (*cur == '[') || (*cur == ']') ||
256            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
257         NEXT(cur);
258     if (uri != NULL) {
259         if (uri->fragment != NULL)
260             xmlFree(uri->fragment);
261 	if (uri->cleanup & 2)
262 	    uri->fragment = STRNDUP(*str, cur - *str);
263 	else
264 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
265     }
266     *str = cur;
267     return (0);
268 }
269 
270 /**
271  * xmlParse3986Query:
272  * @uri:  pointer to an URI structure
273  * @str:  pointer to the string to analyze
274  *
275  * Parse the query part of an URI
276  *
277  * query = *uric
278  *
279  * Returns 0 or the error code
280  */
281 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)282 xmlParse3986Query(xmlURIPtr uri, const char **str)
283 {
284     const char *cur;
285 
286     if (str == NULL)
287         return (-1);
288 
289     cur = *str;
290 
291     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
292            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
293         NEXT(cur);
294     if (uri != NULL) {
295         if (uri->query != NULL)
296             xmlFree(uri->query);
297 	if (uri->cleanup & 2)
298 	    uri->query = STRNDUP(*str, cur - *str);
299 	else
300 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
301 
302 	/* Save the raw bytes of the query as well.
303 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304 	 */
305 	if (uri->query_raw != NULL)
306 	    xmlFree (uri->query_raw);
307 	uri->query_raw = STRNDUP (*str, cur - *str);
308     }
309     *str = cur;
310     return (0);
311 }
312 
313 /**
314  * xmlParse3986Port:
315  * @uri:  pointer to an URI structure
316  * @str:  the string to analyze
317  *
318  * Parse a port part and fills in the appropriate fields
319  * of the @uri structure
320  *
321  * port          = *DIGIT
322  *
323  * Returns 0 or the error code
324  */
325 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)326 xmlParse3986Port(xmlURIPtr uri, const char **str)
327 {
328     const char *cur = *str;
329     unsigned port = 0; /* unsigned for defined overflow behavior */
330 
331     if (ISA_DIGIT(cur)) {
332 	while (ISA_DIGIT(cur)) {
333 	    port = port * 10 + (*cur - '0');
334 
335 	    cur++;
336 	}
337 	if (uri != NULL)
338 	    uri->port = port & USHRT_MAX; /* port value modulo INT_MAX+1 */
339 	*str = cur;
340 	return(0);
341     }
342     return(1);
343 }
344 
345 /**
346  * xmlParse3986Userinfo:
347  * @uri:  pointer to an URI structure
348  * @str:  the string to analyze
349  *
350  * Parse an user informations part and fills in the appropriate fields
351  * of the @uri structure
352  *
353  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
354  *
355  * Returns 0 or the error code
356  */
357 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)358 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
359 {
360     const char *cur;
361 
362     cur = *str;
363     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
364            ISA_SUB_DELIM(cur) || (*cur == ':'))
365 	NEXT(cur);
366     if (*cur == '@') {
367 	if (uri != NULL) {
368 	    if (uri->user != NULL) xmlFree(uri->user);
369 	    if (uri->cleanup & 2)
370 		uri->user = STRNDUP(*str, cur - *str);
371 	    else
372 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
373 	}
374 	*str = cur;
375 	return(0);
376     }
377     return(1);
378 }
379 
380 /**
381  * xmlParse3986DecOctet:
382  * @str:  the string to analyze
383  *
384  *    dec-octet     = DIGIT                 ; 0-9
385  *                  / %x31-39 DIGIT         ; 10-99
386  *                  / "1" 2DIGIT            ; 100-199
387  *                  / "2" %x30-34 DIGIT     ; 200-249
388  *                  / "25" %x30-35          ; 250-255
389  *
390  * Skip a dec-octet.
391  *
392  * Returns 0 if found and skipped, 1 otherwise
393  */
394 static int
xmlParse3986DecOctet(const char ** str)395 xmlParse3986DecOctet(const char **str) {
396     const char *cur = *str;
397 
398     if (!(ISA_DIGIT(cur)))
399         return(1);
400     if (!ISA_DIGIT(cur+1))
401 	cur++;
402     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
403 	cur += 2;
404     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
405 	cur += 3;
406     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
407 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
408 	cur += 3;
409     else if ((*cur == '2') && (*(cur + 1) == '5') &&
410 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
411 	cur += 3;
412     else
413         return(1);
414     *str = cur;
415     return(0);
416 }
417 /**
418  * xmlParse3986Host:
419  * @uri:  pointer to an URI structure
420  * @str:  the string to analyze
421  *
422  * Parse an host part and fills in the appropriate fields
423  * of the @uri structure
424  *
425  * host          = IP-literal / IPv4address / reg-name
426  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
427  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
428  * reg-name      = *( unreserved / pct-encoded / sub-delims )
429  *
430  * Returns 0 or the error code
431  */
432 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)433 xmlParse3986Host(xmlURIPtr uri, const char **str)
434 {
435     const char *cur = *str;
436     const char *host;
437 
438     host = cur;
439     /*
440      * IPv6 and future adressing scheme are enclosed between brackets
441      */
442     if (*cur == '[') {
443         cur++;
444 	while ((*cur != ']') && (*cur != 0))
445 	    cur++;
446 	if (*cur != ']')
447 	    return(1);
448 	cur++;
449 	goto found;
450     }
451     /*
452      * try to parse an IPv4
453      */
454     if (ISA_DIGIT(cur)) {
455         if (xmlParse3986DecOctet(&cur) != 0)
456 	    goto not_ipv4;
457 	if (*cur != '.')
458 	    goto not_ipv4;
459 	cur++;
460         if (xmlParse3986DecOctet(&cur) != 0)
461 	    goto not_ipv4;
462 	if (*cur != '.')
463 	    goto not_ipv4;
464         if (xmlParse3986DecOctet(&cur) != 0)
465 	    goto not_ipv4;
466 	if (*cur != '.')
467 	    goto not_ipv4;
468         if (xmlParse3986DecOctet(&cur) != 0)
469 	    goto not_ipv4;
470 	goto found;
471 not_ipv4:
472         cur = *str;
473     }
474     /*
475      * then this should be a hostname which can be empty
476      */
477     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
478         NEXT(cur);
479 found:
480     if (uri != NULL) {
481 	if (uri->authority != NULL) xmlFree(uri->authority);
482 	uri->authority = NULL;
483 	if (uri->server != NULL) xmlFree(uri->server);
484 	if (cur != host) {
485 	    if (uri->cleanup & 2)
486 		uri->server = STRNDUP(host, cur - host);
487 	    else
488 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
489 	} else
490 	    uri->server = NULL;
491     }
492     *str = cur;
493     return(0);
494 }
495 
496 /**
497  * xmlParse3986Authority:
498  * @uri:  pointer to an URI structure
499  * @str:  the string to analyze
500  *
501  * Parse an authority part and fills in the appropriate fields
502  * of the @uri structure
503  *
504  * authority     = [ userinfo "@" ] host [ ":" port ]
505  *
506  * Returns 0 or the error code
507  */
508 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)509 xmlParse3986Authority(xmlURIPtr uri, const char **str)
510 {
511     const char *cur;
512     int ret;
513 
514     cur = *str;
515     /*
516      * try to parse an userinfo and check for the trailing @
517      */
518     ret = xmlParse3986Userinfo(uri, &cur);
519     if ((ret != 0) || (*cur != '@'))
520         cur = *str;
521     else
522         cur++;
523     ret = xmlParse3986Host(uri, &cur);
524     if (ret != 0) return(ret);
525     if (*cur == ':') {
526         cur++;
527         ret = xmlParse3986Port(uri, &cur);
528 	if (ret != 0) return(ret);
529     }
530     *str = cur;
531     return(0);
532 }
533 
534 /**
535  * xmlParse3986Segment:
536  * @str:  the string to analyze
537  * @forbid: an optional forbidden character
538  * @empty: allow an empty segment
539  *
540  * Parse a segment and fills in the appropriate fields
541  * of the @uri structure
542  *
543  * segment       = *pchar
544  * segment-nz    = 1*pchar
545  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
546  *               ; non-zero-length segment without any colon ":"
547  *
548  * Returns 0 or the error code
549  */
550 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)551 xmlParse3986Segment(const char **str, char forbid, int empty)
552 {
553     const char *cur;
554 
555     cur = *str;
556     if (!ISA_PCHAR(cur)) {
557         if (empty)
558 	    return(0);
559 	return(1);
560     }
561     while (ISA_PCHAR(cur) && (*cur != forbid))
562         NEXT(cur);
563     *str = cur;
564     return (0);
565 }
566 
567 /**
568  * xmlParse3986PathAbEmpty:
569  * @uri:  pointer to an URI structure
570  * @str:  the string to analyze
571  *
572  * Parse an path absolute or empty and fills in the appropriate fields
573  * of the @uri structure
574  *
575  * path-abempty  = *( "/" segment )
576  *
577  * Returns 0 or the error code
578  */
579 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)580 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
581 {
582     const char *cur;
583     int ret;
584 
585     cur = *str;
586 
587     while (*cur == '/') {
588         cur++;
589 	ret = xmlParse3986Segment(&cur, 0, 1);
590 	if (ret != 0) return(ret);
591     }
592     if (uri != NULL) {
593 	if (uri->path != NULL) xmlFree(uri->path);
594         if (*str != cur) {
595             if (uri->cleanup & 2)
596                 uri->path = STRNDUP(*str, cur - *str);
597             else
598                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
599         } else {
600             uri->path = NULL;
601         }
602     }
603     *str = cur;
604     return (0);
605 }
606 
607 /**
608  * xmlParse3986PathAbsolute:
609  * @uri:  pointer to an URI structure
610  * @str:  the string to analyze
611  *
612  * Parse an path absolute and fills in the appropriate fields
613  * of the @uri structure
614  *
615  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
616  *
617  * Returns 0 or the error code
618  */
619 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)620 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
621 {
622     const char *cur;
623     int ret;
624 
625     cur = *str;
626 
627     if (*cur != '/')
628         return(1);
629     cur++;
630     ret = xmlParse3986Segment(&cur, 0, 0);
631     if (ret == 0) {
632 	while (*cur == '/') {
633 	    cur++;
634 	    ret = xmlParse3986Segment(&cur, 0, 1);
635 	    if (ret != 0) return(ret);
636 	}
637     }
638     if (uri != NULL) {
639 	if (uri->path != NULL) xmlFree(uri->path);
640         if (cur != *str) {
641             if (uri->cleanup & 2)
642                 uri->path = STRNDUP(*str, cur - *str);
643             else
644                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
645         } else {
646             uri->path = NULL;
647         }
648     }
649     *str = cur;
650     return (0);
651 }
652 
653 /**
654  * xmlParse3986PathRootless:
655  * @uri:  pointer to an URI structure
656  * @str:  the string to analyze
657  *
658  * Parse an path without root and fills in the appropriate fields
659  * of the @uri structure
660  *
661  * path-rootless = segment-nz *( "/" segment )
662  *
663  * Returns 0 or the error code
664  */
665 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)666 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
667 {
668     const char *cur;
669     int ret;
670 
671     cur = *str;
672 
673     ret = xmlParse3986Segment(&cur, 0, 0);
674     if (ret != 0) return(ret);
675     while (*cur == '/') {
676         cur++;
677 	ret = xmlParse3986Segment(&cur, 0, 1);
678 	if (ret != 0) return(ret);
679     }
680     if (uri != NULL) {
681 	if (uri->path != NULL) xmlFree(uri->path);
682         if (cur != *str) {
683             if (uri->cleanup & 2)
684                 uri->path = STRNDUP(*str, cur - *str);
685             else
686                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
687         } else {
688             uri->path = NULL;
689         }
690     }
691     *str = cur;
692     return (0);
693 }
694 
695 /**
696  * xmlParse3986PathNoScheme:
697  * @uri:  pointer to an URI structure
698  * @str:  the string to analyze
699  *
700  * Parse an path which is not a scheme and fills in the appropriate fields
701  * of the @uri structure
702  *
703  * path-noscheme = segment-nz-nc *( "/" segment )
704  *
705  * Returns 0 or the error code
706  */
707 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)708 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
709 {
710     const char *cur;
711     int ret;
712 
713     cur = *str;
714 
715     ret = xmlParse3986Segment(&cur, ':', 0);
716     if (ret != 0) return(ret);
717     while (*cur == '/') {
718         cur++;
719 	ret = xmlParse3986Segment(&cur, 0, 1);
720 	if (ret != 0) return(ret);
721     }
722     if (uri != NULL) {
723 	if (uri->path != NULL) xmlFree(uri->path);
724         if (cur != *str) {
725             if (uri->cleanup & 2)
726                 uri->path = STRNDUP(*str, cur - *str);
727             else
728                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
729         } else {
730             uri->path = NULL;
731         }
732     }
733     *str = cur;
734     return (0);
735 }
736 
737 /**
738  * xmlParse3986HierPart:
739  * @uri:  pointer to an URI structure
740  * @str:  the string to analyze
741  *
742  * Parse an hierarchical part and fills in the appropriate fields
743  * of the @uri structure
744  *
745  * hier-part     = "//" authority path-abempty
746  *                / path-absolute
747  *                / path-rootless
748  *                / path-empty
749  *
750  * Returns 0 or the error code
751  */
752 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)753 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
754 {
755     const char *cur;
756     int ret;
757 
758     cur = *str;
759 
760     if ((*cur == '/') && (*(cur + 1) == '/')) {
761         cur += 2;
762 	ret = xmlParse3986Authority(uri, &cur);
763 	if (ret != 0) return(ret);
764 	if (uri->server == NULL)
765 	    uri->port = -1;
766 	ret = xmlParse3986PathAbEmpty(uri, &cur);
767 	if (ret != 0) return(ret);
768 	*str = cur;
769 	return(0);
770     } else if (*cur == '/') {
771         ret = xmlParse3986PathAbsolute(uri, &cur);
772 	if (ret != 0) return(ret);
773     } else if (ISA_PCHAR(cur)) {
774         ret = xmlParse3986PathRootless(uri, &cur);
775 	if (ret != 0) return(ret);
776     } else {
777 	/* path-empty is effectively empty */
778 	if (uri != NULL) {
779 	    if (uri->path != NULL) xmlFree(uri->path);
780 	    uri->path = NULL;
781 	}
782     }
783     *str = cur;
784     return (0);
785 }
786 
787 /**
788  * xmlParse3986RelativeRef:
789  * @uri:  pointer to an URI structure
790  * @str:  the string to analyze
791  *
792  * Parse an URI string and fills in the appropriate fields
793  * of the @uri structure
794  *
795  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
796  * relative-part = "//" authority path-abempty
797  *               / path-absolute
798  *               / path-noscheme
799  *               / path-empty
800  *
801  * Returns 0 or the error code
802  */
803 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)804 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
805     int ret;
806 
807     if ((*str == '/') && (*(str + 1) == '/')) {
808         str += 2;
809 	ret = xmlParse3986Authority(uri, &str);
810 	if (ret != 0) return(ret);
811 	ret = xmlParse3986PathAbEmpty(uri, &str);
812 	if (ret != 0) return(ret);
813     } else if (*str == '/') {
814 	ret = xmlParse3986PathAbsolute(uri, &str);
815 	if (ret != 0) return(ret);
816     } else if (ISA_PCHAR(str)) {
817         ret = xmlParse3986PathNoScheme(uri, &str);
818 	if (ret != 0) return(ret);
819     } else {
820 	/* path-empty is effectively empty */
821 	if (uri != NULL) {
822 	    if (uri->path != NULL) xmlFree(uri->path);
823 	    uri->path = NULL;
824 	}
825     }
826 
827     if (*str == '?') {
828 	str++;
829 	ret = xmlParse3986Query(uri, &str);
830 	if (ret != 0) return(ret);
831     }
832     if (*str == '#') {
833 	str++;
834 	ret = xmlParse3986Fragment(uri, &str);
835 	if (ret != 0) return(ret);
836     }
837     if (*str != 0) {
838 	xmlCleanURI(uri);
839 	return(1);
840     }
841     return(0);
842 }
843 
844 
845 /**
846  * xmlParse3986URI:
847  * @uri:  pointer to an URI structure
848  * @str:  the string to analyze
849  *
850  * Parse an URI string and fills in the appropriate fields
851  * of the @uri structure
852  *
853  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
854  *
855  * Returns 0 or the error code
856  */
857 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)858 xmlParse3986URI(xmlURIPtr uri, const char *str) {
859     int ret;
860 
861     ret = xmlParse3986Scheme(uri, &str);
862     if (ret != 0) return(ret);
863     if (*str != ':') {
864 	return(1);
865     }
866     str++;
867     ret = xmlParse3986HierPart(uri, &str);
868     if (ret != 0) return(ret);
869     if (*str == '?') {
870 	str++;
871 	ret = xmlParse3986Query(uri, &str);
872 	if (ret != 0) return(ret);
873     }
874     if (*str == '#') {
875 	str++;
876 	ret = xmlParse3986Fragment(uri, &str);
877 	if (ret != 0) return(ret);
878     }
879     if (*str != 0) {
880 	xmlCleanURI(uri);
881 	return(1);
882     }
883     return(0);
884 }
885 
886 /**
887  * xmlParse3986URIReference:
888  * @uri:  pointer to an URI structure
889  * @str:  the string to analyze
890  *
891  * Parse an URI reference string and fills in the appropriate fields
892  * of the @uri structure
893  *
894  * URI-reference = URI / relative-ref
895  *
896  * Returns 0 or the error code
897  */
898 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)899 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
900     int ret;
901 
902     if (str == NULL)
903 	return(-1);
904     xmlCleanURI(uri);
905 
906     /*
907      * Try first to parse absolute refs, then fallback to relative if
908      * it fails.
909      */
910     ret = xmlParse3986URI(uri, str);
911     if (ret != 0) {
912 	xmlCleanURI(uri);
913         ret = xmlParse3986RelativeRef(uri, str);
914 	if (ret != 0) {
915 	    xmlCleanURI(uri);
916 	    return(ret);
917 	}
918     }
919     return(0);
920 }
921 
922 /**
923  * xmlParseURI:
924  * @str:  the URI string to analyze
925  *
926  * Parse an URI based on RFC 3986
927  *
928  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
929  *
930  * Returns a newly built xmlURIPtr or NULL in case of error
931  */
932 xmlURIPtr
xmlParseURI(const char * str)933 xmlParseURI(const char *str) {
934     xmlURIPtr uri;
935     int ret;
936 
937     if (str == NULL)
938 	return(NULL);
939     uri = xmlCreateURI();
940     if (uri != NULL) {
941 	ret = xmlParse3986URIReference(uri, str);
942         if (ret) {
943 	    xmlFreeURI(uri);
944 	    return(NULL);
945 	}
946     }
947     return(uri);
948 }
949 
950 /**
951  * xmlParseURIReference:
952  * @uri:  pointer to an URI structure
953  * @str:  the string to analyze
954  *
955  * Parse an URI reference string based on RFC 3986 and fills in the
956  * appropriate fields of the @uri structure
957  *
958  * URI-reference = URI / relative-ref
959  *
960  * Returns 0 or the error code
961  */
962 int
xmlParseURIReference(xmlURIPtr uri,const char * str)963 xmlParseURIReference(xmlURIPtr uri, const char *str) {
964     return(xmlParse3986URIReference(uri, str));
965 }
966 
967 /**
968  * xmlParseURIRaw:
969  * @str:  the URI string to analyze
970  * @raw:  if 1 unescaping of URI pieces are disabled
971  *
972  * Parse an URI but allows to keep intact the original fragments.
973  *
974  * URI-reference = URI / relative-ref
975  *
976  * Returns a newly built xmlURIPtr or NULL in case of error
977  */
978 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)979 xmlParseURIRaw(const char *str, int raw) {
980     xmlURIPtr uri;
981     int ret;
982 
983     if (str == NULL)
984 	return(NULL);
985     uri = xmlCreateURI();
986     if (uri != NULL) {
987         if (raw) {
988 	    uri->cleanup |= 2;
989 	}
990 	ret = xmlParseURIReference(uri, str);
991         if (ret) {
992 	    xmlFreeURI(uri);
993 	    return(NULL);
994 	}
995     }
996     return(uri);
997 }
998 
999 /************************************************************************
1000  *									*
1001  *			Generic URI structure functions			*
1002  *									*
1003  ************************************************************************/
1004 
1005 /**
1006  * xmlCreateURI:
1007  *
1008  * Simply creates an empty xmlURI
1009  *
1010  * Returns the new structure or NULL in case of error
1011  */
1012 xmlURIPtr
xmlCreateURI(void)1013 xmlCreateURI(void) {
1014     xmlURIPtr ret;
1015 
1016     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1017     if (ret == NULL) {
1018         xmlURIErrMemory("creating URI structure\n");
1019 	return(NULL);
1020     }
1021     memset(ret, 0, sizeof(xmlURI));
1022     return(ret);
1023 }
1024 
1025 /**
1026  * xmlSaveUriRealloc:
1027  *
1028  * Function to handle properly a reallocation when saving an URI
1029  * Also imposes some limit on the length of an URI string output
1030  */
1031 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1032 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1033     xmlChar *temp;
1034     int tmp;
1035 
1036     if (*max > MAX_URI_LENGTH) {
1037         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1038         return(NULL);
1039     }
1040     tmp = *max * 2;
1041     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1042     if (temp == NULL) {
1043         xmlURIErrMemory("saving URI\n");
1044         return(NULL);
1045     }
1046     *max = tmp;
1047     return(temp);
1048 }
1049 
1050 /**
1051  * xmlSaveUri:
1052  * @uri:  pointer to an xmlURI
1053  *
1054  * Save the URI as an escaped string
1055  *
1056  * Returns a new string (to be deallocated by caller)
1057  */
1058 xmlChar *
xmlSaveUri(xmlURIPtr uri)1059 xmlSaveUri(xmlURIPtr uri) {
1060     xmlChar *ret = NULL;
1061     xmlChar *temp;
1062     const char *p;
1063     int len;
1064     int max;
1065 
1066     if (uri == NULL) return(NULL);
1067 
1068 
1069     max = 80;
1070     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1071     if (ret == NULL) {
1072         xmlURIErrMemory("saving URI\n");
1073 	return(NULL);
1074     }
1075     len = 0;
1076 
1077     if (uri->scheme != NULL) {
1078 	p = uri->scheme;
1079 	while (*p != 0) {
1080 	    if (len >= max) {
1081                 temp = xmlSaveUriRealloc(ret, &max);
1082                 if (temp == NULL) goto mem_error;
1083 		ret = temp;
1084 	    }
1085 	    ret[len++] = *p++;
1086 	}
1087 	if (len >= max) {
1088             temp = xmlSaveUriRealloc(ret, &max);
1089             if (temp == NULL) goto mem_error;
1090             ret = temp;
1091 	}
1092 	ret[len++] = ':';
1093     }
1094     if (uri->opaque != NULL) {
1095 	p = uri->opaque;
1096 	while (*p != 0) {
1097 	    if (len + 3 >= max) {
1098                 temp = xmlSaveUriRealloc(ret, &max);
1099                 if (temp == NULL) goto mem_error;
1100                 ret = temp;
1101 	    }
1102 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1103 		ret[len++] = *p++;
1104 	    else {
1105 		int val = *(unsigned char *)p++;
1106 		int hi = val / 0x10, lo = val % 0x10;
1107 		ret[len++] = '%';
1108 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1109 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1110 	    }
1111 	}
1112     } else {
1113 	if ((uri->server != NULL) || (uri->port == -1)) {
1114 	    if (len + 3 >= max) {
1115                 temp = xmlSaveUriRealloc(ret, &max);
1116                 if (temp == NULL) goto mem_error;
1117                 ret = temp;
1118 	    }
1119 	    ret[len++] = '/';
1120 	    ret[len++] = '/';
1121 	    if (uri->user != NULL) {
1122 		p = uri->user;
1123 		while (*p != 0) {
1124 		    if (len + 3 >= max) {
1125                         temp = xmlSaveUriRealloc(ret, &max);
1126                         if (temp == NULL) goto mem_error;
1127                         ret = temp;
1128 		    }
1129 		    if ((IS_UNRESERVED(*(p))) ||
1130 			((*(p) == ';')) || ((*(p) == ':')) ||
1131 			((*(p) == '&')) || ((*(p) == '=')) ||
1132 			((*(p) == '+')) || ((*(p) == '$')) ||
1133 			((*(p) == ',')))
1134 			ret[len++] = *p++;
1135 		    else {
1136 			int val = *(unsigned char *)p++;
1137 			int hi = val / 0x10, lo = val % 0x10;
1138 			ret[len++] = '%';
1139 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1140 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1141 		    }
1142 		}
1143 		if (len + 3 >= max) {
1144                     temp = xmlSaveUriRealloc(ret, &max);
1145                     if (temp == NULL) goto mem_error;
1146                     ret = temp;
1147 		}
1148 		ret[len++] = '@';
1149 	    }
1150 	    if (uri->server != NULL) {
1151 		p = uri->server;
1152 		while (*p != 0) {
1153 		    if (len >= max) {
1154 			temp = xmlSaveUriRealloc(ret, &max);
1155 			if (temp == NULL) goto mem_error;
1156 			ret = temp;
1157 		    }
1158 		    ret[len++] = *p++;
1159 		}
1160 		if (uri->port > 0) {
1161 		    if (len + 10 >= max) {
1162 			temp = xmlSaveUriRealloc(ret, &max);
1163 			if (temp == NULL) goto mem_error;
1164 			ret = temp;
1165 		    }
1166 		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1167 		}
1168 	    }
1169 	} else if (uri->authority != NULL) {
1170 	    if (len + 3 >= max) {
1171                 temp = xmlSaveUriRealloc(ret, &max);
1172                 if (temp == NULL) goto mem_error;
1173                 ret = temp;
1174 	    }
1175 	    ret[len++] = '/';
1176 	    ret[len++] = '/';
1177 	    p = uri->authority;
1178 	    while (*p != 0) {
1179 		if (len + 3 >= max) {
1180                     temp = xmlSaveUriRealloc(ret, &max);
1181                     if (temp == NULL) goto mem_error;
1182                     ret = temp;
1183 		}
1184 		if ((IS_UNRESERVED(*(p))) ||
1185                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1186                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1187                     ((*(p) == '=')) || ((*(p) == '+')))
1188 		    ret[len++] = *p++;
1189 		else {
1190 		    int val = *(unsigned char *)p++;
1191 		    int hi = val / 0x10, lo = val % 0x10;
1192 		    ret[len++] = '%';
1193 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1194 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1195 		}
1196 	    }
1197 	} else if (uri->scheme != NULL) {
1198 	    if (len + 3 >= max) {
1199                 temp = xmlSaveUriRealloc(ret, &max);
1200                 if (temp == NULL) goto mem_error;
1201                 ret = temp;
1202 	    }
1203 	}
1204 	if (uri->path != NULL) {
1205 	    p = uri->path;
1206 	    /*
1207 	     * the colon in file:///d: should not be escaped or
1208 	     * Windows accesses fail later.
1209 	     */
1210 	    if ((uri->scheme != NULL) &&
1211 		(p[0] == '/') &&
1212 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1213 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1214 		(p[2] == ':') &&
1215 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1216 		if (len + 3 >= max) {
1217                     temp = xmlSaveUriRealloc(ret, &max);
1218                     if (temp == NULL) goto mem_error;
1219                     ret = temp;
1220 		}
1221 		ret[len++] = *p++;
1222 		ret[len++] = *p++;
1223 		ret[len++] = *p++;
1224 	    }
1225 	    while (*p != 0) {
1226 		if (len + 3 >= max) {
1227                     temp = xmlSaveUriRealloc(ret, &max);
1228                     if (temp == NULL) goto mem_error;
1229                     ret = temp;
1230 		}
1231 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1232                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1233 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1234 	            ((*(p) == ',')))
1235 		    ret[len++] = *p++;
1236 		else {
1237 		    int val = *(unsigned char *)p++;
1238 		    int hi = val / 0x10, lo = val % 0x10;
1239 		    ret[len++] = '%';
1240 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1241 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1242 		}
1243 	    }
1244 	}
1245 	if (uri->query_raw != NULL) {
1246 	    if (len + 1 >= max) {
1247                 temp = xmlSaveUriRealloc(ret, &max);
1248                 if (temp == NULL) goto mem_error;
1249                 ret = temp;
1250 	    }
1251 	    ret[len++] = '?';
1252 	    p = uri->query_raw;
1253 	    while (*p != 0) {
1254 		if (len + 1 >= max) {
1255                     temp = xmlSaveUriRealloc(ret, &max);
1256                     if (temp == NULL) goto mem_error;
1257                     ret = temp;
1258 		}
1259 		ret[len++] = *p++;
1260 	    }
1261 	} else if (uri->query != NULL) {
1262 	    if (len + 3 >= max) {
1263                 temp = xmlSaveUriRealloc(ret, &max);
1264                 if (temp == NULL) goto mem_error;
1265                 ret = temp;
1266 	    }
1267 	    ret[len++] = '?';
1268 	    p = uri->query;
1269 	    while (*p != 0) {
1270 		if (len + 3 >= max) {
1271                     temp = xmlSaveUriRealloc(ret, &max);
1272                     if (temp == NULL) goto mem_error;
1273                     ret = temp;
1274 		}
1275 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1276 		    ret[len++] = *p++;
1277 		else {
1278 		    int val = *(unsigned char *)p++;
1279 		    int hi = val / 0x10, lo = val % 0x10;
1280 		    ret[len++] = '%';
1281 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1282 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1283 		}
1284 	    }
1285 	}
1286     }
1287     if (uri->fragment != NULL) {
1288 	if (len + 3 >= max) {
1289             temp = xmlSaveUriRealloc(ret, &max);
1290             if (temp == NULL) goto mem_error;
1291             ret = temp;
1292 	}
1293 	ret[len++] = '#';
1294 	p = uri->fragment;
1295 	while (*p != 0) {
1296 	    if (len + 3 >= max) {
1297                 temp = xmlSaveUriRealloc(ret, &max);
1298                 if (temp == NULL) goto mem_error;
1299                 ret = temp;
1300 	    }
1301 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1302 		ret[len++] = *p++;
1303 	    else {
1304 		int val = *(unsigned char *)p++;
1305 		int hi = val / 0x10, lo = val % 0x10;
1306 		ret[len++] = '%';
1307 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1308 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1309 	    }
1310 	}
1311     }
1312     if (len >= max) {
1313         temp = xmlSaveUriRealloc(ret, &max);
1314         if (temp == NULL) goto mem_error;
1315         ret = temp;
1316     }
1317     ret[len] = 0;
1318     return(ret);
1319 
1320 mem_error:
1321     xmlFree(ret);
1322     return(NULL);
1323 }
1324 
1325 /**
1326  * xmlPrintURI:
1327  * @stream:  a FILE* for the output
1328  * @uri:  pointer to an xmlURI
1329  *
1330  * Prints the URI in the stream @stream.
1331  */
1332 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1333 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1334     xmlChar *out;
1335 
1336     out = xmlSaveUri(uri);
1337     if (out != NULL) {
1338 	fprintf(stream, "%s", (char *) out);
1339 	xmlFree(out);
1340     }
1341 }
1342 
1343 /**
1344  * xmlCleanURI:
1345  * @uri:  pointer to an xmlURI
1346  *
1347  * Make sure the xmlURI struct is free of content
1348  */
1349 static void
xmlCleanURI(xmlURIPtr uri)1350 xmlCleanURI(xmlURIPtr uri) {
1351     if (uri == NULL) return;
1352 
1353     if (uri->scheme != NULL) xmlFree(uri->scheme);
1354     uri->scheme = NULL;
1355     if (uri->server != NULL) xmlFree(uri->server);
1356     uri->server = NULL;
1357     if (uri->user != NULL) xmlFree(uri->user);
1358     uri->user = NULL;
1359     if (uri->path != NULL) xmlFree(uri->path);
1360     uri->path = NULL;
1361     if (uri->fragment != NULL) xmlFree(uri->fragment);
1362     uri->fragment = NULL;
1363     if (uri->opaque != NULL) xmlFree(uri->opaque);
1364     uri->opaque = NULL;
1365     if (uri->authority != NULL) xmlFree(uri->authority);
1366     uri->authority = NULL;
1367     if (uri->query != NULL) xmlFree(uri->query);
1368     uri->query = NULL;
1369     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1370     uri->query_raw = NULL;
1371 }
1372 
1373 /**
1374  * xmlFreeURI:
1375  * @uri:  pointer to an xmlURI
1376  *
1377  * Free up the xmlURI struct
1378  */
1379 void
xmlFreeURI(xmlURIPtr uri)1380 xmlFreeURI(xmlURIPtr uri) {
1381     if (uri == NULL) return;
1382 
1383     if (uri->scheme != NULL) xmlFree(uri->scheme);
1384     if (uri->server != NULL) xmlFree(uri->server);
1385     if (uri->user != NULL) xmlFree(uri->user);
1386     if (uri->path != NULL) xmlFree(uri->path);
1387     if (uri->fragment != NULL) xmlFree(uri->fragment);
1388     if (uri->opaque != NULL) xmlFree(uri->opaque);
1389     if (uri->authority != NULL) xmlFree(uri->authority);
1390     if (uri->query != NULL) xmlFree(uri->query);
1391     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1392     xmlFree(uri);
1393 }
1394 
1395 /************************************************************************
1396  *									*
1397  *			Helper functions				*
1398  *									*
1399  ************************************************************************/
1400 
1401 /**
1402  * xmlNormalizeURIPath:
1403  * @path:  pointer to the path string
1404  *
1405  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1406  * Section 5.2, steps 6.c through 6.g.
1407  *
1408  * Normalization occurs directly on the string, no new allocation is done
1409  *
1410  * Returns 0 or an error code
1411  */
1412 int
xmlNormalizeURIPath(char * path)1413 xmlNormalizeURIPath(char *path) {
1414     char *cur, *out;
1415 
1416     if (path == NULL)
1417 	return(-1);
1418 
1419     /* Skip all initial "/" chars.  We want to get to the beginning of the
1420      * first non-empty segment.
1421      */
1422     cur = path;
1423     while (cur[0] == '/')
1424       ++cur;
1425     if (cur[0] == '\0')
1426       return(0);
1427 
1428     /* Keep everything we've seen so far.  */
1429     out = cur;
1430 
1431     /*
1432      * Analyze each segment in sequence for cases (c) and (d).
1433      */
1434     while (cur[0] != '\0') {
1435 	/*
1436 	 * c) All occurrences of "./", where "." is a complete path segment,
1437 	 *    are removed from the buffer string.
1438 	 */
1439 	if ((cur[0] == '.') && (cur[1] == '/')) {
1440 	    cur += 2;
1441 	    /* '//' normalization should be done at this point too */
1442 	    while (cur[0] == '/')
1443 		cur++;
1444 	    continue;
1445 	}
1446 
1447 	/*
1448 	 * d) If the buffer string ends with "." as a complete path segment,
1449 	 *    that "." is removed.
1450 	 */
1451 	if ((cur[0] == '.') && (cur[1] == '\0'))
1452 	    break;
1453 
1454 	/* Otherwise keep the segment.  */
1455 	while (cur[0] != '/') {
1456             if (cur[0] == '\0')
1457               goto done_cd;
1458 	    (out++)[0] = (cur++)[0];
1459 	}
1460 	/* nomalize // */
1461 	while ((cur[0] == '/') && (cur[1] == '/'))
1462 	    cur++;
1463 
1464         (out++)[0] = (cur++)[0];
1465     }
1466  done_cd:
1467     out[0] = '\0';
1468 
1469     /* Reset to the beginning of the first segment for the next sequence.  */
1470     cur = path;
1471     while (cur[0] == '/')
1472       ++cur;
1473     if (cur[0] == '\0')
1474 	return(0);
1475 
1476     /*
1477      * Analyze each segment in sequence for cases (e) and (f).
1478      *
1479      * e) All occurrences of "<segment>/../", where <segment> is a
1480      *    complete path segment not equal to "..", are removed from the
1481      *    buffer string.  Removal of these path segments is performed
1482      *    iteratively, removing the leftmost matching pattern on each
1483      *    iteration, until no matching pattern remains.
1484      *
1485      * f) If the buffer string ends with "<segment>/..", where <segment>
1486      *    is a complete path segment not equal to "..", that
1487      *    "<segment>/.." is removed.
1488      *
1489      * To satisfy the "iterative" clause in (e), we need to collapse the
1490      * string every time we find something that needs to be removed.  Thus,
1491      * we don't need to keep two pointers into the string: we only need a
1492      * "current position" pointer.
1493      */
1494     while (1) {
1495         char *segp, *tmp;
1496 
1497         /* At the beginning of each iteration of this loop, "cur" points to
1498          * the first character of the segment we want to examine.
1499          */
1500 
1501         /* Find the end of the current segment.  */
1502         segp = cur;
1503         while ((segp[0] != '/') && (segp[0] != '\0'))
1504           ++segp;
1505 
1506         /* If this is the last segment, we're done (we need at least two
1507          * segments to meet the criteria for the (e) and (f) cases).
1508          */
1509         if (segp[0] == '\0')
1510           break;
1511 
1512         /* If the first segment is "..", or if the next segment _isn't_ "..",
1513          * keep this segment and try the next one.
1514          */
1515         ++segp;
1516         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1517             || ((segp[0] != '.') || (segp[1] != '.')
1518                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1519           cur = segp;
1520           continue;
1521         }
1522 
1523         /* If we get here, remove this segment and the next one and back up
1524          * to the previous segment (if there is one), to implement the
1525          * "iteratively" clause.  It's pretty much impossible to back up
1526          * while maintaining two pointers into the buffer, so just compact
1527          * the whole buffer now.
1528          */
1529 
1530         /* If this is the end of the buffer, we're done.  */
1531         if (segp[2] == '\0') {
1532           cur[0] = '\0';
1533           break;
1534         }
1535         /* Valgrind complained, strcpy(cur, segp + 3); */
1536         /* string will overlap, do not use strcpy */
1537         tmp = cur;
1538         segp += 3;
1539         while ((*tmp++ = *segp++) != 0)
1540           ;
1541 
1542         /* If there are no previous segments, then keep going from here.  */
1543         segp = cur;
1544         while ((segp > path) && ((--segp)[0] == '/'))
1545           ;
1546         if (segp == path)
1547           continue;
1548 
1549         /* "segp" is pointing to the end of a previous segment; find it's
1550          * start.  We need to back up to the previous segment and start
1551          * over with that to handle things like "foo/bar/../..".  If we
1552          * don't do this, then on the first pass we'll remove the "bar/..",
1553          * but be pointing at the second ".." so we won't realize we can also
1554          * remove the "foo/..".
1555          */
1556         cur = segp;
1557         while ((cur > path) && (cur[-1] != '/'))
1558           --cur;
1559     }
1560     out[0] = '\0';
1561 
1562     /*
1563      * g) If the resulting buffer string still begins with one or more
1564      *    complete path segments of "..", then the reference is
1565      *    considered to be in error. Implementations may handle this
1566      *    error by retaining these components in the resolved path (i.e.,
1567      *    treating them as part of the final URI), by removing them from
1568      *    the resolved path (i.e., discarding relative levels above the
1569      *    root), or by avoiding traversal of the reference.
1570      *
1571      * We discard them from the final path.
1572      */
1573     if (path[0] == '/') {
1574       cur = path;
1575       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1576              && ((cur[3] == '/') || (cur[3] == '\0')))
1577 	cur += 3;
1578 
1579       if (cur != path) {
1580 	out = path;
1581 	while (cur[0] != '\0')
1582           (out++)[0] = (cur++)[0];
1583 	out[0] = 0;
1584       }
1585     }
1586 
1587     return(0);
1588 }
1589 
is_hex(char c)1590 static int is_hex(char c) {
1591     if (((c >= '0') && (c <= '9')) ||
1592         ((c >= 'a') && (c <= 'f')) ||
1593         ((c >= 'A') && (c <= 'F')))
1594 	return(1);
1595     return(0);
1596 }
1597 
1598 /**
1599  * xmlURIUnescapeString:
1600  * @str:  the string to unescape
1601  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1602  * @target:  optional destination buffer
1603  *
1604  * Unescaping routine, but does not check that the string is an URI. The
1605  * output is a direct unsigned char translation of %XX values (no encoding)
1606  * Note that the length of the result can only be smaller or same size as
1607  * the input string.
1608  *
1609  * Returns a copy of the string, but unescaped, will return NULL only in case
1610  * of error
1611  */
1612 char *
xmlURIUnescapeString(const char * str,int len,char * target)1613 xmlURIUnescapeString(const char *str, int len, char *target) {
1614     char *ret, *out;
1615     const char *in;
1616 
1617     if (str == NULL)
1618 	return(NULL);
1619     if (len <= 0) len = strlen(str);
1620     if (len < 0) return(NULL);
1621 
1622     if (target == NULL) {
1623 	ret = (char *) xmlMallocAtomic(len + 1);
1624 	if (ret == NULL) {
1625             xmlURIErrMemory("unescaping URI value\n");
1626 	    return(NULL);
1627 	}
1628     } else
1629 	ret = target;
1630     in = str;
1631     out = ret;
1632     while(len > 0) {
1633 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1634 	    in++;
1635 	    if ((*in >= '0') && (*in <= '9'))
1636 	        *out = (*in - '0');
1637 	    else if ((*in >= 'a') && (*in <= 'f'))
1638 	        *out = (*in - 'a') + 10;
1639 	    else if ((*in >= 'A') && (*in <= 'F'))
1640 	        *out = (*in - 'A') + 10;
1641 	    in++;
1642 	    if ((*in >= '0') && (*in <= '9'))
1643 	        *out = *out * 16 + (*in - '0');
1644 	    else if ((*in >= 'a') && (*in <= 'f'))
1645 	        *out = *out * 16 + (*in - 'a') + 10;
1646 	    else if ((*in >= 'A') && (*in <= 'F'))
1647 	        *out = *out * 16 + (*in - 'A') + 10;
1648 	    in++;
1649 	    len -= 3;
1650 	    out++;
1651 	} else {
1652 	    *out++ = *in++;
1653 	    len--;
1654 	}
1655     }
1656     *out = 0;
1657     return(ret);
1658 }
1659 
1660 /**
1661  * xmlURIEscapeStr:
1662  * @str:  string to escape
1663  * @list: exception list string of chars not to escape
1664  *
1665  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1666  * and the characters in the exception list.
1667  *
1668  * Returns a new escaped string or NULL in case of error.
1669  */
1670 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1671 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1672     xmlChar *ret, ch;
1673     xmlChar *temp;
1674     const xmlChar *in;
1675     int len, out;
1676 
1677     if (str == NULL)
1678 	return(NULL);
1679     if (str[0] == 0)
1680 	return(xmlStrdup(str));
1681     len = xmlStrlen(str);
1682     if (!(len > 0)) return(NULL);
1683 
1684     len += 20;
1685     ret = (xmlChar *) xmlMallocAtomic(len);
1686     if (ret == NULL) {
1687         xmlURIErrMemory("escaping URI value\n");
1688 	return(NULL);
1689     }
1690     in = (const xmlChar *) str;
1691     out = 0;
1692     while(*in != 0) {
1693 	if (len - out <= 3) {
1694             temp = xmlSaveUriRealloc(ret, &len);
1695 	    if (temp == NULL) {
1696                 xmlURIErrMemory("escaping URI value\n");
1697 		xmlFree(ret);
1698 		return(NULL);
1699 	    }
1700 	    ret = temp;
1701 	}
1702 
1703 	ch = *in;
1704 
1705 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1706 	    unsigned char val;
1707 	    ret[out++] = '%';
1708 	    val = ch >> 4;
1709 	    if (val <= 9)
1710 		ret[out++] = '0' + val;
1711 	    else
1712 		ret[out++] = 'A' + val - 0xA;
1713 	    val = ch & 0xF;
1714 	    if (val <= 9)
1715 		ret[out++] = '0' + val;
1716 	    else
1717 		ret[out++] = 'A' + val - 0xA;
1718 	    in++;
1719 	} else {
1720 	    ret[out++] = *in++;
1721 	}
1722 
1723     }
1724     ret[out] = 0;
1725     return(ret);
1726 }
1727 
1728 /**
1729  * xmlURIEscape:
1730  * @str:  the string of the URI to escape
1731  *
1732  * Escaping routine, does not do validity checks !
1733  * It will try to escape the chars needing this, but this is heuristic
1734  * based it's impossible to be sure.
1735  *
1736  * Returns an copy of the string, but escaped
1737  *
1738  * 25 May 2001
1739  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1740  * according to RFC2396.
1741  *   - Carl Douglas
1742  */
1743 xmlChar *
xmlURIEscape(const xmlChar * str)1744 xmlURIEscape(const xmlChar * str)
1745 {
1746     xmlChar *ret, *segment = NULL;
1747     xmlURIPtr uri;
1748     int ret2;
1749 
1750 #define NULLCHK(p) if(!p) { \
1751          xmlURIErrMemory("escaping URI value\n"); \
1752          xmlFreeURI(uri); \
1753          return NULL; } \
1754 
1755     if (str == NULL)
1756         return (NULL);
1757 
1758     uri = xmlCreateURI();
1759     if (uri != NULL) {
1760 	/*
1761 	 * Allow escaping errors in the unescaped form
1762 	 */
1763         uri->cleanup = 1;
1764         ret2 = xmlParseURIReference(uri, (const char *)str);
1765         if (ret2) {
1766             xmlFreeURI(uri);
1767             return (NULL);
1768         }
1769     }
1770 
1771     if (!uri)
1772         return NULL;
1773 
1774     ret = NULL;
1775 
1776     if (uri->scheme) {
1777         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1778         NULLCHK(segment)
1779         ret = xmlStrcat(ret, segment);
1780         ret = xmlStrcat(ret, BAD_CAST ":");
1781         xmlFree(segment);
1782     }
1783 
1784     if (uri->authority) {
1785         segment =
1786             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1787         NULLCHK(segment)
1788         ret = xmlStrcat(ret, BAD_CAST "//");
1789         ret = xmlStrcat(ret, segment);
1790         xmlFree(segment);
1791     }
1792 
1793     if (uri->user) {
1794         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1795         NULLCHK(segment)
1796 		ret = xmlStrcat(ret,BAD_CAST "//");
1797         ret = xmlStrcat(ret, segment);
1798         ret = xmlStrcat(ret, BAD_CAST "@");
1799         xmlFree(segment);
1800     }
1801 
1802     if (uri->server) {
1803         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1804         NULLCHK(segment)
1805 		if (uri->user == NULL)
1806 		ret = xmlStrcat(ret, BAD_CAST "//");
1807         ret = xmlStrcat(ret, segment);
1808         xmlFree(segment);
1809     }
1810 
1811     if (uri->port) {
1812         xmlChar port[10];
1813 
1814         snprintf((char *) port, 10, "%d", uri->port);
1815         ret = xmlStrcat(ret, BAD_CAST ":");
1816         ret = xmlStrcat(ret, port);
1817     }
1818 
1819     if (uri->path) {
1820         segment =
1821             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1822         NULLCHK(segment)
1823         ret = xmlStrcat(ret, segment);
1824         xmlFree(segment);
1825     }
1826 
1827     if (uri->query_raw) {
1828         ret = xmlStrcat(ret, BAD_CAST "?");
1829         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1830     }
1831     else if (uri->query) {
1832         segment =
1833             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1834         NULLCHK(segment)
1835         ret = xmlStrcat(ret, BAD_CAST "?");
1836         ret = xmlStrcat(ret, segment);
1837         xmlFree(segment);
1838     }
1839 
1840     if (uri->opaque) {
1841         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1842         NULLCHK(segment)
1843         ret = xmlStrcat(ret, segment);
1844         xmlFree(segment);
1845     }
1846 
1847     if (uri->fragment) {
1848         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1849         NULLCHK(segment)
1850         ret = xmlStrcat(ret, BAD_CAST "#");
1851         ret = xmlStrcat(ret, segment);
1852         xmlFree(segment);
1853     }
1854 
1855     xmlFreeURI(uri);
1856 #undef NULLCHK
1857 
1858     return (ret);
1859 }
1860 
1861 /************************************************************************
1862  *									*
1863  *			Public functions				*
1864  *									*
1865  ************************************************************************/
1866 
1867 /**
1868  * xmlBuildURI:
1869  * @URI:  the URI instance found in the document
1870  * @base:  the base value
1871  *
1872  * Computes he final URI of the reference done by checking that
1873  * the given URI is valid, and building the final URI using the
1874  * base URI. This is processed according to section 5.2 of the
1875  * RFC 2396
1876  *
1877  * 5.2. Resolving Relative References to Absolute Form
1878  *
1879  * Returns a new URI string (to be freed by the caller) or NULL in case
1880  *         of error.
1881  */
1882 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1883 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1884     xmlChar *val = NULL;
1885     int ret, len, indx, cur, out;
1886     xmlURIPtr ref = NULL;
1887     xmlURIPtr bas = NULL;
1888     xmlURIPtr res = NULL;
1889 
1890     /*
1891      * 1) The URI reference is parsed into the potential four components and
1892      *    fragment identifier, as described in Section 4.3.
1893      *
1894      *    NOTE that a completely empty URI is treated by modern browsers
1895      *    as a reference to "." rather than as a synonym for the current
1896      *    URI.  Should we do that here?
1897      */
1898     if (URI == NULL)
1899 	ret = -1;
1900     else {
1901 	if (*URI) {
1902 	    ref = xmlCreateURI();
1903 	    if (ref == NULL)
1904 		goto done;
1905 	    ret = xmlParseURIReference(ref, (const char *) URI);
1906 	}
1907 	else
1908 	    ret = 0;
1909     }
1910     if (ret != 0)
1911 	goto done;
1912     if ((ref != NULL) && (ref->scheme != NULL)) {
1913 	/*
1914 	 * The URI is absolute don't modify.
1915 	 */
1916 	val = xmlStrdup(URI);
1917 	goto done;
1918     }
1919     if (base == NULL)
1920 	ret = -1;
1921     else {
1922 	bas = xmlCreateURI();
1923 	if (bas == NULL)
1924 	    goto done;
1925 	ret = xmlParseURIReference(bas, (const char *) base);
1926     }
1927     if (ret != 0) {
1928 	if (ref)
1929 	    val = xmlSaveUri(ref);
1930 	goto done;
1931     }
1932     if (ref == NULL) {
1933 	/*
1934 	 * the base fragment must be ignored
1935 	 */
1936 	if (bas->fragment != NULL) {
1937 	    xmlFree(bas->fragment);
1938 	    bas->fragment = NULL;
1939 	}
1940 	val = xmlSaveUri(bas);
1941 	goto done;
1942     }
1943 
1944     /*
1945      * 2) If the path component is empty and the scheme, authority, and
1946      *    query components are undefined, then it is a reference to the
1947      *    current document and we are done.  Otherwise, the reference URI's
1948      *    query and fragment components are defined as found (or not found)
1949      *    within the URI reference and not inherited from the base URI.
1950      *
1951      *    NOTE that in modern browsers, the parsing differs from the above
1952      *    in the following aspect:  the query component is allowed to be
1953      *    defined while still treating this as a reference to the current
1954      *    document.
1955      */
1956     res = xmlCreateURI();
1957     if (res == NULL)
1958 	goto done;
1959     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1960 	((ref->authority == NULL) && (ref->server == NULL))) {
1961 	if (bas->scheme != NULL)
1962 	    res->scheme = xmlMemStrdup(bas->scheme);
1963 	if (bas->authority != NULL)
1964 	    res->authority = xmlMemStrdup(bas->authority);
1965 	else if ((bas->server != NULL) || (bas->port == -1)) {
1966 	    if (bas->server != NULL)
1967 		res->server = xmlMemStrdup(bas->server);
1968 	    if (bas->user != NULL)
1969 		res->user = xmlMemStrdup(bas->user);
1970 	    res->port = bas->port;
1971 	}
1972 	if (bas->path != NULL)
1973 	    res->path = xmlMemStrdup(bas->path);
1974 	if (ref->query_raw != NULL)
1975 	    res->query_raw = xmlMemStrdup (ref->query_raw);
1976 	else if (ref->query != NULL)
1977 	    res->query = xmlMemStrdup(ref->query);
1978 	else if (bas->query_raw != NULL)
1979 	    res->query_raw = xmlMemStrdup(bas->query_raw);
1980 	else if (bas->query != NULL)
1981 	    res->query = xmlMemStrdup(bas->query);
1982 	if (ref->fragment != NULL)
1983 	    res->fragment = xmlMemStrdup(ref->fragment);
1984 	goto step_7;
1985     }
1986 
1987     /*
1988      * 3) If the scheme component is defined, indicating that the reference
1989      *    starts with a scheme name, then the reference is interpreted as an
1990      *    absolute URI and we are done.  Otherwise, the reference URI's
1991      *    scheme is inherited from the base URI's scheme component.
1992      */
1993     if (ref->scheme != NULL) {
1994 	val = xmlSaveUri(ref);
1995 	goto done;
1996     }
1997     if (bas->scheme != NULL)
1998 	res->scheme = xmlMemStrdup(bas->scheme);
1999 
2000     if (ref->query_raw != NULL)
2001 	res->query_raw = xmlMemStrdup(ref->query_raw);
2002     else if (ref->query != NULL)
2003 	res->query = xmlMemStrdup(ref->query);
2004     if (ref->fragment != NULL)
2005 	res->fragment = xmlMemStrdup(ref->fragment);
2006 
2007     /*
2008      * 4) If the authority component is defined, then the reference is a
2009      *    network-path and we skip to step 7.  Otherwise, the reference
2010      *    URI's authority is inherited from the base URI's authority
2011      *    component, which will also be undefined if the URI scheme does not
2012      *    use an authority component.
2013      */
2014     if ((ref->authority != NULL) || (ref->server != NULL)) {
2015 	if (ref->authority != NULL)
2016 	    res->authority = xmlMemStrdup(ref->authority);
2017 	else {
2018 	    res->server = xmlMemStrdup(ref->server);
2019 	    if (ref->user != NULL)
2020 		res->user = xmlMemStrdup(ref->user);
2021             res->port = ref->port;
2022 	}
2023 	if (ref->path != NULL)
2024 	    res->path = xmlMemStrdup(ref->path);
2025 	goto step_7;
2026     }
2027     if (bas->authority != NULL)
2028 	res->authority = xmlMemStrdup(bas->authority);
2029     else if ((bas->server != NULL) || (bas->port == -1)) {
2030 	if (bas->server != NULL)
2031 	    res->server = xmlMemStrdup(bas->server);
2032 	if (bas->user != NULL)
2033 	    res->user = xmlMemStrdup(bas->user);
2034 	res->port = bas->port;
2035     }
2036 
2037     /*
2038      * 5) If the path component begins with a slash character ("/"), then
2039      *    the reference is an absolute-path and we skip to step 7.
2040      */
2041     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2042 	res->path = xmlMemStrdup(ref->path);
2043 	goto step_7;
2044     }
2045 
2046 
2047     /*
2048      * 6) If this step is reached, then we are resolving a relative-path
2049      *    reference.  The relative path needs to be merged with the base
2050      *    URI's path.  Although there are many ways to do this, we will
2051      *    describe a simple method using a separate string buffer.
2052      *
2053      * Allocate a buffer large enough for the result string.
2054      */
2055     len = 2; /* extra / and 0 */
2056     if (ref->path != NULL)
2057 	len += strlen(ref->path);
2058     if (bas->path != NULL)
2059 	len += strlen(bas->path);
2060     res->path = (char *) xmlMallocAtomic(len);
2061     if (res->path == NULL) {
2062         xmlURIErrMemory("resolving URI against base\n");
2063 	goto done;
2064     }
2065     res->path[0] = 0;
2066 
2067     /*
2068      * a) All but the last segment of the base URI's path component is
2069      *    copied to the buffer.  In other words, any characters after the
2070      *    last (right-most) slash character, if any, are excluded.
2071      */
2072     cur = 0;
2073     out = 0;
2074     if (bas->path != NULL) {
2075 	while (bas->path[cur] != 0) {
2076 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2077 		cur++;
2078 	    if (bas->path[cur] == 0)
2079 		break;
2080 
2081 	    cur++;
2082 	    while (out < cur) {
2083 		res->path[out] = bas->path[out];
2084 		out++;
2085 	    }
2086 	}
2087     }
2088     res->path[out] = 0;
2089 
2090     /*
2091      * b) The reference's path component is appended to the buffer
2092      *    string.
2093      */
2094     if (ref->path != NULL && ref->path[0] != 0) {
2095 	indx = 0;
2096 	/*
2097 	 * Ensure the path includes a '/'
2098 	 */
2099 	if ((out == 0) && (bas->server != NULL))
2100 	    res->path[out++] = '/';
2101 	while (ref->path[indx] != 0) {
2102 	    res->path[out++] = ref->path[indx++];
2103 	}
2104     }
2105     res->path[out] = 0;
2106 
2107     /*
2108      * Steps c) to h) are really path normalization steps
2109      */
2110     xmlNormalizeURIPath(res->path);
2111 
2112 step_7:
2113 
2114     /*
2115      * 7) The resulting URI components, including any inherited from the
2116      *    base URI, are recombined to give the absolute form of the URI
2117      *    reference.
2118      */
2119     val = xmlSaveUri(res);
2120 
2121 done:
2122     if (ref != NULL)
2123 	xmlFreeURI(ref);
2124     if (bas != NULL)
2125 	xmlFreeURI(bas);
2126     if (res != NULL)
2127 	xmlFreeURI(res);
2128     return(val);
2129 }
2130 
2131 /**
2132  * xmlBuildRelativeURI:
2133  * @URI:  the URI reference under consideration
2134  * @base:  the base value
2135  *
2136  * Expresses the URI of the reference in terms relative to the
2137  * base.  Some examples of this operation include:
2138  *     base = "http://site1.com/docs/book1.html"
2139  *        URI input                        URI returned
2140  *     docs/pic1.gif                    pic1.gif
2141  *     docs/img/pic1.gif                img/pic1.gif
2142  *     img/pic1.gif                     ../img/pic1.gif
2143  *     http://site1.com/docs/pic1.gif   pic1.gif
2144  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2145  *
2146  *     base = "docs/book1.html"
2147  *        URI input                        URI returned
2148  *     docs/pic1.gif                    pic1.gif
2149  *     docs/img/pic1.gif                img/pic1.gif
2150  *     img/pic1.gif                     ../img/pic1.gif
2151  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2152  *
2153  *
2154  * Note: if the URI reference is really wierd or complicated, it may be
2155  *       worthwhile to first convert it into a "nice" one by calling
2156  *       xmlBuildURI (using 'base') before calling this routine,
2157  *       since this routine (for reasonable efficiency) assumes URI has
2158  *       already been through some validation.
2159  *
2160  * Returns a new URI string (to be freed by the caller) or NULL in case
2161  * error.
2162  */
2163 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2164 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2165 {
2166     xmlChar *val = NULL;
2167     int ret;
2168     int ix;
2169     int nbslash = 0;
2170     int len;
2171     xmlURIPtr ref = NULL;
2172     xmlURIPtr bas = NULL;
2173     xmlChar *bptr, *uptr, *vptr;
2174     int remove_path = 0;
2175 
2176     if ((URI == NULL) || (*URI == 0))
2177 	return NULL;
2178 
2179     /*
2180      * First parse URI into a standard form
2181      */
2182     ref = xmlCreateURI ();
2183     if (ref == NULL)
2184 	return NULL;
2185     /* If URI not already in "relative" form */
2186     if (URI[0] != '.') {
2187 	ret = xmlParseURIReference (ref, (const char *) URI);
2188 	if (ret != 0)
2189 	    goto done;		/* Error in URI, return NULL */
2190     } else
2191 	ref->path = (char *)xmlStrdup(URI);
2192 
2193     /*
2194      * Next parse base into the same standard form
2195      */
2196     if ((base == NULL) || (*base == 0)) {
2197 	val = xmlStrdup (URI);
2198 	goto done;
2199     }
2200     bas = xmlCreateURI ();
2201     if (bas == NULL)
2202 	goto done;
2203     if (base[0] != '.') {
2204 	ret = xmlParseURIReference (bas, (const char *) base);
2205 	if (ret != 0)
2206 	    goto done;		/* Error in base, return NULL */
2207     } else
2208 	bas->path = (char *)xmlStrdup(base);
2209 
2210     /*
2211      * If the scheme / server on the URI differs from the base,
2212      * just return the URI
2213      */
2214     if ((ref->scheme != NULL) &&
2215 	((bas->scheme == NULL) ||
2216 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2217 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2218 	val = xmlStrdup (URI);
2219 	goto done;
2220     }
2221     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2222 	val = xmlStrdup(BAD_CAST "");
2223 	goto done;
2224     }
2225     if (bas->path == NULL) {
2226 	val = xmlStrdup((xmlChar *)ref->path);
2227 	goto done;
2228     }
2229     if (ref->path == NULL) {
2230         ref->path = (char *) "/";
2231 	remove_path = 1;
2232     }
2233 
2234     /*
2235      * At this point (at last!) we can compare the two paths
2236      *
2237      * First we take care of the special case where either of the
2238      * two path components may be missing (bug 316224)
2239      */
2240     if (bas->path == NULL) {
2241 	if (ref->path != NULL) {
2242 	    uptr = (xmlChar *) ref->path;
2243 	    if (*uptr == '/')
2244 		uptr++;
2245 	    /* exception characters from xmlSaveUri */
2246 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2247 	}
2248 	goto done;
2249     }
2250     bptr = (xmlChar *)bas->path;
2251     if (ref->path == NULL) {
2252 	for (ix = 0; bptr[ix] != 0; ix++) {
2253 	    if (bptr[ix] == '/')
2254 		nbslash++;
2255 	}
2256 	uptr = NULL;
2257 	len = 1;	/* this is for a string terminator only */
2258     } else {
2259         xmlChar *rptr = (xmlChar *) ref->path;
2260         int pos = 0;
2261 
2262         /*
2263          * Next we compare the two strings and find where they first differ
2264          */
2265 	if ((*rptr == '.') && (rptr[1] == '/'))
2266             rptr += 2;
2267 	if ((*bptr == '.') && (bptr[1] == '/'))
2268             bptr += 2;
2269 	else if ((*bptr == '/') && (*rptr != '/'))
2270 	    bptr++;
2271 	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2272 	    pos++;
2273 
2274 	if (bptr[pos] == rptr[pos]) {
2275 	    val = xmlStrdup(BAD_CAST "");
2276 	    goto done;		/* (I can't imagine why anyone would do this) */
2277 	}
2278 
2279 	/*
2280 	 * In URI, "back up" to the last '/' encountered.  This will be the
2281 	 * beginning of the "unique" suffix of URI
2282 	 */
2283 	ix = pos;
2284 	if ((rptr[ix] == '/') && (ix > 0))
2285 	    ix--;
2286 	else if ((rptr[ix] == 0) && (ix > 1) && (rptr[ix - 1] == '/'))
2287 	    ix -= 2;
2288 	for (; ix > 0; ix--) {
2289 	    if (rptr[ix] == '/')
2290 		break;
2291 	}
2292 	if (ix == 0) {
2293 	    uptr = (xmlChar *)rptr;
2294 	} else {
2295 	    ix++;
2296 	    uptr = (xmlChar *)&rptr[ix];
2297 	}
2298 
2299 	/*
2300 	 * In base, count the number of '/' from the differing point
2301 	 */
2302 	if (bptr[pos] != rptr[pos]) {/* check for trivial URI == base */
2303 	    for (; bptr[ix] != 0; ix++) {
2304 		if (bptr[ix] == '/')
2305 		    nbslash++;
2306 	    }
2307 	}
2308 	len = xmlStrlen (uptr) + 1;
2309     }
2310 
2311     if (nbslash == 0) {
2312 	if (uptr != NULL)
2313 	    /* exception characters from xmlSaveUri */
2314 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2315 	goto done;
2316     }
2317 
2318     /*
2319      * Allocate just enough space for the returned string -
2320      * length of the remainder of the URI, plus enough space
2321      * for the "../" groups, plus one for the terminator
2322      */
2323     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2324     if (val == NULL) {
2325         xmlURIErrMemory("building relative URI\n");
2326 	goto done;
2327     }
2328     vptr = val;
2329     /*
2330      * Put in as many "../" as needed
2331      */
2332     for (; nbslash>0; nbslash--) {
2333 	*vptr++ = '.';
2334 	*vptr++ = '.';
2335 	*vptr++ = '/';
2336     }
2337     /*
2338      * Finish up with the end of the URI
2339      */
2340     if (uptr != NULL) {
2341         if ((vptr > val) && (len > 0) &&
2342 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2343 	    memcpy (vptr, uptr + 1, len - 1);
2344 	    vptr[len - 2] = 0;
2345 	} else {
2346 	    memcpy (vptr, uptr, len);
2347 	    vptr[len - 1] = 0;
2348 	}
2349     } else {
2350 	vptr[len - 1] = 0;
2351     }
2352 
2353     /* escape the freshly-built path */
2354     vptr = val;
2355 	/* exception characters from xmlSaveUri */
2356     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2357     xmlFree(vptr);
2358 
2359 done:
2360     /*
2361      * Free the working variables
2362      */
2363     if (remove_path != 0)
2364         ref->path = NULL;
2365     if (ref != NULL)
2366 	xmlFreeURI (ref);
2367     if (bas != NULL)
2368 	xmlFreeURI (bas);
2369 
2370     return val;
2371 }
2372 
2373 /**
2374  * xmlCanonicPath:
2375  * @path:  the resource locator in a filesystem notation
2376  *
2377  * Constructs a canonic path from the specified path.
2378  *
2379  * Returns a new canonic path, or a duplicate of the path parameter if the
2380  * construction fails. The caller is responsible for freeing the memory occupied
2381  * by the returned string. If there is insufficient memory available, or the
2382  * argument is NULL, the function returns NULL.
2383  */
2384 #define IS_WINDOWS_PATH(p)					\
2385 	((p != NULL) &&						\
2386 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2387 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2388 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2389 xmlChar *
xmlCanonicPath(const xmlChar * path)2390 xmlCanonicPath(const xmlChar *path)
2391 {
2392 /*
2393  * For Windows implementations, additional work needs to be done to
2394  * replace backslashes in pathnames with "forward slashes"
2395  */
2396 #if defined(_WIN32) && !defined(__CYGWIN__)
2397     int len = 0;
2398     char *p = NULL;
2399 #endif
2400     xmlURIPtr uri;
2401     xmlChar *ret;
2402     const xmlChar *absuri;
2403 
2404     if (path == NULL)
2405 	return(NULL);
2406 
2407 #if defined(_WIN32)
2408     /*
2409      * We must not change the backslashes to slashes if the the path
2410      * starts with \\?\
2411      * Those paths can be up to 32k characters long.
2412      * Was added specifically for OpenOffice, those paths can't be converted
2413      * to URIs anyway.
2414      */
2415     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2416         (path[3] == '\\') )
2417 	return xmlStrdup((const xmlChar *) path);
2418 #endif
2419 
2420 	/* sanitize filename starting with // so it can be used as URI */
2421     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2422         path++;
2423 
2424     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2425 	xmlFreeURI(uri);
2426 	return xmlStrdup(path);
2427     }
2428 
2429     /* Check if this is an "absolute uri" */
2430     absuri = xmlStrstr(path, BAD_CAST "://");
2431     if (absuri != NULL) {
2432         int l, j;
2433 	unsigned char c;
2434 	xmlChar *escURI;
2435 
2436         /*
2437 	 * this looks like an URI where some parts have not been
2438 	 * escaped leading to a parsing problem.  Check that the first
2439 	 * part matches a protocol.
2440 	 */
2441 	l = absuri - path;
2442 	/* Bypass if first part (part before the '://') is > 20 chars */
2443 	if ((l <= 0) || (l > 20))
2444 	    goto path_processing;
2445 	/* Bypass if any non-alpha characters are present in first part */
2446 	for (j = 0;j < l;j++) {
2447 	    c = path[j];
2448 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2449 	        goto path_processing;
2450 	}
2451 
2452 	/* Escape all except the characters specified in the supplied path */
2453         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2454 	if (escURI != NULL) {
2455 	    /* Try parsing the escaped path */
2456 	    uri = xmlParseURI((const char *) escURI);
2457 	    /* If successful, return the escaped string */
2458 	    if (uri != NULL) {
2459 	        xmlFreeURI(uri);
2460 		return escURI;
2461 	    }
2462             xmlFree(escURI);
2463 	}
2464     }
2465 
2466 path_processing:
2467 /* For Windows implementations, replace backslashes with 'forward slashes' */
2468 #if defined(_WIN32) && !defined(__CYGWIN__)
2469     /*
2470      * Create a URI structure
2471      */
2472     uri = xmlCreateURI();
2473     if (uri == NULL) {		/* Guard against 'out of memory' */
2474         return(NULL);
2475     }
2476 
2477     len = xmlStrlen(path);
2478     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2479         /* make the scheme 'file' */
2480 	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2481 	/* allocate space for leading '/' + path + string terminator */
2482 	uri->path = xmlMallocAtomic(len + 2);
2483 	if (uri->path == NULL) {
2484 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2485 	    return(NULL);
2486 	}
2487 	/* Put in leading '/' plus path */
2488 	uri->path[0] = '/';
2489 	p = uri->path + 1;
2490 	strncpy(p, (char *) path, len + 1);
2491     } else {
2492 	uri->path = (char *) xmlStrdup(path);
2493 	if (uri->path == NULL) {
2494 	    xmlFreeURI(uri);
2495 	    return(NULL);
2496 	}
2497 	p = uri->path;
2498     }
2499     /* Now change all occurences of '\' to '/' */
2500     while (*p != '\0') {
2501 	if (*p == '\\')
2502 	    *p = '/';
2503 	p++;
2504     }
2505 
2506     if (uri->scheme == NULL) {
2507 	ret = xmlStrdup((const xmlChar *) uri->path);
2508     } else {
2509 	ret = xmlSaveUri(uri);
2510     }
2511 
2512     xmlFreeURI(uri);
2513 #else
2514     ret = xmlStrdup((const xmlChar *) path);
2515 #endif
2516     return(ret);
2517 }
2518 
2519 /**
2520  * xmlPathToURI:
2521  * @path:  the resource locator in a filesystem notation
2522  *
2523  * Constructs an URI expressing the existing path
2524  *
2525  * Returns a new URI, or a duplicate of the path parameter if the
2526  * construction fails. The caller is responsible for freeing the memory
2527  * occupied by the returned string. If there is insufficient memory available,
2528  * or the argument is NULL, the function returns NULL.
2529  */
2530 xmlChar *
xmlPathToURI(const xmlChar * path)2531 xmlPathToURI(const xmlChar *path)
2532 {
2533     xmlURIPtr uri;
2534     xmlURI temp;
2535     xmlChar *ret, *cal;
2536 
2537     if (path == NULL)
2538         return(NULL);
2539 
2540     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2541 	xmlFreeURI(uri);
2542 	return xmlStrdup(path);
2543     }
2544     cal = xmlCanonicPath(path);
2545     if (cal == NULL)
2546         return(NULL);
2547 #if defined(_WIN32) && !defined(__CYGWIN__)
2548     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2549        If 'cal' is a valid URI allready then we are done here, as continuing would make
2550        it invalid. */
2551     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2552 	xmlFreeURI(uri);
2553 	return cal;
2554     }
2555     /* 'cal' can contain a relative path with backslashes. If that is processed
2556        by xmlSaveURI, they will be escaped and the external entity loader machinery
2557        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2558     ret = cal;
2559     while (*ret != '\0') {
2560 	if (*ret == '\\')
2561 	    *ret = '/';
2562 	ret++;
2563     }
2564 #endif
2565     memset(&temp, 0, sizeof(temp));
2566     temp.path = (char *) cal;
2567     ret = xmlSaveUri(&temp);
2568     xmlFree(cal);
2569     return(ret);
2570 }
2571 #define bottom_uri
2572 #include "elfgcchack.h"
2573