1 /**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11 #define IN_LIBXML
12 #include "libxml.h"
13
14 #include <string.h>
15
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
20
21 /**
22 * MAX_URI_LENGTH:
23 *
24 * The definition of the URI regexp in the above RFC has no size limit
25 * In practice they are usually relativey short except for the
26 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27 * maximum size before hitting random practical limits is around 64 KB
28 * and 4KB is usually a maximum admitted limit for proper operations.
29 * The value below is more a security limit than anything else and
30 * really should never be hit by 'normal' operations
31 * Set to 1 MByte in 2012, this is only enforced on output
32 */
33 #define MAX_URI_LENGTH 1024 * 1024
34
35 static void
xmlURIErrMemory(const char * extra)36 xmlURIErrMemory(const char *extra)
37 {
38 if (extra)
39 __xmlRaiseError(NULL, NULL, NULL,
40 NULL, NULL, XML_FROM_URI,
41 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42 extra, NULL, NULL, 0, 0,
43 "Memory allocation failed : %s\n", extra);
44 else
45 __xmlRaiseError(NULL, NULL, NULL,
46 NULL, NULL, XML_FROM_URI,
47 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48 NULL, NULL, NULL, 0, 0,
49 "Memory allocation failed\n");
50 }
51
52 static void xmlCleanURI(xmlURIPtr uri);
53
54 /*
55 * Old rule from 2396 used in legacy handling code
56 * alpha = lowalpha | upalpha
57 */
58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59
60
61 /*
62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64 * "u" | "v" | "w" | "x" | "y" | "z"
65 */
66
67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69 /*
70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72 * "U" | "V" | "W" | "X" | "Y" | "Z"
73 */
74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
76 #ifdef IS_DIGIT
77 #undef IS_DIGIT
78 #endif
79 /*
80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81 */
82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84 /*
85 * alphanum = alpha | digit
86 */
87
88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89
90 /*
91 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92 */
93
94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
95 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
96 ((x) == '(') || ((x) == ')'))
97
98 /*
99 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
100 */
101
102 #define IS_UNWISE(p) \
103 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
104 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
105 ((*(p) == ']')) || ((*(p) == '`')))
106 /*
107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108 * "[" | "]"
109 */
110
111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114 ((x) == ']'))
115
116 /*
117 * unreserved = alphanum | mark
118 */
119
120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121
122 /*
123 * Skip to next pointer char, handle escaped sequences
124 */
125
126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128 /*
129 * Productions from the spec.
130 *
131 * authority = server | reg_name
132 * reg_name = 1*( unreserved | escaped | "$" | "," |
133 * ";" | ":" | "@" | "&" | "=" | "+" )
134 *
135 * path = [ abs_path | opaque_part ]
136 */
137
138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139
140 /************************************************************************
141 * *
142 * RFC 3986 parser *
143 * *
144 ************************************************************************/
145
146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
148 ((*(p) >= 'A') && (*(p) <= 'Z')))
149 #define ISA_HEXDIG(p) \
150 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
151 ((*(p) >= 'A') && (*(p) <= 'F')))
152
153 /*
154 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155 * / "*" / "+" / "," / ";" / "="
156 */
157 #define ISA_SUB_DELIM(p) \
158 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
159 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
160 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
161 ((*(p) == '=')) || ((*(p) == '\'')))
162
163 /*
164 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165 */
166 #define ISA_GEN_DELIM(p) \
167 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
168 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
169 ((*(p) == '@')))
170
171 /*
172 * reserved = gen-delims / sub-delims
173 */
174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175
176 /*
177 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
178 */
179 #define ISA_UNRESERVED(p) \
180 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
181 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182
183 /*
184 * pct-encoded = "%" HEXDIG HEXDIG
185 */
186 #define ISA_PCT_ENCODED(p) \
187 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189 /*
190 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
191 */
192 #define ISA_PCHAR(p) \
193 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
194 ((*(p) == ':')) || ((*(p) == '@')))
195
196 /**
197 * xmlParse3986Scheme:
198 * @uri: pointer to an URI structure
199 * @str: pointer to the string to analyze
200 *
201 * Parse an URI scheme
202 *
203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204 *
205 * Returns 0 or the error code
206 */
207 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209 const char *cur;
210
211 if (str == NULL)
212 return(-1);
213
214 cur = *str;
215 if (!ISA_ALPHA(cur))
216 return(2);
217 cur++;
218 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220 if (uri != NULL) {
221 if (uri->scheme != NULL) xmlFree(uri->scheme);
222 uri->scheme = STRNDUP(*str, cur - *str);
223 }
224 *str = cur;
225 return(0);
226 }
227
228 /**
229 * xmlParse3986Fragment:
230 * @uri: pointer to an URI structure
231 * @str: pointer to the string to analyze
232 *
233 * Parse the query part of an URI
234 *
235 * fragment = *( pchar / "/" / "?" )
236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237 * in the fragment identifier but this is used very broadly for
238 * xpointer scheme selection, so we are allowing it here to not break
239 * for example all the DocBook processing chains.
240 *
241 * Returns 0 or the error code
242 */
243 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)244 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245 {
246 const char *cur;
247
248 if (str == NULL)
249 return (-1);
250
251 cur = *str;
252
253 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254 (*cur == '[') || (*cur == ']') ||
255 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256 NEXT(cur);
257 if (uri != NULL) {
258 if (uri->fragment != NULL)
259 xmlFree(uri->fragment);
260 if (uri->cleanup & 2)
261 uri->fragment = STRNDUP(*str, cur - *str);
262 else
263 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264 }
265 *str = cur;
266 return (0);
267 }
268
269 /**
270 * xmlParse3986Query:
271 * @uri: pointer to an URI structure
272 * @str: pointer to the string to analyze
273 *
274 * Parse the query part of an URI
275 *
276 * query = *uric
277 *
278 * Returns 0 or the error code
279 */
280 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)281 xmlParse3986Query(xmlURIPtr uri, const char **str)
282 {
283 const char *cur;
284
285 if (str == NULL)
286 return (-1);
287
288 cur = *str;
289
290 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292 NEXT(cur);
293 if (uri != NULL) {
294 if (uri->query != NULL)
295 xmlFree(uri->query);
296 if (uri->cleanup & 2)
297 uri->query = STRNDUP(*str, cur - *str);
298 else
299 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300
301 /* Save the raw bytes of the query as well.
302 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303 */
304 if (uri->query_raw != NULL)
305 xmlFree (uri->query_raw);
306 uri->query_raw = STRNDUP (*str, cur - *str);
307 }
308 *str = cur;
309 return (0);
310 }
311
312 /**
313 * xmlParse3986Port:
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
316 *
317 * Parse a port part and fills in the appropriate fields
318 * of the @uri structure
319 *
320 * port = *DIGIT
321 *
322 * Returns 0 or the error code
323 */
324 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)325 xmlParse3986Port(xmlURIPtr uri, const char **str)
326 {
327 const char *cur = *str;
328
329 if (ISA_DIGIT(cur)) {
330 if (uri != NULL)
331 uri->port = 0;
332 while (ISA_DIGIT(cur)) {
333 if (uri != NULL)
334 uri->port = uri->port * 10 + (*cur - '0');
335 cur++;
336 }
337 *str = cur;
338 return(0);
339 }
340 return(1);
341 }
342
343 /**
344 * xmlParse3986Userinfo:
345 * @uri: pointer to an URI structure
346 * @str: the string to analyze
347 *
348 * Parse an user informations part and fills in the appropriate fields
349 * of the @uri structure
350 *
351 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
352 *
353 * Returns 0 or the error code
354 */
355 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)356 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
357 {
358 const char *cur;
359
360 cur = *str;
361 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
362 ISA_SUB_DELIM(cur) || (*cur == ':'))
363 NEXT(cur);
364 if (*cur == '@') {
365 if (uri != NULL) {
366 if (uri->user != NULL) xmlFree(uri->user);
367 if (uri->cleanup & 2)
368 uri->user = STRNDUP(*str, cur - *str);
369 else
370 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
371 }
372 *str = cur;
373 return(0);
374 }
375 return(1);
376 }
377
378 /**
379 * xmlParse3986DecOctet:
380 * @str: the string to analyze
381 *
382 * dec-octet = DIGIT ; 0-9
383 * / %x31-39 DIGIT ; 10-99
384 * / "1" 2DIGIT ; 100-199
385 * / "2" %x30-34 DIGIT ; 200-249
386 * / "25" %x30-35 ; 250-255
387 *
388 * Skip a dec-octet.
389 *
390 * Returns 0 if found and skipped, 1 otherwise
391 */
392 static int
xmlParse3986DecOctet(const char ** str)393 xmlParse3986DecOctet(const char **str) {
394 const char *cur = *str;
395
396 if (!(ISA_DIGIT(cur)))
397 return(1);
398 if (!ISA_DIGIT(cur+1))
399 cur++;
400 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
401 cur += 2;
402 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
403 cur += 3;
404 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
405 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
406 cur += 3;
407 else if ((*cur == '2') && (*(cur + 1) == '5') &&
408 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
409 cur += 3;
410 else
411 return(1);
412 *str = cur;
413 return(0);
414 }
415 /**
416 * xmlParse3986Host:
417 * @uri: pointer to an URI structure
418 * @str: the string to analyze
419 *
420 * Parse an host part and fills in the appropriate fields
421 * of the @uri structure
422 *
423 * host = IP-literal / IPv4address / reg-name
424 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
425 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
426 * reg-name = *( unreserved / pct-encoded / sub-delims )
427 *
428 * Returns 0 or the error code
429 */
430 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)431 xmlParse3986Host(xmlURIPtr uri, const char **str)
432 {
433 const char *cur = *str;
434 const char *host;
435
436 host = cur;
437 /*
438 * IPv6 and future adressing scheme are enclosed between brackets
439 */
440 if (*cur == '[') {
441 cur++;
442 while ((*cur != ']') && (*cur != 0))
443 cur++;
444 if (*cur != ']')
445 return(1);
446 cur++;
447 goto found;
448 }
449 /*
450 * try to parse an IPv4
451 */
452 if (ISA_DIGIT(cur)) {
453 if (xmlParse3986DecOctet(&cur) != 0)
454 goto not_ipv4;
455 if (*cur != '.')
456 goto not_ipv4;
457 cur++;
458 if (xmlParse3986DecOctet(&cur) != 0)
459 goto not_ipv4;
460 if (*cur != '.')
461 goto not_ipv4;
462 if (xmlParse3986DecOctet(&cur) != 0)
463 goto not_ipv4;
464 if (*cur != '.')
465 goto not_ipv4;
466 if (xmlParse3986DecOctet(&cur) != 0)
467 goto not_ipv4;
468 goto found;
469 not_ipv4:
470 cur = *str;
471 }
472 /*
473 * then this should be a hostname which can be empty
474 */
475 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
476 NEXT(cur);
477 found:
478 if (uri != NULL) {
479 if (uri->authority != NULL) xmlFree(uri->authority);
480 uri->authority = NULL;
481 if (uri->server != NULL) xmlFree(uri->server);
482 if (cur != host) {
483 if (uri->cleanup & 2)
484 uri->server = STRNDUP(host, cur - host);
485 else
486 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
487 } else
488 uri->server = NULL;
489 }
490 *str = cur;
491 return(0);
492 }
493
494 /**
495 * xmlParse3986Authority:
496 * @uri: pointer to an URI structure
497 * @str: the string to analyze
498 *
499 * Parse an authority part and fills in the appropriate fields
500 * of the @uri structure
501 *
502 * authority = [ userinfo "@" ] host [ ":" port ]
503 *
504 * Returns 0 or the error code
505 */
506 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)507 xmlParse3986Authority(xmlURIPtr uri, const char **str)
508 {
509 const char *cur;
510 int ret;
511
512 cur = *str;
513 /*
514 * try to parse an userinfo and check for the trailing @
515 */
516 ret = xmlParse3986Userinfo(uri, &cur);
517 if ((ret != 0) || (*cur != '@'))
518 cur = *str;
519 else
520 cur++;
521 ret = xmlParse3986Host(uri, &cur);
522 if (ret != 0) return(ret);
523 if (*cur == ':') {
524 cur++;
525 ret = xmlParse3986Port(uri, &cur);
526 if (ret != 0) return(ret);
527 }
528 *str = cur;
529 return(0);
530 }
531
532 /**
533 * xmlParse3986Segment:
534 * @str: the string to analyze
535 * @forbid: an optional forbidden character
536 * @empty: allow an empty segment
537 *
538 * Parse a segment and fills in the appropriate fields
539 * of the @uri structure
540 *
541 * segment = *pchar
542 * segment-nz = 1*pchar
543 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
544 * ; non-zero-length segment without any colon ":"
545 *
546 * Returns 0 or the error code
547 */
548 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)549 xmlParse3986Segment(const char **str, char forbid, int empty)
550 {
551 const char *cur;
552
553 cur = *str;
554 if (!ISA_PCHAR(cur)) {
555 if (empty)
556 return(0);
557 return(1);
558 }
559 while (ISA_PCHAR(cur) && (*cur != forbid))
560 NEXT(cur);
561 *str = cur;
562 return (0);
563 }
564
565 /**
566 * xmlParse3986PathAbEmpty:
567 * @uri: pointer to an URI structure
568 * @str: the string to analyze
569 *
570 * Parse an path absolute or empty and fills in the appropriate fields
571 * of the @uri structure
572 *
573 * path-abempty = *( "/" segment )
574 *
575 * Returns 0 or the error code
576 */
577 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)578 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
579 {
580 const char *cur;
581 int ret;
582
583 cur = *str;
584
585 while (*cur == '/') {
586 cur++;
587 ret = xmlParse3986Segment(&cur, 0, 1);
588 if (ret != 0) return(ret);
589 }
590 if (uri != NULL) {
591 if (uri->path != NULL) xmlFree(uri->path);
592 if (*str != cur) {
593 if (uri->cleanup & 2)
594 uri->path = STRNDUP(*str, cur - *str);
595 else
596 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
597 } else {
598 uri->path = NULL;
599 }
600 }
601 *str = cur;
602 return (0);
603 }
604
605 /**
606 * xmlParse3986PathAbsolute:
607 * @uri: pointer to an URI structure
608 * @str: the string to analyze
609 *
610 * Parse an path absolute and fills in the appropriate fields
611 * of the @uri structure
612 *
613 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
614 *
615 * Returns 0 or the error code
616 */
617 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)618 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
619 {
620 const char *cur;
621 int ret;
622
623 cur = *str;
624
625 if (*cur != '/')
626 return(1);
627 cur++;
628 ret = xmlParse3986Segment(&cur, 0, 0);
629 if (ret == 0) {
630 while (*cur == '/') {
631 cur++;
632 ret = xmlParse3986Segment(&cur, 0, 1);
633 if (ret != 0) return(ret);
634 }
635 }
636 if (uri != NULL) {
637 if (uri->path != NULL) xmlFree(uri->path);
638 if (cur != *str) {
639 if (uri->cleanup & 2)
640 uri->path = STRNDUP(*str, cur - *str);
641 else
642 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
643 } else {
644 uri->path = NULL;
645 }
646 }
647 *str = cur;
648 return (0);
649 }
650
651 /**
652 * xmlParse3986PathRootless:
653 * @uri: pointer to an URI structure
654 * @str: the string to analyze
655 *
656 * Parse an path without root and fills in the appropriate fields
657 * of the @uri structure
658 *
659 * path-rootless = segment-nz *( "/" segment )
660 *
661 * Returns 0 or the error code
662 */
663 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)664 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
665 {
666 const char *cur;
667 int ret;
668
669 cur = *str;
670
671 ret = xmlParse3986Segment(&cur, 0, 0);
672 if (ret != 0) return(ret);
673 while (*cur == '/') {
674 cur++;
675 ret = xmlParse3986Segment(&cur, 0, 1);
676 if (ret != 0) return(ret);
677 }
678 if (uri != NULL) {
679 if (uri->path != NULL) xmlFree(uri->path);
680 if (cur != *str) {
681 if (uri->cleanup & 2)
682 uri->path = STRNDUP(*str, cur - *str);
683 else
684 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
685 } else {
686 uri->path = NULL;
687 }
688 }
689 *str = cur;
690 return (0);
691 }
692
693 /**
694 * xmlParse3986PathNoScheme:
695 * @uri: pointer to an URI structure
696 * @str: the string to analyze
697 *
698 * Parse an path which is not a scheme and fills in the appropriate fields
699 * of the @uri structure
700 *
701 * path-noscheme = segment-nz-nc *( "/" segment )
702 *
703 * Returns 0 or the error code
704 */
705 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)706 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
707 {
708 const char *cur;
709 int ret;
710
711 cur = *str;
712
713 ret = xmlParse3986Segment(&cur, ':', 0);
714 if (ret != 0) return(ret);
715 while (*cur == '/') {
716 cur++;
717 ret = xmlParse3986Segment(&cur, 0, 1);
718 if (ret != 0) return(ret);
719 }
720 if (uri != NULL) {
721 if (uri->path != NULL) xmlFree(uri->path);
722 if (cur != *str) {
723 if (uri->cleanup & 2)
724 uri->path = STRNDUP(*str, cur - *str);
725 else
726 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
727 } else {
728 uri->path = NULL;
729 }
730 }
731 *str = cur;
732 return (0);
733 }
734
735 /**
736 * xmlParse3986HierPart:
737 * @uri: pointer to an URI structure
738 * @str: the string to analyze
739 *
740 * Parse an hierarchical part and fills in the appropriate fields
741 * of the @uri structure
742 *
743 * hier-part = "//" authority path-abempty
744 * / path-absolute
745 * / path-rootless
746 * / path-empty
747 *
748 * Returns 0 or the error code
749 */
750 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)751 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
752 {
753 const char *cur;
754 int ret;
755
756 cur = *str;
757
758 if ((*cur == '/') && (*(cur + 1) == '/')) {
759 cur += 2;
760 ret = xmlParse3986Authority(uri, &cur);
761 if (ret != 0) return(ret);
762 ret = xmlParse3986PathAbEmpty(uri, &cur);
763 if (ret != 0) return(ret);
764 *str = cur;
765 return(0);
766 } else if (*cur == '/') {
767 ret = xmlParse3986PathAbsolute(uri, &cur);
768 if (ret != 0) return(ret);
769 } else if (ISA_PCHAR(cur)) {
770 ret = xmlParse3986PathRootless(uri, &cur);
771 if (ret != 0) return(ret);
772 } else {
773 /* path-empty is effectively empty */
774 if (uri != NULL) {
775 if (uri->path != NULL) xmlFree(uri->path);
776 uri->path = NULL;
777 }
778 }
779 *str = cur;
780 return (0);
781 }
782
783 /**
784 * xmlParse3986RelativeRef:
785 * @uri: pointer to an URI structure
786 * @str: the string to analyze
787 *
788 * Parse an URI string and fills in the appropriate fields
789 * of the @uri structure
790 *
791 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
792 * relative-part = "//" authority path-abempty
793 * / path-absolute
794 * / path-noscheme
795 * / path-empty
796 *
797 * Returns 0 or the error code
798 */
799 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)800 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
801 int ret;
802
803 if ((*str == '/') && (*(str + 1) == '/')) {
804 str += 2;
805 ret = xmlParse3986Authority(uri, &str);
806 if (ret != 0) return(ret);
807 ret = xmlParse3986PathAbEmpty(uri, &str);
808 if (ret != 0) return(ret);
809 } else if (*str == '/') {
810 ret = xmlParse3986PathAbsolute(uri, &str);
811 if (ret != 0) return(ret);
812 } else if (ISA_PCHAR(str)) {
813 ret = xmlParse3986PathNoScheme(uri, &str);
814 if (ret != 0) return(ret);
815 } else {
816 /* path-empty is effectively empty */
817 if (uri != NULL) {
818 if (uri->path != NULL) xmlFree(uri->path);
819 uri->path = NULL;
820 }
821 }
822
823 if (*str == '?') {
824 str++;
825 ret = xmlParse3986Query(uri, &str);
826 if (ret != 0) return(ret);
827 }
828 if (*str == '#') {
829 str++;
830 ret = xmlParse3986Fragment(uri, &str);
831 if (ret != 0) return(ret);
832 }
833 if (*str != 0) {
834 xmlCleanURI(uri);
835 return(1);
836 }
837 return(0);
838 }
839
840
841 /**
842 * xmlParse3986URI:
843 * @uri: pointer to an URI structure
844 * @str: the string to analyze
845 *
846 * Parse an URI string and fills in the appropriate fields
847 * of the @uri structure
848 *
849 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
850 *
851 * Returns 0 or the error code
852 */
853 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)854 xmlParse3986URI(xmlURIPtr uri, const char *str) {
855 int ret;
856
857 ret = xmlParse3986Scheme(uri, &str);
858 if (ret != 0) return(ret);
859 if (*str != ':') {
860 return(1);
861 }
862 str++;
863 ret = xmlParse3986HierPart(uri, &str);
864 if (ret != 0) return(ret);
865 if (*str == '?') {
866 str++;
867 ret = xmlParse3986Query(uri, &str);
868 if (ret != 0) return(ret);
869 }
870 if (*str == '#') {
871 str++;
872 ret = xmlParse3986Fragment(uri, &str);
873 if (ret != 0) return(ret);
874 }
875 if (*str != 0) {
876 xmlCleanURI(uri);
877 return(1);
878 }
879 return(0);
880 }
881
882 /**
883 * xmlParse3986URIReference:
884 * @uri: pointer to an URI structure
885 * @str: the string to analyze
886 *
887 * Parse an URI reference string and fills in the appropriate fields
888 * of the @uri structure
889 *
890 * URI-reference = URI / relative-ref
891 *
892 * Returns 0 or the error code
893 */
894 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)895 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
896 int ret;
897
898 if (str == NULL)
899 return(-1);
900 xmlCleanURI(uri);
901
902 /*
903 * Try first to parse absolute refs, then fallback to relative if
904 * it fails.
905 */
906 ret = xmlParse3986URI(uri, str);
907 if (ret != 0) {
908 xmlCleanURI(uri);
909 ret = xmlParse3986RelativeRef(uri, str);
910 if (ret != 0) {
911 xmlCleanURI(uri);
912 return(ret);
913 }
914 }
915 return(0);
916 }
917
918 /**
919 * xmlParseURI:
920 * @str: the URI string to analyze
921 *
922 * Parse an URI based on RFC 3986
923 *
924 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
925 *
926 * Returns a newly built xmlURIPtr or NULL in case of error
927 */
928 xmlURIPtr
xmlParseURI(const char * str)929 xmlParseURI(const char *str) {
930 xmlURIPtr uri;
931 int ret;
932
933 if (str == NULL)
934 return(NULL);
935 uri = xmlCreateURI();
936 if (uri != NULL) {
937 ret = xmlParse3986URIReference(uri, str);
938 if (ret) {
939 xmlFreeURI(uri);
940 return(NULL);
941 }
942 }
943 return(uri);
944 }
945
946 /**
947 * xmlParseURIReference:
948 * @uri: pointer to an URI structure
949 * @str: the string to analyze
950 *
951 * Parse an URI reference string based on RFC 3986 and fills in the
952 * appropriate fields of the @uri structure
953 *
954 * URI-reference = URI / relative-ref
955 *
956 * Returns 0 or the error code
957 */
958 int
xmlParseURIReference(xmlURIPtr uri,const char * str)959 xmlParseURIReference(xmlURIPtr uri, const char *str) {
960 return(xmlParse3986URIReference(uri, str));
961 }
962
963 /**
964 * xmlParseURIRaw:
965 * @str: the URI string to analyze
966 * @raw: if 1 unescaping of URI pieces are disabled
967 *
968 * Parse an URI but allows to keep intact the original fragments.
969 *
970 * URI-reference = URI / relative-ref
971 *
972 * Returns a newly built xmlURIPtr or NULL in case of error
973 */
974 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)975 xmlParseURIRaw(const char *str, int raw) {
976 xmlURIPtr uri;
977 int ret;
978
979 if (str == NULL)
980 return(NULL);
981 uri = xmlCreateURI();
982 if (uri != NULL) {
983 if (raw) {
984 uri->cleanup |= 2;
985 }
986 ret = xmlParseURIReference(uri, str);
987 if (ret) {
988 xmlFreeURI(uri);
989 return(NULL);
990 }
991 }
992 return(uri);
993 }
994
995 /************************************************************************
996 * *
997 * Generic URI structure functions *
998 * *
999 ************************************************************************/
1000
1001 /**
1002 * xmlCreateURI:
1003 *
1004 * Simply creates an empty xmlURI
1005 *
1006 * Returns the new structure or NULL in case of error
1007 */
1008 xmlURIPtr
xmlCreateURI(void)1009 xmlCreateURI(void) {
1010 xmlURIPtr ret;
1011
1012 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1013 if (ret == NULL) {
1014 xmlURIErrMemory("creating URI structure\n");
1015 return(NULL);
1016 }
1017 memset(ret, 0, sizeof(xmlURI));
1018 return(ret);
1019 }
1020
1021 /**
1022 * xmlSaveUriRealloc:
1023 *
1024 * Function to handle properly a reallocation when saving an URI
1025 * Also imposes some limit on the length of an URI string output
1026 */
1027 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1028 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1029 xmlChar *temp;
1030 int tmp;
1031
1032 if (*max > MAX_URI_LENGTH) {
1033 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1034 return(NULL);
1035 }
1036 tmp = *max * 2;
1037 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1038 if (temp == NULL) {
1039 xmlURIErrMemory("saving URI\n");
1040 return(NULL);
1041 }
1042 *max = tmp;
1043 return(temp);
1044 }
1045
1046 /**
1047 * xmlSaveUri:
1048 * @uri: pointer to an xmlURI
1049 *
1050 * Save the URI as an escaped string
1051 *
1052 * Returns a new string (to be deallocated by caller)
1053 */
1054 xmlChar *
xmlSaveUri(xmlURIPtr uri)1055 xmlSaveUri(xmlURIPtr uri) {
1056 xmlChar *ret = NULL;
1057 xmlChar *temp;
1058 const char *p;
1059 int len;
1060 int max;
1061
1062 if (uri == NULL) return(NULL);
1063
1064
1065 max = 80;
1066 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1067 if (ret == NULL) {
1068 xmlURIErrMemory("saving URI\n");
1069 return(NULL);
1070 }
1071 len = 0;
1072
1073 if (uri->scheme != NULL) {
1074 p = uri->scheme;
1075 while (*p != 0) {
1076 if (len >= max) {
1077 temp = xmlSaveUriRealloc(ret, &max);
1078 if (temp == NULL) goto mem_error;
1079 ret = temp;
1080 }
1081 ret[len++] = *p++;
1082 }
1083 if (len >= max) {
1084 temp = xmlSaveUriRealloc(ret, &max);
1085 if (temp == NULL) goto mem_error;
1086 ret = temp;
1087 }
1088 ret[len++] = ':';
1089 }
1090 if (uri->opaque != NULL) {
1091 p = uri->opaque;
1092 while (*p != 0) {
1093 if (len + 3 >= max) {
1094 temp = xmlSaveUriRealloc(ret, &max);
1095 if (temp == NULL) goto mem_error;
1096 ret = temp;
1097 }
1098 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1099 ret[len++] = *p++;
1100 else {
1101 int val = *(unsigned char *)p++;
1102 int hi = val / 0x10, lo = val % 0x10;
1103 ret[len++] = '%';
1104 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1105 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1106 }
1107 }
1108 } else {
1109 if (uri->server != NULL) {
1110 if (len + 3 >= max) {
1111 temp = xmlSaveUriRealloc(ret, &max);
1112 if (temp == NULL) goto mem_error;
1113 ret = temp;
1114 }
1115 ret[len++] = '/';
1116 ret[len++] = '/';
1117 if (uri->user != NULL) {
1118 p = uri->user;
1119 while (*p != 0) {
1120 if (len + 3 >= max) {
1121 temp = xmlSaveUriRealloc(ret, &max);
1122 if (temp == NULL) goto mem_error;
1123 ret = temp;
1124 }
1125 if ((IS_UNRESERVED(*(p))) ||
1126 ((*(p) == ';')) || ((*(p) == ':')) ||
1127 ((*(p) == '&')) || ((*(p) == '=')) ||
1128 ((*(p) == '+')) || ((*(p) == '$')) ||
1129 ((*(p) == ',')))
1130 ret[len++] = *p++;
1131 else {
1132 int val = *(unsigned char *)p++;
1133 int hi = val / 0x10, lo = val % 0x10;
1134 ret[len++] = '%';
1135 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1136 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1137 }
1138 }
1139 if (len + 3 >= max) {
1140 temp = xmlSaveUriRealloc(ret, &max);
1141 if (temp == NULL) goto mem_error;
1142 ret = temp;
1143 }
1144 ret[len++] = '@';
1145 }
1146 p = uri->server;
1147 while (*p != 0) {
1148 if (len >= max) {
1149 temp = xmlSaveUriRealloc(ret, &max);
1150 if (temp == NULL) goto mem_error;
1151 ret = temp;
1152 }
1153 ret[len++] = *p++;
1154 }
1155 if (uri->port > 0) {
1156 if (len + 10 >= max) {
1157 temp = xmlSaveUriRealloc(ret, &max);
1158 if (temp == NULL) goto mem_error;
1159 ret = temp;
1160 }
1161 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1162 }
1163 } else if (uri->authority != NULL) {
1164 if (len + 3 >= max) {
1165 temp = xmlSaveUriRealloc(ret, &max);
1166 if (temp == NULL) goto mem_error;
1167 ret = temp;
1168 }
1169 ret[len++] = '/';
1170 ret[len++] = '/';
1171 p = uri->authority;
1172 while (*p != 0) {
1173 if (len + 3 >= max) {
1174 temp = xmlSaveUriRealloc(ret, &max);
1175 if (temp == NULL) goto mem_error;
1176 ret = temp;
1177 }
1178 if ((IS_UNRESERVED(*(p))) ||
1179 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1180 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1181 ((*(p) == '=')) || ((*(p) == '+')))
1182 ret[len++] = *p++;
1183 else {
1184 int val = *(unsigned char *)p++;
1185 int hi = val / 0x10, lo = val % 0x10;
1186 ret[len++] = '%';
1187 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1188 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1189 }
1190 }
1191 } else if (uri->scheme != NULL) {
1192 if (len + 3 >= max) {
1193 temp = xmlSaveUriRealloc(ret, &max);
1194 if (temp == NULL) goto mem_error;
1195 ret = temp;
1196 }
1197 ret[len++] = '/';
1198 ret[len++] = '/';
1199 }
1200 if (uri->path != NULL) {
1201 p = uri->path;
1202 /*
1203 * the colon in file:///d: should not be escaped or
1204 * Windows accesses fail later.
1205 */
1206 if ((uri->scheme != NULL) &&
1207 (p[0] == '/') &&
1208 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1209 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1210 (p[2] == ':') &&
1211 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1212 if (len + 3 >= max) {
1213 temp = xmlSaveUriRealloc(ret, &max);
1214 if (temp == NULL) goto mem_error;
1215 ret = temp;
1216 }
1217 ret[len++] = *p++;
1218 ret[len++] = *p++;
1219 ret[len++] = *p++;
1220 }
1221 while (*p != 0) {
1222 if (len + 3 >= max) {
1223 temp = xmlSaveUriRealloc(ret, &max);
1224 if (temp == NULL) goto mem_error;
1225 ret = temp;
1226 }
1227 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1228 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1229 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1230 ((*(p) == ',')))
1231 ret[len++] = *p++;
1232 else {
1233 int val = *(unsigned char *)p++;
1234 int hi = val / 0x10, lo = val % 0x10;
1235 ret[len++] = '%';
1236 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1237 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1238 }
1239 }
1240 }
1241 if (uri->query_raw != NULL) {
1242 if (len + 1 >= max) {
1243 temp = xmlSaveUriRealloc(ret, &max);
1244 if (temp == NULL) goto mem_error;
1245 ret = temp;
1246 }
1247 ret[len++] = '?';
1248 p = uri->query_raw;
1249 while (*p != 0) {
1250 if (len + 1 >= max) {
1251 temp = xmlSaveUriRealloc(ret, &max);
1252 if (temp == NULL) goto mem_error;
1253 ret = temp;
1254 }
1255 ret[len++] = *p++;
1256 }
1257 } else if (uri->query != NULL) {
1258 if (len + 3 >= max) {
1259 temp = xmlSaveUriRealloc(ret, &max);
1260 if (temp == NULL) goto mem_error;
1261 ret = temp;
1262 }
1263 ret[len++] = '?';
1264 p = uri->query;
1265 while (*p != 0) {
1266 if (len + 3 >= max) {
1267 temp = xmlSaveUriRealloc(ret, &max);
1268 if (temp == NULL) goto mem_error;
1269 ret = temp;
1270 }
1271 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1272 ret[len++] = *p++;
1273 else {
1274 int val = *(unsigned char *)p++;
1275 int hi = val / 0x10, lo = val % 0x10;
1276 ret[len++] = '%';
1277 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1278 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1279 }
1280 }
1281 }
1282 }
1283 if (uri->fragment != NULL) {
1284 if (len + 3 >= max) {
1285 temp = xmlSaveUriRealloc(ret, &max);
1286 if (temp == NULL) goto mem_error;
1287 ret = temp;
1288 }
1289 ret[len++] = '#';
1290 p = uri->fragment;
1291 while (*p != 0) {
1292 if (len + 3 >= max) {
1293 temp = xmlSaveUriRealloc(ret, &max);
1294 if (temp == NULL) goto mem_error;
1295 ret = temp;
1296 }
1297 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1298 ret[len++] = *p++;
1299 else {
1300 int val = *(unsigned char *)p++;
1301 int hi = val / 0x10, lo = val % 0x10;
1302 ret[len++] = '%';
1303 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1304 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1305 }
1306 }
1307 }
1308 if (len >= max) {
1309 temp = xmlSaveUriRealloc(ret, &max);
1310 if (temp == NULL) goto mem_error;
1311 ret = temp;
1312 }
1313 ret[len] = 0;
1314 return(ret);
1315
1316 mem_error:
1317 xmlFree(ret);
1318 return(NULL);
1319 }
1320
1321 /**
1322 * xmlPrintURI:
1323 * @stream: a FILE* for the output
1324 * @uri: pointer to an xmlURI
1325 *
1326 * Prints the URI in the stream @stream.
1327 */
1328 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1329 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1330 xmlChar *out;
1331
1332 out = xmlSaveUri(uri);
1333 if (out != NULL) {
1334 fprintf(stream, "%s", (char *) out);
1335 xmlFree(out);
1336 }
1337 }
1338
1339 /**
1340 * xmlCleanURI:
1341 * @uri: pointer to an xmlURI
1342 *
1343 * Make sure the xmlURI struct is free of content
1344 */
1345 static void
xmlCleanURI(xmlURIPtr uri)1346 xmlCleanURI(xmlURIPtr uri) {
1347 if (uri == NULL) return;
1348
1349 if (uri->scheme != NULL) xmlFree(uri->scheme);
1350 uri->scheme = NULL;
1351 if (uri->server != NULL) xmlFree(uri->server);
1352 uri->server = NULL;
1353 if (uri->user != NULL) xmlFree(uri->user);
1354 uri->user = NULL;
1355 if (uri->path != NULL) xmlFree(uri->path);
1356 uri->path = NULL;
1357 if (uri->fragment != NULL) xmlFree(uri->fragment);
1358 uri->fragment = NULL;
1359 if (uri->opaque != NULL) xmlFree(uri->opaque);
1360 uri->opaque = NULL;
1361 if (uri->authority != NULL) xmlFree(uri->authority);
1362 uri->authority = NULL;
1363 if (uri->query != NULL) xmlFree(uri->query);
1364 uri->query = NULL;
1365 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1366 uri->query_raw = NULL;
1367 }
1368
1369 /**
1370 * xmlFreeURI:
1371 * @uri: pointer to an xmlURI
1372 *
1373 * Free up the xmlURI struct
1374 */
1375 void
xmlFreeURI(xmlURIPtr uri)1376 xmlFreeURI(xmlURIPtr uri) {
1377 if (uri == NULL) return;
1378
1379 if (uri->scheme != NULL) xmlFree(uri->scheme);
1380 if (uri->server != NULL) xmlFree(uri->server);
1381 if (uri->user != NULL) xmlFree(uri->user);
1382 if (uri->path != NULL) xmlFree(uri->path);
1383 if (uri->fragment != NULL) xmlFree(uri->fragment);
1384 if (uri->opaque != NULL) xmlFree(uri->opaque);
1385 if (uri->authority != NULL) xmlFree(uri->authority);
1386 if (uri->query != NULL) xmlFree(uri->query);
1387 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1388 xmlFree(uri);
1389 }
1390
1391 /************************************************************************
1392 * *
1393 * Helper functions *
1394 * *
1395 ************************************************************************/
1396
1397 /**
1398 * xmlNormalizeURIPath:
1399 * @path: pointer to the path string
1400 *
1401 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1402 * Section 5.2, steps 6.c through 6.g.
1403 *
1404 * Normalization occurs directly on the string, no new allocation is done
1405 *
1406 * Returns 0 or an error code
1407 */
1408 int
xmlNormalizeURIPath(char * path)1409 xmlNormalizeURIPath(char *path) {
1410 char *cur, *out;
1411
1412 if (path == NULL)
1413 return(-1);
1414
1415 /* Skip all initial "/" chars. We want to get to the beginning of the
1416 * first non-empty segment.
1417 */
1418 cur = path;
1419 while (cur[0] == '/')
1420 ++cur;
1421 if (cur[0] == '\0')
1422 return(0);
1423
1424 /* Keep everything we've seen so far. */
1425 out = cur;
1426
1427 /*
1428 * Analyze each segment in sequence for cases (c) and (d).
1429 */
1430 while (cur[0] != '\0') {
1431 /*
1432 * c) All occurrences of "./", where "." is a complete path segment,
1433 * are removed from the buffer string.
1434 */
1435 if ((cur[0] == '.') && (cur[1] == '/')) {
1436 cur += 2;
1437 /* '//' normalization should be done at this point too */
1438 while (cur[0] == '/')
1439 cur++;
1440 continue;
1441 }
1442
1443 /*
1444 * d) If the buffer string ends with "." as a complete path segment,
1445 * that "." is removed.
1446 */
1447 if ((cur[0] == '.') && (cur[1] == '\0'))
1448 break;
1449
1450 /* Otherwise keep the segment. */
1451 while (cur[0] != '/') {
1452 if (cur[0] == '\0')
1453 goto done_cd;
1454 (out++)[0] = (cur++)[0];
1455 }
1456 /* nomalize // */
1457 while ((cur[0] == '/') && (cur[1] == '/'))
1458 cur++;
1459
1460 (out++)[0] = (cur++)[0];
1461 }
1462 done_cd:
1463 out[0] = '\0';
1464
1465 /* Reset to the beginning of the first segment for the next sequence. */
1466 cur = path;
1467 while (cur[0] == '/')
1468 ++cur;
1469 if (cur[0] == '\0')
1470 return(0);
1471
1472 /*
1473 * Analyze each segment in sequence for cases (e) and (f).
1474 *
1475 * e) All occurrences of "<segment>/../", where <segment> is a
1476 * complete path segment not equal to "..", are removed from the
1477 * buffer string. Removal of these path segments is performed
1478 * iteratively, removing the leftmost matching pattern on each
1479 * iteration, until no matching pattern remains.
1480 *
1481 * f) If the buffer string ends with "<segment>/..", where <segment>
1482 * is a complete path segment not equal to "..", that
1483 * "<segment>/.." is removed.
1484 *
1485 * To satisfy the "iterative" clause in (e), we need to collapse the
1486 * string every time we find something that needs to be removed. Thus,
1487 * we don't need to keep two pointers into the string: we only need a
1488 * "current position" pointer.
1489 */
1490 while (1) {
1491 char *segp, *tmp;
1492
1493 /* At the beginning of each iteration of this loop, "cur" points to
1494 * the first character of the segment we want to examine.
1495 */
1496
1497 /* Find the end of the current segment. */
1498 segp = cur;
1499 while ((segp[0] != '/') && (segp[0] != '\0'))
1500 ++segp;
1501
1502 /* If this is the last segment, we're done (we need at least two
1503 * segments to meet the criteria for the (e) and (f) cases).
1504 */
1505 if (segp[0] == '\0')
1506 break;
1507
1508 /* If the first segment is "..", or if the next segment _isn't_ "..",
1509 * keep this segment and try the next one.
1510 */
1511 ++segp;
1512 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1513 || ((segp[0] != '.') || (segp[1] != '.')
1514 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1515 cur = segp;
1516 continue;
1517 }
1518
1519 /* If we get here, remove this segment and the next one and back up
1520 * to the previous segment (if there is one), to implement the
1521 * "iteratively" clause. It's pretty much impossible to back up
1522 * while maintaining two pointers into the buffer, so just compact
1523 * the whole buffer now.
1524 */
1525
1526 /* If this is the end of the buffer, we're done. */
1527 if (segp[2] == '\0') {
1528 cur[0] = '\0';
1529 break;
1530 }
1531 /* Valgrind complained, strcpy(cur, segp + 3); */
1532 /* string will overlap, do not use strcpy */
1533 tmp = cur;
1534 segp += 3;
1535 while ((*tmp++ = *segp++) != 0)
1536 ;
1537
1538 /* If there are no previous segments, then keep going from here. */
1539 segp = cur;
1540 while ((segp > path) && ((--segp)[0] == '/'))
1541 ;
1542 if (segp == path)
1543 continue;
1544
1545 /* "segp" is pointing to the end of a previous segment; find it's
1546 * start. We need to back up to the previous segment and start
1547 * over with that to handle things like "foo/bar/../..". If we
1548 * don't do this, then on the first pass we'll remove the "bar/..",
1549 * but be pointing at the second ".." so we won't realize we can also
1550 * remove the "foo/..".
1551 */
1552 cur = segp;
1553 while ((cur > path) && (cur[-1] != '/'))
1554 --cur;
1555 }
1556 out[0] = '\0';
1557
1558 /*
1559 * g) If the resulting buffer string still begins with one or more
1560 * complete path segments of "..", then the reference is
1561 * considered to be in error. Implementations may handle this
1562 * error by retaining these components in the resolved path (i.e.,
1563 * treating them as part of the final URI), by removing them from
1564 * the resolved path (i.e., discarding relative levels above the
1565 * root), or by avoiding traversal of the reference.
1566 *
1567 * We discard them from the final path.
1568 */
1569 if (path[0] == '/') {
1570 cur = path;
1571 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1572 && ((cur[3] == '/') || (cur[3] == '\0')))
1573 cur += 3;
1574
1575 if (cur != path) {
1576 out = path;
1577 while (cur[0] != '\0')
1578 (out++)[0] = (cur++)[0];
1579 out[0] = 0;
1580 }
1581 }
1582
1583 return(0);
1584 }
1585
is_hex(char c)1586 static int is_hex(char c) {
1587 if (((c >= '0') && (c <= '9')) ||
1588 ((c >= 'a') && (c <= 'f')) ||
1589 ((c >= 'A') && (c <= 'F')))
1590 return(1);
1591 return(0);
1592 }
1593
1594 /**
1595 * xmlURIUnescapeString:
1596 * @str: the string to unescape
1597 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1598 * @target: optional destination buffer
1599 *
1600 * Unescaping routine, but does not check that the string is an URI. The
1601 * output is a direct unsigned char translation of %XX values (no encoding)
1602 * Note that the length of the result can only be smaller or same size as
1603 * the input string.
1604 *
1605 * Returns a copy of the string, but unescaped, will return NULL only in case
1606 * of error
1607 */
1608 char *
xmlURIUnescapeString(const char * str,int len,char * target)1609 xmlURIUnescapeString(const char *str, int len, char *target) {
1610 char *ret, *out;
1611 const char *in;
1612
1613 if (str == NULL)
1614 return(NULL);
1615 if (len <= 0) len = strlen(str);
1616 if (len < 0) return(NULL);
1617
1618 if (target == NULL) {
1619 ret = (char *) xmlMallocAtomic(len + 1);
1620 if (ret == NULL) {
1621 xmlURIErrMemory("unescaping URI value\n");
1622 return(NULL);
1623 }
1624 } else
1625 ret = target;
1626 in = str;
1627 out = ret;
1628 while(len > 0) {
1629 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1630 in++;
1631 if ((*in >= '0') && (*in <= '9'))
1632 *out = (*in - '0');
1633 else if ((*in >= 'a') && (*in <= 'f'))
1634 *out = (*in - 'a') + 10;
1635 else if ((*in >= 'A') && (*in <= 'F'))
1636 *out = (*in - 'A') + 10;
1637 in++;
1638 if ((*in >= '0') && (*in <= '9'))
1639 *out = *out * 16 + (*in - '0');
1640 else if ((*in >= 'a') && (*in <= 'f'))
1641 *out = *out * 16 + (*in - 'a') + 10;
1642 else if ((*in >= 'A') && (*in <= 'F'))
1643 *out = *out * 16 + (*in - 'A') + 10;
1644 in++;
1645 len -= 3;
1646 out++;
1647 } else {
1648 *out++ = *in++;
1649 len--;
1650 }
1651 }
1652 *out = 0;
1653 return(ret);
1654 }
1655
1656 /**
1657 * xmlURIEscapeStr:
1658 * @str: string to escape
1659 * @list: exception list string of chars not to escape
1660 *
1661 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1662 * and the characters in the exception list.
1663 *
1664 * Returns a new escaped string or NULL in case of error.
1665 */
1666 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1667 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1668 xmlChar *ret, ch;
1669 xmlChar *temp;
1670 const xmlChar *in;
1671 int len, out;
1672
1673 if (str == NULL)
1674 return(NULL);
1675 if (str[0] == 0)
1676 return(xmlStrdup(str));
1677 len = xmlStrlen(str);
1678 if (!(len > 0)) return(NULL);
1679
1680 len += 20;
1681 ret = (xmlChar *) xmlMallocAtomic(len);
1682 if (ret == NULL) {
1683 xmlURIErrMemory("escaping URI value\n");
1684 return(NULL);
1685 }
1686 in = (const xmlChar *) str;
1687 out = 0;
1688 while(*in != 0) {
1689 if (len - out <= 3) {
1690 temp = xmlSaveUriRealloc(ret, &len);
1691 if (temp == NULL) {
1692 xmlURIErrMemory("escaping URI value\n");
1693 xmlFree(ret);
1694 return(NULL);
1695 }
1696 ret = temp;
1697 }
1698
1699 ch = *in;
1700
1701 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1702 unsigned char val;
1703 ret[out++] = '%';
1704 val = ch >> 4;
1705 if (val <= 9)
1706 ret[out++] = '0' + val;
1707 else
1708 ret[out++] = 'A' + val - 0xA;
1709 val = ch & 0xF;
1710 if (val <= 9)
1711 ret[out++] = '0' + val;
1712 else
1713 ret[out++] = 'A' + val - 0xA;
1714 in++;
1715 } else {
1716 ret[out++] = *in++;
1717 }
1718
1719 }
1720 ret[out] = 0;
1721 return(ret);
1722 }
1723
1724 /**
1725 * xmlURIEscape:
1726 * @str: the string of the URI to escape
1727 *
1728 * Escaping routine, does not do validity checks !
1729 * It will try to escape the chars needing this, but this is heuristic
1730 * based it's impossible to be sure.
1731 *
1732 * Returns an copy of the string, but escaped
1733 *
1734 * 25 May 2001
1735 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1736 * according to RFC2396.
1737 * - Carl Douglas
1738 */
1739 xmlChar *
xmlURIEscape(const xmlChar * str)1740 xmlURIEscape(const xmlChar * str)
1741 {
1742 xmlChar *ret, *segment = NULL;
1743 xmlURIPtr uri;
1744 int ret2;
1745
1746 #define NULLCHK(p) if(!p) { \
1747 xmlURIErrMemory("escaping URI value\n"); \
1748 xmlFreeURI(uri); \
1749 return NULL; } \
1750
1751 if (str == NULL)
1752 return (NULL);
1753
1754 uri = xmlCreateURI();
1755 if (uri != NULL) {
1756 /*
1757 * Allow escaping errors in the unescaped form
1758 */
1759 uri->cleanup = 1;
1760 ret2 = xmlParseURIReference(uri, (const char *)str);
1761 if (ret2) {
1762 xmlFreeURI(uri);
1763 return (NULL);
1764 }
1765 }
1766
1767 if (!uri)
1768 return NULL;
1769
1770 ret = NULL;
1771
1772 if (uri->scheme) {
1773 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1774 NULLCHK(segment)
1775 ret = xmlStrcat(ret, segment);
1776 ret = xmlStrcat(ret, BAD_CAST ":");
1777 xmlFree(segment);
1778 }
1779
1780 if (uri->authority) {
1781 segment =
1782 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1783 NULLCHK(segment)
1784 ret = xmlStrcat(ret, BAD_CAST "//");
1785 ret = xmlStrcat(ret, segment);
1786 xmlFree(segment);
1787 }
1788
1789 if (uri->user) {
1790 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1791 NULLCHK(segment)
1792 ret = xmlStrcat(ret,BAD_CAST "//");
1793 ret = xmlStrcat(ret, segment);
1794 ret = xmlStrcat(ret, BAD_CAST "@");
1795 xmlFree(segment);
1796 }
1797
1798 if (uri->server) {
1799 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1800 NULLCHK(segment)
1801 if (uri->user == NULL)
1802 ret = xmlStrcat(ret, BAD_CAST "//");
1803 ret = xmlStrcat(ret, segment);
1804 xmlFree(segment);
1805 }
1806
1807 if (uri->port) {
1808 xmlChar port[10];
1809
1810 snprintf((char *) port, 10, "%d", uri->port);
1811 ret = xmlStrcat(ret, BAD_CAST ":");
1812 ret = xmlStrcat(ret, port);
1813 }
1814
1815 if (uri->path) {
1816 segment =
1817 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1818 NULLCHK(segment)
1819 ret = xmlStrcat(ret, segment);
1820 xmlFree(segment);
1821 }
1822
1823 if (uri->query_raw) {
1824 ret = xmlStrcat(ret, BAD_CAST "?");
1825 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1826 }
1827 else if (uri->query) {
1828 segment =
1829 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1830 NULLCHK(segment)
1831 ret = xmlStrcat(ret, BAD_CAST "?");
1832 ret = xmlStrcat(ret, segment);
1833 xmlFree(segment);
1834 }
1835
1836 if (uri->opaque) {
1837 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1838 NULLCHK(segment)
1839 ret = xmlStrcat(ret, segment);
1840 xmlFree(segment);
1841 }
1842
1843 if (uri->fragment) {
1844 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1845 NULLCHK(segment)
1846 ret = xmlStrcat(ret, BAD_CAST "#");
1847 ret = xmlStrcat(ret, segment);
1848 xmlFree(segment);
1849 }
1850
1851 xmlFreeURI(uri);
1852 #undef NULLCHK
1853
1854 return (ret);
1855 }
1856
1857 /************************************************************************
1858 * *
1859 * Public functions *
1860 * *
1861 ************************************************************************/
1862
1863 /**
1864 * xmlBuildURI:
1865 * @URI: the URI instance found in the document
1866 * @base: the base value
1867 *
1868 * Computes he final URI of the reference done by checking that
1869 * the given URI is valid, and building the final URI using the
1870 * base URI. This is processed according to section 5.2 of the
1871 * RFC 2396
1872 *
1873 * 5.2. Resolving Relative References to Absolute Form
1874 *
1875 * Returns a new URI string (to be freed by the caller) or NULL in case
1876 * of error.
1877 */
1878 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1879 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1880 xmlChar *val = NULL;
1881 int ret, len, indx, cur, out;
1882 xmlURIPtr ref = NULL;
1883 xmlURIPtr bas = NULL;
1884 xmlURIPtr res = NULL;
1885
1886 /*
1887 * 1) The URI reference is parsed into the potential four components and
1888 * fragment identifier, as described in Section 4.3.
1889 *
1890 * NOTE that a completely empty URI is treated by modern browsers
1891 * as a reference to "." rather than as a synonym for the current
1892 * URI. Should we do that here?
1893 */
1894 if (URI == NULL)
1895 ret = -1;
1896 else {
1897 if (*URI) {
1898 ref = xmlCreateURI();
1899 if (ref == NULL)
1900 goto done;
1901 ret = xmlParseURIReference(ref, (const char *) URI);
1902 }
1903 else
1904 ret = 0;
1905 }
1906 if (ret != 0)
1907 goto done;
1908 if ((ref != NULL) && (ref->scheme != NULL)) {
1909 /*
1910 * The URI is absolute don't modify.
1911 */
1912 val = xmlStrdup(URI);
1913 goto done;
1914 }
1915 if (base == NULL)
1916 ret = -1;
1917 else {
1918 bas = xmlCreateURI();
1919 if (bas == NULL)
1920 goto done;
1921 ret = xmlParseURIReference(bas, (const char *) base);
1922 }
1923 if (ret != 0) {
1924 if (ref)
1925 val = xmlSaveUri(ref);
1926 goto done;
1927 }
1928 if (ref == NULL) {
1929 /*
1930 * the base fragment must be ignored
1931 */
1932 if (bas->fragment != NULL) {
1933 xmlFree(bas->fragment);
1934 bas->fragment = NULL;
1935 }
1936 val = xmlSaveUri(bas);
1937 goto done;
1938 }
1939
1940 /*
1941 * 2) If the path component is empty and the scheme, authority, and
1942 * query components are undefined, then it is a reference to the
1943 * current document and we are done. Otherwise, the reference URI's
1944 * query and fragment components are defined as found (or not found)
1945 * within the URI reference and not inherited from the base URI.
1946 *
1947 * NOTE that in modern browsers, the parsing differs from the above
1948 * in the following aspect: the query component is allowed to be
1949 * defined while still treating this as a reference to the current
1950 * document.
1951 */
1952 res = xmlCreateURI();
1953 if (res == NULL)
1954 goto done;
1955 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1956 ((ref->authority == NULL) && (ref->server == NULL))) {
1957 if (bas->scheme != NULL)
1958 res->scheme = xmlMemStrdup(bas->scheme);
1959 if (bas->authority != NULL)
1960 res->authority = xmlMemStrdup(bas->authority);
1961 else if (bas->server != NULL) {
1962 res->server = xmlMemStrdup(bas->server);
1963 if (bas->user != NULL)
1964 res->user = xmlMemStrdup(bas->user);
1965 res->port = bas->port;
1966 }
1967 if (bas->path != NULL)
1968 res->path = xmlMemStrdup(bas->path);
1969 if (ref->query_raw != NULL)
1970 res->query_raw = xmlMemStrdup (ref->query_raw);
1971 else if (ref->query != NULL)
1972 res->query = xmlMemStrdup(ref->query);
1973 else if (bas->query_raw != NULL)
1974 res->query_raw = xmlMemStrdup(bas->query_raw);
1975 else if (bas->query != NULL)
1976 res->query = xmlMemStrdup(bas->query);
1977 if (ref->fragment != NULL)
1978 res->fragment = xmlMemStrdup(ref->fragment);
1979 goto step_7;
1980 }
1981
1982 /*
1983 * 3) If the scheme component is defined, indicating that the reference
1984 * starts with a scheme name, then the reference is interpreted as an
1985 * absolute URI and we are done. Otherwise, the reference URI's
1986 * scheme is inherited from the base URI's scheme component.
1987 */
1988 if (ref->scheme != NULL) {
1989 val = xmlSaveUri(ref);
1990 goto done;
1991 }
1992 if (bas->scheme != NULL)
1993 res->scheme = xmlMemStrdup(bas->scheme);
1994
1995 if (ref->query_raw != NULL)
1996 res->query_raw = xmlMemStrdup(ref->query_raw);
1997 else if (ref->query != NULL)
1998 res->query = xmlMemStrdup(ref->query);
1999 if (ref->fragment != NULL)
2000 res->fragment = xmlMemStrdup(ref->fragment);
2001
2002 /*
2003 * 4) If the authority component is defined, then the reference is a
2004 * network-path and we skip to step 7. Otherwise, the reference
2005 * URI's authority is inherited from the base URI's authority
2006 * component, which will also be undefined if the URI scheme does not
2007 * use an authority component.
2008 */
2009 if ((ref->authority != NULL) || (ref->server != NULL)) {
2010 if (ref->authority != NULL)
2011 res->authority = xmlMemStrdup(ref->authority);
2012 else {
2013 res->server = xmlMemStrdup(ref->server);
2014 if (ref->user != NULL)
2015 res->user = xmlMemStrdup(ref->user);
2016 res->port = ref->port;
2017 }
2018 if (ref->path != NULL)
2019 res->path = xmlMemStrdup(ref->path);
2020 goto step_7;
2021 }
2022 if (bas->authority != NULL)
2023 res->authority = xmlMemStrdup(bas->authority);
2024 else if (bas->server != NULL) {
2025 res->server = xmlMemStrdup(bas->server);
2026 if (bas->user != NULL)
2027 res->user = xmlMemStrdup(bas->user);
2028 res->port = bas->port;
2029 }
2030
2031 /*
2032 * 5) If the path component begins with a slash character ("/"), then
2033 * the reference is an absolute-path and we skip to step 7.
2034 */
2035 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2036 res->path = xmlMemStrdup(ref->path);
2037 goto step_7;
2038 }
2039
2040
2041 /*
2042 * 6) If this step is reached, then we are resolving a relative-path
2043 * reference. The relative path needs to be merged with the base
2044 * URI's path. Although there are many ways to do this, we will
2045 * describe a simple method using a separate string buffer.
2046 *
2047 * Allocate a buffer large enough for the result string.
2048 */
2049 len = 2; /* extra / and 0 */
2050 if (ref->path != NULL)
2051 len += strlen(ref->path);
2052 if (bas->path != NULL)
2053 len += strlen(bas->path);
2054 res->path = (char *) xmlMallocAtomic(len);
2055 if (res->path == NULL) {
2056 xmlURIErrMemory("resolving URI against base\n");
2057 goto done;
2058 }
2059 res->path[0] = 0;
2060
2061 /*
2062 * a) All but the last segment of the base URI's path component is
2063 * copied to the buffer. In other words, any characters after the
2064 * last (right-most) slash character, if any, are excluded.
2065 */
2066 cur = 0;
2067 out = 0;
2068 if (bas->path != NULL) {
2069 while (bas->path[cur] != 0) {
2070 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2071 cur++;
2072 if (bas->path[cur] == 0)
2073 break;
2074
2075 cur++;
2076 while (out < cur) {
2077 res->path[out] = bas->path[out];
2078 out++;
2079 }
2080 }
2081 }
2082 res->path[out] = 0;
2083
2084 /*
2085 * b) The reference's path component is appended to the buffer
2086 * string.
2087 */
2088 if (ref->path != NULL && ref->path[0] != 0) {
2089 indx = 0;
2090 /*
2091 * Ensure the path includes a '/'
2092 */
2093 if ((out == 0) && (bas->server != NULL))
2094 res->path[out++] = '/';
2095 while (ref->path[indx] != 0) {
2096 res->path[out++] = ref->path[indx++];
2097 }
2098 }
2099 res->path[out] = 0;
2100
2101 /*
2102 * Steps c) to h) are really path normalization steps
2103 */
2104 xmlNormalizeURIPath(res->path);
2105
2106 step_7:
2107
2108 /*
2109 * 7) The resulting URI components, including any inherited from the
2110 * base URI, are recombined to give the absolute form of the URI
2111 * reference.
2112 */
2113 val = xmlSaveUri(res);
2114
2115 done:
2116 if (ref != NULL)
2117 xmlFreeURI(ref);
2118 if (bas != NULL)
2119 xmlFreeURI(bas);
2120 if (res != NULL)
2121 xmlFreeURI(res);
2122 return(val);
2123 }
2124
2125 /**
2126 * xmlBuildRelativeURI:
2127 * @URI: the URI reference under consideration
2128 * @base: the base value
2129 *
2130 * Expresses the URI of the reference in terms relative to the
2131 * base. Some examples of this operation include:
2132 * base = "http://site1.com/docs/book1.html"
2133 * URI input URI returned
2134 * docs/pic1.gif pic1.gif
2135 * docs/img/pic1.gif img/pic1.gif
2136 * img/pic1.gif ../img/pic1.gif
2137 * http://site1.com/docs/pic1.gif pic1.gif
2138 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2139 *
2140 * base = "docs/book1.html"
2141 * URI input URI returned
2142 * docs/pic1.gif pic1.gif
2143 * docs/img/pic1.gif img/pic1.gif
2144 * img/pic1.gif ../img/pic1.gif
2145 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2146 *
2147 *
2148 * Note: if the URI reference is really wierd or complicated, it may be
2149 * worthwhile to first convert it into a "nice" one by calling
2150 * xmlBuildURI (using 'base') before calling this routine,
2151 * since this routine (for reasonable efficiency) assumes URI has
2152 * already been through some validation.
2153 *
2154 * Returns a new URI string (to be freed by the caller) or NULL in case
2155 * error.
2156 */
2157 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2158 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2159 {
2160 xmlChar *val = NULL;
2161 int ret;
2162 int ix;
2163 int pos = 0;
2164 int nbslash = 0;
2165 int len;
2166 xmlURIPtr ref = NULL;
2167 xmlURIPtr bas = NULL;
2168 xmlChar *bptr, *uptr, *vptr;
2169 int remove_path = 0;
2170
2171 if ((URI == NULL) || (*URI == 0))
2172 return NULL;
2173
2174 /*
2175 * First parse URI into a standard form
2176 */
2177 ref = xmlCreateURI ();
2178 if (ref == NULL)
2179 return NULL;
2180 /* If URI not already in "relative" form */
2181 if (URI[0] != '.') {
2182 ret = xmlParseURIReference (ref, (const char *) URI);
2183 if (ret != 0)
2184 goto done; /* Error in URI, return NULL */
2185 } else
2186 ref->path = (char *)xmlStrdup(URI);
2187
2188 /*
2189 * Next parse base into the same standard form
2190 */
2191 if ((base == NULL) || (*base == 0)) {
2192 val = xmlStrdup (URI);
2193 goto done;
2194 }
2195 bas = xmlCreateURI ();
2196 if (bas == NULL)
2197 goto done;
2198 if (base[0] != '.') {
2199 ret = xmlParseURIReference (bas, (const char *) base);
2200 if (ret != 0)
2201 goto done; /* Error in base, return NULL */
2202 } else
2203 bas->path = (char *)xmlStrdup(base);
2204
2205 /*
2206 * If the scheme / server on the URI differs from the base,
2207 * just return the URI
2208 */
2209 if ((ref->scheme != NULL) &&
2210 ((bas->scheme == NULL) ||
2211 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2212 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2213 val = xmlStrdup (URI);
2214 goto done;
2215 }
2216 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2217 val = xmlStrdup(BAD_CAST "");
2218 goto done;
2219 }
2220 if (bas->path == NULL) {
2221 val = xmlStrdup((xmlChar *)ref->path);
2222 goto done;
2223 }
2224 if (ref->path == NULL) {
2225 ref->path = (char *) "/";
2226 remove_path = 1;
2227 }
2228
2229 /*
2230 * At this point (at last!) we can compare the two paths
2231 *
2232 * First we take care of the special case where either of the
2233 * two path components may be missing (bug 316224)
2234 */
2235 if (bas->path == NULL) {
2236 if (ref->path != NULL) {
2237 uptr = (xmlChar *) ref->path;
2238 if (*uptr == '/')
2239 uptr++;
2240 /* exception characters from xmlSaveUri */
2241 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2242 }
2243 goto done;
2244 }
2245 bptr = (xmlChar *)bas->path;
2246 if (ref->path == NULL) {
2247 for (ix = 0; bptr[ix] != 0; ix++) {
2248 if (bptr[ix] == '/')
2249 nbslash++;
2250 }
2251 uptr = NULL;
2252 len = 1; /* this is for a string terminator only */
2253 } else {
2254 /*
2255 * Next we compare the two strings and find where they first differ
2256 */
2257 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2258 pos += 2;
2259 if ((*bptr == '.') && (bptr[1] == '/'))
2260 bptr += 2;
2261 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2262 bptr++;
2263 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2264 pos++;
2265
2266 if (bptr[pos] == ref->path[pos]) {
2267 val = xmlStrdup(BAD_CAST "");
2268 goto done; /* (I can't imagine why anyone would do this) */
2269 }
2270
2271 /*
2272 * In URI, "back up" to the last '/' encountered. This will be the
2273 * beginning of the "unique" suffix of URI
2274 */
2275 ix = pos;
2276 if ((ref->path[ix] == '/') && (ix > 0))
2277 ix--;
2278 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2279 ix -= 2;
2280 for (; ix > 0; ix--) {
2281 if (ref->path[ix] == '/')
2282 break;
2283 }
2284 if (ix == 0) {
2285 uptr = (xmlChar *)ref->path;
2286 } else {
2287 ix++;
2288 uptr = (xmlChar *)&ref->path[ix];
2289 }
2290
2291 /*
2292 * In base, count the number of '/' from the differing point
2293 */
2294 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2295 for (; bptr[ix] != 0; ix++) {
2296 if (bptr[ix] == '/')
2297 nbslash++;
2298 }
2299 }
2300 len = xmlStrlen (uptr) + 1;
2301 }
2302
2303 if (nbslash == 0) {
2304 if (uptr != NULL)
2305 /* exception characters from xmlSaveUri */
2306 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2307 goto done;
2308 }
2309
2310 /*
2311 * Allocate just enough space for the returned string -
2312 * length of the remainder of the URI, plus enough space
2313 * for the "../" groups, plus one for the terminator
2314 */
2315 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2316 if (val == NULL) {
2317 xmlURIErrMemory("building relative URI\n");
2318 goto done;
2319 }
2320 vptr = val;
2321 /*
2322 * Put in as many "../" as needed
2323 */
2324 for (; nbslash>0; nbslash--) {
2325 *vptr++ = '.';
2326 *vptr++ = '.';
2327 *vptr++ = '/';
2328 }
2329 /*
2330 * Finish up with the end of the URI
2331 */
2332 if (uptr != NULL) {
2333 if ((vptr > val) && (len > 0) &&
2334 (uptr[0] == '/') && (vptr[-1] == '/')) {
2335 memcpy (vptr, uptr + 1, len - 1);
2336 vptr[len - 2] = 0;
2337 } else {
2338 memcpy (vptr, uptr, len);
2339 vptr[len - 1] = 0;
2340 }
2341 } else {
2342 vptr[len - 1] = 0;
2343 }
2344
2345 /* escape the freshly-built path */
2346 vptr = val;
2347 /* exception characters from xmlSaveUri */
2348 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2349 xmlFree(vptr);
2350
2351 done:
2352 /*
2353 * Free the working variables
2354 */
2355 if (remove_path != 0)
2356 ref->path = NULL;
2357 if (ref != NULL)
2358 xmlFreeURI (ref);
2359 if (bas != NULL)
2360 xmlFreeURI (bas);
2361
2362 return val;
2363 }
2364
2365 /**
2366 * xmlCanonicPath:
2367 * @path: the resource locator in a filesystem notation
2368 *
2369 * Constructs a canonic path from the specified path.
2370 *
2371 * Returns a new canonic path, or a duplicate of the path parameter if the
2372 * construction fails. The caller is responsible for freeing the memory occupied
2373 * by the returned string. If there is insufficient memory available, or the
2374 * argument is NULL, the function returns NULL.
2375 */
2376 #define IS_WINDOWS_PATH(p) \
2377 ((p != NULL) && \
2378 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2379 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2380 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2381 xmlChar *
xmlCanonicPath(const xmlChar * path)2382 xmlCanonicPath(const xmlChar *path)
2383 {
2384 /*
2385 * For Windows implementations, additional work needs to be done to
2386 * replace backslashes in pathnames with "forward slashes"
2387 */
2388 #if defined(_WIN32) && !defined(__CYGWIN__)
2389 int len = 0;
2390 int i = 0;
2391 xmlChar *p = NULL;
2392 #endif
2393 xmlURIPtr uri;
2394 xmlChar *ret;
2395 const xmlChar *absuri;
2396
2397 if (path == NULL)
2398 return(NULL);
2399
2400 #if defined(_WIN32)
2401 /*
2402 * We must not change the backslashes to slashes if the the path
2403 * starts with \\?\
2404 * Those paths can be up to 32k characters long.
2405 * Was added specifically for OpenOffice, those paths can't be converted
2406 * to URIs anyway.
2407 */
2408 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2409 (path[3] == '\\') )
2410 return xmlStrdup((const xmlChar *) path);
2411 #endif
2412
2413 /* sanitize filename starting with // so it can be used as URI */
2414 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2415 path++;
2416
2417 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2418 xmlFreeURI(uri);
2419 return xmlStrdup(path);
2420 }
2421
2422 /* Check if this is an "absolute uri" */
2423 absuri = xmlStrstr(path, BAD_CAST "://");
2424 if (absuri != NULL) {
2425 int l, j;
2426 unsigned char c;
2427 xmlChar *escURI;
2428
2429 /*
2430 * this looks like an URI where some parts have not been
2431 * escaped leading to a parsing problem. Check that the first
2432 * part matches a protocol.
2433 */
2434 l = absuri - path;
2435 /* Bypass if first part (part before the '://') is > 20 chars */
2436 if ((l <= 0) || (l > 20))
2437 goto path_processing;
2438 /* Bypass if any non-alpha characters are present in first part */
2439 for (j = 0;j < l;j++) {
2440 c = path[j];
2441 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2442 goto path_processing;
2443 }
2444
2445 /* Escape all except the characters specified in the supplied path */
2446 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2447 if (escURI != NULL) {
2448 /* Try parsing the escaped path */
2449 uri = xmlParseURI((const char *) escURI);
2450 /* If successful, return the escaped string */
2451 if (uri != NULL) {
2452 xmlFreeURI(uri);
2453 return escURI;
2454 }
2455 }
2456 }
2457
2458 path_processing:
2459 /* For Windows implementations, replace backslashes with 'forward slashes' */
2460 #if defined(_WIN32) && !defined(__CYGWIN__)
2461 /*
2462 * Create a URI structure
2463 */
2464 uri = xmlCreateURI();
2465 if (uri == NULL) { /* Guard against 'out of memory' */
2466 return(NULL);
2467 }
2468
2469 len = xmlStrlen(path);
2470 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2471 /* make the scheme 'file' */
2472 uri->scheme = xmlStrdup(BAD_CAST "file");
2473 /* allocate space for leading '/' + path + string terminator */
2474 uri->path = xmlMallocAtomic(len + 2);
2475 if (uri->path == NULL) {
2476 xmlFreeURI(uri); /* Guard agains 'out of memory' */
2477 return(NULL);
2478 }
2479 /* Put in leading '/' plus path */
2480 uri->path[0] = '/';
2481 p = uri->path + 1;
2482 strncpy(p, path, len + 1);
2483 } else {
2484 uri->path = xmlStrdup(path);
2485 if (uri->path == NULL) {
2486 xmlFreeURI(uri);
2487 return(NULL);
2488 }
2489 p = uri->path;
2490 }
2491 /* Now change all occurences of '\' to '/' */
2492 while (*p != '\0') {
2493 if (*p == '\\')
2494 *p = '/';
2495 p++;
2496 }
2497
2498 if (uri->scheme == NULL) {
2499 ret = xmlStrdup((const xmlChar *) uri->path);
2500 } else {
2501 ret = xmlSaveUri(uri);
2502 }
2503
2504 xmlFreeURI(uri);
2505 #else
2506 ret = xmlStrdup((const xmlChar *) path);
2507 #endif
2508 return(ret);
2509 }
2510
2511 /**
2512 * xmlPathToURI:
2513 * @path: the resource locator in a filesystem notation
2514 *
2515 * Constructs an URI expressing the existing path
2516 *
2517 * Returns a new URI, or a duplicate of the path parameter if the
2518 * construction fails. The caller is responsible for freeing the memory
2519 * occupied by the returned string. If there is insufficient memory available,
2520 * or the argument is NULL, the function returns NULL.
2521 */
2522 xmlChar *
xmlPathToURI(const xmlChar * path)2523 xmlPathToURI(const xmlChar *path)
2524 {
2525 xmlURIPtr uri;
2526 xmlURI temp;
2527 xmlChar *ret, *cal;
2528
2529 if (path == NULL)
2530 return(NULL);
2531
2532 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2533 xmlFreeURI(uri);
2534 return xmlStrdup(path);
2535 }
2536 cal = xmlCanonicPath(path);
2537 if (cal == NULL)
2538 return(NULL);
2539 #if defined(_WIN32) && !defined(__CYGWIN__)
2540 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2541 If 'cal' is a valid URI allready then we are done here, as continuing would make
2542 it invalid. */
2543 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2544 xmlFreeURI(uri);
2545 return cal;
2546 }
2547 /* 'cal' can contain a relative path with backslashes. If that is processed
2548 by xmlSaveURI, they will be escaped and the external entity loader machinery
2549 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2550 ret = cal;
2551 while (*ret != '\0') {
2552 if (*ret == '\\')
2553 *ret = '/';
2554 ret++;
2555 }
2556 #endif
2557 memset(&temp, 0, sizeof(temp));
2558 temp.path = (char *) cal;
2559 ret = xmlSaveUri(&temp);
2560 xmlFree(cal);
2561 return(ret);
2562 }
2563 #define bottom_uri
2564 #include "elfgcchack.h"
2565