1 /* ----------------------------------------------------------------------- *
2  *
3  *   Copyright 2009-2011 Intel Corporation; author: H. Peter Anvin
4  *
5  *   Permission is hereby granted, free of charge, to any person
6  *   obtaining a copy of this software and associated documentation
7  *   files (the "Software"), to deal in the Software without
8  *   restriction, including without limitation the rights to use,
9  *   copy, modify, merge, publish, distribute, sublicense, and/or
10  *   sell copies of the Software, and to permit persons to whom
11  *   the Software is furnished to do so, subject to the following
12  *   conditions:
13  *
14  *   The above copyright notice and this permission notice shall
15  *   be included in all copies or substantial portions of the Software.
16  *
17  *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  *   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  *   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  *   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  *   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  *   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  *   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  *   OTHER DEALINGS IN THE SOFTWARE.
25  *
26  * ----------------------------------------------------------------------- */
27 
28 /*
29  * urlparse.c
30  */
31 
32 #include <string.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include "url.h"
36 
37 /*
38  * Return the type of a URL without modifying the string
39  */
url_type(const char * url)40 enum url_type url_type(const char *url)
41 {
42     const char *q;
43 
44     q = strchr(url, ':');
45     if (!q)
46 	return URL_SUFFIX;
47 
48     if (q[1] == '/' && q[2] == '/')
49 	return URL_NORMAL;
50 
51     if (q[1] == ':')
52 	return URL_OLD_TFTP;
53 
54     return URL_SUFFIX;
55 }
56 
57 /*
58  * Decompose a URL into its components.  This is done in-place;
59  * this routine does not allocate any additional storage.  Freeing the
60  * original buffer frees all storage used.
61  */
parse_url(struct url_info * ui,char * url)62 void parse_url(struct url_info *ui, char *url)
63 {
64     char *p = url;
65     char *q, *r, *s;
66     int c;
67 
68     memset(ui, 0, sizeof *ui);
69 
70     q = strchr(p, ':');
71     if (q && (q[1] == '/' && q[2] == '/')) {
72 	ui->type = URL_NORMAL;
73 
74 	ui->scheme = p;
75 	*q = '\0';
76 	p = q+3;
77 
78 	q = strchr(p, '/');
79 	if (q) {
80 	    *q = '\0';
81 	    ui->path = q+1;
82 	    q = strchr(q+1, '#');
83 	    if (q)
84 		*q = '\0';
85 	} else {
86 	    ui->path = "";
87 	}
88 
89 	r = strchr(p, '@');
90 	if (r) {
91 	    ui->user = p;
92 	    *r = '\0';
93 	    s = strchr(p, ':');
94 	    if (s) {
95 		*s = '\0';
96 		ui->passwd = s+1;
97 	    }
98 	    p = r+1;
99 	}
100 
101 	ui->host = p;
102 	r = strchr(p, ':');
103 	if (r) {
104 	    *r++ = '\0';
105 	    ui->port = 0;
106 	    while ((c = *r++)) {
107 		c -= '0';
108 		if (c > 9)
109 		    break;
110 		ui->port = ui->port * 10 + c;
111 	    }
112 	}
113     } else if (q && q[1] == ':') {
114 	*q = '\0';
115 	ui->scheme = "tftp";
116 	ui->host = p;
117 	ui->path = q+2;
118 	ui->type = URL_OLD_TFTP;
119     } else {
120 	ui->path = p;
121 	ui->type = URL_SUFFIX;
122     }
123 }
124 
125 /*
126  * Escapes unsafe characters in a URL.
127  * This does *not* escape things like query characters!
128  * Returns the number of characters in the total output.
129  */
url_escape_unsafe(char * output,const char * input,size_t bufsize)130 size_t url_escape_unsafe(char *output, const char *input, size_t bufsize)
131 {
132     static const char uchexchar[] = "0123456789ABCDEF";
133     const char *p;
134     unsigned char c;
135     char *q;
136     size_t n = 0;
137 
138     q = output;
139     for (p = input; (c = *p); p++) {
140 	if (c <= ' ' || c > '~') {
141 	    if (++n < bufsize) *q++ = '%';
142 	    if (++n < bufsize) *q++ = uchexchar[c >> 4];
143 	    if (++n < bufsize) *q++ = uchexchar[c & 15];
144 	} else {
145 	    if (++n < bufsize) *q++ = c;
146 	}
147     }
148 
149     *q = '\0';
150     return n;
151 }
152 
hexdigit(char c)153 static int hexdigit(char c)
154 {
155     if (c >= '0' && c <= '9')
156 	return c - '0';
157     c |= 0x20;
158     if (c >= 'a' && c <= 'f')
159 	return c - 'a' + 10;
160     return -1;
161 }
162 
163 /*
164  * Unescapes a buffer, optionally ending at an *unescaped* terminator
165  * (like ; for TFTP).  The unescaping is done in-place.
166  *
167  * If a terminator is reached, return a pointer to the first character
168  * after the terminator.
169  */
url_unescape(char * buffer,char terminator)170 char *url_unescape(char *buffer, char terminator)
171 {
172     char *p = buffer;
173     char *q = buffer;
174     unsigned char c;
175     int x, y;
176 
177     while ((c = *p)) {
178 	if (c == terminator) {
179 	    *q = '\0';
180 	    return p;
181 	}
182 	p++;
183 	if (c == '%') {
184 	    x = hexdigit(p[0]);
185 	    if (x >= 0) {
186 		y = hexdigit(p[1]);
187 		if (y >= 0) {
188 		    *q++ = (x << 4) + y;
189 		    p += 2;
190 		    continue;
191 		}
192 	    }
193 	}
194 	*q++ = c;
195     }
196     *q = '\0';
197     return NULL;
198 }
199 
200 #ifdef URL_TEST
201 
main(int argc,char * argv[])202 int main(int argc, char *argv[])
203 {
204     int i;
205     struct url_info url;
206 
207     for (i = 1; i < argc; i++) {
208 	parse_url(&url, argv[i]);
209 	printf("scheme:  %s\n"
210 	       "user:    %s\n"
211 	       "passwd:  %s\n"
212 	       "host:    %s\n"
213 	       "port:    %d\n"
214 	       "path:    %s\n"
215 	       "type:    %d\n",
216 	       url.scheme, url.user, url.passwd, url.host, url.port,
217 	       url.path, url.type);
218     }
219 
220     return 0;
221 }
222 
223 #endif
224