1 /*
2  * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17  */
18 
19 FILE_LICENCE ( GPL2_OR_LATER );
20 
21 /** @file
22  *
23  * Uniform Resource Identifiers
24  *
25  */
26 
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <libgen.h>
31 #include <ctype.h>
32 #include <gpxe/vsprintf.h>
33 #include <gpxe/uri.h>
34 
35 /**
36  * Dump URI for debugging
37  *
38  * @v uri		URI
39  */
dump_uri(struct uri * uri)40 static void dump_uri ( struct uri *uri ) {
41 	if ( ! uri )
42 		return;
43 	if ( uri->scheme )
44 		DBG ( " scheme \"%s\"", uri->scheme );
45 	if ( uri->opaque )
46 		DBG ( " opaque \"%s\"", uri->opaque );
47 	if ( uri->user )
48 		DBG ( " user \"%s\"", uri->user );
49 	if ( uri->password )
50 		DBG ( " password \"%s\"", uri->password );
51 	if ( uri->host )
52 		DBG ( " host \"%s\"", uri->host );
53 	if ( uri->port )
54 		DBG ( " port \"%s\"", uri->port );
55 	if ( uri->path )
56 		DBG ( " path \"%s\"", uri->path );
57 	if ( uri->query )
58 		DBG ( " query \"%s\"", uri->query );
59 	if ( uri->fragment )
60 		DBG ( " fragment \"%s\"", uri->fragment );
61 }
62 
63 /**
64  * Parse URI
65  *
66  * @v uri_string	URI as a string
67  * @ret uri		URI
68  *
69  * Splits a URI into its component parts.  The return URI structure is
70  * dynamically allocated and must eventually be freed by calling
71  * uri_put().
72  */
parse_uri(const char * uri_string)73 struct uri * parse_uri ( const char *uri_string ) {
74 	struct uri *uri;
75 	char *raw;
76 	char *tmp;
77 	char *path = NULL;
78 	char *authority = NULL;
79 	int i;
80 	size_t raw_len;
81 
82 	/* Allocate space for URI struct and a copy of the string */
83 	raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
84 	uri = zalloc ( sizeof ( *uri ) + raw_len );
85 	if ( ! uri )
86 		return NULL;
87 	raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
88 
89 	/* Copy in the raw string */
90 	memcpy ( raw, uri_string, raw_len );
91 
92 	/* Start by chopping off the fragment, if it exists */
93 	if ( ( tmp = strchr ( raw, '#' ) ) ) {
94 		*(tmp++) = '\0';
95 		uri->fragment = tmp;
96 	}
97 
98 	/* Identify absolute/relative URI.  We ignore schemes that are
99 	 * apparently only a single character long, since otherwise we
100 	 * misinterpret a DOS-style path name ("C:\path\to\file") as a
101 	 * URI with scheme="C",opaque="\path\to\file".
102 	 */
103 	if ( ( tmp = strchr ( raw, ':' ) ) && ( tmp > ( raw + 1 ) ) ) {
104 		/* Absolute URI: identify hierarchical/opaque */
105 		uri->scheme = raw;
106 		*(tmp++) = '\0';
107 		if ( *tmp == '/' ) {
108 			/* Absolute URI with hierarchical part */
109 			path = tmp;
110 		} else {
111 			/* Absolute URI with opaque part */
112 			uri->opaque = tmp;
113 		}
114 	} else {
115 		/* Relative URI */
116 		path = raw;
117 	}
118 
119 	/* If we don't have a path (i.e. we have an absolute URI with
120 	 * an opaque portion, we're already finished processing
121 	 */
122 	if ( ! path )
123 		goto done;
124 
125 	/* Chop off the query, if it exists */
126 	if ( ( tmp = strchr ( path, '?' ) ) ) {
127 		*(tmp++) = '\0';
128 		uri->query = tmp;
129 	}
130 
131 	/* Identify net/absolute/relative path */
132 	if ( strncmp ( path, "//", 2 ) == 0 ) {
133 		/* Net path.  If this is terminated by the first '/'
134 		 * of an absolute path, then we have no space for a
135 		 * terminator after the authority field, so shuffle
136 		 * the authority down by one byte, overwriting one of
137 		 * the two slashes.
138 		 */
139 		authority = ( path + 2 );
140 		if ( ( tmp = strchr ( authority, '/' ) ) ) {
141 			/* Shuffle down */
142 			uri->path = tmp;
143 			memmove ( ( authority - 1 ), authority,
144 				  ( tmp - authority ) );
145 			authority--;
146 			*(--tmp) = '\0';
147 		}
148 	} else {
149 		/* Absolute/relative path */
150 		uri->path = path;
151 	}
152 
153 	/* Split authority into user[:password] and host[:port] portions */
154 	if ( ( tmp = strchr ( authority, '@' ) ) ) {
155 		/* Has user[:password] */
156 		*(tmp++) = '\0';
157 		uri->host = tmp;
158 		uri->user = authority;
159 		if ( ( tmp = strchr ( authority, ':' ) ) ) {
160 			/* Has password */
161 			*(tmp++) = '\0';
162 			uri->password = tmp;
163 		}
164 	} else {
165 		/* No user:password */
166 		uri->host = authority;
167 	}
168 
169 	/* Split host into host[:port] */
170 	if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
171 		*(tmp++) = '\0';
172 		uri->port = tmp;
173 	}
174 
175 	/* Decode fields that should be decoded */
176 	for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
177 		const char *field = uri_get_field ( uri, i );
178 		if ( field && ( URI_ENCODED & ( 1 << i ) ) )
179 			uri_decode ( field, ( char * ) field,
180 				     strlen ( field ) + 1 /* NUL */ );
181 	}
182 
183  done:
184 	DBG ( "URI \"%s\" split into", uri_string );
185 	dump_uri ( uri );
186 	DBG ( "\n" );
187 
188 	return uri;
189 }
190 
191 /**
192  * Get port from URI
193  *
194  * @v uri		URI, or NULL
195  * @v default_port	Default port to use if none specified in URI
196  * @ret port		Port
197  */
uri_port(struct uri * uri,unsigned int default_port)198 unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
199 	if ( ( ! uri ) || ( ! uri->port ) )
200 		return default_port;
201 	return ( strtoul ( uri->port, NULL, 0 ) );
202 }
203 
204 /**
205  * Unparse URI
206  *
207  * @v buf		Buffer to fill with URI string
208  * @v size		Size of buffer
209  * @v uri		URI to write into buffer, or NULL
210  * @v fields		Bitmask of fields to include in URI string, or URI_ALL
211  * @ret len		Length of URI string
212  */
unparse_uri(char * buf,size_t size,struct uri * uri,unsigned int fields)213 int unparse_uri ( char *buf, size_t size, struct uri *uri,
214 		  unsigned int fields ) {
215 	/* List of characters that typically go before certain fields */
216 	static char separators[] = { /* scheme */ 0, /* opaque */ ':',
217 				     /* user */ 0, /* password */ ':',
218 				     /* host */ '@', /* port */ ':',
219 				     /* path */ 0, /* query */ '?',
220 				     /* fragment */ '#' };
221 	int used = 0;
222 	int i;
223 
224 	DBG ( "URI unparsing" );
225 	dump_uri ( uri );
226 	DBG ( "\n" );
227 
228 	/* Ensure buffer is NUL-terminated */
229 	if ( size )
230 		buf[0] = '\0';
231 
232 	/* Special-case NULL URI */
233 	if ( ! uri )
234 		return 0;
235 
236 	/* Iterate through requested fields */
237 	for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
238 		const char *field = uri_get_field ( uri, i );
239 		char sep = separators[i];
240 
241 		/* Ensure `fields' only contains bits for fields that exist */
242 		if ( ! field )
243 			fields &= ~( 1 << i );
244 
245 		/* Store this field if we were asked to */
246 		if ( fields & ( 1 << i ) ) {
247 			/* Print :// if we're non-opaque and had a scheme */
248 			if ( ( fields & URI_SCHEME_BIT ) &&
249 			     ( i > URI_OPAQUE ) ) {
250 				used += ssnprintf ( buf + used, size - used,
251 						    "://" );
252 				/* Only print :// once */
253 				fields &= ~URI_SCHEME_BIT;
254 			}
255 
256 			/* Only print separator if an earlier field exists */
257 			if ( sep && ( fields & ( ( 1 << i ) - 1 ) ) )
258 				used += ssnprintf ( buf + used, size - used,
259 						    "%c", sep );
260 
261 			/* Print contents of field, possibly encoded */
262 			if ( URI_ENCODED & ( 1 << i ) )
263 				used += uri_encode ( field, buf + used,
264 						     size - used, i );
265 			else
266 				used += ssnprintf ( buf + used, size - used,
267 						    "%s", field );
268 		}
269 	}
270 
271 	return used;
272 }
273 
274 /**
275  * Duplicate URI
276  *
277  * @v uri		URI
278  * @ret uri		Duplicate URI
279  *
280  * Creates a modifiable copy of a URI.
281  */
uri_dup(struct uri * uri)282 struct uri * uri_dup ( struct uri *uri ) {
283 	size_t len = ( unparse_uri ( NULL, 0, uri, URI_ALL ) + 1 );
284 	char buf[len];
285 
286 	unparse_uri ( buf, len, uri, URI_ALL );
287 	return parse_uri ( buf );
288 }
289 
290 /**
291  * Resolve base+relative path
292  *
293  * @v base_uri		Base path
294  * @v relative_uri	Relative path
295  * @ret resolved_uri	Resolved path
296  *
297  * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
298  * path (e.g. "initrd.gz") and produces a new path
299  * (e.g. "/var/lib/tftpboot/initrd.gz").  Note that any non-directory
300  * portion of the base path will automatically be stripped; this
301  * matches the semantics used when resolving the path component of
302  * URIs.
303  */
resolve_path(const char * base_path,const char * relative_path)304 char * resolve_path ( const char *base_path,
305 		      const char *relative_path ) {
306 	size_t base_len = ( strlen ( base_path ) + 1 );
307 	char base_path_copy[base_len];
308 	char *base_tmp = base_path_copy;
309 	char *resolved;
310 
311 	/* If relative path is absolute, just re-use it */
312 	if ( relative_path[0] == '/' )
313 		return strdup ( relative_path );
314 
315 	/* Create modifiable copy of path for dirname() */
316 	memcpy ( base_tmp, base_path, base_len );
317 	base_tmp = dirname ( base_tmp );
318 
319 	/* Process "./" and "../" elements */
320 	while ( *relative_path == '.' ) {
321 		relative_path++;
322 		if ( *relative_path == 0 ) {
323 			/* Do nothing */
324 		} else if ( *relative_path == '/' ) {
325 			relative_path++;
326 		} else if ( *relative_path == '.' ) {
327 			relative_path++;
328 			if ( *relative_path == 0 ) {
329 				base_tmp = dirname ( base_tmp );
330 			} else if ( *relative_path == '/' ) {
331 				base_tmp = dirname ( base_tmp );
332 				relative_path++;
333 			} else {
334 				relative_path -= 2;
335 				break;
336 			}
337 		} else {
338 			relative_path--;
339 			break;
340 		}
341 	}
342 
343 	/* Create and return new path */
344 	if ( asprintf ( &resolved, "%s%s%s", base_tmp,
345 			( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
346 			  "" : "/" ), relative_path ) < 0 )
347 		return NULL;
348 
349 	return resolved;
350 }
351 
352 /**
353  * Resolve base+relative URI
354  *
355  * @v base_uri		Base URI, or NULL
356  * @v relative_uri	Relative URI
357  * @ret resolved_uri	Resolved URI
358  *
359  * Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a
360  * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
361  * (e.g. "http://etherboot.org/initrds/initrd.gz").
362  */
resolve_uri(struct uri * base_uri,struct uri * relative_uri)363 struct uri * resolve_uri ( struct uri *base_uri,
364 			   struct uri *relative_uri ) {
365 	struct uri tmp_uri;
366 	char *tmp_path = NULL;
367 	struct uri *new_uri;
368 
369 	/* If relative URI is absolute, just re-use it */
370 	if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
371 		return uri_get ( relative_uri );
372 
373 	/* Mangle URI */
374 	memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
375 	if ( relative_uri->path ) {
376 		tmp_path = resolve_path ( ( base_uri->path ?
377 					    base_uri->path : "/" ),
378 					  relative_uri->path );
379 		tmp_uri.path = tmp_path;
380 		tmp_uri.query = relative_uri->query;
381 		tmp_uri.fragment = relative_uri->fragment;
382 	} else if ( relative_uri->query ) {
383 		tmp_uri.query = relative_uri->query;
384 		tmp_uri.fragment = relative_uri->fragment;
385 	} else if ( relative_uri->fragment ) {
386 		tmp_uri.fragment = relative_uri->fragment;
387 	}
388 
389 	/* Create demangled URI */
390 	new_uri = uri_dup ( &tmp_uri );
391 	free ( tmp_path );
392 	return new_uri;
393 }
394 
395 /**
396  * Test for unreserved URI characters
397  *
398  * @v c			Character to test
399  * @v field		Field of URI in which character lies
400  * @ret is_unreserved	Character is an unreserved character
401  */
is_unreserved_uri_char(int c,int field)402 static int is_unreserved_uri_char ( int c, int field ) {
403 	/* According to RFC3986, the unreserved character set is
404 	 *
405 	 * A-Z a-z 0-9 - _ . ~
406 	 *
407 	 * but we also pass & ; = in queries, / in paths,
408 	 * and everything in opaques
409 	 */
410 	int ok = ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
411 		    ( c == '-' ) || ( c == '_' ) ||
412 		    ( c == '.' ) || ( c == '~' ) );
413 
414 	if ( field == URI_QUERY )
415 		ok = ok || ( c == ';' ) || ( c == '&' ) || ( c == '=' );
416 
417 	if ( field == URI_PATH )
418 		ok = ok || ( c == '/' );
419 
420 	if ( field == URI_OPAQUE )
421 		ok = 1;
422 
423 	return ok;
424 }
425 
426 /**
427  * URI-encode string
428  *
429  * @v raw_string	String to be URI-encoded
430  * @v buf		Buffer to contain encoded string
431  * @v len		Length of buffer
432  * @v field		Field of URI in which string lies
433  * @ret len		Length of encoded string (excluding NUL)
434  */
uri_encode(const char * raw_string,char * buf,ssize_t len,int field)435 size_t uri_encode ( const char *raw_string, char *buf, ssize_t len,
436 		    int field ) {
437 	ssize_t remaining = len;
438 	size_t used;
439 	unsigned char c;
440 
441 	if ( len > 0 )
442 		buf[0] = '\0';
443 
444 	while ( ( c = *(raw_string++) ) ) {
445 		if ( is_unreserved_uri_char ( c, field ) ) {
446 			used = ssnprintf ( buf, remaining, "%c", c );
447 		} else {
448 			used = ssnprintf ( buf, remaining, "%%%02X", c );
449 		}
450 		buf += used;
451 		remaining -= used;
452 	}
453 
454 	return ( len - remaining );
455 }
456 
457 /**
458  * Decode URI-encoded string
459  *
460  * @v encoded_string	URI-encoded string
461  * @v buf		Buffer to contain decoded string
462  * @v len		Length of buffer
463  * @ret len		Length of decoded string (excluding NUL)
464  *
465  * This function may be used in-place, with @a buf the same as
466  * @a encoded_string.
467  */
uri_decode(const char * encoded_string,char * buf,ssize_t len)468 size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len ) {
469 	ssize_t remaining;
470 	char hexbuf[3];
471 	char *hexbuf_end;
472 	unsigned char c;
473 
474 	for ( remaining = len; *encoded_string; remaining-- ) {
475 		if ( *encoded_string == '%' ) {
476 			encoded_string++;
477 			snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
478 				   encoded_string );
479 			c = strtoul ( hexbuf, &hexbuf_end, 16 );
480 			encoded_string += ( hexbuf_end - hexbuf );
481 		} else {
482 			c = *(encoded_string++);
483 		}
484 		if ( remaining > 1 )
485 			*buf++ = c;
486 	}
487 
488 	if ( len )
489 		*buf = 0;
490 
491 	return ( len - remaining );
492 }
493