1 /*
2  *
3  * Copyright 2015 gRPC authors.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 #include <grpc/support/port_platform.h>
20 
21 #include "src/core/ext/filters/client_channel/uri_parser.h"
22 
23 #include <string.h>
24 
25 #include <grpc/slice_buffer.h>
26 #include <grpc/support/alloc.h>
27 #include <grpc/support/log.h>
28 #include <grpc/support/string_util.h>
29 
30 #include "src/core/lib/gpr/string.h"
31 #include "src/core/lib/slice/percent_encoding.h"
32 #include "src/core/lib/slice/slice_internal.h"
33 #include "src/core/lib/slice/slice_string_helpers.h"
34 
35 /** a size_t default value... maps to all 1's */
36 #define NOT_SET (~(size_t)0)
37 
bad_uri(const char * uri_text,size_t pos,const char * section,bool suppress_errors)38 static grpc_uri* bad_uri(const char* uri_text, size_t pos, const char* section,
39                          bool suppress_errors) {
40   char* line_prefix;
41   size_t pfx_len;
42 
43   if (!suppress_errors) {
44     gpr_asprintf(&line_prefix, "bad uri.%s: '", section);
45     pfx_len = strlen(line_prefix) + pos;
46     gpr_log(GPR_ERROR, "%s%s'", line_prefix, uri_text);
47     gpr_free(line_prefix);
48 
49     line_prefix = static_cast<char*>(gpr_malloc(pfx_len + 1));
50     memset(line_prefix, ' ', pfx_len);
51     line_prefix[pfx_len] = 0;
52     gpr_log(GPR_ERROR, "%s^ here", line_prefix);
53     gpr_free(line_prefix);
54   }
55 
56   return nullptr;
57 }
58 
59 /** Returns a copy of percent decoded \a src[begin, end) */
decode_and_copy_component(const char * src,size_t begin,size_t end)60 static char* decode_and_copy_component(const char* src, size_t begin,
61                                        size_t end) {
62   grpc_slice component =
63       (begin == NOT_SET || end == NOT_SET)
64           ? grpc_empty_slice()
65           : grpc_slice_from_copied_buffer(src + begin, end - begin);
66   grpc_slice decoded_component =
67       grpc_permissive_percent_decode_slice(component);
68   char* out = grpc_dump_slice(decoded_component, GPR_DUMP_ASCII);
69   grpc_slice_unref_internal(component);
70   grpc_slice_unref_internal(decoded_component);
71   return out;
72 }
73 
valid_hex(char c)74 static bool valid_hex(char c) {
75   return ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')) ||
76          ((c >= '0') && (c <= '9'));
77 }
78 
79 /** Returns how many chars to advance if \a uri_text[i] begins a valid \a pchar
80  * production. If \a uri_text[i] introduces an invalid \a pchar (such as percent
81  * sign not followed by two hex digits), NOT_SET is returned. */
parse_pchar(const char * uri_text,size_t i)82 static size_t parse_pchar(const char* uri_text, size_t i) {
83   /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
84    * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
85    * pct-encoded = "%" HEXDIG HEXDIG
86    * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
87    / "*" / "+" / "," / ";" / "=" */
88   char c = uri_text[i];
89   switch (c) {
90     default:
91       if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) ||
92           ((c >= '0') && (c <= '9'))) {
93         return 1;
94       }
95       break;
96     case ':':
97     case '@':
98     case '-':
99     case '.':
100     case '_':
101     case '~':
102     case '!':
103     case '$':
104     case '&':
105     case '\'':
106     case '(':
107     case ')':
108     case '*':
109     case '+':
110     case ',':
111     case ';':
112     case '=':
113       return 1;
114     case '%': /* pct-encoded */
115       if (valid_hex(uri_text[i + 1]) && valid_hex(uri_text[i + 2])) {
116         return 2;
117       }
118       return NOT_SET;
119   }
120   return 0;
121 }
122 
123 /* *( pchar / "?" / "/" ) */
parse_fragment_or_query(const char * uri_text,size_t * i)124 static int parse_fragment_or_query(const char* uri_text, size_t* i) {
125   char c;
126   while ((c = uri_text[*i]) != 0) {
127     const size_t advance = parse_pchar(uri_text, *i); /* pchar */
128     switch (advance) {
129       case 0: /* uri_text[i] isn't in pchar */
130         /* maybe it's ? or / */
131         if (uri_text[*i] == '?' || uri_text[*i] == '/') {
132           (*i)++;
133           break;
134         } else {
135           return 1;
136         }
137         GPR_UNREACHABLE_CODE(return 0);
138       default:
139         (*i) += advance;
140         break;
141       case NOT_SET: /* uri_text[i] introduces an invalid URI */
142         return 0;
143     }
144   }
145   /* *i is the first uri_text position past the \a query production, maybe \0 */
146   return 1;
147 }
148 
parse_query_parts(grpc_uri * uri)149 static void parse_query_parts(grpc_uri* uri) {
150   static const char* QUERY_PARTS_SEPARATOR = "&";
151   static const char* QUERY_PARTS_VALUE_SEPARATOR = "=";
152   GPR_ASSERT(uri->query != nullptr);
153   if (uri->query[0] == '\0') {
154     uri->query_parts = nullptr;
155     uri->query_parts_values = nullptr;
156     uri->num_query_parts = 0;
157     return;
158   }
159 
160   gpr_string_split(uri->query, QUERY_PARTS_SEPARATOR, &uri->query_parts,
161                    &uri->num_query_parts);
162   uri->query_parts_values =
163       static_cast<char**>(gpr_malloc(uri->num_query_parts * sizeof(char**)));
164   for (size_t i = 0; i < uri->num_query_parts; i++) {
165     char** query_param_parts;
166     size_t num_query_param_parts;
167     char* full = uri->query_parts[i];
168     gpr_string_split(full, QUERY_PARTS_VALUE_SEPARATOR, &query_param_parts,
169                      &num_query_param_parts);
170     GPR_ASSERT(num_query_param_parts > 0);
171     uri->query_parts[i] = query_param_parts[0];
172     if (num_query_param_parts > 1) {
173       /* TODO(dgq): only the first value after the separator is considered.
174        * Perhaps all chars after the first separator for the query part should
175        * be included, even if they include the separator. */
176       uri->query_parts_values[i] = query_param_parts[1];
177     } else {
178       uri->query_parts_values[i] = nullptr;
179     }
180     for (size_t j = 2; j < num_query_param_parts; j++) {
181       gpr_free(query_param_parts[j]);
182     }
183     gpr_free(query_param_parts);
184     gpr_free(full);
185   }
186 }
187 
grpc_uri_parse(const char * uri_text,bool suppress_errors)188 grpc_uri* grpc_uri_parse(const char* uri_text, bool suppress_errors) {
189   grpc_uri* uri;
190   size_t scheme_begin = 0;
191   size_t scheme_end = NOT_SET;
192   size_t authority_begin = NOT_SET;
193   size_t authority_end = NOT_SET;
194   size_t path_begin = NOT_SET;
195   size_t path_end = NOT_SET;
196   size_t query_begin = NOT_SET;
197   size_t query_end = NOT_SET;
198   size_t fragment_begin = NOT_SET;
199   size_t fragment_end = NOT_SET;
200   size_t i;
201 
202   for (i = scheme_begin; uri_text[i] != 0; i++) {
203     if (uri_text[i] == ':') {
204       scheme_end = i;
205       break;
206     }
207     if (uri_text[i] >= 'a' && uri_text[i] <= 'z') continue;
208     if (uri_text[i] >= 'A' && uri_text[i] <= 'Z') continue;
209     if (i != scheme_begin) {
210       if (uri_text[i] >= '0' && uri_text[i] <= '9') continue;
211       if (uri_text[i] == '+') continue;
212       if (uri_text[i] == '-') continue;
213       if (uri_text[i] == '.') continue;
214     }
215     break;
216   }
217   if (scheme_end == NOT_SET) {
218     return bad_uri(uri_text, i, "scheme", suppress_errors);
219   }
220 
221   if (uri_text[scheme_end + 1] == '/' && uri_text[scheme_end + 2] == '/') {
222     authority_begin = scheme_end + 3;
223     for (i = authority_begin; uri_text[i] != 0 && authority_end == NOT_SET;
224          i++) {
225       if (uri_text[i] == '/' || uri_text[i] == '?' || uri_text[i] == '#') {
226         authority_end = i;
227       }
228     }
229     if (authority_end == NOT_SET && uri_text[i] == 0) {
230       authority_end = i;
231     }
232     if (authority_end == NOT_SET) {
233       return bad_uri(uri_text, i, "authority", suppress_errors);
234     }
235     /* TODO(ctiller): parse the authority correctly */
236     path_begin = authority_end;
237   } else {
238     path_begin = scheme_end + 1;
239   }
240 
241   for (i = path_begin; uri_text[i] != 0; i++) {
242     if (uri_text[i] == '?' || uri_text[i] == '#') {
243       path_end = i;
244       break;
245     }
246   }
247   if (path_end == NOT_SET && uri_text[i] == 0) {
248     path_end = i;
249   }
250   if (path_end == NOT_SET) {
251     return bad_uri(uri_text, i, "path", suppress_errors);
252   }
253 
254   if (uri_text[i] == '?') {
255     query_begin = ++i;
256     if (!parse_fragment_or_query(uri_text, &i)) {
257       return bad_uri(uri_text, i, "query", suppress_errors);
258     } else if (uri_text[i] != 0 && uri_text[i] != '#') {
259       /* We must be at the end or at the beginning of a fragment */
260       return bad_uri(uri_text, i, "query", suppress_errors);
261     }
262     query_end = i;
263   }
264   if (uri_text[i] == '#') {
265     fragment_begin = ++i;
266     if (!parse_fragment_or_query(uri_text, &i)) {
267       return bad_uri(uri_text, i - fragment_end, "fragment", suppress_errors);
268     } else if (uri_text[i] != 0) {
269       /* We must be at the end */
270       return bad_uri(uri_text, i, "fragment", suppress_errors);
271     }
272     fragment_end = i;
273   }
274 
275   uri = static_cast<grpc_uri*>(gpr_zalloc(sizeof(*uri)));
276   uri->scheme = decode_and_copy_component(uri_text, scheme_begin, scheme_end);
277   uri->authority =
278       decode_and_copy_component(uri_text, authority_begin, authority_end);
279   uri->path = decode_and_copy_component(uri_text, path_begin, path_end);
280   uri->query = decode_and_copy_component(uri_text, query_begin, query_end);
281   uri->fragment =
282       decode_and_copy_component(uri_text, fragment_begin, fragment_end);
283   parse_query_parts(uri);
284 
285   return uri;
286 }
287 
grpc_uri_get_query_arg(const grpc_uri * uri,const char * key)288 const char* grpc_uri_get_query_arg(const grpc_uri* uri, const char* key) {
289   GPR_ASSERT(key != nullptr);
290   if (key[0] == '\0') return nullptr;
291 
292   for (size_t i = 0; i < uri->num_query_parts; ++i) {
293     if (0 == strcmp(key, uri->query_parts[i])) {
294       return uri->query_parts_values[i];
295     }
296   }
297   return nullptr;
298 }
299 
grpc_uri_destroy(grpc_uri * uri)300 void grpc_uri_destroy(grpc_uri* uri) {
301   if (!uri) return;
302   gpr_free(uri->scheme);
303   gpr_free(uri->authority);
304   gpr_free(uri->path);
305   gpr_free(uri->query);
306   for (size_t i = 0; i < uri->num_query_parts; ++i) {
307     gpr_free(uri->query_parts[i]);
308     gpr_free(uri->query_parts_values[i]);
309   }
310   gpr_free(uri->query_parts);
311   gpr_free(uri->query_parts_values);
312   gpr_free(uri->fragment);
313   gpr_free(uri);
314 }
315