1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.haxx.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  ***************************************************************************/
22 #include "tool_setup.h"
23 
24 #define ENABLE_CURLX_PRINTF
25 /* use our own printf() functions */
26 #include "curlx.h"
27 #include "tool_cfgable.h"
28 #include "tool_doswin.h"
29 #include "tool_urlglob.h"
30 #include "tool_vms.h"
31 
32 #include "memdebug.h" /* keep this as LAST include */
33 
34 #define GLOBERROR(string, column, code) \
35   glob->error = string, glob->pos = column, code
36 
glob_fixed(URLGlob * glob,char * fixed,size_t len)37 static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len)
38 {
39   URLPattern *pat = &glob->pattern[glob->size];
40   pat->type = UPTSet;
41   pat->content.Set.size = 1;
42   pat->content.Set.ptr_s = 0;
43   pat->globindex = -1;
44 
45   pat->content.Set.elements = malloc(sizeof(char *));
46 
47   if(!pat->content.Set.elements)
48     return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
49 
50   pat->content.Set.elements[0] = malloc(len + 1);
51   if(!pat->content.Set.elements[0])
52     return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
53 
54   memcpy(pat->content.Set.elements[0], fixed, len);
55   pat->content.Set.elements[0][len] = 0;
56 
57   return CURLE_OK;
58 }
59 
60 /* multiply
61  *
62  * Multiplies and checks for overflow.
63  */
multiply(unsigned long * amount,long with)64 static int multiply(unsigned long *amount, long with)
65 {
66   unsigned long sum = *amount * with;
67   if(!with) {
68     *amount = 0;
69     return 0;
70   }
71   if(sum/with != *amount)
72     return 1; /* didn't fit, bail out */
73   *amount = sum;
74   return 0;
75 }
76 
glob_set(URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)77 static CURLcode glob_set(URLGlob *glob, char **patternp,
78                          size_t *posp, unsigned long *amount,
79                          int globindex)
80 {
81   /* processes a set expression with the point behind the opening '{'
82      ','-separated elements are collected until the next closing '}'
83   */
84   URLPattern *pat;
85   bool done = FALSE;
86   char *buf = glob->glob_buffer;
87   char *pattern = *patternp;
88   char *opattern = pattern;
89   size_t opos = *posp-1;
90 
91   pat = &glob->pattern[glob->size];
92   /* patterns 0,1,2,... correspond to size=1,3,5,... */
93   pat->type = UPTSet;
94   pat->content.Set.size = 0;
95   pat->content.Set.ptr_s = 0;
96   pat->content.Set.elements = NULL;
97   pat->globindex = globindex;
98 
99   while(!done) {
100     switch (*pattern) {
101     case '\0':                  /* URL ended while set was still open */
102       return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
103 
104     case '{':
105     case '[':                   /* no nested expressions at this time */
106       return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
107 
108     case '}':                           /* set element completed */
109       if(opattern == pattern)
110         return GLOBERROR("empty string within braces", *posp,
111                          CURLE_URL_MALFORMAT);
112 
113       /* add 1 to size since it'll be incremented below */
114       if(multiply(amount, pat->content.Set.size + 1))
115         return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
116 
117       /* FALLTHROUGH */
118     case ',':
119 
120       *buf = '\0';
121       if(pat->content.Set.elements) {
122         char **new_arr = realloc(pat->content.Set.elements,
123                                  (pat->content.Set.size + 1) * sizeof(char *));
124         if(!new_arr)
125           return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
126 
127         pat->content.Set.elements = new_arr;
128       }
129       else
130         pat->content.Set.elements = malloc(sizeof(char *));
131 
132       if(!pat->content.Set.elements)
133         return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
134 
135       pat->content.Set.elements[pat->content.Set.size] =
136         strdup(glob->glob_buffer);
137       if(!pat->content.Set.elements[pat->content.Set.size])
138         return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
139       ++pat->content.Set.size;
140 
141       if(*pattern == '}') {
142         pattern++; /* pass the closing brace */
143         done = TRUE;
144         continue;
145       }
146 
147       buf = glob->glob_buffer;
148       ++pattern;
149       ++(*posp);
150       break;
151 
152     case ']':                           /* illegal closing bracket */
153       return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
154 
155     case '\\':                          /* escaped character, skip '\' */
156       if(pattern[1]) {
157         ++pattern;
158         ++(*posp);
159       }
160       /* FALLTHROUGH */
161     default:
162       *buf++ = *pattern++;              /* copy character to set element */
163       ++(*posp);
164     }
165   }
166 
167   *patternp = pattern; /* return with the new position */
168   return CURLE_OK;
169 }
170 
glob_range(URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)171 static CURLcode glob_range(URLGlob *glob, char **patternp,
172                            size_t *posp, unsigned long *amount,
173                            int globindex)
174 {
175   /* processes a range expression with the point behind the opening '['
176      - char range: e.g. "a-z]", "B-Q]"
177      - num range: e.g. "0-9]", "17-2000]"
178      - num range with leading zeros: e.g. "001-999]"
179      expression is checked for well-formedness and collected until the next ']'
180   */
181   URLPattern *pat;
182   int rc;
183   char *pattern = *patternp;
184   char *c;
185 
186   pat = &glob->pattern[glob->size];
187   pat->globindex = globindex;
188 
189   if(ISALPHA(*pattern)) {
190     /* character range detected */
191     char min_c;
192     char max_c;
193     char end_c;
194     unsigned long step = 1;
195 
196     pat->type = UPTCharRange;
197 
198     rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
199 
200     if(rc == 3) {
201       if(end_c == ':') {
202         char *endp;
203         errno = 0;
204         step = strtoul(&pattern[4], &endp, 10);
205         if(errno || &pattern[4] == endp || *endp != ']')
206           step = 0;
207         else
208           pattern = endp + 1;
209       }
210       else if(end_c != ']')
211         /* then this is wrong */
212         rc = 0;
213       else
214         /* end_c == ']' */
215         pattern += 4;
216     }
217 
218     *posp += (pattern - *patternp);
219 
220     if(rc != 3 || !step || step > (unsigned)INT_MAX ||
221        (min_c == max_c && step != 1) ||
222        (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
223                            (max_c - min_c) > ('z' - 'a'))))
224       /* the pattern is not well-formed */
225       return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
226 
227     /* if there was a ":[num]" thing, use that as step or else use 1 */
228     pat->content.CharRange.step = (int)step;
229     pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
230     pat->content.CharRange.max_c = max_c;
231 
232     if(multiply(amount, ((pat->content.CharRange.max_c -
233                           pat->content.CharRange.min_c) /
234                          pat->content.CharRange.step + 1)))
235       return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
236   }
237   else if(ISDIGIT(*pattern)) {
238     /* numeric range detected */
239     unsigned long min_n;
240     unsigned long max_n = 0;
241     unsigned long step_n = 0;
242     char *endp;
243 
244     pat->type = UPTNumRange;
245     pat->content.NumRange.padlength = 0;
246 
247     if(*pattern == '0') {
248       /* leading zero specified, count them! */
249       c = pattern;
250       while(ISDIGIT(*c)) {
251         c++;
252         ++pat->content.NumRange.padlength; /* padding length is set for all
253                                               instances of this pattern */
254       }
255     }
256 
257     errno = 0;
258     min_n = strtoul(pattern, &endp, 10);
259     if(errno || (endp == pattern))
260       endp = NULL;
261     else {
262       if(*endp != '-')
263         endp = NULL;
264       else {
265         pattern = endp + 1;
266         while(*pattern && ISBLANK(*pattern))
267           pattern++;
268         if(!ISDIGIT(*pattern)) {
269           endp = NULL;
270           goto fail;
271         }
272         errno = 0;
273         max_n = strtoul(pattern, &endp, 10);
274         if(errno)
275           /* overflow */
276           endp = NULL;
277         else if(*endp == ':') {
278           pattern = endp + 1;
279           errno = 0;
280           step_n = strtoul(pattern, &endp, 10);
281           if(errno)
282             /* over/underflow situation */
283             endp = NULL;
284         }
285         else
286           step_n = 1;
287         if(endp && (*endp == ']')) {
288           pattern = endp + 1;
289         }
290         else
291           endp = NULL;
292       }
293     }
294 
295     fail:
296     *posp += (pattern - *patternp);
297 
298     if(!endp || !step_n ||
299        (min_n == max_n && step_n != 1) ||
300        (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
301       /* the pattern is not well-formed */
302       return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
303 
304     /* typecasting to ints are fine here since we make sure above that we
305        are within 31 bits */
306     pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
307     pat->content.NumRange.max_n = max_n;
308     pat->content.NumRange.step = step_n;
309 
310     if(multiply(amount, ((pat->content.NumRange.max_n -
311                           pat->content.NumRange.min_n) /
312                          pat->content.NumRange.step + 1)))
313       return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
314   }
315   else
316     return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
317 
318   *patternp = pattern;
319   return CURLE_OK;
320 }
321 
peek_ipv6(const char * str,size_t * skip)322 static bool peek_ipv6(const char *str, size_t *skip)
323 {
324   /*
325    * Scan for a potential IPv6 literal.
326    * - Valid globs contain a hyphen and <= 1 colon.
327    * - IPv6 literals contain no hyphens and >= 2 colons.
328    */
329   size_t i = 0;
330   size_t colons = 0;
331   if(str[i++] != '[') {
332     return FALSE;
333   }
334   for(;;) {
335     const char c = str[i++];
336     if(ISALNUM(c) || c == '.' || c == '%') {
337       /* ok */
338     }
339     else if(c == ':') {
340       colons++;
341     }
342     else if(c == ']') {
343       *skip = i;
344       return colons >= 2 ? TRUE : FALSE;
345     }
346     else {
347       return FALSE;
348     }
349   }
350 }
351 
glob_parse(URLGlob * glob,char * pattern,size_t pos,unsigned long * amount)352 static CURLcode glob_parse(URLGlob *glob, char *pattern,
353                            size_t pos, unsigned long *amount)
354 {
355   /* processes a literal string component of a URL
356      special characters '{' and '[' branch to set/range processing functions
357    */
358   CURLcode res = CURLE_OK;
359   int globindex = 0; /* count "actual" globs */
360 
361   *amount = 1;
362 
363   while(*pattern && !res) {
364     char *buf = glob->glob_buffer;
365     size_t sublen = 0;
366     while(*pattern && *pattern != '{') {
367       if(*pattern == '[') {
368         /* skip over IPv6 literals and [] */
369         size_t skip = 0;
370         if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
371           skip = 2;
372         if(skip) {
373           memcpy(buf, pattern, skip);
374           buf += skip;
375           pattern += skip;
376           sublen += skip;
377           continue;
378         }
379         break;
380       }
381       if(*pattern == '}' || *pattern == ']')
382         return GLOBERROR("unmatched close brace/bracket", pos,
383                          CURLE_URL_MALFORMAT);
384 
385       /* only allow \ to escape known "special letters" */
386       if(*pattern == '\\' &&
387          (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
388           *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
389 
390         /* escape character, skip '\' */
391         ++pattern;
392         ++pos;
393       }
394       *buf++ = *pattern++; /* copy character to literal */
395       ++pos;
396       sublen++;
397     }
398     if(sublen) {
399       /* we got a literal string, add it as a single-item list */
400       *buf = '\0';
401       res = glob_fixed(glob, glob->glob_buffer, sublen);
402     }
403     else {
404       switch (*pattern) {
405       case '\0': /* done  */
406         break;
407 
408       case '{':
409         /* process set pattern */
410         pattern++;
411         pos++;
412         res = glob_set(glob, &pattern, &pos, amount, globindex++);
413         break;
414 
415       case '[':
416         /* process range pattern */
417         pattern++;
418         pos++;
419         res = glob_range(glob, &pattern, &pos, amount, globindex++);
420         break;
421       }
422     }
423 
424     if(++glob->size >= GLOB_PATTERN_NUM)
425       return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
426   }
427   return res;
428 }
429 
glob_url(URLGlob ** glob,char * url,unsigned long * urlnum,FILE * error)430 CURLcode glob_url(URLGlob **glob, char *url, unsigned long *urlnum,
431                   FILE *error)
432 {
433   /*
434    * We can deal with any-size, just make a buffer with the same length
435    * as the specified URL!
436    */
437   URLGlob *glob_expand;
438   unsigned long amount = 0;
439   char *glob_buffer;
440   CURLcode res;
441 
442   *glob = NULL;
443 
444   glob_buffer = malloc(strlen(url) + 1);
445   if(!glob_buffer)
446     return CURLE_OUT_OF_MEMORY;
447   glob_buffer[0] = 0;
448 
449   glob_expand = calloc(1, sizeof(URLGlob));
450   if(!glob_expand) {
451     Curl_safefree(glob_buffer);
452     return CURLE_OUT_OF_MEMORY;
453   }
454   glob_expand->urllen = strlen(url);
455   glob_expand->glob_buffer = glob_buffer;
456 
457   res = glob_parse(glob_expand, url, 1, &amount);
458   if(!res)
459     *urlnum = amount;
460   else {
461     if(error && glob_expand->error) {
462       char text[512];
463       const char *t;
464       if(glob_expand->pos) {
465         msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^",
466                   glob_expand->error,
467                   glob_expand->pos, url, glob_expand->pos - 1, " ");
468         t = text;
469       }
470       else
471         t = glob_expand->error;
472 
473       /* send error description to the error-stream */
474       fprintf(error, "curl: (%d) %s\n", res, t);
475     }
476     /* it failed, we cleanup */
477     glob_cleanup(glob_expand);
478     *urlnum = 1;
479     return res;
480   }
481 
482   *glob = glob_expand;
483   return CURLE_OK;
484 }
485 
glob_cleanup(URLGlob * glob)486 void glob_cleanup(URLGlob* glob)
487 {
488   size_t i;
489   int elem;
490 
491   for(i = 0; i < glob->size; i++) {
492     if((glob->pattern[i].type == UPTSet) &&
493        (glob->pattern[i].content.Set.elements)) {
494       for(elem = glob->pattern[i].content.Set.size - 1;
495           elem >= 0;
496           --elem) {
497         Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
498       }
499       Curl_safefree(glob->pattern[i].content.Set.elements);
500     }
501   }
502   Curl_safefree(glob->glob_buffer);
503   Curl_safefree(glob);
504 }
505 
glob_next_url(char ** globbed,URLGlob * glob)506 CURLcode glob_next_url(char **globbed, URLGlob *glob)
507 {
508   URLPattern *pat;
509   size_t i;
510   size_t len;
511   size_t buflen = glob->urllen + 1;
512   char *buf = glob->glob_buffer;
513 
514   *globbed = NULL;
515 
516   if(!glob->beenhere)
517     glob->beenhere = 1;
518   else {
519     bool carry = TRUE;
520 
521     /* implement a counter over the index ranges of all patterns, starting
522        with the rightmost pattern */
523     for(i = 0; carry && (i < glob->size); i++) {
524       carry = FALSE;
525       pat = &glob->pattern[glob->size - 1 - i];
526       switch(pat->type) {
527       case UPTSet:
528         if((pat->content.Set.elements) &&
529            (++pat->content.Set.ptr_s == pat->content.Set.size)) {
530           pat->content.Set.ptr_s = 0;
531           carry = TRUE;
532         }
533         break;
534       case UPTCharRange:
535         pat->content.CharRange.ptr_c =
536           (char)(pat->content.CharRange.step +
537                  (int)((unsigned char)pat->content.CharRange.ptr_c));
538         if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
539           pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
540           carry = TRUE;
541         }
542         break;
543       case UPTNumRange:
544         pat->content.NumRange.ptr_n += pat->content.NumRange.step;
545         if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
546           pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
547           carry = TRUE;
548         }
549         break;
550       default:
551         printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
552         return CURLE_FAILED_INIT;
553       }
554     }
555     if(carry) {         /* first pattern ptr has run into overflow, done! */
556       /* TODO: verify if this should actually return CURLE_OK. */
557       return CURLE_OK; /* CURLE_OK to match previous behavior */
558     }
559   }
560 
561   for(i = 0; i < glob->size; ++i) {
562     pat = &glob->pattern[i];
563     switch(pat->type) {
564     case UPTSet:
565       if(pat->content.Set.elements) {
566         msnprintf(buf, buflen, "%s",
567                   pat->content.Set.elements[pat->content.Set.ptr_s]);
568         len = strlen(buf);
569         buf += len;
570         buflen -= len;
571       }
572       break;
573     case UPTCharRange:
574       if(buflen) {
575         *buf++ = pat->content.CharRange.ptr_c;
576         *buf = '\0';
577         buflen--;
578       }
579       break;
580     case UPTNumRange:
581       msnprintf(buf, buflen, "%0*lu",
582                 pat->content.NumRange.padlength,
583                 pat->content.NumRange.ptr_n);
584       len = strlen(buf);
585       buf += len;
586       buflen -= len;
587       break;
588     default:
589       printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
590       return CURLE_FAILED_INIT;
591     }
592   }
593 
594   *globbed = strdup(glob->glob_buffer);
595   if(!*globbed)
596     return CURLE_OUT_OF_MEMORY;
597 
598   return CURLE_OK;
599 }
600 
glob_match_url(char ** result,char * filename,URLGlob * glob)601 CURLcode glob_match_url(char **result, char *filename, URLGlob *glob)
602 {
603   char *target;
604   size_t allocsize;
605   char numbuf[18];
606   char *appendthis = (char *)"";
607   size_t appendlen = 0;
608   size_t stringlen = 0;
609 
610   *result = NULL;
611 
612   /* We cannot use the glob_buffer for storage here since the filename may
613    * be longer than the URL we use. We allocate a good start size, then
614    * we need to realloc in case of need.
615    */
616   allocsize = strlen(filename) + 1; /* make it at least one byte to store the
617                                        trailing zero */
618   target = malloc(allocsize);
619   if(!target)
620     return CURLE_OUT_OF_MEMORY;
621 
622   while(*filename) {
623     if(*filename == '#' && ISDIGIT(filename[1])) {
624       char *ptr = filename;
625       unsigned long num = strtoul(&filename[1], &filename, 10);
626       URLPattern *pat = NULL;
627 
628       if(num < glob->size) {
629         unsigned long i;
630         num--; /* make it zero based */
631         /* find the correct glob entry */
632         for(i = 0; i<glob->size; i++) {
633           if(glob->pattern[i].globindex == (int)num) {
634             pat = &glob->pattern[i];
635             break;
636           }
637         }
638       }
639 
640       if(pat) {
641         switch(pat->type) {
642         case UPTSet:
643           if(pat->content.Set.elements) {
644             appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
645             appendlen =
646               strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
647           }
648           break;
649         case UPTCharRange:
650           numbuf[0] = pat->content.CharRange.ptr_c;
651           numbuf[1] = 0;
652           appendthis = numbuf;
653           appendlen = 1;
654           break;
655         case UPTNumRange:
656           msnprintf(numbuf, sizeof(numbuf), "%0*lu",
657                     pat->content.NumRange.padlength,
658                     pat->content.NumRange.ptr_n);
659           appendthis = numbuf;
660           appendlen = strlen(numbuf);
661           break;
662         default:
663           fprintf(stderr, "internal error: invalid pattern type (%d)\n",
664                   (int)pat->type);
665           Curl_safefree(target);
666           return CURLE_FAILED_INIT;
667         }
668       }
669       else {
670         /* #[num] out of range, use the #[num] in the output */
671         filename = ptr;
672         appendthis = filename++;
673         appendlen = 1;
674       }
675     }
676     else {
677       appendthis = filename++;
678       appendlen = 1;
679     }
680     if(appendlen + stringlen >= allocsize) {
681       char *newstr;
682       /* we append a single byte to allow for the trailing byte to be appended
683          at the end of this function outside the while() loop */
684       allocsize = (appendlen + stringlen) * 2;
685       newstr = realloc(target, allocsize + 1);
686       if(!newstr) {
687         Curl_safefree(target);
688         return CURLE_OUT_OF_MEMORY;
689       }
690       target = newstr;
691     }
692     memcpy(&target[stringlen], appendthis, appendlen);
693     stringlen += appendlen;
694   }
695   target[stringlen]= '\0';
696 
697 #if defined(MSDOS) || defined(WIN32)
698   {
699     char *sanitized;
700     SANITIZEcode sc = sanitize_file_name(&sanitized, target,
701                                          (SANITIZE_ALLOW_PATH |
702                                           SANITIZE_ALLOW_RESERVED));
703     Curl_safefree(target);
704     if(sc)
705       return CURLE_URL_MALFORMAT;
706     target = sanitized;
707   }
708 #endif /* MSDOS || WIN32 */
709 
710   *result = target;
711   return CURLE_OK;
712 }
713