1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.haxx.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  ***************************************************************************/
22 #include "tool_setup.h"
23 
24 #define ENABLE_CURLX_PRINTF
25 /* use our own printf() functions */
26 #include "curlx.h"
27 #include "tool_cfgable.h"
28 #include "tool_doswin.h"
29 #include "tool_urlglob.h"
30 #include "tool_vms.h"
31 #include "dynbuf.h"
32 
33 #include "memdebug.h" /* keep this as LAST include */
34 
35 #define GLOBERROR(string, column, code) \
36   glob->error = string, glob->pos = column, code
37 
glob_fixed(struct URLGlob * glob,char * fixed,size_t len)38 static CURLcode glob_fixed(struct URLGlob *glob, char *fixed, size_t len)
39 {
40   struct URLPattern *pat = &glob->pattern[glob->size];
41   pat->type = UPTSet;
42   pat->content.Set.size = 1;
43   pat->content.Set.ptr_s = 0;
44   pat->globindex = -1;
45 
46   pat->content.Set.elements = malloc(sizeof(char *));
47 
48   if(!pat->content.Set.elements)
49     return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
50 
51   pat->content.Set.elements[0] = malloc(len + 1);
52   if(!pat->content.Set.elements[0])
53     return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
54 
55   memcpy(pat->content.Set.elements[0], fixed, len);
56   pat->content.Set.elements[0][len] = 0;
57 
58   return CURLE_OK;
59 }
60 
61 /* multiply
62  *
63  * Multiplies and checks for overflow.
64  */
multiply(unsigned long * amount,long with)65 static int multiply(unsigned long *amount, long with)
66 {
67   unsigned long sum = *amount * with;
68   if(!with) {
69     *amount = 0;
70     return 0;
71   }
72   if(sum/with != *amount)
73     return 1; /* didn't fit, bail out */
74   *amount = sum;
75   return 0;
76 }
77 
glob_set(struct URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)78 static CURLcode glob_set(struct URLGlob *glob, char **patternp,
79                          size_t *posp, unsigned long *amount,
80                          int globindex)
81 {
82   /* processes a set expression with the point behind the opening '{'
83      ','-separated elements are collected until the next closing '}'
84   */
85   struct URLPattern *pat;
86   bool done = FALSE;
87   char *buf = glob->glob_buffer;
88   char *pattern = *patternp;
89   char *opattern = pattern;
90   size_t opos = *posp-1;
91 
92   pat = &glob->pattern[glob->size];
93   /* patterns 0,1,2,... correspond to size=1,3,5,... */
94   pat->type = UPTSet;
95   pat->content.Set.size = 0;
96   pat->content.Set.ptr_s = 0;
97   pat->content.Set.elements = NULL;
98   pat->globindex = globindex;
99 
100   while(!done) {
101     switch (*pattern) {
102     case '\0':                  /* URL ended while set was still open */
103       return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
104 
105     case '{':
106     case '[':                   /* no nested expressions at this time */
107       return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
108 
109     case '}':                           /* set element completed */
110       if(opattern == pattern)
111         return GLOBERROR("empty string within braces", *posp,
112                          CURLE_URL_MALFORMAT);
113 
114       /* add 1 to size since it'll be incremented below */
115       if(multiply(amount, pat->content.Set.size + 1))
116         return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
117 
118       /* FALLTHROUGH */
119     case ',':
120 
121       *buf = '\0';
122       if(pat->content.Set.elements) {
123         char **new_arr = realloc(pat->content.Set.elements,
124                                  (pat->content.Set.size + 1) * sizeof(char *));
125         if(!new_arr)
126           return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
127 
128         pat->content.Set.elements = new_arr;
129       }
130       else
131         pat->content.Set.elements = malloc(sizeof(char *));
132 
133       if(!pat->content.Set.elements)
134         return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
135 
136       pat->content.Set.elements[pat->content.Set.size] =
137         strdup(glob->glob_buffer);
138       if(!pat->content.Set.elements[pat->content.Set.size])
139         return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
140       ++pat->content.Set.size;
141 
142       if(*pattern == '}') {
143         pattern++; /* pass the closing brace */
144         done = TRUE;
145         continue;
146       }
147 
148       buf = glob->glob_buffer;
149       ++pattern;
150       ++(*posp);
151       break;
152 
153     case ']':                           /* illegal closing bracket */
154       return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
155 
156     case '\\':                          /* escaped character, skip '\' */
157       if(pattern[1]) {
158         ++pattern;
159         ++(*posp);
160       }
161       /* FALLTHROUGH */
162     default:
163       *buf++ = *pattern++;              /* copy character to set element */
164       ++(*posp);
165     }
166   }
167 
168   *patternp = pattern; /* return with the new position */
169   return CURLE_OK;
170 }
171 
glob_range(struct URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)172 static CURLcode glob_range(struct URLGlob *glob, char **patternp,
173                            size_t *posp, unsigned long *amount,
174                            int globindex)
175 {
176   /* processes a range expression with the point behind the opening '['
177      - char range: e.g. "a-z]", "B-Q]"
178      - num range: e.g. "0-9]", "17-2000]"
179      - num range with leading zeros: e.g. "001-999]"
180      expression is checked for well-formedness and collected until the next ']'
181   */
182   struct URLPattern *pat;
183   int rc;
184   char *pattern = *patternp;
185   char *c;
186 
187   pat = &glob->pattern[glob->size];
188   pat->globindex = globindex;
189 
190   if(ISALPHA(*pattern)) {
191     /* character range detected */
192     char min_c;
193     char max_c;
194     char end_c;
195     unsigned long step = 1;
196 
197     pat->type = UPTCharRange;
198 
199     rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
200 
201     if(rc == 3) {
202       if(end_c == ':') {
203         char *endp;
204         errno = 0;
205         step = strtoul(&pattern[4], &endp, 10);
206         if(errno || &pattern[4] == endp || *endp != ']')
207           step = 0;
208         else
209           pattern = endp + 1;
210       }
211       else if(end_c != ']')
212         /* then this is wrong */
213         rc = 0;
214       else
215         /* end_c == ']' */
216         pattern += 4;
217     }
218 
219     *posp += (pattern - *patternp);
220 
221     if(rc != 3 || !step || step > (unsigned)INT_MAX ||
222        (min_c == max_c && step != 1) ||
223        (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
224                            (max_c - min_c) > ('z' - 'a'))))
225       /* the pattern is not well-formed */
226       return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
227 
228     /* if there was a ":[num]" thing, use that as step or else use 1 */
229     pat->content.CharRange.step = (int)step;
230     pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
231     pat->content.CharRange.max_c = max_c;
232 
233     if(multiply(amount, ((pat->content.CharRange.max_c -
234                           pat->content.CharRange.min_c) /
235                          pat->content.CharRange.step + 1)))
236       return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
237   }
238   else if(ISDIGIT(*pattern)) {
239     /* numeric range detected */
240     unsigned long min_n;
241     unsigned long max_n = 0;
242     unsigned long step_n = 0;
243     char *endp;
244 
245     pat->type = UPTNumRange;
246     pat->content.NumRange.padlength = 0;
247 
248     if(*pattern == '0') {
249       /* leading zero specified, count them! */
250       c = pattern;
251       while(ISDIGIT(*c)) {
252         c++;
253         ++pat->content.NumRange.padlength; /* padding length is set for all
254                                               instances of this pattern */
255       }
256     }
257 
258     errno = 0;
259     min_n = strtoul(pattern, &endp, 10);
260     if(errno || (endp == pattern))
261       endp = NULL;
262     else {
263       if(*endp != '-')
264         endp = NULL;
265       else {
266         pattern = endp + 1;
267         while(*pattern && ISBLANK(*pattern))
268           pattern++;
269         if(!ISDIGIT(*pattern)) {
270           endp = NULL;
271           goto fail;
272         }
273         errno = 0;
274         max_n = strtoul(pattern, &endp, 10);
275         if(errno)
276           /* overflow */
277           endp = NULL;
278         else if(*endp == ':') {
279           pattern = endp + 1;
280           errno = 0;
281           step_n = strtoul(pattern, &endp, 10);
282           if(errno)
283             /* over/underflow situation */
284             endp = NULL;
285         }
286         else
287           step_n = 1;
288         if(endp && (*endp == ']')) {
289           pattern = endp + 1;
290         }
291         else
292           endp = NULL;
293       }
294     }
295 
296     fail:
297     *posp += (pattern - *patternp);
298 
299     if(!endp || !step_n ||
300        (min_n == max_n && step_n != 1) ||
301        (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
302       /* the pattern is not well-formed */
303       return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
304 
305     /* typecasting to ints are fine here since we make sure above that we
306        are within 31 bits */
307     pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
308     pat->content.NumRange.max_n = max_n;
309     pat->content.NumRange.step = step_n;
310 
311     if(multiply(amount, ((pat->content.NumRange.max_n -
312                           pat->content.NumRange.min_n) /
313                          pat->content.NumRange.step + 1)))
314       return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
315   }
316   else
317     return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
318 
319   *patternp = pattern;
320   return CURLE_OK;
321 }
322 
323 #define MAX_IP6LEN 128
324 
peek_ipv6(const char * str,size_t * skip)325 static bool peek_ipv6(const char *str, size_t *skip)
326 {
327   /*
328    * Scan for a potential IPv6 literal.
329    * - Valid globs contain a hyphen and <= 1 colon.
330    * - IPv6 literals contain no hyphens and >= 2 colons.
331    */
332   char hostname[MAX_IP6LEN];
333   CURLU *u;
334   char *endbr = strchr(str, ']');
335   size_t hlen;
336   CURLUcode rc;
337   if(!endbr)
338     return FALSE;
339 
340   hlen = endbr - str + 1;
341   if(hlen >= MAX_IP6LEN)
342     return FALSE;
343 
344   u = curl_url();
345   if(!u)
346     return FALSE;
347 
348   memcpy(hostname, str, hlen);
349   hostname[hlen] = 0;
350 
351   /* ask to "guess scheme" as then it works without a https:// prefix */
352   rc = curl_url_set(u, CURLUPART_URL, hostname, CURLU_GUESS_SCHEME);
353 
354   curl_url_cleanup(u);
355   if(!rc)
356     *skip = hlen;
357   return rc ? FALSE : TRUE;
358 }
359 
glob_parse(struct URLGlob * glob,char * pattern,size_t pos,unsigned long * amount)360 static CURLcode glob_parse(struct URLGlob *glob, char *pattern,
361                            size_t pos, unsigned long *amount)
362 {
363   /* processes a literal string component of a URL
364      special characters '{' and '[' branch to set/range processing functions
365    */
366   CURLcode res = CURLE_OK;
367   int globindex = 0; /* count "actual" globs */
368 
369   *amount = 1;
370 
371   while(*pattern && !res) {
372     char *buf = glob->glob_buffer;
373     size_t sublen = 0;
374     while(*pattern && *pattern != '{') {
375       if(*pattern == '[') {
376         /* skip over IPv6 literals and [] */
377         size_t skip = 0;
378         if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
379           skip = 2;
380         if(skip) {
381           memcpy(buf, pattern, skip);
382           buf += skip;
383           pattern += skip;
384           sublen += skip;
385           continue;
386         }
387         break;
388       }
389       if(*pattern == '}' || *pattern == ']')
390         return GLOBERROR("unmatched close brace/bracket", pos,
391                          CURLE_URL_MALFORMAT);
392 
393       /* only allow \ to escape known "special letters" */
394       if(*pattern == '\\' &&
395          (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
396           *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
397 
398         /* escape character, skip '\' */
399         ++pattern;
400         ++pos;
401       }
402       *buf++ = *pattern++; /* copy character to literal */
403       ++pos;
404       sublen++;
405     }
406     if(sublen) {
407       /* we got a literal string, add it as a single-item list */
408       *buf = '\0';
409       res = glob_fixed(glob, glob->glob_buffer, sublen);
410     }
411     else {
412       switch (*pattern) {
413       case '\0': /* done  */
414         break;
415 
416       case '{':
417         /* process set pattern */
418         pattern++;
419         pos++;
420         res = glob_set(glob, &pattern, &pos, amount, globindex++);
421         break;
422 
423       case '[':
424         /* process range pattern */
425         pattern++;
426         pos++;
427         res = glob_range(glob, &pattern, &pos, amount, globindex++);
428         break;
429       }
430     }
431 
432     if(++glob->size >= GLOB_PATTERN_NUM)
433       return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
434   }
435   return res;
436 }
437 
glob_url(struct URLGlob ** glob,char * url,unsigned long * urlnum,FILE * error)438 CURLcode glob_url(struct URLGlob **glob, char *url, unsigned long *urlnum,
439                   FILE *error)
440 {
441   /*
442    * We can deal with any-size, just make a buffer with the same length
443    * as the specified URL!
444    */
445   struct URLGlob *glob_expand;
446   unsigned long amount = 0;
447   char *glob_buffer;
448   CURLcode res;
449 
450   *glob = NULL;
451 
452   glob_buffer = malloc(strlen(url) + 1);
453   if(!glob_buffer)
454     return CURLE_OUT_OF_MEMORY;
455   glob_buffer[0] = 0;
456 
457   glob_expand = calloc(1, sizeof(struct URLGlob));
458   if(!glob_expand) {
459     Curl_safefree(glob_buffer);
460     return CURLE_OUT_OF_MEMORY;
461   }
462   glob_expand->urllen = strlen(url);
463   glob_expand->glob_buffer = glob_buffer;
464 
465   res = glob_parse(glob_expand, url, 1, &amount);
466   if(!res)
467     *urlnum = amount;
468   else {
469     if(error && glob_expand->error) {
470       char text[512];
471       const char *t;
472       if(glob_expand->pos) {
473         msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^",
474                   glob_expand->error,
475                   glob_expand->pos, url, glob_expand->pos - 1, " ");
476         t = text;
477       }
478       else
479         t = glob_expand->error;
480 
481       /* send error description to the error-stream */
482       fprintf(error, "curl: (%d) %s\n", res, t);
483     }
484     /* it failed, we cleanup */
485     glob_cleanup(glob_expand);
486     *urlnum = 1;
487     return res;
488   }
489 
490   *glob = glob_expand;
491   return CURLE_OK;
492 }
493 
glob_cleanup(struct URLGlob * glob)494 void glob_cleanup(struct URLGlob *glob)
495 {
496   size_t i;
497   int elem;
498 
499   if(!glob)
500     return;
501 
502   for(i = 0; i < glob->size; i++) {
503     if((glob->pattern[i].type == UPTSet) &&
504        (glob->pattern[i].content.Set.elements)) {
505       for(elem = glob->pattern[i].content.Set.size - 1;
506           elem >= 0;
507           --elem) {
508         Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
509       }
510       Curl_safefree(glob->pattern[i].content.Set.elements);
511     }
512   }
513   Curl_safefree(glob->glob_buffer);
514   Curl_safefree(glob);
515 }
516 
glob_next_url(char ** globbed,struct URLGlob * glob)517 CURLcode glob_next_url(char **globbed, struct URLGlob *glob)
518 {
519   struct URLPattern *pat;
520   size_t i;
521   size_t len;
522   size_t buflen = glob->urllen + 1;
523   char *buf = glob->glob_buffer;
524 
525   *globbed = NULL;
526 
527   if(!glob->beenhere)
528     glob->beenhere = 1;
529   else {
530     bool carry = TRUE;
531 
532     /* implement a counter over the index ranges of all patterns, starting
533        with the rightmost pattern */
534     for(i = 0; carry && (i < glob->size); i++) {
535       carry = FALSE;
536       pat = &glob->pattern[glob->size - 1 - i];
537       switch(pat->type) {
538       case UPTSet:
539         if((pat->content.Set.elements) &&
540            (++pat->content.Set.ptr_s == pat->content.Set.size)) {
541           pat->content.Set.ptr_s = 0;
542           carry = TRUE;
543         }
544         break;
545       case UPTCharRange:
546         pat->content.CharRange.ptr_c =
547           (char)(pat->content.CharRange.step +
548                  (int)((unsigned char)pat->content.CharRange.ptr_c));
549         if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
550           pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
551           carry = TRUE;
552         }
553         break;
554       case UPTNumRange:
555         pat->content.NumRange.ptr_n += pat->content.NumRange.step;
556         if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
557           pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
558           carry = TRUE;
559         }
560         break;
561       default:
562         printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
563         return CURLE_FAILED_INIT;
564       }
565     }
566     if(carry) {         /* first pattern ptr has run into overflow, done! */
567       return CURLE_OK;
568     }
569   }
570 
571   for(i = 0; i < glob->size; ++i) {
572     pat = &glob->pattern[i];
573     switch(pat->type) {
574     case UPTSet:
575       if(pat->content.Set.elements) {
576         msnprintf(buf, buflen, "%s",
577                   pat->content.Set.elements[pat->content.Set.ptr_s]);
578         len = strlen(buf);
579         buf += len;
580         buflen -= len;
581       }
582       break;
583     case UPTCharRange:
584       if(buflen) {
585         *buf++ = pat->content.CharRange.ptr_c;
586         *buf = '\0';
587         buflen--;
588       }
589       break;
590     case UPTNumRange:
591       msnprintf(buf, buflen, "%0*lu",
592                 pat->content.NumRange.padlength,
593                 pat->content.NumRange.ptr_n);
594       len = strlen(buf);
595       buf += len;
596       buflen -= len;
597       break;
598     default:
599       printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
600       return CURLE_FAILED_INIT;
601     }
602   }
603 
604   *globbed = strdup(glob->glob_buffer);
605   if(!*globbed)
606     return CURLE_OUT_OF_MEMORY;
607 
608   return CURLE_OK;
609 }
610 
611 #define MAX_OUTPUT_GLOB_LENGTH (10*1024)
612 
glob_match_url(char ** result,char * filename,struct URLGlob * glob)613 CURLcode glob_match_url(char **result, char *filename, struct URLGlob *glob)
614 {
615   char numbuf[18];
616   char *appendthis = (char *)"";
617   size_t appendlen = 0;
618   struct curlx_dynbuf dyn;
619 
620   *result = NULL;
621 
622   /* We cannot use the glob_buffer for storage since the filename may be
623    * longer than the URL we use.
624    */
625   curlx_dyn_init(&dyn, MAX_OUTPUT_GLOB_LENGTH);
626 
627   while(*filename) {
628     if(*filename == '#' && ISDIGIT(filename[1])) {
629       char *ptr = filename;
630       unsigned long num = strtoul(&filename[1], &filename, 10);
631       struct URLPattern *pat = NULL;
632 
633       if(num && (num < glob->size)) {
634         unsigned long i;
635         num--; /* make it zero based */
636         /* find the correct glob entry */
637         for(i = 0; i<glob->size; i++) {
638           if(glob->pattern[i].globindex == (int)num) {
639             pat = &glob->pattern[i];
640             break;
641           }
642         }
643       }
644 
645       if(pat) {
646         switch(pat->type) {
647         case UPTSet:
648           if(pat->content.Set.elements) {
649             appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
650             appendlen =
651               strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
652           }
653           break;
654         case UPTCharRange:
655           numbuf[0] = pat->content.CharRange.ptr_c;
656           numbuf[1] = 0;
657           appendthis = numbuf;
658           appendlen = 1;
659           break;
660         case UPTNumRange:
661           msnprintf(numbuf, sizeof(numbuf), "%0*lu",
662                     pat->content.NumRange.padlength,
663                     pat->content.NumRange.ptr_n);
664           appendthis = numbuf;
665           appendlen = strlen(numbuf);
666           break;
667         default:
668           fprintf(stderr, "internal error: invalid pattern type (%d)\n",
669                   (int)pat->type);
670           curlx_dyn_free(&dyn);
671           return CURLE_FAILED_INIT;
672         }
673       }
674       else {
675         /* #[num] out of range, use the #[num] in the output */
676         filename = ptr;
677         appendthis = filename++;
678         appendlen = 1;
679       }
680     }
681     else {
682       appendthis = filename++;
683       appendlen = 1;
684     }
685     if(curlx_dyn_addn(&dyn, appendthis, appendlen))
686       return CURLE_OUT_OF_MEMORY;
687   }
688 
689 #if defined(MSDOS) || defined(WIN32)
690   {
691     char *sanitized;
692     SANITIZEcode sc = sanitize_file_name(&sanitized, curlx_dyn_ptr(&dyn),
693                                          (SANITIZE_ALLOW_PATH |
694                                           SANITIZE_ALLOW_RESERVED));
695     curlx_dyn_free(&dyn);
696     if(sc)
697       return CURLE_URL_MALFORMAT;
698     *result = sanitized;
699     return CURLE_OK;
700   }
701 #else
702   *result = curlx_dyn_ptr(&dyn);
703   return CURLE_OK;
704 #endif /* MSDOS || WIN32 */
705 }
706