1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Scanf/printf implementation for use in *Sanitizer interceptors.
11// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
12// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
13// with a few common GNU extensions.
14//
15//===----------------------------------------------------------------------===//
16
17#include <stdarg.h>
18
19static const char *parse_number(const char *p, int *out) {
20  *out = internal_atoll(p);
21  while (*p >= '0' && *p <= '9')
22    ++p;
23  return p;
24}
25
26static const char *maybe_parse_param_index(const char *p, int *out) {
27  // n$
28  if (*p >= '0' && *p <= '9') {
29    int number;
30    const char *q = parse_number(p, &number);
31    CHECK(q);
32    if (*q == '$') {
33      *out = number;
34      p = q + 1;
35    }
36  }
37
38  // Otherwise, do not change p. This will be re-parsed later as the field
39  // width.
40  return p;
41}
42
43static bool char_is_one_of(char c, const char *s) {
44  return !!internal_strchr(s, c);
45}
46
47static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
48  if (char_is_one_of(*p, "jztLq")) {
49    ll[0] = *p;
50    ++p;
51  } else if (*p == 'h') {
52    ll[0] = 'h';
53    ++p;
54    if (*p == 'h') {
55      ll[1] = 'h';
56      ++p;
57    }
58  } else if (*p == 'l') {
59    ll[0] = 'l';
60    ++p;
61    if (*p == 'l') {
62      ll[1] = 'l';
63      ++p;
64    }
65  }
66  return p;
67}
68
69// Returns true if the character is an integer conversion specifier.
70static bool format_is_integer_conv(char c) {
71  return char_is_one_of(c, "diouxXn");
72}
73
74// Returns true if the character is an floating point conversion specifier.
75static bool format_is_float_conv(char c) {
76  return char_is_one_of(c, "aAeEfFgG");
77}
78
79// Returns string output character size for string-like conversions,
80// or 0 if the conversion is invalid.
81static int format_get_char_size(char convSpecifier,
82                                const char lengthModifier[2]) {
83  if (char_is_one_of(convSpecifier, "CS")) {
84    return sizeof(wchar_t);
85  }
86
87  if (char_is_one_of(convSpecifier, "cs[")) {
88    if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
89      return sizeof(wchar_t);
90    else if (lengthModifier[0] == '\0')
91      return sizeof(char);
92  }
93
94  return 0;
95}
96
97enum FormatStoreSize {
98  // Store size not known in advance; can be calculated as wcslen() of the
99  // destination buffer.
100  FSS_WCSLEN = -2,
101  // Store size not known in advance; can be calculated as strlen() of the
102  // destination buffer.
103  FSS_STRLEN = -1,
104  // Invalid conversion specifier.
105  FSS_INVALID = 0
106};
107
108// Returns the memory size of a format directive (if >0), or a value of
109// FormatStoreSize.
110static int format_get_value_size(char convSpecifier,
111                                 const char lengthModifier[2],
112                                 bool promote_float) {
113  if (format_is_integer_conv(convSpecifier)) {
114    switch (lengthModifier[0]) {
115    case 'h':
116      return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
117    case 'l':
118      return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
119    case 'q':
120      return sizeof(long long);
121    case 'L':
122      return sizeof(long long);
123    case 'j':
124      return sizeof(INTMAX_T);
125    case 'z':
126      return sizeof(SIZE_T);
127    case 't':
128      return sizeof(PTRDIFF_T);
129    case 0:
130      return sizeof(int);
131    default:
132      return FSS_INVALID;
133    }
134  }
135
136  if (format_is_float_conv(convSpecifier)) {
137    switch (lengthModifier[0]) {
138    case 'L':
139    case 'q':
140      return sizeof(long double);
141    case 'l':
142      return lengthModifier[1] == 'l' ? sizeof(long double)
143                                           : sizeof(double);
144    case 0:
145      // Printf promotes floats to doubles but scanf does not
146      return promote_float ? sizeof(double) : sizeof(float);
147    default:
148      return FSS_INVALID;
149    }
150  }
151
152  if (convSpecifier == 'p') {
153    if (lengthModifier[0] != 0)
154      return FSS_INVALID;
155    return sizeof(void *);
156  }
157
158  return FSS_INVALID;
159}
160
161struct ScanfDirective {
162  int argIdx; // argument index, or -1 if not specified ("%n$")
163  int fieldWidth;
164  const char *begin;
165  const char *end;
166  bool suppressed; // suppress assignment ("*")
167  bool allocate;   // allocate space ("m")
168  char lengthModifier[2];
169  char convSpecifier;
170  bool maybeGnuMalloc;
171};
172
173// Parse scanf format string. If a valid directive in encountered, it is
174// returned in dir. This function returns the pointer to the first
175// unprocessed character, or 0 in case of error.
176// In case of the end-of-string, a pointer to the closing \0 is returned.
177static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
178                                    ScanfDirective *dir) {
179  internal_memset(dir, 0, sizeof(*dir));
180  dir->argIdx = -1;
181
182  while (*p) {
183    if (*p != '%') {
184      ++p;
185      continue;
186    }
187    dir->begin = p;
188    ++p;
189    // %%
190    if (*p == '%') {
191      ++p;
192      continue;
193    }
194    if (*p == '\0') {
195      return nullptr;
196    }
197    // %n$
198    p = maybe_parse_param_index(p, &dir->argIdx);
199    CHECK(p);
200    // *
201    if (*p == '*') {
202      dir->suppressed = true;
203      ++p;
204    }
205    // Field width
206    if (*p >= '0' && *p <= '9') {
207      p = parse_number(p, &dir->fieldWidth);
208      CHECK(p);
209      if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
210        return nullptr;
211    }
212    // m
213    if (*p == 'm') {
214      dir->allocate = true;
215      ++p;
216    }
217    // Length modifier.
218    p = maybe_parse_length_modifier(p, dir->lengthModifier);
219    // Conversion specifier.
220    dir->convSpecifier = *p++;
221    // Consume %[...] expression.
222    if (dir->convSpecifier == '[') {
223      if (*p == '^')
224        ++p;
225      if (*p == ']')
226        ++p;
227      while (*p && *p != ']')
228        ++p;
229      if (*p == 0)
230        return nullptr; // unexpected end of string
231                        // Consume the closing ']'.
232      ++p;
233    }
234    // This is unfortunately ambiguous between old GNU extension
235    // of %as, %aS and %a[...] and newer POSIX %a followed by
236    // letters s, S or [.
237    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
238        !dir->lengthModifier[0]) {
239      if (*p == 's' || *p == 'S') {
240        dir->maybeGnuMalloc = true;
241        ++p;
242      } else if (*p == '[') {
243        // Watch for %a[h-j%d], if % appears in the
244        // [...] range, then we need to give up, we don't know
245        // if scanf will parse it as POSIX %a [h-j %d ] or
246        // GNU allocation of string with range dh-j plus %.
247        const char *q = p + 1;
248        if (*q == '^')
249          ++q;
250        if (*q == ']')
251          ++q;
252        while (*q && *q != ']' && *q != '%')
253          ++q;
254        if (*q == 0 || *q == '%')
255          return nullptr;
256        p = q + 1; // Consume the closing ']'.
257        dir->maybeGnuMalloc = true;
258      }
259    }
260    dir->end = p;
261    break;
262  }
263  return p;
264}
265
266static int scanf_get_value_size(ScanfDirective *dir) {
267  if (dir->allocate) {
268    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
269      return FSS_INVALID;
270    return sizeof(char *);
271  }
272
273  if (dir->maybeGnuMalloc) {
274    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
275      return FSS_INVALID;
276    // This is ambiguous, so check the smaller size of char * (if it is
277    // a GNU extension of %as, %aS or %a[...]) and float (if it is
278    // POSIX %a followed by s, S or [ letters).
279    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
280  }
281
282  if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
283    bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
284    unsigned charSize =
285        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
286    if (charSize == 0)
287      return FSS_INVALID;
288    if (dir->fieldWidth == 0) {
289      if (!needsTerminator)
290        return charSize;
291      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
292    }
293    return (dir->fieldWidth + needsTerminator) * charSize;
294  }
295
296  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
297}
298
299// Common part of *scanf interceptors.
300// Process format string and va_list, and report all store ranges.
301// Stops when "consuming" n_inputs input items.
302static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
303                         const char *format, va_list aq) {
304  CHECK_GT(n_inputs, 0);
305  const char *p = format;
306
307  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
308
309  while (*p) {
310    ScanfDirective dir;
311    p = scanf_parse_next(p, allowGnuMalloc, &dir);
312    if (!p)
313      break;
314    if (dir.convSpecifier == 0) {
315      // This can only happen at the end of the format string.
316      CHECK_EQ(*p, 0);
317      break;
318    }
319    // Here the directive is valid. Do what it says.
320    if (dir.argIdx != -1) {
321      // Unsupported.
322      break;
323    }
324    if (dir.suppressed)
325      continue;
326    int size = scanf_get_value_size(&dir);
327    if (size == FSS_INVALID) {
328      Report("WARNING: unexpected format specifier in scanf interceptor: "
329        "%.*s\n", dir.end - dir.begin, dir.begin);
330      break;
331    }
332    void *argp = va_arg(aq, void *);
333    if (dir.convSpecifier != 'n')
334      --n_inputs;
335    if (n_inputs < 0)
336      break;
337    if (size == FSS_STRLEN) {
338      size = internal_strlen((const char *)argp) + 1;
339    } else if (size == FSS_WCSLEN) {
340      // FIXME: actually use wcslen() to calculate it.
341      size = 0;
342    }
343    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
344  }
345}
346
347#if SANITIZER_INTERCEPT_PRINTF
348
349struct PrintfDirective {
350  int fieldWidth;
351  int fieldPrecision;
352  int argIdx; // width argument index, or -1 if not specified ("%*n$")
353  int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
354  const char *begin;
355  const char *end;
356  bool starredWidth;
357  bool starredPrecision;
358  char lengthModifier[2];
359  char convSpecifier;
360};
361
362static const char *maybe_parse_number(const char *p, int *out) {
363  if (*p >= '0' && *p <= '9')
364    p = parse_number(p, out);
365  return p;
366}
367
368static const char *maybe_parse_number_or_star(const char *p, int *out,
369                                              bool *star) {
370  if (*p == '*') {
371    *star = true;
372    ++p;
373  } else {
374    *star = false;
375    p = maybe_parse_number(p, out);
376  }
377  return p;
378}
379
380// Parse printf format string. Same as scanf_parse_next.
381static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
382  internal_memset(dir, 0, sizeof(*dir));
383  dir->argIdx = -1;
384  dir->precisionIdx = -1;
385
386  while (*p) {
387    if (*p != '%') {
388      ++p;
389      continue;
390    }
391    dir->begin = p;
392    ++p;
393    // %%
394    if (*p == '%') {
395      ++p;
396      continue;
397    }
398    if (*p == '\0') {
399      return nullptr;
400    }
401    // %n$
402    p = maybe_parse_param_index(p, &dir->precisionIdx);
403    CHECK(p);
404    // Flags
405    while (char_is_one_of(*p, "'-+ #0")) {
406      ++p;
407    }
408    // Field width
409    p = maybe_parse_number_or_star(p, &dir->fieldWidth,
410                                   &dir->starredWidth);
411    if (!p)
412      return nullptr;
413    // Precision
414    if (*p == '.') {
415      ++p;
416      // Actual precision is optional (surprise!)
417      p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
418                                     &dir->starredPrecision);
419      if (!p)
420        return nullptr;
421      // m$
422      if (dir->starredPrecision) {
423        p = maybe_parse_param_index(p, &dir->precisionIdx);
424        CHECK(p);
425      }
426    }
427    // Length modifier.
428    p = maybe_parse_length_modifier(p, dir->lengthModifier);
429    // Conversion specifier.
430    dir->convSpecifier = *p++;
431    dir->end = p;
432    break;
433  }
434  return p;
435}
436
437static int printf_get_value_size(PrintfDirective *dir) {
438  if (dir->convSpecifier == 'm') {
439    return sizeof(char *);
440  }
441
442  if (char_is_one_of(dir->convSpecifier, "cCsS")) {
443    unsigned charSize =
444        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
445    if (charSize == 0)
446      return FSS_INVALID;
447    if (char_is_one_of(dir->convSpecifier, "sS")) {
448      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
449    }
450    return charSize;
451  }
452
453  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
454}
455
456#define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
457  do {                                                             \
458    if (format_is_float_conv(convSpecifier)) {                     \
459      switch (size) {                                              \
460      case 8:                                                      \
461        va_arg(*aq, double);                                       \
462        break;                                                     \
463      case 12:                                                     \
464        va_arg(*aq, long double);                                  \
465        break;                                                     \
466      case 16:                                                     \
467        va_arg(*aq, long double);                                  \
468        break;                                                     \
469      default:                                                     \
470        Report("WARNING: unexpected floating-point arg size"       \
471               " in printf interceptor: %d\n", size);              \
472        return;                                                    \
473      }                                                            \
474    } else {                                                       \
475      switch (size) {                                              \
476      case 1:                                                      \
477      case 2:                                                      \
478      case 4:                                                      \
479        va_arg(*aq, u32);                                          \
480        break;                                                     \
481      case 8:                                                      \
482        va_arg(*aq, u64);                                          \
483        break;                                                     \
484      default:                                                     \
485        Report("WARNING: unexpected arg size"                      \
486               " in printf interceptor: %d\n", size);              \
487        return;                                                    \
488      }                                                            \
489    }                                                              \
490  } while (0)
491
492// Common part of *printf interceptors.
493// Process format string and va_list, and report all load ranges.
494static void printf_common(void *ctx, const char *format, va_list aq) {
495  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
496
497  const char *p = format;
498
499  while (*p) {
500    PrintfDirective dir;
501    p = printf_parse_next(p, &dir);
502    if (!p)
503      break;
504    if (dir.convSpecifier == 0) {
505      // This can only happen at the end of the format string.
506      CHECK_EQ(*p, 0);
507      break;
508    }
509    // Here the directive is valid. Do what it says.
510    if (dir.argIdx != -1 || dir.precisionIdx != -1) {
511      // Unsupported.
512      break;
513    }
514    if (dir.starredWidth) {
515      // Dynamic width
516      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
517    }
518    if (dir.starredPrecision) {
519      // Dynamic precision
520      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
521    }
522    int size = printf_get_value_size(&dir);
523    if (size == FSS_INVALID) {
524      Report("WARNING: unexpected format specifier in printf "
525             "interceptor: %.*s\n", dir.end - dir.begin, dir.begin);
526      break;
527    }
528    if (dir.convSpecifier == 'n') {
529      void *argp = va_arg(aq, void *);
530      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
531      continue;
532    } else if (size == FSS_STRLEN) {
533      if (void *argp = va_arg(aq, void *)) {
534        if (dir.starredPrecision) {
535          // FIXME: properly support starred precision for strings.
536          size = 0;
537        } else if (dir.fieldPrecision > 0) {
538          // Won't read more than "precision" symbols.
539          size = internal_strnlen((const char *)argp, dir.fieldPrecision);
540          if (size < dir.fieldPrecision) size++;
541        } else {
542          // Whole string will be accessed.
543          size = internal_strlen((const char *)argp) + 1;
544        }
545        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
546      }
547    } else if (size == FSS_WCSLEN) {
548      if (void *argp = va_arg(aq, void *)) {
549        // FIXME: Properly support wide-character strings (via wcsrtombs).
550        size = 0;
551        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
552      }
553    } else {
554      // Skip non-pointer args
555      SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
556    }
557  }
558}
559
560#endif // SANITIZER_INTERCEPT_PRINTF
561