• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Handling of format string in scanf and friends.  The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
18 
19 using clang::analyze_format_string::ArgType;
20 using clang::analyze_format_string::FormatStringHandler;
21 using clang::analyze_format_string::LengthModifier;
22 using clang::analyze_format_string::OptionalAmount;
23 using clang::analyze_format_string::ConversionSpecifier;
24 using clang::analyze_scanf::ScanfConversionSpecifier;
25 using clang::analyze_scanf::ScanfSpecifier;
26 using clang::UpdateOnReturn;
27 using namespace clang;
28 
29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30         ScanfSpecifierResult;
31 
ParseScanList(FormatStringHandler & H,ScanfConversionSpecifier & CS,const char * & Beg,const char * E)32 static bool ParseScanList(FormatStringHandler &H,
33                           ScanfConversionSpecifier &CS,
34                           const char *&Beg, const char *E) {
35   const char *I = Beg;
36   const char *start = I - 1;
37   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38 
39   // No more characters?
40   if (I == E) {
41     H.HandleIncompleteScanList(start, I);
42     return true;
43   }
44 
45   // Special case: ']' is the first character.
46   if (*I == ']') {
47     if (++I == E) {
48       H.HandleIncompleteScanList(start, I - 1);
49       return true;
50     }
51   }
52 
53   // Special case: "^]" are the first characters.
54   if (I + 1 != E && I[0] == '^' && I[1] == ']') {
55     I += 2;
56     if (I == E) {
57       H.HandleIncompleteScanList(start, I - 1);
58       return true;
59     }
60   }
61 
62   // Look for a ']' character which denotes the end of the scan list.
63   while (*I != ']') {
64     if (++I == E) {
65       H.HandleIncompleteScanList(start, I - 1);
66       return true;
67     }
68   }
69 
70   CS.setEndScanList(I);
71   return false;
72 }
73 
74 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75 // We can possibly refactor.
ParseScanfSpecifier(FormatStringHandler & H,const char * & Beg,const char * E,unsigned & argIndex,const LangOptions & LO,const TargetInfo & Target)76 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
77                                                 const char *&Beg,
78                                                 const char *E,
79                                                 unsigned &argIndex,
80                                                 const LangOptions &LO,
81                                                 const TargetInfo &Target) {
82 
83   using namespace clang::analyze_scanf;
84   const char *I = Beg;
85   const char *Start = nullptr;
86   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87 
88     // Look for a '%' character that indicates the start of a format specifier.
89   for ( ; I != E ; ++I) {
90     char c = *I;
91     if (c == '\0') {
92         // Detect spurious null characters, which are likely errors.
93       H.HandleNullChar(I);
94       return true;
95     }
96     if (c == '%') {
97       Start = I++;  // Record the start of the format specifier.
98       break;
99     }
100   }
101 
102     // No format specifier found?
103   if (!Start)
104     return false;
105 
106   if (I == E) {
107       // No more characters left?
108     H.HandleIncompleteSpecifier(Start, E - Start);
109     return true;
110   }
111 
112   ScanfSpecifier FS;
113   if (ParseArgPosition(H, FS, Start, I, E))
114     return true;
115 
116   if (I == E) {
117       // No more characters left?
118     H.HandleIncompleteSpecifier(Start, E - Start);
119     return true;
120   }
121 
122   // Look for '*' flag if it is present.
123   if (*I == '*') {
124     FS.setSuppressAssignment(I);
125     if (++I == E) {
126       H.HandleIncompleteSpecifier(Start, E - Start);
127       return true;
128     }
129   }
130 
131   // Look for the field width (if any).  Unlike printf, this is either
132   // a fixed integer or isn't present.
133   const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
134   if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
135     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136     FS.setFieldWidth(Amt);
137 
138     if (I == E) {
139       // No more characters left?
140       H.HandleIncompleteSpecifier(Start, E - Start);
141       return true;
142     }
143   }
144 
145   // Look for the length modifier.
146   if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147       // No more characters left?
148     H.HandleIncompleteSpecifier(Start, E - Start);
149     return true;
150   }
151 
152   // Detect spurious null characters, which are likely errors.
153   if (*I == '\0') {
154     H.HandleNullChar(I);
155     return true;
156   }
157 
158   // Finally, look for the conversion specifier.
159   const char *conversionPosition = I++;
160   ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
161   switch (*conversionPosition) {
162     default:
163       break;
164     case '%': k = ConversionSpecifier::PercentArg;   break;
165     case 'A': k = ConversionSpecifier::AArg; break;
166     case 'E': k = ConversionSpecifier::EArg; break;
167     case 'F': k = ConversionSpecifier::FArg; break;
168     case 'G': k = ConversionSpecifier::GArg; break;
169     case 'X': k = ConversionSpecifier::XArg; break;
170     case 'a': k = ConversionSpecifier::aArg; break;
171     case 'd': k = ConversionSpecifier::dArg; break;
172     case 'e': k = ConversionSpecifier::eArg; break;
173     case 'f': k = ConversionSpecifier::fArg; break;
174     case 'g': k = ConversionSpecifier::gArg; break;
175     case 'i': k = ConversionSpecifier::iArg; break;
176     case 'n': k = ConversionSpecifier::nArg; break;
177     case 'c': k = ConversionSpecifier::cArg; break;
178     case 'C': k = ConversionSpecifier::CArg; break;
179     case 'S': k = ConversionSpecifier::SArg; break;
180     case '[': k = ConversionSpecifier::ScanListArg; break;
181     case 'u': k = ConversionSpecifier::uArg; break;
182     case 'x': k = ConversionSpecifier::xArg; break;
183     case 'o': k = ConversionSpecifier::oArg; break;
184     case 's': k = ConversionSpecifier::sArg; break;
185     case 'p': k = ConversionSpecifier::pArg; break;
186     // Apple extensions
187       // Apple-specific
188     case 'D':
189       if (Target.getTriple().isOSDarwin())
190         k = ConversionSpecifier::DArg;
191       break;
192     case 'O':
193       if (Target.getTriple().isOSDarwin())
194         k = ConversionSpecifier::OArg;
195       break;
196     case 'U':
197       if (Target.getTriple().isOSDarwin())
198         k = ConversionSpecifier::UArg;
199       break;
200   }
201   ScanfConversionSpecifier CS(conversionPosition, k);
202   if (k == ScanfConversionSpecifier::ScanListArg) {
203     if (ParseScanList(H, CS, I, E))
204       return true;
205   }
206   FS.setConversionSpecifier(CS);
207   if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208       && !FS.usesPositionalArg())
209     FS.setArgIndex(argIndex++);
210 
211   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
212   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213 
214   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215     // Assume the conversion takes one argument.
216     return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
217   }
218   return ScanfSpecifierResult(Start, FS);
219 }
220 
getArgType(ASTContext & Ctx) const221 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
222   const ScanfConversionSpecifier &CS = getConversionSpecifier();
223 
224   if (!CS.consumesDataArgument())
225     return ArgType::Invalid();
226 
227   switch(CS.getKind()) {
228     // Signed int.
229     case ConversionSpecifier::dArg:
230     case ConversionSpecifier::DArg:
231     case ConversionSpecifier::iArg:
232       switch (LM.getKind()) {
233         case LengthModifier::None:
234           return ArgType::PtrTo(Ctx.IntTy);
235         case LengthModifier::AsChar:
236           return ArgType::PtrTo(ArgType::AnyCharTy);
237         case LengthModifier::AsShort:
238           return ArgType::PtrTo(Ctx.ShortTy);
239         case LengthModifier::AsLong:
240           return ArgType::PtrTo(Ctx.LongTy);
241         case LengthModifier::AsLongLong:
242         case LengthModifier::AsQuad:
243           return ArgType::PtrTo(Ctx.LongLongTy);
244         case LengthModifier::AsInt64:
245           return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
246         case LengthModifier::AsIntMax:
247           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
248         case LengthModifier::AsSizeT:
249           // FIXME: ssize_t.
250           return ArgType();
251         case LengthModifier::AsPtrDiff:
252           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
253         case LengthModifier::AsLongDouble:
254           // GNU extension.
255           return ArgType::PtrTo(Ctx.LongLongTy);
256         case LengthModifier::AsAllocate:
257         case LengthModifier::AsMAllocate:
258         case LengthModifier::AsInt32:
259         case LengthModifier::AsInt3264:
260           return ArgType::Invalid();
261       }
262 
263     // Unsigned int.
264     case ConversionSpecifier::oArg:
265     case ConversionSpecifier::OArg:
266     case ConversionSpecifier::uArg:
267     case ConversionSpecifier::UArg:
268     case ConversionSpecifier::xArg:
269     case ConversionSpecifier::XArg:
270       switch (LM.getKind()) {
271         case LengthModifier::None:
272           return ArgType::PtrTo(Ctx.UnsignedIntTy);
273         case LengthModifier::AsChar:
274           return ArgType::PtrTo(Ctx.UnsignedCharTy);
275         case LengthModifier::AsShort:
276           return ArgType::PtrTo(Ctx.UnsignedShortTy);
277         case LengthModifier::AsLong:
278           return ArgType::PtrTo(Ctx.UnsignedLongTy);
279         case LengthModifier::AsLongLong:
280         case LengthModifier::AsQuad:
281           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
282         case LengthModifier::AsInt64:
283           return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
284         case LengthModifier::AsIntMax:
285           return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
286         case LengthModifier::AsSizeT:
287           return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
288         case LengthModifier::AsPtrDiff:
289           // FIXME: Unsigned version of ptrdiff_t?
290           return ArgType();
291         case LengthModifier::AsLongDouble:
292           // GNU extension.
293           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
294         case LengthModifier::AsAllocate:
295         case LengthModifier::AsMAllocate:
296         case LengthModifier::AsInt32:
297         case LengthModifier::AsInt3264:
298           return ArgType::Invalid();
299       }
300 
301     // Float.
302     case ConversionSpecifier::aArg:
303     case ConversionSpecifier::AArg:
304     case ConversionSpecifier::eArg:
305     case ConversionSpecifier::EArg:
306     case ConversionSpecifier::fArg:
307     case ConversionSpecifier::FArg:
308     case ConversionSpecifier::gArg:
309     case ConversionSpecifier::GArg:
310       switch (LM.getKind()) {
311         case LengthModifier::None:
312           return ArgType::PtrTo(Ctx.FloatTy);
313         case LengthModifier::AsLong:
314           return ArgType::PtrTo(Ctx.DoubleTy);
315         case LengthModifier::AsLongDouble:
316           return ArgType::PtrTo(Ctx.LongDoubleTy);
317         default:
318           return ArgType::Invalid();
319       }
320 
321     // Char, string and scanlist.
322     case ConversionSpecifier::cArg:
323     case ConversionSpecifier::sArg:
324     case ConversionSpecifier::ScanListArg:
325       switch (LM.getKind()) {
326         case LengthModifier::None:
327           return ArgType::PtrTo(ArgType::AnyCharTy);
328         case LengthModifier::AsLong:
329           return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
330         case LengthModifier::AsAllocate:
331         case LengthModifier::AsMAllocate:
332           return ArgType::PtrTo(ArgType::CStrTy);
333         default:
334           return ArgType::Invalid();
335       }
336     case ConversionSpecifier::CArg:
337     case ConversionSpecifier::SArg:
338       // FIXME: Mac OS X specific?
339       switch (LM.getKind()) {
340         case LengthModifier::None:
341           return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
342         case LengthModifier::AsAllocate:
343         case LengthModifier::AsMAllocate:
344           return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
345         default:
346           return ArgType::Invalid();
347       }
348 
349     // Pointer.
350     case ConversionSpecifier::pArg:
351       return ArgType::PtrTo(ArgType::CPointerTy);
352 
353     // Write-back.
354     case ConversionSpecifier::nArg:
355       switch (LM.getKind()) {
356         case LengthModifier::None:
357           return ArgType::PtrTo(Ctx.IntTy);
358         case LengthModifier::AsChar:
359           return ArgType::PtrTo(Ctx.SignedCharTy);
360         case LengthModifier::AsShort:
361           return ArgType::PtrTo(Ctx.ShortTy);
362         case LengthModifier::AsLong:
363           return ArgType::PtrTo(Ctx.LongTy);
364         case LengthModifier::AsLongLong:
365         case LengthModifier::AsQuad:
366           return ArgType::PtrTo(Ctx.LongLongTy);
367         case LengthModifier::AsInt64:
368           return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
369         case LengthModifier::AsIntMax:
370           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
371         case LengthModifier::AsSizeT:
372           return ArgType(); // FIXME: ssize_t
373         case LengthModifier::AsPtrDiff:
374           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
375         case LengthModifier::AsLongDouble:
376           return ArgType(); // FIXME: Is this a known extension?
377         case LengthModifier::AsAllocate:
378         case LengthModifier::AsMAllocate:
379         case LengthModifier::AsInt32:
380         case LengthModifier::AsInt3264:
381           return ArgType::Invalid();
382         }
383 
384     default:
385       break;
386   }
387 
388   return ArgType();
389 }
390 
fixType(QualType QT,QualType RawQT,const LangOptions & LangOpt,ASTContext & Ctx)391 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
392                              const LangOptions &LangOpt,
393                              ASTContext &Ctx) {
394 
395   // %n is different from other conversion specifiers; don't try to fix it.
396   if (CS.getKind() == ConversionSpecifier::nArg)
397     return false;
398 
399   if (!QT->isPointerType())
400     return false;
401 
402   QualType PT = QT->getPointeeType();
403 
404   // If it's an enum, get its underlying type.
405   if (const EnumType *ETy = PT->getAs<EnumType>())
406     PT = ETy->getDecl()->getIntegerType();
407 
408   const BuiltinType *BT = PT->getAs<BuiltinType>();
409   if (!BT)
410     return false;
411 
412   // Pointer to a character.
413   if (PT->isAnyCharacterType()) {
414     CS.setKind(ConversionSpecifier::sArg);
415     if (PT->isWideCharType())
416       LM.setKind(LengthModifier::AsWideChar);
417     else
418       LM.setKind(LengthModifier::None);
419 
420     // If we know the target array length, we can use it as a field width.
421     if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
422       if (CAT->getSizeModifier() == ArrayType::Normal)
423         FieldWidth = OptionalAmount(OptionalAmount::Constant,
424                                     CAT->getSize().getZExtValue() - 1,
425                                     "", 0, false);
426 
427     }
428     return true;
429   }
430 
431   // Figure out the length modifier.
432   switch (BT->getKind()) {
433     // no modifier
434     case BuiltinType::UInt:
435     case BuiltinType::Int:
436     case BuiltinType::Float:
437       LM.setKind(LengthModifier::None);
438       break;
439 
440     // hh
441     case BuiltinType::Char_U:
442     case BuiltinType::UChar:
443     case BuiltinType::Char_S:
444     case BuiltinType::SChar:
445       LM.setKind(LengthModifier::AsChar);
446       break;
447 
448     // h
449     case BuiltinType::Short:
450     case BuiltinType::UShort:
451       LM.setKind(LengthModifier::AsShort);
452       break;
453 
454     // l
455     case BuiltinType::Long:
456     case BuiltinType::ULong:
457     case BuiltinType::Double:
458       LM.setKind(LengthModifier::AsLong);
459       break;
460 
461     // ll
462     case BuiltinType::LongLong:
463     case BuiltinType::ULongLong:
464       LM.setKind(LengthModifier::AsLongLong);
465       break;
466 
467     // L
468     case BuiltinType::LongDouble:
469       LM.setKind(LengthModifier::AsLongDouble);
470       break;
471 
472     // Don't know.
473     default:
474       return false;
475   }
476 
477   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
478   if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
479     namedTypeToLengthModifier(PT, LM);
480 
481   // If fixing the length modifier was enough, we are done.
482   if (hasValidLengthModifier(Ctx.getTargetInfo())) {
483     const analyze_scanf::ArgType &AT = getArgType(Ctx);
484     if (AT.isValid() && AT.matchesType(Ctx, QT))
485       return true;
486   }
487 
488   // Figure out the conversion specifier.
489   if (PT->isRealFloatingType())
490     CS.setKind(ConversionSpecifier::fArg);
491   else if (PT->isSignedIntegerType())
492     CS.setKind(ConversionSpecifier::dArg);
493   else if (PT->isUnsignedIntegerType())
494     CS.setKind(ConversionSpecifier::uArg);
495   else
496     llvm_unreachable("Unexpected type");
497 
498   return true;
499 }
500 
toString(raw_ostream & os) const501 void ScanfSpecifier::toString(raw_ostream &os) const {
502   os << "%";
503 
504   if (usesPositionalArg())
505     os << getPositionalArgIndex() << "$";
506   if (SuppressAssignment)
507     os << "*";
508 
509   FieldWidth.toString(os);
510   os << LM.toString();
511   os << CS.toString();
512 }
513 
ParseScanfString(FormatStringHandler & H,const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target)514 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
515                                                     const char *I,
516                                                     const char *E,
517                                                     const LangOptions &LO,
518                                                     const TargetInfo &Target) {
519 
520   unsigned argIndex = 0;
521 
522   // Keep looking for a format specifier until we have exhausted the string.
523   while (I != E) {
524     const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
525                                                           LO, Target);
526     // Did a fail-stop error of any kind occur when parsing the specifier?
527     // If so, don't do any more processing.
528     if (FSR.shouldStop())
529       return true;
530       // Did we exhaust the string or encounter an error that
531       // we can recover from?
532     if (!FSR.hasValue())
533       continue;
534       // We have a format specifier.  Pass it to the callback.
535     if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
536                                 I - FSR.getStart())) {
537       return true;
538     }
539   }
540   assert(I == E && "Format string not exhausted");
541   return false;
542 }
543