1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Handling of format string in scanf and friends. The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
18
19 using clang::analyze_format_string::ArgType;
20 using clang::analyze_format_string::FormatStringHandler;
21 using clang::analyze_format_string::LengthModifier;
22 using clang::analyze_format_string::OptionalAmount;
23 using clang::analyze_format_string::ConversionSpecifier;
24 using clang::analyze_scanf::ScanfConversionSpecifier;
25 using clang::analyze_scanf::ScanfSpecifier;
26 using clang::UpdateOnReturn;
27 using namespace clang;
28
29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30 ScanfSpecifierResult;
31
ParseScanList(FormatStringHandler & H,ScanfConversionSpecifier & CS,const char * & Beg,const char * E)32 static bool ParseScanList(FormatStringHandler &H,
33 ScanfConversionSpecifier &CS,
34 const char *&Beg, const char *E) {
35 const char *I = Beg;
36 const char *start = I - 1;
37 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39 // No more characters?
40 if (I == E) {
41 H.HandleIncompleteScanList(start, I);
42 return true;
43 }
44
45 // Special case: ']' is the first character.
46 if (*I == ']') {
47 if (++I == E) {
48 H.HandleIncompleteScanList(start, I - 1);
49 return true;
50 }
51 }
52
53 // Special case: "^]" are the first characters.
54 if (I + 1 != E && I[0] == '^' && I[1] == ']') {
55 I += 2;
56 if (I == E) {
57 H.HandleIncompleteScanList(start, I - 1);
58 return true;
59 }
60 }
61
62 // Look for a ']' character which denotes the end of the scan list.
63 while (*I != ']') {
64 if (++I == E) {
65 H.HandleIncompleteScanList(start, I - 1);
66 return true;
67 }
68 }
69
70 CS.setEndScanList(I);
71 return false;
72 }
73
74 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75 // We can possibly refactor.
ParseScanfSpecifier(FormatStringHandler & H,const char * & Beg,const char * E,unsigned & argIndex,const LangOptions & LO,const TargetInfo & Target)76 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
77 const char *&Beg,
78 const char *E,
79 unsigned &argIndex,
80 const LangOptions &LO,
81 const TargetInfo &Target) {
82
83 using namespace clang::analyze_scanf;
84 const char *I = Beg;
85 const char *Start = nullptr;
86 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87
88 // Look for a '%' character that indicates the start of a format specifier.
89 for ( ; I != E ; ++I) {
90 char c = *I;
91 if (c == '\0') {
92 // Detect spurious null characters, which are likely errors.
93 H.HandleNullChar(I);
94 return true;
95 }
96 if (c == '%') {
97 Start = I++; // Record the start of the format specifier.
98 break;
99 }
100 }
101
102 // No format specifier found?
103 if (!Start)
104 return false;
105
106 if (I == E) {
107 // No more characters left?
108 H.HandleIncompleteSpecifier(Start, E - Start);
109 return true;
110 }
111
112 ScanfSpecifier FS;
113 if (ParseArgPosition(H, FS, Start, I, E))
114 return true;
115
116 if (I == E) {
117 // No more characters left?
118 H.HandleIncompleteSpecifier(Start, E - Start);
119 return true;
120 }
121
122 // Look for '*' flag if it is present.
123 if (*I == '*') {
124 FS.setSuppressAssignment(I);
125 if (++I == E) {
126 H.HandleIncompleteSpecifier(Start, E - Start);
127 return true;
128 }
129 }
130
131 // Look for the field width (if any). Unlike printf, this is either
132 // a fixed integer or isn't present.
133 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
134 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
135 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136 FS.setFieldWidth(Amt);
137
138 if (I == E) {
139 // No more characters left?
140 H.HandleIncompleteSpecifier(Start, E - Start);
141 return true;
142 }
143 }
144
145 // Look for the length modifier.
146 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147 // No more characters left?
148 H.HandleIncompleteSpecifier(Start, E - Start);
149 return true;
150 }
151
152 // Detect spurious null characters, which are likely errors.
153 if (*I == '\0') {
154 H.HandleNullChar(I);
155 return true;
156 }
157
158 // Finally, look for the conversion specifier.
159 const char *conversionPosition = I++;
160 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
161 switch (*conversionPosition) {
162 default:
163 break;
164 case '%': k = ConversionSpecifier::PercentArg; break;
165 case 'A': k = ConversionSpecifier::AArg; break;
166 case 'E': k = ConversionSpecifier::EArg; break;
167 case 'F': k = ConversionSpecifier::FArg; break;
168 case 'G': k = ConversionSpecifier::GArg; break;
169 case 'X': k = ConversionSpecifier::XArg; break;
170 case 'a': k = ConversionSpecifier::aArg; break;
171 case 'd': k = ConversionSpecifier::dArg; break;
172 case 'e': k = ConversionSpecifier::eArg; break;
173 case 'f': k = ConversionSpecifier::fArg; break;
174 case 'g': k = ConversionSpecifier::gArg; break;
175 case 'i': k = ConversionSpecifier::iArg; break;
176 case 'n': k = ConversionSpecifier::nArg; break;
177 case 'c': k = ConversionSpecifier::cArg; break;
178 case 'C': k = ConversionSpecifier::CArg; break;
179 case 'S': k = ConversionSpecifier::SArg; break;
180 case '[': k = ConversionSpecifier::ScanListArg; break;
181 case 'u': k = ConversionSpecifier::uArg; break;
182 case 'x': k = ConversionSpecifier::xArg; break;
183 case 'o': k = ConversionSpecifier::oArg; break;
184 case 's': k = ConversionSpecifier::sArg; break;
185 case 'p': k = ConversionSpecifier::pArg; break;
186 // Apple extensions
187 // Apple-specific
188 case 'D':
189 if (Target.getTriple().isOSDarwin())
190 k = ConversionSpecifier::DArg;
191 break;
192 case 'O':
193 if (Target.getTriple().isOSDarwin())
194 k = ConversionSpecifier::OArg;
195 break;
196 case 'U':
197 if (Target.getTriple().isOSDarwin())
198 k = ConversionSpecifier::UArg;
199 break;
200 }
201 ScanfConversionSpecifier CS(conversionPosition, k);
202 if (k == ScanfConversionSpecifier::ScanListArg) {
203 if (ParseScanList(H, CS, I, E))
204 return true;
205 }
206 FS.setConversionSpecifier(CS);
207 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208 && !FS.usesPositionalArg())
209 FS.setArgIndex(argIndex++);
210
211 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213
214 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215 // Assume the conversion takes one argument.
216 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
217 }
218 return ScanfSpecifierResult(Start, FS);
219 }
220
getArgType(ASTContext & Ctx) const221 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
222 const ScanfConversionSpecifier &CS = getConversionSpecifier();
223
224 if (!CS.consumesDataArgument())
225 return ArgType::Invalid();
226
227 switch(CS.getKind()) {
228 // Signed int.
229 case ConversionSpecifier::dArg:
230 case ConversionSpecifier::DArg:
231 case ConversionSpecifier::iArg:
232 switch (LM.getKind()) {
233 case LengthModifier::None:
234 return ArgType::PtrTo(Ctx.IntTy);
235 case LengthModifier::AsChar:
236 return ArgType::PtrTo(ArgType::AnyCharTy);
237 case LengthModifier::AsShort:
238 return ArgType::PtrTo(Ctx.ShortTy);
239 case LengthModifier::AsLong:
240 return ArgType::PtrTo(Ctx.LongTy);
241 case LengthModifier::AsLongLong:
242 case LengthModifier::AsQuad:
243 return ArgType::PtrTo(Ctx.LongLongTy);
244 case LengthModifier::AsInt64:
245 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
246 case LengthModifier::AsIntMax:
247 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
248 case LengthModifier::AsSizeT:
249 // FIXME: ssize_t.
250 return ArgType();
251 case LengthModifier::AsPtrDiff:
252 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
253 case LengthModifier::AsLongDouble:
254 // GNU extension.
255 return ArgType::PtrTo(Ctx.LongLongTy);
256 case LengthModifier::AsAllocate:
257 case LengthModifier::AsMAllocate:
258 case LengthModifier::AsInt32:
259 case LengthModifier::AsInt3264:
260 return ArgType::Invalid();
261 }
262
263 // Unsigned int.
264 case ConversionSpecifier::oArg:
265 case ConversionSpecifier::OArg:
266 case ConversionSpecifier::uArg:
267 case ConversionSpecifier::UArg:
268 case ConversionSpecifier::xArg:
269 case ConversionSpecifier::XArg:
270 switch (LM.getKind()) {
271 case LengthModifier::None:
272 return ArgType::PtrTo(Ctx.UnsignedIntTy);
273 case LengthModifier::AsChar:
274 return ArgType::PtrTo(Ctx.UnsignedCharTy);
275 case LengthModifier::AsShort:
276 return ArgType::PtrTo(Ctx.UnsignedShortTy);
277 case LengthModifier::AsLong:
278 return ArgType::PtrTo(Ctx.UnsignedLongTy);
279 case LengthModifier::AsLongLong:
280 case LengthModifier::AsQuad:
281 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
282 case LengthModifier::AsInt64:
283 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
284 case LengthModifier::AsIntMax:
285 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
286 case LengthModifier::AsSizeT:
287 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
288 case LengthModifier::AsPtrDiff:
289 // FIXME: Unsigned version of ptrdiff_t?
290 return ArgType();
291 case LengthModifier::AsLongDouble:
292 // GNU extension.
293 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
294 case LengthModifier::AsAllocate:
295 case LengthModifier::AsMAllocate:
296 case LengthModifier::AsInt32:
297 case LengthModifier::AsInt3264:
298 return ArgType::Invalid();
299 }
300
301 // Float.
302 case ConversionSpecifier::aArg:
303 case ConversionSpecifier::AArg:
304 case ConversionSpecifier::eArg:
305 case ConversionSpecifier::EArg:
306 case ConversionSpecifier::fArg:
307 case ConversionSpecifier::FArg:
308 case ConversionSpecifier::gArg:
309 case ConversionSpecifier::GArg:
310 switch (LM.getKind()) {
311 case LengthModifier::None:
312 return ArgType::PtrTo(Ctx.FloatTy);
313 case LengthModifier::AsLong:
314 return ArgType::PtrTo(Ctx.DoubleTy);
315 case LengthModifier::AsLongDouble:
316 return ArgType::PtrTo(Ctx.LongDoubleTy);
317 default:
318 return ArgType::Invalid();
319 }
320
321 // Char, string and scanlist.
322 case ConversionSpecifier::cArg:
323 case ConversionSpecifier::sArg:
324 case ConversionSpecifier::ScanListArg:
325 switch (LM.getKind()) {
326 case LengthModifier::None:
327 return ArgType::PtrTo(ArgType::AnyCharTy);
328 case LengthModifier::AsLong:
329 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
330 case LengthModifier::AsAllocate:
331 case LengthModifier::AsMAllocate:
332 return ArgType::PtrTo(ArgType::CStrTy);
333 default:
334 return ArgType::Invalid();
335 }
336 case ConversionSpecifier::CArg:
337 case ConversionSpecifier::SArg:
338 // FIXME: Mac OS X specific?
339 switch (LM.getKind()) {
340 case LengthModifier::None:
341 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
342 case LengthModifier::AsAllocate:
343 case LengthModifier::AsMAllocate:
344 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
345 default:
346 return ArgType::Invalid();
347 }
348
349 // Pointer.
350 case ConversionSpecifier::pArg:
351 return ArgType::PtrTo(ArgType::CPointerTy);
352
353 // Write-back.
354 case ConversionSpecifier::nArg:
355 switch (LM.getKind()) {
356 case LengthModifier::None:
357 return ArgType::PtrTo(Ctx.IntTy);
358 case LengthModifier::AsChar:
359 return ArgType::PtrTo(Ctx.SignedCharTy);
360 case LengthModifier::AsShort:
361 return ArgType::PtrTo(Ctx.ShortTy);
362 case LengthModifier::AsLong:
363 return ArgType::PtrTo(Ctx.LongTy);
364 case LengthModifier::AsLongLong:
365 case LengthModifier::AsQuad:
366 return ArgType::PtrTo(Ctx.LongLongTy);
367 case LengthModifier::AsInt64:
368 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
369 case LengthModifier::AsIntMax:
370 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
371 case LengthModifier::AsSizeT:
372 return ArgType(); // FIXME: ssize_t
373 case LengthModifier::AsPtrDiff:
374 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
375 case LengthModifier::AsLongDouble:
376 return ArgType(); // FIXME: Is this a known extension?
377 case LengthModifier::AsAllocate:
378 case LengthModifier::AsMAllocate:
379 case LengthModifier::AsInt32:
380 case LengthModifier::AsInt3264:
381 return ArgType::Invalid();
382 }
383
384 default:
385 break;
386 }
387
388 return ArgType();
389 }
390
fixType(QualType QT,QualType RawQT,const LangOptions & LangOpt,ASTContext & Ctx)391 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
392 const LangOptions &LangOpt,
393 ASTContext &Ctx) {
394
395 // %n is different from other conversion specifiers; don't try to fix it.
396 if (CS.getKind() == ConversionSpecifier::nArg)
397 return false;
398
399 if (!QT->isPointerType())
400 return false;
401
402 QualType PT = QT->getPointeeType();
403
404 // If it's an enum, get its underlying type.
405 if (const EnumType *ETy = PT->getAs<EnumType>())
406 PT = ETy->getDecl()->getIntegerType();
407
408 const BuiltinType *BT = PT->getAs<BuiltinType>();
409 if (!BT)
410 return false;
411
412 // Pointer to a character.
413 if (PT->isAnyCharacterType()) {
414 CS.setKind(ConversionSpecifier::sArg);
415 if (PT->isWideCharType())
416 LM.setKind(LengthModifier::AsWideChar);
417 else
418 LM.setKind(LengthModifier::None);
419
420 // If we know the target array length, we can use it as a field width.
421 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
422 if (CAT->getSizeModifier() == ArrayType::Normal)
423 FieldWidth = OptionalAmount(OptionalAmount::Constant,
424 CAT->getSize().getZExtValue() - 1,
425 "", 0, false);
426
427 }
428 return true;
429 }
430
431 // Figure out the length modifier.
432 switch (BT->getKind()) {
433 // no modifier
434 case BuiltinType::UInt:
435 case BuiltinType::Int:
436 case BuiltinType::Float:
437 LM.setKind(LengthModifier::None);
438 break;
439
440 // hh
441 case BuiltinType::Char_U:
442 case BuiltinType::UChar:
443 case BuiltinType::Char_S:
444 case BuiltinType::SChar:
445 LM.setKind(LengthModifier::AsChar);
446 break;
447
448 // h
449 case BuiltinType::Short:
450 case BuiltinType::UShort:
451 LM.setKind(LengthModifier::AsShort);
452 break;
453
454 // l
455 case BuiltinType::Long:
456 case BuiltinType::ULong:
457 case BuiltinType::Double:
458 LM.setKind(LengthModifier::AsLong);
459 break;
460
461 // ll
462 case BuiltinType::LongLong:
463 case BuiltinType::ULongLong:
464 LM.setKind(LengthModifier::AsLongLong);
465 break;
466
467 // L
468 case BuiltinType::LongDouble:
469 LM.setKind(LengthModifier::AsLongDouble);
470 break;
471
472 // Don't know.
473 default:
474 return false;
475 }
476
477 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
478 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
479 namedTypeToLengthModifier(PT, LM);
480
481 // If fixing the length modifier was enough, we are done.
482 if (hasValidLengthModifier(Ctx.getTargetInfo())) {
483 const analyze_scanf::ArgType &AT = getArgType(Ctx);
484 if (AT.isValid() && AT.matchesType(Ctx, QT))
485 return true;
486 }
487
488 // Figure out the conversion specifier.
489 if (PT->isRealFloatingType())
490 CS.setKind(ConversionSpecifier::fArg);
491 else if (PT->isSignedIntegerType())
492 CS.setKind(ConversionSpecifier::dArg);
493 else if (PT->isUnsignedIntegerType())
494 CS.setKind(ConversionSpecifier::uArg);
495 else
496 llvm_unreachable("Unexpected type");
497
498 return true;
499 }
500
toString(raw_ostream & os) const501 void ScanfSpecifier::toString(raw_ostream &os) const {
502 os << "%";
503
504 if (usesPositionalArg())
505 os << getPositionalArgIndex() << "$";
506 if (SuppressAssignment)
507 os << "*";
508
509 FieldWidth.toString(os);
510 os << LM.toString();
511 os << CS.toString();
512 }
513
ParseScanfString(FormatStringHandler & H,const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target)514 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
515 const char *I,
516 const char *E,
517 const LangOptions &LO,
518 const TargetInfo &Target) {
519
520 unsigned argIndex = 0;
521
522 // Keep looking for a format specifier until we have exhausted the string.
523 while (I != E) {
524 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
525 LO, Target);
526 // Did a fail-stop error of any kind occur when parsing the specifier?
527 // If so, don't do any more processing.
528 if (FSR.shouldStop())
529 return true;
530 // Did we exhaust the string or encounter an error that
531 // we can recover from?
532 if (!FSR.hasValue())
533 continue;
534 // We have a format specifier. Pass it to the callback.
535 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
536 I - FSR.getStart())) {
537 return true;
538 }
539 }
540 assert(I == E && "Format string not exhausted");
541 return false;
542 }
543