1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This checker defines the attack surface for generic taint propagation.
11 //
12 // The taint information produced by it might be useful to other checkers. For
13 // example, checkers should report errors which involve tainted data more
14 // aggressively, even if the involved symbols are under constrained.
15 //
16 //===----------------------------------------------------------------------===//
17 #include "ClangSACheckers.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/Basic/Builtins.h"
20 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21 #include "clang/StaticAnalyzer/Core/Checker.h"
22 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
25 #include <climits>
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32                                             check::PreStmt<CallExpr> > {
33 public:
getTag()34   static void *getTag() { static int Tag; return &Tag; }
35 
36   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37 
38   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39 
40 private:
41   static const unsigned InvalidArgIndex = UINT_MAX;
42   /// Denotes the return vale.
43   static const unsigned ReturnValueIndex = UINT_MAX - 1;
44 
45   mutable std::unique_ptr<BugType> BT;
initBugType() const46   inline void initBugType() const {
47     if (!BT)
48       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
49   }
50 
51   /// \brief Catch taint related bugs. Check if tainted data is passed to a
52   /// system call etc.
53   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54 
55   /// \brief Add taint sources on a pre-visit.
56   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57 
58   /// \brief Propagate taint generated at pre-visit.
59   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60 
61   /// \brief Add taint sources on a post visit.
62   void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63 
64   /// Check if the region the expression evaluates to is the standard input,
65   /// and thus, is tainted.
66   static bool isStdin(const Expr *E, CheckerContext &C);
67 
68   /// \brief Given a pointer argument, get the symbol of the value it contains
69   /// (points to).
70   static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
71 
72   /// Functions defining the attack surface.
73   typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
74                                                        CheckerContext &C) const;
75   ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76   ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77   ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78 
79   /// Taint the scanned input if the file is tainted.
80   ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
81 
82   /// Check for CWE-134: Uncontrolled Format String.
83   static const char MsgUncontrolledFormatString[];
84   bool checkUncontrolledFormatString(const CallExpr *CE,
85                                      CheckerContext &C) const;
86 
87   /// Check for:
88   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89   /// CWE-78, "Failure to Sanitize Data into an OS Command"
90   static const char MsgSanitizeSystemArgs[];
91   bool checkSystemCall(const CallExpr *CE, StringRef Name,
92                        CheckerContext &C) const;
93 
94   /// Check if tainted data is used as a buffer size ins strn.. functions,
95   /// and allocators.
96   static const char MsgTaintedBufferSize[];
97   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98                               CheckerContext &C) const;
99 
100   /// Generate a report if the expression is tainted or points to tainted data.
101   bool generateReportIfTainted(const Expr *E, const char Msg[],
102                                CheckerContext &C) const;
103 
104 
105   typedef SmallVector<unsigned, 2> ArgVector;
106 
107   /// \brief A struct used to specify taint propagation rules for a function.
108   ///
109   /// If any of the possible taint source arguments is tainted, all of the
110   /// destination arguments should also be tainted. Use InvalidArgIndex in the
111   /// src list to specify that all of the arguments can introduce taint. Use
112   /// InvalidArgIndex in the dst arguments to signify that all the non-const
113   /// pointer and reference arguments might be tainted on return. If
114   /// ReturnValueIndex is added to the dst list, the return value will be
115   /// tainted.
116   struct TaintPropagationRule {
117     /// List of arguments which can be taint sources and should be checked.
118     ArgVector SrcArgs;
119     /// List of arguments which should be tainted on function return.
120     ArgVector DstArgs;
121     // TODO: Check if using other data structures would be more optimal.
122 
TaintPropagationRule__anon6a4756a20111::GenericTaintChecker::TaintPropagationRule123     TaintPropagationRule() {}
124 
TaintPropagationRule__anon6a4756a20111::GenericTaintChecker::TaintPropagationRule125     TaintPropagationRule(unsigned SArg,
126                          unsigned DArg, bool TaintRet = false) {
127       SrcArgs.push_back(SArg);
128       DstArgs.push_back(DArg);
129       if (TaintRet)
130         DstArgs.push_back(ReturnValueIndex);
131     }
132 
TaintPropagationRule__anon6a4756a20111::GenericTaintChecker::TaintPropagationRule133     TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134                          unsigned DArg, bool TaintRet = false) {
135       SrcArgs.push_back(SArg1);
136       SrcArgs.push_back(SArg2);
137       DstArgs.push_back(DArg);
138       if (TaintRet)
139         DstArgs.push_back(ReturnValueIndex);
140     }
141 
142     /// Get the propagation rule for a given function.
143     static TaintPropagationRule
144       getTaintPropagationRule(const FunctionDecl *FDecl,
145                               StringRef Name,
146                               CheckerContext &C);
147 
addSrcArg__anon6a4756a20111::GenericTaintChecker::TaintPropagationRule148     inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
addDstArg__anon6a4756a20111::GenericTaintChecker::TaintPropagationRule149     inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
150 
isNull__anon6a4756a20111::GenericTaintChecker::TaintPropagationRule151     inline bool isNull() const { return SrcArgs.empty(); }
152 
isDestinationArgument__anon6a4756a20111::GenericTaintChecker::TaintPropagationRule153     inline bool isDestinationArgument(unsigned ArgNum) const {
154       return (std::find(DstArgs.begin(),
155                         DstArgs.end(), ArgNum) != DstArgs.end());
156     }
157 
isTaintedOrPointsToTainted__anon6a4756a20111::GenericTaintChecker::TaintPropagationRule158     static inline bool isTaintedOrPointsToTainted(const Expr *E,
159                                                   ProgramStateRef State,
160                                                   CheckerContext &C) {
161       return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162               (E->getType().getTypePtr()->isPointerType() &&
163                State->isTainted(getPointedToSymbol(C, E))));
164     }
165 
166     /// \brief Pre-process a function which propagates taint according to the
167     /// taint rule.
168     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
169 
170   };
171 };
172 
173 const unsigned GenericTaintChecker::ReturnValueIndex;
174 const unsigned GenericTaintChecker::InvalidArgIndex;
175 
176 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177   "Untrusted data is used as a format string "
178   "(CWE-134: Uncontrolled Format String)";
179 
180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
181   "Untrusted data is passed to a system call "
182   "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
183 
184 const char GenericTaintChecker::MsgTaintedBufferSize[] =
185   "Untrusted data is used to specify the buffer size "
186   "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
187   "character data and the null terminator)";
188 
189 } // end of anonymous namespace
190 
191 /// A set which is used to pass information from call pre-visit instruction
192 /// to the call post-visit. The values are unsigned integers, which are either
193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
194 /// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit,unsigned)195 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
196 
197 GenericTaintChecker::TaintPropagationRule
198 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
199                                                      const FunctionDecl *FDecl,
200                                                      StringRef Name,
201                                                      CheckerContext &C) {
202   // TODO: Currently, we might lose precision here: we always mark a return
203   // value as tainted even if it's just a pointer, pointing to tainted data.
204 
205   // Check for exact name match for functions without builtin substitutes.
206   TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
207     .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
208     .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
209     .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
210     .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
211     .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
212     .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
213     .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
214     .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
215     .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
216     .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
217     .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
218     .Case("read", TaintPropagationRule(0, 2, 1, true))
219     .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
220     .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
221     .Case("fgets", TaintPropagationRule(2, 0, true))
222     .Case("getline", TaintPropagationRule(2, 0))
223     .Case("getdelim", TaintPropagationRule(3, 0))
224     .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
225     .Default(TaintPropagationRule());
226 
227   if (!Rule.isNull())
228     return Rule;
229 
230   // Check if it's one of the memory setting/copying functions.
231   // This check is specialized but faster then calling isCLibraryFunction.
232   unsigned BId = 0;
233   if ( (BId = FDecl->getMemoryFunctionKind()) )
234     switch(BId) {
235     case Builtin::BImemcpy:
236     case Builtin::BImemmove:
237     case Builtin::BIstrncpy:
238     case Builtin::BIstrncat:
239       return TaintPropagationRule(1, 2, 0, true);
240     case Builtin::BIstrlcpy:
241     case Builtin::BIstrlcat:
242       return TaintPropagationRule(1, 2, 0, false);
243     case Builtin::BIstrndup:
244       return TaintPropagationRule(0, 1, ReturnValueIndex);
245 
246     default:
247       break;
248     };
249 
250   // Process all other functions which could be defined as builtins.
251   if (Rule.isNull()) {
252     if (C.isCLibraryFunction(FDecl, "snprintf") ||
253         C.isCLibraryFunction(FDecl, "sprintf"))
254       return TaintPropagationRule(InvalidArgIndex, 0, true);
255     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
256              C.isCLibraryFunction(FDecl, "stpcpy") ||
257              C.isCLibraryFunction(FDecl, "strcat"))
258       return TaintPropagationRule(1, 0, true);
259     else if (C.isCLibraryFunction(FDecl, "bcopy"))
260       return TaintPropagationRule(0, 2, 1, false);
261     else if (C.isCLibraryFunction(FDecl, "strdup") ||
262              C.isCLibraryFunction(FDecl, "strdupa"))
263       return TaintPropagationRule(0, ReturnValueIndex);
264     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
265       return TaintPropagationRule(0, ReturnValueIndex);
266   }
267 
268   // Skipping the following functions, since they might be used for cleansing
269   // or smart memory copy:
270   // - memccpy - copying until hitting a special character.
271 
272   return TaintPropagationRule();
273 }
274 
checkPreStmt(const CallExpr * CE,CheckerContext & C) const275 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
276                                        CheckerContext &C) const {
277   // Check for errors first.
278   if (checkPre(CE, C))
279     return;
280 
281   // Add taint second.
282   addSourcesPre(CE, C);
283 }
284 
checkPostStmt(const CallExpr * CE,CheckerContext & C) const285 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
286                                         CheckerContext &C) const {
287   if (propagateFromPre(CE, C))
288     return;
289   addSourcesPost(CE, C);
290 }
291 
addSourcesPre(const CallExpr * CE,CheckerContext & C) const292 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
293                                         CheckerContext &C) const {
294   ProgramStateRef State = nullptr;
295   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
296   if (!FDecl || FDecl->getKind() != Decl::Function)
297     return;
298 
299   StringRef Name = C.getCalleeName(FDecl);
300   if (Name.empty())
301     return;
302 
303   // First, try generating a propagation rule for this function.
304   TaintPropagationRule Rule =
305     TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
306   if (!Rule.isNull()) {
307     State = Rule.process(CE, C);
308     if (!State)
309       return;
310     C.addTransition(State);
311     return;
312   }
313 
314   // Otherwise, check if we have custom pre-processing implemented.
315   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
316     .Case("fscanf", &GenericTaintChecker::preFscanf)
317     .Default(nullptr);
318   // Check and evaluate the call.
319   if (evalFunction)
320     State = (this->*evalFunction)(CE, C);
321   if (!State)
322     return;
323   C.addTransition(State);
324 
325 }
326 
propagateFromPre(const CallExpr * CE,CheckerContext & C) const327 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
328                                            CheckerContext &C) const {
329   ProgramStateRef State = C.getState();
330 
331   // Depending on what was tainted at pre-visit, we determined a set of
332   // arguments which should be tainted after the function returns. These are
333   // stored in the state as TaintArgsOnPostVisit set.
334   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
335   if (TaintArgs.isEmpty())
336     return false;
337 
338   for (llvm::ImmutableSet<unsigned>::iterator
339          I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
340     unsigned ArgNum  = *I;
341 
342     // Special handling for the tainted return value.
343     if (ArgNum == ReturnValueIndex) {
344       State = State->addTaint(CE, C.getLocationContext());
345       continue;
346     }
347 
348     // The arguments are pointer arguments. The data they are pointing at is
349     // tainted after the call.
350     if (CE->getNumArgs() < (ArgNum + 1))
351       return false;
352     const Expr* Arg = CE->getArg(ArgNum);
353     SymbolRef Sym = getPointedToSymbol(C, Arg);
354     if (Sym)
355       State = State->addTaint(Sym);
356   }
357 
358   // Clear up the taint info from the state.
359   State = State->remove<TaintArgsOnPostVisit>();
360 
361   if (State != C.getState()) {
362     C.addTransition(State);
363     return true;
364   }
365   return false;
366 }
367 
addSourcesPost(const CallExpr * CE,CheckerContext & C) const368 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
369                                          CheckerContext &C) const {
370   // Define the attack surface.
371   // Set the evaluation function by switching on the callee name.
372   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
373   if (!FDecl || FDecl->getKind() != Decl::Function)
374     return;
375 
376   StringRef Name = C.getCalleeName(FDecl);
377   if (Name.empty())
378     return;
379   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
380     .Case("scanf", &GenericTaintChecker::postScanf)
381     // TODO: Add support for vfscanf & family.
382     .Case("getchar", &GenericTaintChecker::postRetTaint)
383     .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
384     .Case("getenv", &GenericTaintChecker::postRetTaint)
385     .Case("fopen", &GenericTaintChecker::postRetTaint)
386     .Case("fdopen", &GenericTaintChecker::postRetTaint)
387     .Case("freopen", &GenericTaintChecker::postRetTaint)
388     .Case("getch", &GenericTaintChecker::postRetTaint)
389     .Case("wgetch", &GenericTaintChecker::postRetTaint)
390     .Case("socket", &GenericTaintChecker::postSocket)
391     .Default(nullptr);
392 
393   // If the callee isn't defined, it is not of security concern.
394   // Check and evaluate the call.
395   ProgramStateRef State = nullptr;
396   if (evalFunction)
397     State = (this->*evalFunction)(CE, C);
398   if (!State)
399     return;
400 
401   C.addTransition(State);
402 }
403 
checkPre(const CallExpr * CE,CheckerContext & C) const404 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
405 
406   if (checkUncontrolledFormatString(CE, C))
407     return true;
408 
409   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
410   if (!FDecl || FDecl->getKind() != Decl::Function)
411     return false;
412 
413   StringRef Name = C.getCalleeName(FDecl);
414   if (Name.empty())
415     return false;
416 
417   if (checkSystemCall(CE, Name, C))
418     return true;
419 
420   if (checkTaintedBufferSize(CE, FDecl, C))
421     return true;
422 
423   return false;
424 }
425 
getPointedToSymbol(CheckerContext & C,const Expr * Arg)426 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
427                                                   const Expr* Arg) {
428   ProgramStateRef State = C.getState();
429   SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
430   if (AddrVal.isUnknownOrUndef())
431     return nullptr;
432 
433   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
434   if (!AddrLoc)
435     return nullptr;
436 
437   const PointerType *ArgTy =
438     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
439   SVal Val = State->getSVal(*AddrLoc,
440                             ArgTy ? ArgTy->getPointeeType(): QualType());
441   return Val.getAsSymbol();
442 }
443 
444 ProgramStateRef
process(const CallExpr * CE,CheckerContext & C) const445 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
446                                                    CheckerContext &C) const {
447   ProgramStateRef State = C.getState();
448 
449   // Check for taint in arguments.
450   bool IsTainted = false;
451   for (ArgVector::const_iterator I = SrcArgs.begin(),
452                                  E = SrcArgs.end(); I != E; ++I) {
453     unsigned ArgNum = *I;
454 
455     if (ArgNum == InvalidArgIndex) {
456       // Check if any of the arguments is tainted, but skip the
457       // destination arguments.
458       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
459         if (isDestinationArgument(i))
460           continue;
461         if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
462           break;
463       }
464       break;
465     }
466 
467     if (CE->getNumArgs() < (ArgNum + 1))
468       return State;
469     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
470       break;
471   }
472   if (!IsTainted)
473     return State;
474 
475   // Mark the arguments which should be tainted after the function returns.
476   for (ArgVector::const_iterator I = DstArgs.begin(),
477                                  E = DstArgs.end(); I != E; ++I) {
478     unsigned ArgNum = *I;
479 
480     // Should we mark all arguments as tainted?
481     if (ArgNum == InvalidArgIndex) {
482       // For all pointer and references that were passed in:
483       //   If they are not pointing to const data, mark data as tainted.
484       //   TODO: So far we are just going one level down; ideally we'd need to
485       //         recurse here.
486       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
487         const Expr *Arg = CE->getArg(i);
488         // Process pointer argument.
489         const Type *ArgTy = Arg->getType().getTypePtr();
490         QualType PType = ArgTy->getPointeeType();
491         if ((!PType.isNull() && !PType.isConstQualified())
492             || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
493           State = State->add<TaintArgsOnPostVisit>(i);
494       }
495       continue;
496     }
497 
498     // Should mark the return value?
499     if (ArgNum == ReturnValueIndex) {
500       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
501       continue;
502     }
503 
504     // Mark the given argument.
505     assert(ArgNum < CE->getNumArgs());
506     State = State->add<TaintArgsOnPostVisit>(ArgNum);
507   }
508 
509   return State;
510 }
511 
512 
513 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
514 // and arg 1 should get taint.
preFscanf(const CallExpr * CE,CheckerContext & C) const515 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
516                                                    CheckerContext &C) const {
517   assert(CE->getNumArgs() >= 2);
518   ProgramStateRef State = C.getState();
519 
520   // Check is the file descriptor is tainted.
521   if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
522       isStdin(CE->getArg(0), C)) {
523     // All arguments except for the first two should get taint.
524     for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
525         State = State->add<TaintArgsOnPostVisit>(i);
526     return State;
527   }
528 
529   return nullptr;
530 }
531 
532 
533 // If argument 0(protocol domain) is network, the return value should get taint.
postSocket(const CallExpr * CE,CheckerContext & C) const534 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
535                                                 CheckerContext &C) const {
536   ProgramStateRef State = C.getState();
537   if (CE->getNumArgs() < 3)
538     return State;
539 
540   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
541   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
542   // White list the internal communication protocols.
543   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
544       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
545     return State;
546   State = State->addTaint(CE, C.getLocationContext());
547   return State;
548 }
549 
postScanf(const CallExpr * CE,CheckerContext & C) const550 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
551                                                    CheckerContext &C) const {
552   ProgramStateRef State = C.getState();
553   if (CE->getNumArgs() < 2)
554     return State;
555 
556   // All arguments except for the very first one should get taint.
557   for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
558     // The arguments are pointer arguments. The data they are pointing at is
559     // tainted after the call.
560     const Expr* Arg = CE->getArg(i);
561         SymbolRef Sym = getPointedToSymbol(C, Arg);
562     if (Sym)
563       State = State->addTaint(Sym);
564   }
565   return State;
566 }
567 
postRetTaint(const CallExpr * CE,CheckerContext & C) const568 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
569                                                   CheckerContext &C) const {
570   return C.getState()->addTaint(CE, C.getLocationContext());
571 }
572 
isStdin(const Expr * E,CheckerContext & C)573 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
574   ProgramStateRef State = C.getState();
575   SVal Val = State->getSVal(E, C.getLocationContext());
576 
577   // stdin is a pointer, so it would be a region.
578   const MemRegion *MemReg = Val.getAsRegion();
579 
580   // The region should be symbolic, we do not know it's value.
581   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
582   if (!SymReg)
583     return false;
584 
585   // Get it's symbol and find the declaration region it's pointing to.
586   const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
587   if (!Sm)
588     return false;
589   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
590   if (!DeclReg)
591     return false;
592 
593   // This region corresponds to a declaration, find out if it's a global/extern
594   // variable named stdin with the proper type.
595   if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
596     D = D->getCanonicalDecl();
597     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
598         if (const PointerType * PtrTy =
599               dyn_cast<PointerType>(D->getType().getTypePtr()))
600           if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
601             return true;
602   }
603   return false;
604 }
605 
getPrintfFormatArgumentNum(const CallExpr * CE,const CheckerContext & C,unsigned int & ArgNum)606 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
607                                        const CheckerContext &C,
608                                        unsigned int &ArgNum) {
609   // Find if the function contains a format string argument.
610   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
611   // vsnprintf, syslog, custom annotated functions.
612   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
613   if (!FDecl)
614     return false;
615   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
616     ArgNum = Format->getFormatIdx() - 1;
617     if ((Format->getType()->getName() == "printf") &&
618          CE->getNumArgs() > ArgNum)
619       return true;
620   }
621 
622   // Or if a function is named setproctitle (this is a heuristic).
623   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
624     ArgNum = 0;
625     return true;
626   }
627 
628   return false;
629 }
630 
generateReportIfTainted(const Expr * E,const char Msg[],CheckerContext & C) const631 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
632                                                   const char Msg[],
633                                                   CheckerContext &C) const {
634   assert(E);
635 
636   // Check for taint.
637   ProgramStateRef State = C.getState();
638   if (!State->isTainted(getPointedToSymbol(C, E)) &&
639       !State->isTainted(E, C.getLocationContext()))
640     return false;
641 
642   // Generate diagnostic.
643   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
644     initBugType();
645     auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
646     report->addRange(E->getSourceRange());
647     C.emitReport(std::move(report));
648     return true;
649   }
650   return false;
651 }
652 
checkUncontrolledFormatString(const CallExpr * CE,CheckerContext & C) const653 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
654                                                         CheckerContext &C) const{
655   // Check if the function contains a format string argument.
656   unsigned int ArgNum = 0;
657   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
658     return false;
659 
660   // If either the format string content or the pointer itself are tainted, warn.
661   return generateReportIfTainted(CE->getArg(ArgNum),
662                                  MsgUncontrolledFormatString, C);
663 }
664 
checkSystemCall(const CallExpr * CE,StringRef Name,CheckerContext & C) const665 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
666                                           StringRef Name,
667                                           CheckerContext &C) const {
668   // TODO: It might make sense to run this check on demand. In some cases,
669   // we should check if the environment has been cleansed here. We also might
670   // need to know if the user was reset before these calls(seteuid).
671   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
672     .Case("system", 0)
673     .Case("popen", 0)
674     .Case("execl", 0)
675     .Case("execle", 0)
676     .Case("execlp", 0)
677     .Case("execv", 0)
678     .Case("execvp", 0)
679     .Case("execvP", 0)
680     .Case("execve", 0)
681     .Case("dlopen", 0)
682     .Default(UINT_MAX);
683 
684   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
685     return false;
686 
687   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
688 }
689 
690 // TODO: Should this check be a part of the CString checker?
691 // If yes, should taint be a global setting?
checkTaintedBufferSize(const CallExpr * CE,const FunctionDecl * FDecl,CheckerContext & C) const692 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
693                                                  const FunctionDecl *FDecl,
694                                                  CheckerContext &C) const {
695   // If the function has a buffer size argument, set ArgNum.
696   unsigned ArgNum = InvalidArgIndex;
697   unsigned BId = 0;
698   if ( (BId = FDecl->getMemoryFunctionKind()) )
699     switch(BId) {
700     case Builtin::BImemcpy:
701     case Builtin::BImemmove:
702     case Builtin::BIstrncpy:
703       ArgNum = 2;
704       break;
705     case Builtin::BIstrndup:
706       ArgNum = 1;
707       break;
708     default:
709       break;
710     };
711 
712   if (ArgNum == InvalidArgIndex) {
713     if (C.isCLibraryFunction(FDecl, "malloc") ||
714         C.isCLibraryFunction(FDecl, "calloc") ||
715         C.isCLibraryFunction(FDecl, "alloca"))
716       ArgNum = 0;
717     else if (C.isCLibraryFunction(FDecl, "memccpy"))
718       ArgNum = 3;
719     else if (C.isCLibraryFunction(FDecl, "realloc"))
720       ArgNum = 1;
721     else if (C.isCLibraryFunction(FDecl, "bcopy"))
722       ArgNum = 2;
723   }
724 
725   return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
726          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
727 }
728 
registerGenericTaintChecker(CheckerManager & mgr)729 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
730   mgr.registerChecker<GenericTaintChecker>();
731 }
732