1 //===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // Define the main class fuzzer::Fuzzer and most functions.
10 //===----------------------------------------------------------------------===//
11 
12 #ifndef LLVM_FUZZER_INTERNAL_H
13 #define LLVM_FUZZER_INTERNAL_H
14 
15 #include <algorithm>
16 #include <atomic>
17 #include <cassert>
18 #include <chrono>
19 #include <climits>
20 #include <cstddef>
21 #include <cstdlib>
22 #include <random>
23 #include <string.h>
24 #include <string>
25 #include <unordered_set>
26 #include <vector>
27 
28 #include "FuzzerExtFunctions.h"
29 #include "FuzzerInterface.h"
30 #include "FuzzerTracePC.h"
31 
32 // Platform detection.
33 #ifdef __linux__
34 #define LIBFUZZER_LINUX 1
35 #define LIBFUZZER_APPLE 0
36 #elif __APPLE__
37 #define LIBFUZZER_LINUX 0
38 #define LIBFUZZER_APPLE 1
39 #else
40 #error "Support for your platform has not been implemented"
41 #endif
42 
43 namespace fuzzer {
44 
45 typedef int (*UserCallback)(const uint8_t *Data, size_t Size);
46 int FuzzerDriver(int *argc, char ***argv, UserCallback Callback);
47 
48 using namespace std::chrono;
49 typedef std::vector<uint8_t> Unit;
50 typedef std::vector<Unit> UnitVector;
51 
52 // A simple POD sized array of bytes.
53 template <size_t kMaxSize> class FixedWord {
54 public:
FixedWord()55   FixedWord() {}
FixedWord(const uint8_t * B,uint8_t S)56   FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); }
57 
Set(const uint8_t * B,uint8_t S)58   void Set(const uint8_t *B, uint8_t S) {
59     assert(S <= kMaxSize);
60     memcpy(Data, B, S);
61     Size = S;
62   }
63 
64   bool operator==(const FixedWord<kMaxSize> &w) const {
65     return Size == w.Size && 0 == memcmp(Data, w.Data, Size);
66   }
67 
68   bool operator<(const FixedWord<kMaxSize> &w) const {
69     if (Size != w.Size)
70       return Size < w.Size;
71     return memcmp(Data, w.Data, Size) < 0;
72   }
73 
GetMaxSize()74   static size_t GetMaxSize() { return kMaxSize; }
data()75   const uint8_t *data() const { return Data; }
size()76   uint8_t size() const { return Size; }
77 
78 private:
79   uint8_t Size = 0;
80   uint8_t Data[kMaxSize];
81 };
82 
83 typedef FixedWord<27> Word; // 28 bytes.
84 
85 bool IsFile(const std::string &Path);
86 std::string FileToString(const std::string &Path);
87 Unit FileToVector(const std::string &Path, size_t MaxSize = 0);
88 void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V,
89                             long *Epoch, size_t MaxSize);
90 void WriteToFile(const Unit &U, const std::string &Path);
91 void CopyFileToErr(const std::string &Path);
92 // Returns "Dir/FileName" or equivalent for the current OS.
93 std::string DirPlusFile(const std::string &DirPath,
94                         const std::string &FileName);
95 
96 void DupAndCloseStderr();
97 void CloseStdout();
98 void Printf(const char *Fmt, ...);
99 void PrintHexArray(const Unit &U, const char *PrintAfter = "");
100 void PrintHexArray(const uint8_t *Data, size_t Size,
101                    const char *PrintAfter = "");
102 void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = "");
103 void PrintASCII(const Unit &U, const char *PrintAfter = "");
104 void PrintASCII(const Word &W, const char *PrintAfter = "");
105 std::string Hash(const Unit &U);
106 void SetTimer(int Seconds);
107 void SetSigSegvHandler();
108 void SetSigBusHandler();
109 void SetSigAbrtHandler();
110 void SetSigIllHandler();
111 void SetSigFpeHandler();
112 void SetSigIntHandler();
113 void SetSigTermHandler();
114 std::string Base64(const Unit &U);
115 int ExecuteCommand(const std::string &Command);
116 size_t GetPeakRSSMb();
117 
118 // Private copy of SHA1 implementation.
119 static const int kSHA1NumBytes = 20;
120 // Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'.
121 void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out);
122 
123 // Changes U to contain only ASCII (isprint+isspace) characters.
124 // Returns true iff U has been changed.
125 bool ToASCII(uint8_t *Data, size_t Size);
126 bool IsASCII(const Unit &U);
127 bool IsASCII(const uint8_t *Data, size_t Size);
128 
129 int NumberOfCpuCores();
130 int GetPid();
131 void SleepSeconds(int Seconds);
132 
133 class Random {
134  public:
Random(unsigned int seed)135   Random(unsigned int seed) : R(seed) {}
Rand()136   size_t Rand() { return R(); }
RandBool()137   size_t RandBool() { return Rand() % 2; }
operator()138   size_t operator()(size_t n) { return n ? Rand() % n : 0; }
Get_mt19937()139   std::mt19937 &Get_mt19937() { return R; }
140  private:
141   std::mt19937 R;
142 };
143 
144 // Dictionary.
145 
146 // Parses one dictionary entry.
147 // If successfull, write the enty to Unit and returns true,
148 // otherwise returns false.
149 bool ParseOneDictionaryEntry(const std::string &Str, Unit *U);
150 // Parses the dictionary file, fills Units, returns true iff all lines
151 // were parsed succesfully.
152 bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units);
153 
154 class DictionaryEntry {
155  public:
DictionaryEntry()156   DictionaryEntry() {}
DictionaryEntry(Word W)157   DictionaryEntry(Word W) : W(W) {}
DictionaryEntry(Word W,size_t PositionHint)158   DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {}
GetW()159   const Word &GetW() const { return W; }
160 
HasPositionHint()161   bool HasPositionHint() const { return PositionHint != std::numeric_limits<size_t>::max(); }
GetPositionHint()162   size_t GetPositionHint() const {
163     assert(HasPositionHint());
164     return PositionHint;
165   }
IncUseCount()166   void IncUseCount() { UseCount++; }
IncSuccessCount()167   void IncSuccessCount() { SuccessCount++; }
GetUseCount()168   size_t GetUseCount() const { return UseCount; }
GetSuccessCount()169   size_t GetSuccessCount() const {return SuccessCount; }
170 
171 private:
172   Word W;
173   size_t PositionHint = std::numeric_limits<size_t>::max();
174   size_t UseCount = 0;
175   size_t SuccessCount = 0;
176 };
177 
178 class Dictionary {
179  public:
180   static const size_t kMaxDictSize = 1 << 14;
181 
ContainsWord(const Word & W)182   bool ContainsWord(const Word &W) const {
183     return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) {
184       return DE.GetW() == W;
185     });
186   }
begin()187   const DictionaryEntry *begin() const { return &DE[0]; }
end()188   const DictionaryEntry *end() const { return begin() + Size; }
189   DictionaryEntry & operator[] (size_t Idx) {
190     assert(Idx < Size);
191     return DE[Idx];
192   }
push_back(DictionaryEntry DE)193   void push_back(DictionaryEntry DE) {
194     if (Size < kMaxDictSize)
195       this->DE[Size++] = DE;
196   }
clear()197   void clear() { Size = 0; }
empty()198   bool empty() const { return Size == 0; }
size()199   size_t size() const { return Size; }
200 
201 private:
202   DictionaryEntry DE[kMaxDictSize];
203   size_t Size = 0;
204 };
205 
206 struct FuzzingOptions {
207   int Verbosity = 1;
208   size_t MaxLen = 0;
209   int UnitTimeoutSec = 300;
210   int TimeoutExitCode = 77;
211   int ErrorExitCode = 77;
212   int MaxTotalTimeSec = 0;
213   int RssLimitMb = 0;
214   bool DoCrossOver = true;
215   int MutateDepth = 5;
216   bool UseCounters = false;
217   bool UseIndirCalls = true;
218   bool UseTraces = false;
219   bool UseMemcmp = true;
220   bool UseFullCoverageSet = false;
221   bool Reload = true;
222   bool ShuffleAtStartUp = true;
223   bool PreferSmall = true;
224   size_t MaxNumberOfRuns = ULONG_MAX;
225   int ReportSlowUnits = 10;
226   bool OnlyASCII = false;
227   std::string OutputCorpus;
228   std::string ArtifactPrefix = "./";
229   std::string ExactArtifactPath;
230   bool SaveArtifacts = true;
231   bool PrintNEW = true; // Print a status line when new units are found;
232   bool OutputCSV = false;
233   bool PrintNewCovPcs = false;
234   bool PrintFinalStats = false;
235   bool DetectLeaks = true;
236   bool TruncateUnits = false;
237   bool PruneCorpus = true;
238 };
239 
240 class MutationDispatcher {
241 public:
242   MutationDispatcher(Random &Rand, const FuzzingOptions &Options);
~MutationDispatcher()243   ~MutationDispatcher() {}
244   /// Indicate that we are about to start a new sequence of mutations.
245   void StartMutationSequence();
246   /// Print the current sequence of mutations.
247   void PrintMutationSequence();
248   /// Indicate that the current sequence of mutations was successfull.
249   void RecordSuccessfulMutationSequence();
250   /// Mutates data by invoking user-provided mutator.
251   size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize);
252   /// Mutates data by invoking user-provided crossover.
253   size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize);
254   /// Mutates data by shuffling bytes.
255   size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize);
256   /// Mutates data by erasing a byte.
257   size_t Mutate_EraseByte(uint8_t *Data, size_t Size, size_t MaxSize);
258   /// Mutates data by inserting a byte.
259   size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize);
260   /// Mutates data by chanding one byte.
261   size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize);
262   /// Mutates data by chanding one bit.
263   size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize);
264 
265   /// Mutates data by adding a word from the manual dictionary.
266   size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size,
267                                             size_t MaxSize);
268 
269   /// Mutates data by adding a word from the temporary automatic dictionary.
270   size_t Mutate_AddWordFromTemporaryAutoDictionary(uint8_t *Data, size_t Size,
271                                                    size_t MaxSize);
272 
273   /// Mutates data by adding a word from the persistent automatic dictionary.
274   size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size,
275                                                     size_t MaxSize);
276 
277   /// Tries to find an ASCII integer in Data, changes it to another ASCII int.
278   size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize);
279 
280   /// CrossOver Data with some other element of the corpus.
281   size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize);
282 
283   /// Applies one of the configured mutations.
284   /// Returns the new size of data which could be up to MaxSize.
285   size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize);
286   /// Applies one of the default mutations. Provided as a service
287   /// to mutation authors.
288   size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize);
289 
290   /// Creates a cross-over of two pieces of Data, returns its size.
291   size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2,
292                    size_t Size2, uint8_t *Out, size_t MaxOutSize);
293 
294   void AddWordToManualDictionary(const Word &W);
295 
296   void AddWordToAutoDictionary(const Word &W, size_t PositionHint);
297   void ClearAutoDictionary();
298   void PrintRecommendedDictionary();
299 
SetCorpus(const std::vector<Unit> * Corpus)300   void SetCorpus(const std::vector<Unit> *Corpus) { this->Corpus = Corpus; }
301 
GetRand()302   Random &GetRand() { return Rand; }
303 
304 private:
305 
306   struct Mutator {
307     size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max);
308     const char *Name;
309   };
310 
311   size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size,
312                                size_t MaxSize);
313   size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize,
314                     const std::vector<Mutator> &Mutators);
315 
316   Random &Rand;
317   const FuzzingOptions Options;
318 
319   // Dictionary provided by the user via -dict=DICT_FILE.
320   Dictionary ManualDictionary;
321   // Temporary dictionary modified by the fuzzer itself,
322   // recreated periodically.
323   Dictionary TempAutoDictionary;
324   // Persistent dictionary modified by the fuzzer, consists of
325   // entries that led to successfull discoveries in the past mutations.
326   Dictionary PersistentAutoDictionary;
327   std::vector<Mutator> CurrentMutatorSequence;
328   std::vector<DictionaryEntry *> CurrentDictionaryEntrySequence;
329   const std::vector<Unit> *Corpus = nullptr;
330   std::vector<uint8_t> MutateInPlaceHere;
331 
332   std::vector<Mutator> Mutators;
333   std::vector<Mutator> DefaultMutators;
334 };
335 
336 class Fuzzer {
337 public:
338 
339   // Aggregates all available coverage measurements.
340   struct Coverage {
CoverageCoverage341     Coverage() { Reset(); }
342 
ResetCoverage343     void Reset() {
344       BlockCoverage = 0;
345       CallerCalleeCoverage = 0;
346       PcMapBits = 0;
347       CounterBitmapBits = 0;
348       PcBufferLen = 0;
349       CounterBitmap.clear();
350       PCMap.Reset();
351     }
352 
353     std::string DebugString() const;
354 
355     size_t BlockCoverage;
356     size_t CallerCalleeCoverage;
357 
358     size_t PcBufferLen;
359     // Precalculated number of bits in CounterBitmap.
360     size_t CounterBitmapBits;
361     std::vector<uint8_t> CounterBitmap;
362     // Precalculated number of bits in PCMap.
363     size_t PcMapBits;
364     PcCoverageMap PCMap;
365   };
366 
367   Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options);
AddToCorpus(const Unit & U)368   void AddToCorpus(const Unit &U) {
369     Corpus.push_back(U);
370     UpdateCorpusDistribution();
371   }
372   size_t ChooseUnitIdxToMutate();
ChooseUnitToMutate()373   const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; };
374   void TruncateUnits(std::vector<Unit> *NewCorpus);
375   void Loop();
376   void Drill();
377   void ShuffleAndMinimize();
378   void InitializeTraceState();
379   void AssignTaintLabels(uint8_t *Data, size_t Size);
CorpusSize()380   size_t CorpusSize() const { return Corpus.size(); }
381   size_t MaxUnitSizeInCorpus() const;
ReadDir(const std::string & Path,long * Epoch,size_t MaxSize)382   void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) {
383     Printf("Loading corpus: %s\n", Path.c_str());
384     ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize);
385   }
386   void RereadOutputCorpus(size_t MaxSize);
387   // Save the current corpus to OutputCorpus.
388   void SaveCorpus();
389 
secondsSinceProcessStartUp()390   size_t secondsSinceProcessStartUp() {
391     return duration_cast<seconds>(system_clock::now() - ProcessStartTime)
392         .count();
393   }
execPerSec()394   size_t execPerSec() {
395     size_t Seconds = secondsSinceProcessStartUp();
396     return Seconds ? TotalNumberOfRuns / Seconds : 0;
397   }
398 
getTotalNumberOfRuns()399   size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; }
400 
401   static void StaticAlarmCallback();
402   static void StaticCrashSignalCallback();
403   static void StaticInterruptCallback();
404 
405   void ExecuteCallback(const uint8_t *Data, size_t Size);
406   bool RunOne(const uint8_t *Data, size_t Size);
407 
408   // Merge Corpora[1:] into Corpora[0].
409   void Merge(const std::vector<std::string> &Corpora);
410   // Returns a subset of 'Extra' that adds coverage to 'Initial'.
411   UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra);
GetMD()412   MutationDispatcher &GetMD() { return MD; }
413   void PrintFinalStats();
414   void SetMaxLen(size_t MaxLen);
415   void RssLimitCallback();
416 
417   // Public for tests.
418   void ResetCoverage();
419 
InFuzzingThread()420   bool InFuzzingThread() const { return IsMyThread; }
421   size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const;
422 
423 private:
424   void AlarmCallback();
425   void CrashCallback();
426   void InterruptCallback();
427   void MutateAndTestOne();
428   void ReportNewCoverage(const Unit &U);
RunOne(const Unit & U)429   bool RunOne(const Unit &U) { return RunOne(U.data(), U.size()); }
430   void RunOneAndUpdateCorpus(const uint8_t *Data, size_t Size);
431   void WriteToOutputCorpus(const Unit &U);
432   void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
433   void PrintStats(const char *Where, const char *End = "\n");
434   void PrintStatusForNewUnit(const Unit &U);
435   void ShuffleCorpus(UnitVector *V);
436   void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size,
437                                bool DuringInitialCorpusExecution);
438 
439   // Updates the probability distribution for the units in the corpus.
440   // Must be called whenever the corpus or unit weights are changed.
441   void UpdateCorpusDistribution();
442 
443   bool UpdateMaxCoverage();
444 
445   // Trace-based fuzzing: we run a unit with some kind of tracing
446   // enabled and record potentially useful mutations. Then
447   // We apply these mutations one by one to the unit and run it again.
448 
449   // Start tracing; forget all previously proposed mutations.
450   void StartTraceRecording();
451   // Stop tracing.
452   void StopTraceRecording();
453 
454   void SetDeathCallback();
455   static void StaticDeathCallback();
456   void DumpCurrentUnit(const char *Prefix);
457   void DeathCallback();
458 
459   void LazyAllocateCurrentUnitData();
460   uint8_t *CurrentUnitData = nullptr;
461   std::atomic<size_t> CurrentUnitSize;
462 
463   size_t TotalNumberOfRuns = 0;
464   size_t NumberOfNewUnitsAdded = 0;
465 
466   bool HasMoreMallocsThanFrees = false;
467   size_t NumberOfLeakDetectionAttempts = 0;
468 
469   std::vector<Unit> Corpus;
470   std::unordered_set<std::string> UnitHashesAddedToCorpus;
471 
472   std::piecewise_constant_distribution<double> CorpusDistribution;
473   UserCallback CB;
474   MutationDispatcher &MD;
475   FuzzingOptions Options;
476   system_clock::time_point ProcessStartTime = system_clock::now();
477   system_clock::time_point UnitStartTime;
478   long TimeOfLongestUnitInSeconds = 0;
479   long EpochOfLastReadOfOutputCorpus = 0;
480 
481   // Maximum recorded coverage.
482   Coverage MaxCoverage;
483 
484   // Need to know our own thread.
485   static thread_local bool IsMyThread;
486 };
487 
488 // Global interface to functions that may or may not be available.
489 extern ExternalFunctions *EF;
490 
491 }; // namespace fuzzer
492 
493 #endif // LLVM_FUZZER_INTERNAL_H
494