1 //===-- EfficiencySanitizer.cpp - performance tuner -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is a part of EfficiencySanitizer, a family of performance tuners
11 // that detects multiple performance issues via separate sub-tools.
12 //
13 // The instrumentation phase is straightforward:
14 //   - Take action on every memory access: either inlined instrumentation,
15 //     or Inserted calls to our run-time library.
16 //   - Optimizations may apply to avoid instrumenting some of the accesses.
17 //   - Turn mem{set,cpy,move} instrinsics into library calls.
18 // The rest is handled by the run-time library.
19 //===----------------------------------------------------------------------===//
20 
21 #include "llvm/Transforms/Instrumentation.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/Analysis/TargetLibraryInfo.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
36 #include "llvm/Transforms/Utils/Local.h"
37 #include "llvm/Transforms/Utils/ModuleUtils.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "esan"
42 
43 // The tool type must be just one of these ClTool* options, as the tools
44 // cannot be combined due to shadow memory constraints.
45 static cl::opt<bool>
46     ClToolCacheFrag("esan-cache-frag", cl::init(false),
47                     cl::desc("Detect data cache fragmentation"), cl::Hidden);
48 static cl::opt<bool>
49     ClToolWorkingSet("esan-working-set", cl::init(false),
50                     cl::desc("Measure the working set size"), cl::Hidden);
51 // Each new tool will get its own opt flag here.
52 // These are converted to EfficiencySanitizerOptions for use
53 // in the code.
54 
55 static cl::opt<bool> ClInstrumentLoadsAndStores(
56     "esan-instrument-loads-and-stores", cl::init(true),
57     cl::desc("Instrument loads and stores"), cl::Hidden);
58 static cl::opt<bool> ClInstrumentMemIntrinsics(
59     "esan-instrument-memintrinsics", cl::init(true),
60     cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
61 static cl::opt<bool> ClInstrumentFastpath(
62     "esan-instrument-fastpath", cl::init(true),
63     cl::desc("Instrument fastpath"), cl::Hidden);
64 static cl::opt<bool> ClAuxFieldInfo(
65     "esan-aux-field-info", cl::init(true),
66     cl::desc("Generate binary with auxiliary struct field information"),
67     cl::Hidden);
68 
69 // Experiments show that the performance difference can be 2x or more,
70 // and accuracy loss is typically negligible, so we turn this on by default.
71 static cl::opt<bool> ClAssumeIntraCacheLine(
72     "esan-assume-intra-cache-line", cl::init(true),
73     cl::desc("Assume each memory access touches just one cache line, for "
74              "better performance but with a potential loss of accuracy."),
75     cl::Hidden);
76 
77 STATISTIC(NumInstrumentedLoads, "Number of instrumented loads");
78 STATISTIC(NumInstrumentedStores, "Number of instrumented stores");
79 STATISTIC(NumFastpaths, "Number of instrumented fastpaths");
80 STATISTIC(NumAccessesWithIrregularSize,
81           "Number of accesses with a size outside our targeted callout sizes");
82 STATISTIC(NumIgnoredStructs, "Number of ignored structs");
83 STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions");
84 STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions");
85 STATISTIC(NumAssumedIntraCacheLine,
86           "Number of accesses assumed to be intra-cache-line");
87 
88 static const uint64_t EsanCtorAndDtorPriority = 0;
89 static const char *const EsanModuleCtorName = "esan.module_ctor";
90 static const char *const EsanModuleDtorName = "esan.module_dtor";
91 static const char *const EsanInitName = "__esan_init";
92 static const char *const EsanExitName = "__esan_exit";
93 
94 // We need to specify the tool to the runtime earlier than
95 // the ctor is called in some cases, so we set a global variable.
96 static const char *const EsanWhichToolName = "__esan_which_tool";
97 
98 // We must keep these Shadow* constants consistent with the esan runtime.
99 // FIXME: Try to place these shadow constants, the names of the __esan_*
100 // interface functions, and the ToolType enum into a header shared between
101 // llvm and compiler-rt.
102 static const uint64_t ShadowMask = 0x00000fffffffffffull;
103 static const uint64_t ShadowOffs[3] = { // Indexed by scale
104   0x0000130000000000ull,
105   0x0000220000000000ull,
106   0x0000440000000000ull,
107 };
108 // This array is indexed by the ToolType enum.
109 static const int ShadowScale[] = {
110   0, // ESAN_None.
111   2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
112   6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
113 };
114 
115 // MaxStructCounterNameSize is a soft size limit to avoid insanely long
116 // names for those extremely large structs.
117 static const unsigned MaxStructCounterNameSize = 512;
118 
119 namespace {
120 
121 static EfficiencySanitizerOptions
OverrideOptionsFromCL(EfficiencySanitizerOptions Options)122 OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
123   if (ClToolCacheFrag)
124     Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
125   else if (ClToolWorkingSet)
126     Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
127 
128   // Direct opt invocation with no params will have the default ESAN_None.
129   // We run the default tool in that case.
130   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None)
131     Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
132 
133   return Options;
134 }
135 
136 // Create a constant for Str so that we can pass it to the run-time lib.
createPrivateGlobalForString(Module & M,StringRef Str,bool AllowMerging)137 static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
138                                                     bool AllowMerging) {
139   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
140   // We use private linkage for module-local strings. If they can be merged
141   // with another one, we set the unnamed_addr attribute.
142   GlobalVariable *GV =
143     new GlobalVariable(M, StrConst->getType(), true,
144                        GlobalValue::PrivateLinkage, StrConst, "");
145   if (AllowMerging)
146     GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
147   GV->setAlignment(1);  // Strings may not be merged w/o setting align 1.
148   return GV;
149 }
150 
151 /// EfficiencySanitizer: instrument each module to find performance issues.
152 class EfficiencySanitizer : public ModulePass {
153 public:
EfficiencySanitizer(const EfficiencySanitizerOptions & Opts=EfficiencySanitizerOptions ())154   EfficiencySanitizer(
155       const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions())
156       : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {}
157   const char *getPassName() const override;
158   void getAnalysisUsage(AnalysisUsage &AU) const override;
159   bool runOnModule(Module &M) override;
160   static char ID;
161 
162 private:
163   bool initOnModule(Module &M);
164   void initializeCallbacks(Module &M);
165   bool shouldIgnoreStructType(StructType *StructTy);
166   void createStructCounterName(
167       StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr);
168   void createCacheFragAuxGV(
169     Module &M, const DataLayout &DL, StructType *StructTy,
170     GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size);
171   GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL,
172                                         Constant *UnitName);
173   Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL);
174   void createDestructor(Module &M, Constant *ToolInfoArg);
175   bool runOnFunction(Function &F, Module &M);
176   bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
177   bool instrumentMemIntrinsic(MemIntrinsic *MI);
178   bool instrumentGetElementPtr(Instruction *I, Module &M);
179   bool insertCounterUpdate(Instruction *I, StructType *StructTy,
180                            unsigned CounterIdx);
getFieldCounterIdx(StructType * StructTy)181   unsigned getFieldCounterIdx(StructType *StructTy) {
182     return 0;
183   }
getArrayCounterIdx(StructType * StructTy)184   unsigned getArrayCounterIdx(StructType *StructTy) {
185     return StructTy->getNumElements();
186   }
getStructCounterSize(StructType * StructTy)187   unsigned getStructCounterSize(StructType *StructTy) {
188     // The struct counter array includes:
189     // - one counter for each struct field,
190     // - one counter for the struct access within an array.
191     return (StructTy->getNumElements()/*field*/ + 1/*array*/);
192   }
193   bool shouldIgnoreMemoryAccess(Instruction *I);
194   int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
195   Value *appToShadow(Value *Shadow, IRBuilder<> &IRB);
196   bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore,
197                           Value *Addr, unsigned Alignment);
198   // Each tool has its own fastpath routine:
199   bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL,
200                                    Value *Addr, unsigned Alignment);
201   bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL,
202                                     Value *Addr, unsigned Alignment);
203 
204   EfficiencySanitizerOptions Options;
205   LLVMContext *Ctx;
206   Type *IntptrTy;
207   // Our slowpath involves callouts to the runtime library.
208   // Access sizes are powers of two: 1, 2, 4, 8, 16.
209   static const size_t NumberOfAccessSizes = 5;
210   Function *EsanAlignedLoad[NumberOfAccessSizes];
211   Function *EsanAlignedStore[NumberOfAccessSizes];
212   Function *EsanUnalignedLoad[NumberOfAccessSizes];
213   Function *EsanUnalignedStore[NumberOfAccessSizes];
214   // For irregular sizes of any alignment:
215   Function *EsanUnalignedLoadN, *EsanUnalignedStoreN;
216   Function *MemmoveFn, *MemcpyFn, *MemsetFn;
217   Function *EsanCtorFunction;
218   Function *EsanDtorFunction;
219   // Remember the counter variable for each struct type to avoid
220   // recomputing the variable name later during instrumentation.
221   std::map<Type *, GlobalVariable *> StructTyMap;
222 };
223 } // namespace
224 
225 char EfficiencySanitizer::ID = 0;
226 INITIALIZE_PASS_BEGIN(
227     EfficiencySanitizer, "esan",
228     "EfficiencySanitizer: finds performance issues.", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)229 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
230 INITIALIZE_PASS_END(
231     EfficiencySanitizer, "esan",
232     "EfficiencySanitizer: finds performance issues.", false, false)
233 
234 const char *EfficiencySanitizer::getPassName() const {
235   return "EfficiencySanitizer";
236 }
237 
getAnalysisUsage(AnalysisUsage & AU) const238 void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
239   AU.addRequired<TargetLibraryInfoWrapperPass>();
240 }
241 
242 ModulePass *
createEfficiencySanitizerPass(const EfficiencySanitizerOptions & Options)243 llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) {
244   return new EfficiencySanitizer(Options);
245 }
246 
initializeCallbacks(Module & M)247 void EfficiencySanitizer::initializeCallbacks(Module &M) {
248   IRBuilder<> IRB(M.getContext());
249   // Initialize the callbacks.
250   for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) {
251     const unsigned ByteSize = 1U << Idx;
252     std::string ByteSizeStr = utostr(ByteSize);
253     // We'll inline the most common (i.e., aligned and frequent sizes)
254     // load + store instrumentation: these callouts are for the slowpath.
255     SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr);
256     EsanAlignedLoad[Idx] =
257         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
258             AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
259     SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr);
260     EsanAlignedStore[Idx] =
261         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
262             AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
263     SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr);
264     EsanUnalignedLoad[Idx] =
265         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
266             UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
267     SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr);
268     EsanUnalignedStore[Idx] =
269         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
270             UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
271   }
272   EsanUnalignedLoadN = checkSanitizerInterfaceFunction(
273       M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(),
274                             IRB.getInt8PtrTy(), IntptrTy, nullptr));
275   EsanUnalignedStoreN = checkSanitizerInterfaceFunction(
276       M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(),
277                             IRB.getInt8PtrTy(), IntptrTy, nullptr));
278   MemmoveFn = checkSanitizerInterfaceFunction(
279       M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
280                             IRB.getInt8PtrTy(), IntptrTy, nullptr));
281   MemcpyFn = checkSanitizerInterfaceFunction(
282       M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
283                             IRB.getInt8PtrTy(), IntptrTy, nullptr));
284   MemsetFn = checkSanitizerInterfaceFunction(
285       M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
286                             IRB.getInt32Ty(), IntptrTy, nullptr));
287 }
288 
shouldIgnoreStructType(StructType * StructTy)289 bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) {
290   if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */)
291     return true;
292   return false;
293 }
294 
createStructCounterName(StructType * StructTy,SmallString<MaxStructCounterNameSize> & NameStr)295 void EfficiencySanitizer::createStructCounterName(
296     StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) {
297   // Append NumFields and field type ids to avoid struct conflicts
298   // with the same name but different fields.
299   if (StructTy->hasName())
300     NameStr += StructTy->getName();
301   else
302     NameStr += "struct.anon";
303   // We allow the actual size of the StructCounterName to be larger than
304   // MaxStructCounterNameSize and append #NumFields and at least one
305   // field type id.
306   // Append #NumFields.
307   NameStr += "#";
308   Twine(StructTy->getNumElements()).toVector(NameStr);
309   // Append struct field type ids in the reverse order.
310   for (int i = StructTy->getNumElements() - 1; i >= 0; --i) {
311     NameStr += "#";
312     Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr);
313     if (NameStr.size() >= MaxStructCounterNameSize)
314       break;
315   }
316   if (StructTy->isLiteral()) {
317     // End with # for literal struct.
318     NameStr += "#";
319   }
320 }
321 
322 // Create global variables with auxiliary information (e.g., struct field size,
323 // offset, and type name) for better user report.
createCacheFragAuxGV(Module & M,const DataLayout & DL,StructType * StructTy,GlobalVariable * & TypeName,GlobalVariable * & Offset,GlobalVariable * & Size)324 void EfficiencySanitizer::createCacheFragAuxGV(
325     Module &M, const DataLayout &DL, StructType *StructTy,
326     GlobalVariable *&TypeName, GlobalVariable *&Offset,
327     GlobalVariable *&Size) {
328   auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
329   auto *Int32Ty = Type::getInt32Ty(*Ctx);
330   // FieldTypeName.
331   auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements());
332   TypeName = new GlobalVariable(M, TypeNameArrayTy, true,
333                                  GlobalVariable::InternalLinkage, nullptr);
334   SmallVector<Constant *, 16> TypeNameVec;
335   // FieldOffset.
336   auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
337   Offset = new GlobalVariable(M, OffsetArrayTy, true,
338                               GlobalVariable::InternalLinkage, nullptr);
339   SmallVector<Constant *, 16> OffsetVec;
340   // FieldSize
341   auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
342   Size = new GlobalVariable(M, SizeArrayTy, true,
343                             GlobalVariable::InternalLinkage, nullptr);
344   SmallVector<Constant *, 16> SizeVec;
345   for (unsigned i = 0; i < StructTy->getNumElements(); ++i) {
346     Type *Ty = StructTy->getElementType(i);
347     std::string Str;
348     raw_string_ostream StrOS(Str);
349     Ty->print(StrOS);
350     TypeNameVec.push_back(
351         ConstantExpr::getPointerCast(
352             createPrivateGlobalForString(M, StrOS.str(), true),
353             Int8PtrTy));
354     OffsetVec.push_back(
355         ConstantInt::get(Int32Ty,
356                          DL.getStructLayout(StructTy)->getElementOffset(i)));
357     SizeVec.push_back(ConstantInt::get(Int32Ty,
358                                        DL.getTypeAllocSize(Ty)));
359     }
360   TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec));
361   Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec));
362   Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec));
363 }
364 
365 // Create the global variable for the cache-fragmentation tool.
createCacheFragInfoGV(Module & M,const DataLayout & DL,Constant * UnitName)366 GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV(
367     Module &M, const DataLayout &DL, Constant *UnitName) {
368   assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag);
369 
370   auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
371   auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo();
372   auto *Int32Ty = Type::getInt32Ty(*Ctx);
373   auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx);
374   auto *Int64Ty = Type::getInt64Ty(*Ctx);
375   auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
376   // This structure should be kept consistent with the StructInfo struct
377   // in the runtime library.
378   // struct StructInfo {
379   //   const char *StructName;
380   //   u32 Size;
381   //   u32 NumFields;
382   //   u32 *FieldOffset;           // auxiliary struct field info.
383   //   u32 *FieldSize;             // auxiliary struct field info.
384   //   const char **FieldTypeName; // auxiliary struct field info.
385   //   u64 *FieldCounters;
386   //   u64 *ArrayCounter;
387   // };
388   auto *StructInfoTy =
389     StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy,
390                     Int8PtrPtrTy, Int64PtrTy, Int64PtrTy, nullptr);
391   auto *StructInfoPtrTy = StructInfoTy->getPointerTo();
392   // This structure should be kept consistent with the CacheFragInfo struct
393   // in the runtime library.
394   // struct CacheFragInfo {
395   //   const char *UnitName;
396   //   u32 NumStructs;
397   //   StructInfo *Structs;
398   // };
399   auto *CacheFragInfoTy =
400     StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy, nullptr);
401 
402   std::vector<StructType *> Vec = M.getIdentifiedStructTypes();
403   unsigned NumStructs = 0;
404   SmallVector<Constant *, 16> Initializers;
405 
406   for (auto &StructTy : Vec) {
407     if (shouldIgnoreStructType(StructTy)) {
408       ++NumIgnoredStructs;
409       continue;
410     }
411     ++NumStructs;
412 
413     // StructName.
414     SmallString<MaxStructCounterNameSize> CounterNameStr;
415     createStructCounterName(StructTy, CounterNameStr);
416     GlobalVariable *StructCounterName = createPrivateGlobalForString(
417         M, CounterNameStr, /*AllowMerging*/true);
418 
419     // Counters.
420     // We create the counter array with StructCounterName and weak linkage
421     // so that the structs with the same name and layout from different
422     // compilation units will be merged into one.
423     auto *CounterArrayTy = ArrayType::get(Int64Ty,
424                                           getStructCounterSize(StructTy));
425     GlobalVariable *Counters =
426       new GlobalVariable(M, CounterArrayTy, false,
427                          GlobalVariable::WeakAnyLinkage,
428                          ConstantAggregateZero::get(CounterArrayTy),
429                          CounterNameStr);
430 
431     // Remember the counter variable for each struct type.
432     StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters));
433 
434     // We pass the field type name array, offset array, and size array to
435     // the runtime for better reporting.
436     GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr;
437     if (ClAuxFieldInfo)
438       createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size);
439 
440     Constant *FieldCounterIdx[2];
441     FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
442     FieldCounterIdx[1] = ConstantInt::get(Int32Ty,
443                                           getFieldCounterIdx(StructTy));
444     Constant *ArrayCounterIdx[2];
445     ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
446     ArrayCounterIdx[1] = ConstantInt::get(Int32Ty,
447                                           getArrayCounterIdx(StructTy));
448     Initializers.push_back(
449         ConstantStruct::get(
450             StructInfoTy,
451             ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy),
452             ConstantInt::get(Int32Ty,
453                              DL.getStructLayout(StructTy)->getSizeInBytes()),
454             ConstantInt::get(Int32Ty, StructTy->getNumElements()),
455             Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy) :
456                 ConstantExpr::getPointerCast(Offset, Int32PtrTy),
457             Size == nullptr ? ConstantPointerNull::get(Int32PtrTy) :
458                 ConstantExpr::getPointerCast(Size, Int32PtrTy),
459             TypeName == nullptr ? ConstantPointerNull::get(Int8PtrPtrTy) :
460                 ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy),
461             ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
462                                            FieldCounterIdx),
463             ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
464                                            ArrayCounterIdx),
465             nullptr));
466   }
467   // Structs.
468   Constant *StructInfo;
469   if (NumStructs == 0) {
470     StructInfo = ConstantPointerNull::get(StructInfoPtrTy);
471   } else {
472     auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs);
473     StructInfo = ConstantExpr::getPointerCast(
474         new GlobalVariable(M, StructInfoArrayTy, false,
475                            GlobalVariable::InternalLinkage,
476                            ConstantArray::get(StructInfoArrayTy, Initializers)),
477         StructInfoPtrTy);
478   }
479 
480   auto *CacheFragInfoGV = new GlobalVariable(
481       M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage,
482       ConstantStruct::get(CacheFragInfoTy,
483                           UnitName,
484                           ConstantInt::get(Int32Ty, NumStructs),
485                           StructInfo,
486                           nullptr));
487   return CacheFragInfoGV;
488 }
489 
490 // Create the tool-specific argument passed to EsanInit and EsanExit.
createEsanInitToolInfoArg(Module & M,const DataLayout & DL)491 Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M,
492                                                          const DataLayout &DL) {
493   // This structure contains tool-specific information about each compilation
494   // unit (module) and is passed to the runtime library.
495   GlobalVariable *ToolInfoGV = nullptr;
496 
497   auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
498   // Compilation unit name.
499   auto *UnitName = ConstantExpr::getPointerCast(
500       createPrivateGlobalForString(M, M.getModuleIdentifier(), true),
501       Int8PtrTy);
502 
503   // Create the tool-specific variable.
504   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag)
505     ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName);
506 
507   if (ToolInfoGV != nullptr)
508     return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy);
509 
510   // Create the null pointer if no tool-specific variable created.
511   return ConstantPointerNull::get(Int8PtrTy);
512 }
513 
createDestructor(Module & M,Constant * ToolInfoArg)514 void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) {
515   PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
516   EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx),
517                                                         false),
518                                       GlobalValue::InternalLinkage,
519                                       EsanModuleDtorName, &M);
520   ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction));
521   IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator());
522   Function *EsanExit = checkSanitizerInterfaceFunction(
523       M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(),
524                             Int8PtrTy, nullptr));
525   EsanExit->setLinkage(Function::ExternalLinkage);
526   IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg});
527   appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority);
528 }
529 
initOnModule(Module & M)530 bool EfficiencySanitizer::initOnModule(Module &M) {
531   Ctx = &M.getContext();
532   const DataLayout &DL = M.getDataLayout();
533   IRBuilder<> IRB(M.getContext());
534   IntegerType *OrdTy = IRB.getInt32Ty();
535   PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
536   IntptrTy = DL.getIntPtrType(M.getContext());
537   // Create the variable passed to EsanInit and EsanExit.
538   Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL);
539   // Constructor
540   // We specify the tool type both in the EsanWhichToolName global
541   // and as an arg to the init routine as a sanity check.
542   std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
543       M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy},
544       /*InitArgs=*/{
545         ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)),
546         ToolInfoArg});
547   appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority);
548 
549   createDestructor(M, ToolInfoArg);
550 
551   new GlobalVariable(M, OrdTy, true,
552                      GlobalValue::WeakAnyLinkage,
553                      ConstantInt::get(OrdTy,
554                                       static_cast<int>(Options.ToolType)),
555                      EsanWhichToolName);
556 
557   return true;
558 }
559 
appToShadow(Value * Shadow,IRBuilder<> & IRB)560 Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) {
561   // Shadow = ((App & Mask) + Offs) >> Scale
562   Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowMask));
563   uint64_t Offs;
564   int Scale = ShadowScale[Options.ToolType];
565   if (Scale <= 2)
566     Offs = ShadowOffs[Scale];
567   else
568     Offs = ShadowOffs[0] << Scale;
569   Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs));
570   if (Scale > 0)
571     Shadow = IRB.CreateLShr(Shadow, Scale);
572   return Shadow;
573 }
574 
shouldIgnoreMemoryAccess(Instruction * I)575 bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) {
576   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
577     // We'd like to know about cache fragmentation in vtable accesses and
578     // constant data references, so we do not currently ignore anything.
579     return false;
580   } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
581     // TODO: the instrumentation disturbs the data layout on the stack, so we
582     // may want to add an option to ignore stack references (if we can
583     // distinguish them) to reduce overhead.
584   }
585   // TODO(bruening): future tools will be returning true for some cases.
586   return false;
587 }
588 
runOnModule(Module & M)589 bool EfficiencySanitizer::runOnModule(Module &M) {
590   bool Res = initOnModule(M);
591   initializeCallbacks(M);
592   for (auto &F : M) {
593     Res |= runOnFunction(F, M);
594   }
595   return Res;
596 }
597 
runOnFunction(Function & F,Module & M)598 bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) {
599   // This is required to prevent instrumenting the call to __esan_init from
600   // within the module constructor.
601   if (&F == EsanCtorFunction)
602     return false;
603   SmallVector<Instruction *, 8> LoadsAndStores;
604   SmallVector<Instruction *, 8> MemIntrinCalls;
605   SmallVector<Instruction *, 8> GetElementPtrs;
606   bool Res = false;
607   const DataLayout &DL = M.getDataLayout();
608   const TargetLibraryInfo *TLI =
609       &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
610 
611   for (auto &BB : F) {
612     for (auto &Inst : BB) {
613       if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
614            isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) &&
615           !shouldIgnoreMemoryAccess(&Inst))
616         LoadsAndStores.push_back(&Inst);
617       else if (isa<MemIntrinsic>(Inst))
618         MemIntrinCalls.push_back(&Inst);
619       else if (isa<GetElementPtrInst>(Inst))
620         GetElementPtrs.push_back(&Inst);
621       else if (CallInst *CI = dyn_cast<CallInst>(&Inst))
622         maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
623     }
624   }
625 
626   if (ClInstrumentLoadsAndStores) {
627     for (auto Inst : LoadsAndStores) {
628       Res |= instrumentLoadOrStore(Inst, DL);
629     }
630   }
631 
632   if (ClInstrumentMemIntrinsics) {
633     for (auto Inst : MemIntrinCalls) {
634       Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
635     }
636   }
637 
638   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
639     for (auto Inst : GetElementPtrs) {
640       Res |= instrumentGetElementPtr(Inst, M);
641     }
642   }
643 
644   return Res;
645 }
646 
instrumentLoadOrStore(Instruction * I,const DataLayout & DL)647 bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I,
648                                                 const DataLayout &DL) {
649   IRBuilder<> IRB(I);
650   bool IsStore;
651   Value *Addr;
652   unsigned Alignment;
653   if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
654     IsStore = false;
655     Alignment = Load->getAlignment();
656     Addr = Load->getPointerOperand();
657   } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
658     IsStore = true;
659     Alignment = Store->getAlignment();
660     Addr = Store->getPointerOperand();
661   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
662     IsStore = true;
663     Alignment = 0;
664     Addr = RMW->getPointerOperand();
665   } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) {
666     IsStore = true;
667     Alignment = 0;
668     Addr = Xchg->getPointerOperand();
669   } else
670     llvm_unreachable("Unsupported mem access type");
671 
672   Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
673   const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
674   Value *OnAccessFunc = nullptr;
675 
676   // Convert 0 to the default alignment.
677   if (Alignment == 0)
678     Alignment = DL.getPrefTypeAlignment(OrigTy);
679 
680   if (IsStore)
681     NumInstrumentedStores++;
682   else
683     NumInstrumentedLoads++;
684   int Idx = getMemoryAccessFuncIndex(Addr, DL);
685   if (Idx < 0) {
686     OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN;
687     IRB.CreateCall(OnAccessFunc,
688                    {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
689                     ConstantInt::get(IntptrTy, TypeSizeBytes)});
690   } else {
691     if (ClInstrumentFastpath &&
692         instrumentFastpath(I, DL, IsStore, Addr, Alignment)) {
693       NumFastpaths++;
694       return true;
695     }
696     if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0)
697       OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx];
698     else
699       OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx];
700     IRB.CreateCall(OnAccessFunc,
701                    IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
702   }
703   return true;
704 }
705 
706 // It's simplest to replace the memset/memmove/memcpy intrinsics with
707 // calls that the runtime library intercepts.
708 // Our pass is late enough that calls should not turn back into intrinsics.
instrumentMemIntrinsic(MemIntrinsic * MI)709 bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
710   IRBuilder<> IRB(MI);
711   bool Res = false;
712   if (isa<MemSetInst>(MI)) {
713     IRB.CreateCall(
714         MemsetFn,
715         {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
716          IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false),
717          IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
718     MI->eraseFromParent();
719     Res = true;
720   } else if (isa<MemTransferInst>(MI)) {
721     IRB.CreateCall(
722         isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn,
723         {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
724          IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()),
725          IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
726     MI->eraseFromParent();
727     Res = true;
728   } else
729     llvm_unreachable("Unsupported mem intrinsic type");
730   return Res;
731 }
732 
instrumentGetElementPtr(Instruction * I,Module & M)733 bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) {
734   GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I);
735   bool Res = false;
736   if (GepInst == nullptr || GepInst->getNumIndices() == 1) {
737     ++NumIgnoredGEPs;
738     return false;
739   }
740   Type *SourceTy = GepInst->getSourceElementType();
741   StructType *StructTy;
742   ConstantInt *Idx;
743   // Check if GEP calculates address from a struct array.
744   if (isa<StructType>(SourceTy)) {
745     StructTy = cast<StructType>(SourceTy);
746     Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1));
747     if ((Idx == nullptr || Idx->getSExtValue() != 0) &&
748         !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0)
749       Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy));
750   }
751   // Iterate all (except the first and the last) idx within each GEP instruction
752   // for possible nested struct field address calculation.
753   for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) {
754     SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(),
755                                    GepInst->idx_begin() + i);
756     Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec);
757     unsigned CounterIdx = 0;
758     if (isa<ArrayType>(Ty)) {
759       ArrayType *ArrayTy = cast<ArrayType>(Ty);
760       StructTy = dyn_cast<StructType>(ArrayTy->getElementType());
761       if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
762         continue;
763       // The last counter for struct array access.
764       CounterIdx = getArrayCounterIdx(StructTy);
765     } else if (isa<StructType>(Ty)) {
766       StructTy = cast<StructType>(Ty);
767       if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
768         continue;
769       // Get the StructTy's subfield index.
770       Idx = cast<ConstantInt>(GepInst->getOperand(i+1));
771       assert(Idx->getSExtValue() >= 0 &&
772              Idx->getSExtValue() < StructTy->getNumElements());
773       CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue();
774     }
775     Res |= insertCounterUpdate(I, StructTy, CounterIdx);
776   }
777   if (Res)
778     ++NumInstrumentedGEPs;
779   else
780     ++NumIgnoredGEPs;
781   return Res;
782 }
783 
insertCounterUpdate(Instruction * I,StructType * StructTy,unsigned CounterIdx)784 bool EfficiencySanitizer::insertCounterUpdate(Instruction *I,
785                                               StructType *StructTy,
786                                               unsigned CounterIdx) {
787   GlobalVariable *CounterArray = StructTyMap[StructTy];
788   if (CounterArray == nullptr)
789     return false;
790   IRBuilder<> IRB(I);
791   Constant *Indices[2];
792   // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and
793   // http://llvm.org/docs/GetElementPtr.html.
794   // The first index of the GEP instruction steps through the first operand,
795   // i.e., the array itself.
796   Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0);
797   // The second index is the index within the array.
798   Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx);
799   Constant *Counter =
800     ConstantExpr::getGetElementPtr(
801         ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)),
802         CounterArray, Indices);
803   Value *Load = IRB.CreateLoad(Counter);
804   IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)),
805                   Counter);
806   return true;
807 }
808 
getMemoryAccessFuncIndex(Value * Addr,const DataLayout & DL)809 int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr,
810                                                   const DataLayout &DL) {
811   Type *OrigPtrTy = Addr->getType();
812   Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
813   assert(OrigTy->isSized());
814   // The size is always a multiple of 8.
815   uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
816   if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 &&
817       TypeSizeBytes != 8 && TypeSizeBytes != 16) {
818     // Irregular sizes do not have per-size call targets.
819     NumAccessesWithIrregularSize++;
820     return -1;
821   }
822   size_t Idx = countTrailingZeros(TypeSizeBytes);
823   assert(Idx < NumberOfAccessSizes);
824   return Idx;
825 }
826 
instrumentFastpath(Instruction * I,const DataLayout & DL,bool IsStore,Value * Addr,unsigned Alignment)827 bool EfficiencySanitizer::instrumentFastpath(Instruction *I,
828                                              const DataLayout &DL, bool IsStore,
829                                              Value *Addr, unsigned Alignment) {
830   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
831     return instrumentFastpathCacheFrag(I, DL, Addr, Alignment);
832   } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
833     return instrumentFastpathWorkingSet(I, DL, Addr, Alignment);
834   }
835   return false;
836 }
837 
instrumentFastpathCacheFrag(Instruction * I,const DataLayout & DL,Value * Addr,unsigned Alignment)838 bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I,
839                                                       const DataLayout &DL,
840                                                       Value *Addr,
841                                                       unsigned Alignment) {
842   // Do nothing.
843   return true; // Return true to avoid slowpath instrumentation.
844 }
845 
instrumentFastpathWorkingSet(Instruction * I,const DataLayout & DL,Value * Addr,unsigned Alignment)846 bool EfficiencySanitizer::instrumentFastpathWorkingSet(
847     Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) {
848   assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this
849   IRBuilder<> IRB(I);
850   Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
851   const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
852   // Bail to the slowpath if the access might touch multiple cache lines.
853   // An access aligned to its size is guaranteed to be intra-cache-line.
854   // getMemoryAccessFuncIndex has already ruled out a size larger than 16
855   // and thus larger than a cache line for platforms this tool targets
856   // (and our shadow memory setup assumes 64-byte cache lines).
857   assert(TypeSize <= 128);
858   if (!(TypeSize == 8 ||
859         (Alignment % (TypeSize / 8)) == 0)) {
860     if (ClAssumeIntraCacheLine)
861       ++NumAssumedIntraCacheLine;
862     else
863       return false;
864   }
865 
866   // We inline instrumentation to set the corresponding shadow bits for
867   // each cache line touched by the application.  Here we handle a single
868   // load or store where we've already ruled out the possibility that it
869   // might touch more than one cache line and thus we simply update the
870   // shadow memory for a single cache line.
871   // Our shadow memory model is fine with races when manipulating shadow values.
872   // We generate the following code:
873   //
874   //   const char BitMask = 0x81;
875   //   char *ShadowAddr = appToShadow(AppAddr);
876   //   if ((*ShadowAddr & BitMask) != BitMask)
877   //     *ShadowAddr |= Bitmask;
878   //
879   Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy);
880   Value *ShadowPtr = appToShadow(AddrPtr, IRB);
881   Type *ShadowTy = IntegerType::get(*Ctx, 8U);
882   Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
883   // The bottom bit is used for the current sampling period's working set.
884   // The top bit is used for the total working set.  We set both on each
885   // memory access, if they are not already set.
886   Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B
887 
888   Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
889   // The AND and CMP will be turned into a TEST instruction by the compiler.
890   Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
891   TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
892   // FIXME: do I need to call SetCurrentDebugLocation?
893   IRB.SetInsertPoint(CmpTerm);
894   // We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
895   // which are used by the runtime library.
896   Value *NewVal = IRB.CreateOr(OldValue, ValueMask);
897   IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
898   IRB.SetInsertPoint(I);
899 
900   return true;
901 }
902