1 //===- AArch64StackTagging.cpp - Stack tagging in IR --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
10
11 #include "AArch64.h"
12 #include "AArch64InstrInfo.h"
13 #include "AArch64Subtarget.h"
14 #include "AArch64TargetMachine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DepthFirstIterator.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/CFG.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/ScalarEvolution.h"
25 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
26 #include "llvm/Analysis/ValueTracking.h"
27 #include "llvm/CodeGen/LiveRegUnits.h"
28 #include "llvm/CodeGen/MachineBasicBlock.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineLoopInfo.h"
34 #include "llvm/CodeGen/MachineOperand.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/TargetPassConfig.h"
37 #include "llvm/CodeGen/TargetRegisterInfo.h"
38 #include "llvm/IR/DebugLoc.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GetElementPtrTypeIterator.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/IntrinsicsAArch64.h"
46 #include "llvm/IR/Metadata.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include "llvm/Transforms/Utils/Local.h"
52 #include <cassert>
53 #include <iterator>
54 #include <utility>
55
56 using namespace llvm;
57
58 #define DEBUG_TYPE "stack-tagging"
59
60 static cl::opt<bool> ClMergeInit(
61 "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
62 cl::desc("merge stack variable initializers with tagging when possible"));
63
64 static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
65 cl::init(40), cl::Hidden);
66
67 static const Align kTagGranuleSize = Align(16);
68
69 namespace {
70
71 class InitializerBuilder {
72 uint64_t Size;
73 const DataLayout *DL;
74 Value *BasePtr;
75 Function *SetTagFn;
76 Function *SetTagZeroFn;
77 Function *StgpFn;
78
79 // List of initializers sorted by start offset.
80 struct Range {
81 uint64_t Start, End;
82 Instruction *Inst;
83 };
84 SmallVector<Range, 4> Ranges;
85 // 8-aligned offset => 8-byte initializer
86 // Missing keys are zero initialized.
87 std::map<uint64_t, Value *> Out;
88
89 public:
InitializerBuilder(uint64_t Size,const DataLayout * DL,Value * BasePtr,Function * SetTagFn,Function * SetTagZeroFn,Function * StgpFn)90 InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr,
91 Function *SetTagFn, Function *SetTagZeroFn,
92 Function *StgpFn)
93 : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn),
94 SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
95
addRange(uint64_t Start,uint64_t End,Instruction * Inst)96 bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
97 auto I = std::lower_bound(
98 Ranges.begin(), Ranges.end(), Start,
99 [](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; });
100 if (I != Ranges.end() && End > I->Start) {
101 // Overlap - bail.
102 return false;
103 }
104 Ranges.insert(I, {Start, End, Inst});
105 return true;
106 }
107
addStore(uint64_t Offset,StoreInst * SI,const DataLayout * DL)108 bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) {
109 int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType());
110 if (!addRange(Offset, Offset + StoreSize, SI))
111 return false;
112 IRBuilder<> IRB(SI);
113 applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0));
114 return true;
115 }
116
addMemSet(uint64_t Offset,MemSetInst * MSI)117 bool addMemSet(uint64_t Offset, MemSetInst *MSI) {
118 uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue();
119 if (!addRange(Offset, Offset + StoreSize, MSI))
120 return false;
121 IRBuilder<> IRB(MSI);
122 applyMemSet(IRB, Offset, Offset + StoreSize,
123 cast<ConstantInt>(MSI->getValue()));
124 return true;
125 }
126
applyMemSet(IRBuilder<> & IRB,int64_t Start,int64_t End,ConstantInt * V)127 void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End,
128 ConstantInt *V) {
129 // Out[] does not distinguish between zero and undef, and we already know
130 // that this memset does not overlap with any other initializer. Nothing to
131 // do for memset(0).
132 if (V->isZero())
133 return;
134 for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
135 uint64_t Cst = 0x0101010101010101UL;
136 int LowBits = Offset < Start ? (Start - Offset) * 8 : 0;
137 if (LowBits)
138 Cst = (Cst >> LowBits) << LowBits;
139 int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0;
140 if (HighBits)
141 Cst = (Cst << HighBits) >> HighBits;
142 ConstantInt *C =
143 ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue());
144
145 Value *&CurrentV = Out[Offset];
146 if (!CurrentV) {
147 CurrentV = C;
148 } else {
149 CurrentV = IRB.CreateOr(CurrentV, C);
150 }
151 }
152 }
153
154 // Take a 64-bit slice of the value starting at the given offset (in bytes).
155 // Offset can be negative. Pad with zeroes on both sides when necessary.
sliceValue(IRBuilder<> & IRB,Value * V,int64_t Offset)156 Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) {
157 if (Offset > 0) {
158 V = IRB.CreateLShr(V, Offset * 8);
159 V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
160 } else if (Offset < 0) {
161 V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
162 V = IRB.CreateShl(V, -Offset * 8);
163 } else {
164 V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
165 }
166 return V;
167 }
168
applyStore(IRBuilder<> & IRB,int64_t Start,int64_t End,Value * StoredValue)169 void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End,
170 Value *StoredValue) {
171 StoredValue = flatten(IRB, StoredValue);
172 for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
173 Value *V = sliceValue(IRB, StoredValue, Offset - Start);
174 Value *&CurrentV = Out[Offset];
175 if (!CurrentV) {
176 CurrentV = V;
177 } else {
178 CurrentV = IRB.CreateOr(CurrentV, V);
179 }
180 }
181 }
182
generate(IRBuilder<> & IRB)183 void generate(IRBuilder<> &IRB) {
184 LLVM_DEBUG(dbgs() << "Combined initializer\n");
185 // No initializers => the entire allocation is undef.
186 if (Ranges.empty()) {
187 emitUndef(IRB, 0, Size);
188 return;
189 }
190
191 // Look through 8-byte initializer list 16 bytes at a time;
192 // If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
193 // Otherwise, emit zeroes up to next available item.
194 uint64_t LastOffset = 0;
195 for (uint64_t Offset = 0; Offset < Size; Offset += 16) {
196 auto I1 = Out.find(Offset);
197 auto I2 = Out.find(Offset + 8);
198 if (I1 == Out.end() && I2 == Out.end())
199 continue;
200
201 if (Offset > LastOffset)
202 emitZeroes(IRB, LastOffset, Offset - LastOffset);
203
204 Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
205 : I1->second;
206 Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
207 : I2->second;
208 emitPair(IRB, Offset, Store1, Store2);
209 LastOffset = Offset + 16;
210 }
211
212 // memset(0) does not update Out[], therefore the tail can be either undef
213 // or zero.
214 if (LastOffset < Size)
215 emitZeroes(IRB, LastOffset, Size - LastOffset);
216
217 for (const auto &R : Ranges) {
218 R.Inst->eraseFromParent();
219 }
220 }
221
emitZeroes(IRBuilder<> & IRB,uint64_t Offset,uint64_t Size)222 void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
223 LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size
224 << ") zero\n");
225 Value *Ptr = BasePtr;
226 if (Offset)
227 Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
228 IRB.CreateCall(SetTagZeroFn,
229 {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
230 }
231
emitUndef(IRBuilder<> & IRB,uint64_t Offset,uint64_t Size)232 void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
233 LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size
234 << ") undef\n");
235 Value *Ptr = BasePtr;
236 if (Offset)
237 Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
238 IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
239 }
240
emitPair(IRBuilder<> & IRB,uint64_t Offset,Value * A,Value * B)241 void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) {
242 LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n");
243 LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n");
244 Value *Ptr = BasePtr;
245 if (Offset)
246 Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
247 IRB.CreateCall(StgpFn, {Ptr, A, B});
248 }
249
flatten(IRBuilder<> & IRB,Value * V)250 Value *flatten(IRBuilder<> &IRB, Value *V) {
251 if (V->getType()->isIntegerTy())
252 return V;
253 // vector of pointers -> vector of ints
254 if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) {
255 LLVMContext &Ctx = IRB.getContext();
256 Type *EltTy = VecTy->getElementType();
257 if (EltTy->isPointerTy()) {
258 uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
259 Type *NewTy = VectorType::get(IntegerType::get(Ctx, EltSize),
260 VecTy->getNumElements());
261 V = IRB.CreatePointerCast(V, NewTy);
262 }
263 }
264 return IRB.CreateBitOrPointerCast(
265 V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8));
266 }
267 };
268
269 class AArch64StackTagging : public FunctionPass {
270 struct AllocaInfo {
271 AllocaInst *AI;
272 SmallVector<IntrinsicInst *, 2> LifetimeStart;
273 SmallVector<IntrinsicInst *, 2> LifetimeEnd;
274 SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
275 int Tag; // -1 for non-tagged allocations
276 };
277
278 bool MergeInit;
279
280 public:
281 static char ID; // Pass ID, replacement for typeid
282
AArch64StackTagging(bool MergeInit=true)283 AArch64StackTagging(bool MergeInit = true)
284 : FunctionPass(ID),
285 MergeInit(ClMergeInit.getNumOccurrences() > 0 ? ClMergeInit
286 : MergeInit) {
287 initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
288 }
289
290 bool isInterestingAlloca(const AllocaInst &AI);
291 void alignAndPadAlloca(AllocaInfo &Info);
292
293 void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
294 uint64_t Size);
295 void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
296
297 Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr,
298 uint64_t Size, InitializerBuilder &IB);
299
300 Instruction *
301 insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
302 const DominatorTree *DT);
303 bool runOnFunction(Function &F) override;
304
getPassName() const305 StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
306
307 private:
308 Function *F;
309 Function *SetTagFunc;
310 const DataLayout *DL;
311 AAResults *AA;
312
getAnalysisUsage(AnalysisUsage & AU) const313 void getAnalysisUsage(AnalysisUsage &AU) const override {
314 AU.setPreservesCFG();
315 if (MergeInit)
316 AU.addRequired<AAResultsWrapperPass>();
317 }
318 };
319
320 } // end anonymous namespace
321
322 char AArch64StackTagging::ID = 0;
323
324 INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
325 false, false)
326 INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
327 false, false)
328
createAArch64StackTaggingPass(bool MergeInit)329 FunctionPass *llvm::createAArch64StackTaggingPass(bool MergeInit) {
330 return new AArch64StackTagging(MergeInit);
331 }
332
collectInitializers(Instruction * StartInst,Value * StartPtr,uint64_t Size,InitializerBuilder & IB)333 Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
334 Value *StartPtr,
335 uint64_t Size,
336 InitializerBuilder &IB) {
337 MemoryLocation AllocaLoc{StartPtr, Size};
338 Instruction *LastInst = StartInst;
339 BasicBlock::iterator BI(StartInst);
340
341 unsigned Count = 0;
342 for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) {
343 if (!isa<DbgInfoIntrinsic>(*BI))
344 ++Count;
345
346 if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc)))
347 continue;
348
349 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
350 // If the instruction is readnone, ignore it, otherwise bail out. We
351 // don't even allow readonly here because we don't want something like:
352 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
353 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
354 break;
355 continue;
356 }
357
358 if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
359 if (!NextStore->isSimple())
360 break;
361
362 // Check to see if this store is to a constant offset from the start ptr.
363 Optional<int64_t> Offset =
364 isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL);
365 if (!Offset)
366 break;
367
368 if (!IB.addStore(*Offset, NextStore, DL))
369 break;
370 LastInst = NextStore;
371 } else {
372 MemSetInst *MSI = cast<MemSetInst>(BI);
373
374 if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
375 break;
376
377 if (!isa<ConstantInt>(MSI->getValue()))
378 break;
379
380 // Check to see if this store is to a constant offset from the start ptr.
381 Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), *DL);
382 if (!Offset)
383 break;
384
385 if (!IB.addMemSet(*Offset, MSI))
386 break;
387 LastInst = MSI;
388 }
389 }
390 return LastInst;
391 }
392
isInterestingAlloca(const AllocaInst & AI)393 bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
394 // FIXME: support dynamic allocas
395 bool IsInteresting =
396 AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
397 // alloca() may be called with 0 size, ignore it.
398 AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
399 // inalloca allocas are not treated as static, and we don't want
400 // dynamic alloca instrumentation for them as well.
401 !AI.isUsedWithInAlloca() &&
402 // swifterror allocas are register promoted by ISel
403 !AI.isSwiftError();
404 return IsInteresting;
405 }
406
tagAlloca(AllocaInst * AI,Instruction * InsertBefore,Value * Ptr,uint64_t Size)407 void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
408 Value *Ptr, uint64_t Size) {
409 auto SetTagZeroFunc =
410 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero);
411 auto StgpFunc =
412 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp);
413
414 InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc);
415 bool LittleEndian =
416 Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
417 // Current implementation of initializer merging assumes little endianness.
418 if (MergeInit && !F->hasOptNone() && LittleEndian) {
419 LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
420 << ", size = " << Size << "\n");
421 InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
422 }
423
424 IRBuilder<> IRB(InsertBefore);
425 IB.generate(IRB);
426 }
427
untagAlloca(AllocaInst * AI,Instruction * InsertBefore,uint64_t Size)428 void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
429 uint64_t Size) {
430 IRBuilder<> IRB(InsertBefore);
431 IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
432 ConstantInt::get(IRB.getInt64Ty(), Size)});
433 }
434
insertBaseTaggedPointer(const MapVector<AllocaInst *,AllocaInfo> & Allocas,const DominatorTree * DT)435 Instruction *AArch64StackTagging::insertBaseTaggedPointer(
436 const MapVector<AllocaInst *, AllocaInfo> &Allocas,
437 const DominatorTree *DT) {
438 BasicBlock *PrologueBB = nullptr;
439 // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
440 for (auto &I : Allocas) {
441 const AllocaInfo &Info = I.second;
442 AllocaInst *AI = Info.AI;
443 if (Info.Tag < 0)
444 continue;
445 if (!PrologueBB) {
446 PrologueBB = AI->getParent();
447 continue;
448 }
449 PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent());
450 }
451 assert(PrologueBB);
452
453 IRBuilder<> IRB(&PrologueBB->front());
454 Function *IRG_SP =
455 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp);
456 Instruction *Base =
457 IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
458 Base->setName("basetag");
459 return Base;
460 }
461
alignAndPadAlloca(AllocaInfo & Info)462 void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
463 const Align NewAlignment =
464 max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize);
465 Info.AI->setAlignment(NewAlignment);
466
467 uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
468 uint64_t AlignedSize = alignTo(Size, kTagGranuleSize);
469 if (Size == AlignedSize)
470 return;
471
472 // Add padding to the alloca.
473 Type *AllocatedType =
474 Info.AI->isArrayAllocation()
475 ? ArrayType::get(
476 Info.AI->getAllocatedType(),
477 cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
478 : Info.AI->getAllocatedType();
479 Type *PaddingType =
480 ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
481 Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
482 auto *NewAI = new AllocaInst(
483 TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
484 NewAI->takeName(Info.AI);
485 NewAI->setAlignment(MaybeAlign(Info.AI->getAlignment()));
486 NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
487 NewAI->setSwiftError(Info.AI->isSwiftError());
488 NewAI->copyMetadata(*Info.AI);
489
490 auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
491 Info.AI->replaceAllUsesWith(NewPtr);
492 Info.AI->eraseFromParent();
493 Info.AI = NewAI;
494 }
495
496 // Helper function to check for post-dominance.
postDominates(const PostDominatorTree * PDT,const IntrinsicInst * A,const IntrinsicInst * B)497 static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A,
498 const IntrinsicInst *B) {
499 const BasicBlock *ABB = A->getParent();
500 const BasicBlock *BBB = B->getParent();
501
502 if (ABB != BBB)
503 return PDT->dominates(ABB, BBB);
504
505 for (const Instruction &I : *ABB) {
506 if (&I == B)
507 return true;
508 if (&I == A)
509 return false;
510 }
511 llvm_unreachable("Corrupt instruction list");
512 }
513
514 // FIXME: check for MTE extension
runOnFunction(Function & Fn)515 bool AArch64StackTagging::runOnFunction(Function &Fn) {
516 if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
517 return false;
518
519 F = &Fn;
520 DL = &Fn.getParent()->getDataLayout();
521 if (MergeInit)
522 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
523
524 MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
525 SmallVector<Instruction *, 8> RetVec;
526 DenseMap<Value *, AllocaInst *> AllocaForValue;
527 SmallVector<Instruction *, 4> UnrecognizedLifetimes;
528
529 for (auto &BB : *F) {
530 for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
531 Instruction *I = &*IT;
532 if (auto *AI = dyn_cast<AllocaInst>(I)) {
533 Allocas[AI].AI = AI;
534 continue;
535 }
536
537 if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(I)) {
538 if (auto *AI =
539 dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation())) {
540 Allocas[AI].DbgVariableIntrinsics.push_back(DVI);
541 }
542 continue;
543 }
544
545 auto *II = dyn_cast<IntrinsicInst>(I);
546 if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
547 II->getIntrinsicID() == Intrinsic::lifetime_end)) {
548 AllocaInst *AI =
549 llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue);
550 if (!AI) {
551 UnrecognizedLifetimes.push_back(I);
552 continue;
553 }
554 if (II->getIntrinsicID() == Intrinsic::lifetime_start)
555 Allocas[AI].LifetimeStart.push_back(II);
556 else
557 Allocas[AI].LifetimeEnd.push_back(II);
558 }
559
560 if (isa<ReturnInst>(I) || isa<ResumeInst>(I) || isa<CleanupReturnInst>(I))
561 RetVec.push_back(I);
562 }
563 }
564
565 if (Allocas.empty())
566 return false;
567
568 int NextTag = 0;
569 int NumInterestingAllocas = 0;
570 for (auto &I : Allocas) {
571 AllocaInfo &Info = I.second;
572 assert(Info.AI);
573
574 if (!isInterestingAlloca(*Info.AI)) {
575 Info.Tag = -1;
576 continue;
577 }
578
579 alignAndPadAlloca(Info);
580 NumInterestingAllocas++;
581 Info.Tag = NextTag;
582 NextTag = (NextTag + 1) % 16;
583 }
584
585 if (NumInterestingAllocas == 0)
586 return true;
587
588 std::unique_ptr<DominatorTree> DeleteDT;
589 DominatorTree *DT = nullptr;
590 if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
591 DT = &P->getDomTree();
592
593 if (DT == nullptr && (NumInterestingAllocas > 1 ||
594 !F->hasFnAttribute(Attribute::OptimizeNone))) {
595 DeleteDT = std::make_unique<DominatorTree>(*F);
596 DT = DeleteDT.get();
597 }
598
599 std::unique_ptr<PostDominatorTree> DeletePDT;
600 PostDominatorTree *PDT = nullptr;
601 if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
602 PDT = &P->getPostDomTree();
603
604 if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) {
605 DeletePDT = std::make_unique<PostDominatorTree>(*F);
606 PDT = DeletePDT.get();
607 }
608
609 SetTagFunc =
610 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
611
612 Instruction *Base = insertBaseTaggedPointer(Allocas, DT);
613
614 for (auto &I : Allocas) {
615 const AllocaInfo &Info = I.second;
616 AllocaInst *AI = Info.AI;
617 if (Info.Tag < 0)
618 continue;
619
620 // Replace alloca with tagp(alloca).
621 IRBuilder<> IRB(Info.AI->getNextNode());
622 Function *TagP = Intrinsic::getDeclaration(
623 F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
624 Instruction *TagPCall =
625 IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
626 ConstantInt::get(IRB.getInt64Ty(), Info.Tag)});
627 if (Info.AI->hasName())
628 TagPCall->setName(Info.AI->getName() + ".tag");
629 Info.AI->replaceAllUsesWith(TagPCall);
630 TagPCall->setOperand(0, Info.AI);
631
632 if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
633 Info.LifetimeEnd.size() == 1) {
634 IntrinsicInst *Start = Info.LifetimeStart[0];
635 IntrinsicInst *End = Info.LifetimeEnd[0];
636 uint64_t Size =
637 dyn_cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
638 Size = alignTo(Size, kTagGranuleSize);
639 tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
640 // We need to ensure that if we tag some object, we certainly untag it
641 // before the function exits.
642 if (PDT != nullptr && postDominates(PDT, End, Start)) {
643 untagAlloca(AI, End, Size);
644 } else {
645 SmallVector<Instruction *, 8> ReachableRetVec;
646 unsigned NumCoveredExits = 0;
647 for (auto &RI : RetVec) {
648 if (!isPotentiallyReachable(Start, RI, nullptr, DT))
649 continue;
650 ReachableRetVec.push_back(RI);
651 if (DT != nullptr && DT->dominates(End, RI))
652 ++NumCoveredExits;
653 }
654 // If there's a mix of covered and non-covered exits, just put the untag
655 // on exits, so we avoid the redundancy of untagging twice.
656 if (NumCoveredExits == ReachableRetVec.size()) {
657 untagAlloca(AI, End, Size);
658 } else {
659 for (auto &RI : ReachableRetVec)
660 untagAlloca(AI, RI, Size);
661 // We may have inserted untag outside of the lifetime interval.
662 // Remove the lifetime end call for this alloca.
663 End->eraseFromParent();
664 }
665 }
666 } else {
667 uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
668 Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
669 tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
670 for (auto &RI : RetVec) {
671 untagAlloca(AI, RI, Size);
672 }
673 // We may have inserted tag/untag outside of any lifetime interval.
674 // Remove all lifetime intrinsics for this alloca.
675 for (auto &II : Info.LifetimeStart)
676 II->eraseFromParent();
677 for (auto &II : Info.LifetimeEnd)
678 II->eraseFromParent();
679 }
680
681 // Fixup debug intrinsics to point to the new alloca.
682 for (auto DVI : Info.DbgVariableIntrinsics)
683 DVI->setArgOperand(
684 0,
685 MetadataAsValue::get(F->getContext(), LocalAsMetadata::get(Info.AI)));
686 }
687
688 // If we have instrumented at least one alloca, all unrecognized lifetime
689 // instrinsics have to go.
690 for (auto &I : UnrecognizedLifetimes)
691 I->eraseFromParent();
692
693 return true;
694 }
695