1 //===- PoisonChecking.cpp - -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implements a transform pass which instruments IR such that poison semantics
10 // are made explicit. That is, it provides a (possibly partial) executable
11 // semantics for every instruction w.r.t. poison as specified in the LLVM
12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass
13 // is focused purely on the semantics of LLVM IR, not any particular source
14 // language. If you're looking for something to see if your C/C++ contains
15 // UB, this is not it.
16 //
17 // The rewritten semantics of each instruction will include the following
18 // components:
19 //
20 // 1) The original instruction, unmodified.
21 // 2) A propagation rule which translates dynamic information about the poison
22 // state of each input to whether the dynamic output of the instruction
23 // produces poison.
24 // 3) A flag validation rule which validates any poison producing flags on the
25 // instruction itself (e.g. checks for overflow on nsw).
26 // 4) A check rule which traps (to a handler function) if this instruction must
27 // execute undefined behavior given the poison state of it's inputs.
28 //
29 // At the moment, the UB detection is done in a best effort manner; that is,
30 // the resulting code may produce a false negative result (not report UB when
31 // it actually exists according to the LangRef spec), but should never produce
32 // a false positive (report UB where it doesn't exist). The intention is to
33 // eventually support a "strict" mode which never dynamically reports a false
34 // negative at the cost of rejecting some valid inputs to translation.
35 //
36 // Use cases for this pass include:
37 // - Understanding (and testing!) the implications of the definition of poison
38 // from the LangRef.
39 // - Validating the output of a IR fuzzer to ensure that all programs produced
40 // are well defined on the specific input used.
41 // - Finding/confirming poison specific miscompiles by checking the poison
42 // status of an input/IR pair is the same before and after an optimization
43 // transform.
44 // - Checking that a bugpoint reduction does not introduce UB which didn't
45 // exist in the original program being reduced.
46 //
47 // The major sources of inaccuracy are currently:
48 // - Most validation rules not yet implemented for instructions with poison
49 // relavant flags. At the moment, only nsw/nuw on add/sub are supported.
50 // - UB which is control dependent on a branch on poison is not yet
51 // reported. Currently, only data flow dependence is modeled.
52 // - Poison which is propagated through memory is not modeled. As such,
53 // storing poison to memory and then reloading it will cause a false negative
54 // as we consider the reloaded value to not be poisoned.
55 // - Poison propagation across function boundaries is not modeled. At the
56 // moment, all arguments and return values are assumed not to be poison.
57 // - Undef is not modeled. In particular, the optimizer's freedom to pick
58 // concrete values for undef bits so as to maximize potential for producing
59 // poison is not modeled.
60 //
61 //===----------------------------------------------------------------------===//
62
63 #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/Statistic.h"
66 #include "llvm/Analysis/MemoryBuiltins.h"
67 #include "llvm/Analysis/ValueTracking.h"
68 #include "llvm/IR/IRBuilder.h"
69 #include "llvm/IR/InstVisitor.h"
70 #include "llvm/IR/IntrinsicInst.h"
71 #include "llvm/IR/PatternMatch.h"
72 #include "llvm/Support/CommandLine.h"
73 #include "llvm/Support/Debug.h"
74
75 using namespace llvm;
76
77 #define DEBUG_TYPE "poison-checking"
78
79 static cl::opt<bool>
80 LocalCheck("poison-checking-function-local",
81 cl::init(false),
82 cl::desc("Check that returns are non-poison (for testing)"));
83
84
isConstantFalse(Value * V)85 static bool isConstantFalse(Value* V) {
86 assert(V->getType()->isIntegerTy(1));
87 if (auto *CI = dyn_cast<ConstantInt>(V))
88 return CI->isZero();
89 return false;
90 }
91
buildOrChain(IRBuilder<> & B,ArrayRef<Value * > Ops)92 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
93 if (Ops.size() == 0)
94 return B.getFalse();
95 unsigned i = 0;
96 for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
97 if (i == Ops.size())
98 return B.getFalse();
99 Value *Accum = Ops[i++];
100 for (; i < Ops.size(); i++)
101 if (!isConstantFalse(Ops[i]))
102 Accum = B.CreateOr(Accum, Ops[i]);
103 return Accum;
104 }
105
generatePoisonChecksForBinOp(Instruction & I,SmallVector<Value *,2> & Checks)106 static void generatePoisonChecksForBinOp(Instruction &I,
107 SmallVector<Value*, 2> &Checks) {
108 assert(isa<BinaryOperator>(I));
109
110 IRBuilder<> B(&I);
111 Value *LHS = I.getOperand(0);
112 Value *RHS = I.getOperand(1);
113 switch (I.getOpcode()) {
114 default:
115 return;
116 case Instruction::Add: {
117 if (I.hasNoSignedWrap()) {
118 auto *OverflowOp =
119 B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
120 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
121 }
122 if (I.hasNoUnsignedWrap()) {
123 auto *OverflowOp =
124 B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
125 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
126 }
127 break;
128 }
129 case Instruction::Sub: {
130 if (I.hasNoSignedWrap()) {
131 auto *OverflowOp =
132 B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
133 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
134 }
135 if (I.hasNoUnsignedWrap()) {
136 auto *OverflowOp =
137 B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
138 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
139 }
140 break;
141 }
142 case Instruction::Mul: {
143 if (I.hasNoSignedWrap()) {
144 auto *OverflowOp =
145 B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
146 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
147 }
148 if (I.hasNoUnsignedWrap()) {
149 auto *OverflowOp =
150 B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
151 Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
152 }
153 break;
154 }
155 case Instruction::UDiv: {
156 if (I.isExact()) {
157 auto *Check =
158 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
159 ConstantInt::get(LHS->getType(), 0));
160 Checks.push_back(Check);
161 }
162 break;
163 }
164 case Instruction::SDiv: {
165 if (I.isExact()) {
166 auto *Check =
167 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
168 ConstantInt::get(LHS->getType(), 0));
169 Checks.push_back(Check);
170 }
171 break;
172 }
173 case Instruction::AShr:
174 case Instruction::LShr:
175 case Instruction::Shl: {
176 Value *ShiftCheck =
177 B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
178 ConstantInt::get(RHS->getType(),
179 LHS->getType()->getScalarSizeInBits()));
180 Checks.push_back(ShiftCheck);
181 break;
182 }
183 };
184 }
185
generatePoisonChecks(Instruction & I)186 static Value* generatePoisonChecks(Instruction &I) {
187 IRBuilder<> B(&I);
188 SmallVector<Value*, 2> Checks;
189 if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
190 generatePoisonChecksForBinOp(I, Checks);
191
192 // Handle non-binops seperately
193 switch (I.getOpcode()) {
194 default:
195 break;
196 case Instruction::ExtractElement: {
197 Value *Vec = I.getOperand(0);
198 if (Vec->getType()->getVectorIsScalable())
199 break;
200 Value *Idx = I.getOperand(1);
201 unsigned NumElts = Vec->getType()->getVectorNumElements();
202 Value *Check =
203 B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
204 ConstantInt::get(Idx->getType(), NumElts));
205 Checks.push_back(Check);
206 break;
207 }
208 case Instruction::InsertElement: {
209 Value *Vec = I.getOperand(0);
210 if (Vec->getType()->getVectorIsScalable())
211 break;
212 Value *Idx = I.getOperand(2);
213 unsigned NumElts = Vec->getType()->getVectorNumElements();
214 Value *Check =
215 B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
216 ConstantInt::get(Idx->getType(), NumElts));
217 Checks.push_back(Check);
218 break;
219 }
220 };
221 return buildOrChain(B, Checks);
222 }
223
getPoisonFor(DenseMap<Value *,Value * > & ValToPoison,Value * V)224 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
225 auto Itr = ValToPoison.find(V);
226 if (Itr != ValToPoison.end())
227 return Itr->second;
228 if (isa<Constant>(V)) {
229 return ConstantInt::getFalse(V->getContext());
230 }
231 // Return false for unknwon values - this implements a non-strict mode where
232 // unhandled IR constructs are simply considered to never produce poison. At
233 // some point in the future, we probably want a "strict mode" for testing if
234 // nothing else.
235 return ConstantInt::getFalse(V->getContext());
236 }
237
CreateAssert(IRBuilder<> & B,Value * Cond)238 static void CreateAssert(IRBuilder<> &B, Value *Cond) {
239 assert(Cond->getType()->isIntegerTy(1));
240 if (auto *CI = dyn_cast<ConstantInt>(Cond))
241 if (CI->isAllOnesValue())
242 return;
243
244 Module *M = B.GetInsertBlock()->getModule();
245 M->getOrInsertFunction("__poison_checker_assert",
246 Type::getVoidTy(M->getContext()),
247 Type::getInt1Ty(M->getContext()));
248 Function *TrapFunc = M->getFunction("__poison_checker_assert");
249 B.CreateCall(TrapFunc, Cond);
250 }
251
CreateAssertNot(IRBuilder<> & B,Value * Cond)252 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
253 assert(Cond->getType()->isIntegerTy(1));
254 CreateAssert(B, B.CreateNot(Cond));
255 }
256
rewrite(Function & F)257 static bool rewrite(Function &F) {
258 auto * const Int1Ty = Type::getInt1Ty(F.getContext());
259
260 DenseMap<Value *, Value *> ValToPoison;
261
262 for (BasicBlock &BB : F)
263 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
264 auto *OldPHI = cast<PHINode>(&*I);
265 auto *NewPHI = PHINode::Create(Int1Ty,
266 OldPHI->getNumIncomingValues());
267 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
268 NewPHI->addIncoming(UndefValue::get(Int1Ty),
269 OldPHI->getIncomingBlock(i));
270 NewPHI->insertBefore(OldPHI);
271 ValToPoison[OldPHI] = NewPHI;
272 }
273
274 for (BasicBlock &BB : F)
275 for (Instruction &I : BB) {
276 if (isa<PHINode>(I)) continue;
277
278 IRBuilder<> B(cast<Instruction>(&I));
279
280 // Note: There are many more sources of documented UB, but this pass only
281 // attempts to find UB triggered by propagation of poison.
282 if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
283 CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
284
285 if (LocalCheck)
286 if (auto *RI = dyn_cast<ReturnInst>(&I))
287 if (RI->getNumOperands() != 0) {
288 Value *Op = RI->getOperand(0);
289 CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
290 }
291
292 SmallVector<Value*, 4> Checks;
293 if (propagatesFullPoison(&I))
294 for (Value *V : I.operands())
295 Checks.push_back(getPoisonFor(ValToPoison, V));
296
297 if (auto *Check = generatePoisonChecks(I))
298 Checks.push_back(Check);
299 ValToPoison[&I] = buildOrChain(B, Checks);
300 }
301
302 for (BasicBlock &BB : F)
303 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
304 auto *OldPHI = cast<PHINode>(&*I);
305 if (!ValToPoison.count(OldPHI))
306 continue; // skip the newly inserted phis
307 auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
308 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
309 auto *OldVal = OldPHI->getIncomingValue(i);
310 NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
311 }
312 }
313 return true;
314 }
315
316
run(Module & M,ModuleAnalysisManager & AM)317 PreservedAnalyses PoisonCheckingPass::run(Module &M,
318 ModuleAnalysisManager &AM) {
319 bool Changed = false;
320 for (auto &F : M)
321 Changed |= rewrite(F);
322
323 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
324 }
325
run(Function & F,FunctionAnalysisManager & AM)326 PreservedAnalyses PoisonCheckingPass::run(Function &F,
327 FunctionAnalysisManager &AM) {
328 return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
329 }
330
331
332 /* Major TODO Items:
333 - Control dependent poison UB
334 - Strict mode - (i.e. must analyze every operand)
335 - Poison through memory
336 - Function ABIs
337 - Full coverage of intrinsics, etc.. (ouch)
338
339 Instructions w/Unclear Semantics:
340 - shufflevector - It would seem reasonable for an out of bounds mask element
341 to produce poison, but the LangRef does not state.
342 - and/or - It would seem reasonable for poison to propagate from both
343 arguments, but LangRef doesn't state and propagatesFullPoison doesn't
344 include these two.
345 - all binary ops w/vector operands - The likely interpretation would be that
346 any element overflowing should produce poison for the entire result, but
347 the LangRef does not state.
348 - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
349 strange that only certian flags should be documented as producing poison.
350
351 Cases of clear poison semantics not yet implemented:
352 - Exact flags on ashr/lshr produce poison
353 - NSW/NUW flags on shl produce poison
354 - Inbounds flag on getelementptr produce poison
355 - fptosi/fptoui (out of bounds input) produce poison
356 - Scalable vector types for insertelement/extractelement
357 - Floating point binary ops w/fmf nnan/noinfs flags produce poison
358 */
359