1 //
2 // The Subzero Code Generator
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
11 /// entirely of the lowering sequence for each high-level instruction.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "IceTargetLoweringMIPS32.h"
16
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstMIPS32.h"
24 #include "IceInstVarIter.h"
25 #include "IceLiveness.h"
26 #include "IceOperand.h"
27 #include "IcePhiLoweringImpl.h"
28 #include "IceRegistersMIPS32.h"
29 #include "IceTargetLoweringMIPS32.def"
30 #include "IceUtils.h"
31 #include "llvm/Support/MathExtras.h"
32
33 namespace MIPS32 {
createTargetLowering(::Ice::Cfg * Func)34 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
35 return ::Ice::MIPS32::TargetMIPS32::create(Func);
36 }
37
38 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)39 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
40 return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx);
41 }
42
43 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)44 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
45 return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx);
46 }
47
staticInit(::Ice::GlobalContext * Ctx)48 void staticInit(::Ice::GlobalContext *Ctx) {
49 ::Ice::MIPS32::TargetMIPS32::staticInit(Ctx);
50 }
51
shouldBePooled(const::Ice::Constant * C)52 bool shouldBePooled(const ::Ice::Constant *C) {
53 return ::Ice::MIPS32::TargetMIPS32::shouldBePooled(C);
54 }
55
getPointerType()56 ::Ice::Type getPointerType() {
57 return ::Ice::MIPS32::TargetMIPS32::getPointerType();
58 }
59
60 } // end of namespace MIPS32
61
62 namespace Ice {
63 namespace MIPS32 {
64
65 using llvm::isInt;
66
67 namespace {
68
69 // The maximum number of arguments to pass in GPR registers.
70 constexpr uint32_t MIPS32_MAX_GPR_ARG = 4;
71
72 std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer;
73 std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer;
74
75 constexpr uint32_t MIPS32_MAX_FP_ARG = 2;
76
77 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer;
78 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer;
79
getRegClassName(RegClass C)80 const char *getRegClassName(RegClass C) {
81 auto ClassNum = static_cast<RegClassMIPS32>(C);
82 assert(ClassNum < RCMIPS32_NUM);
83 switch (ClassNum) {
84 default:
85 assert(C < RC_Target);
86 return regClassString(C);
87 // Add handling of new register classes below.
88 }
89 }
90
91 // Stack alignment
92 constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16;
93
94 // Value is in bytes. Return Value adjusted to the next highest multiple of the
95 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)96 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
97 size_t typeAlignInBytes = typeWidthInBytes(Ty);
98 // Vectors are stored on stack with the same alignment as that of int type
99 if (isVectorType(Ty))
100 typeAlignInBytes = typeWidthInBytes(IceType_i64);
101 return Utils::applyAlignment(Value, typeAlignInBytes);
102 }
103
104 // Value is in bytes. Return Value adjusted to the next highest multiple of the
105 // stack alignment.
applyStackAlignment(uint32_t Value)106 uint32_t applyStackAlignment(uint32_t Value) {
107 return Utils::applyAlignment(Value, MIPS32_STACK_ALIGNMENT_BYTES);
108 }
109
110 } // end of anonymous namespace
111
TargetMIPS32(Cfg * Func)112 TargetMIPS32::TargetMIPS32(Cfg *Func)
113 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {}
114
assignVarStackSlots(VarList & SortedSpilledVariables,size_t SpillAreaPaddingBytes,size_t SpillAreaSizeBytes,size_t GlobalsAndSubsequentPaddingSize)115 void TargetMIPS32::assignVarStackSlots(VarList &SortedSpilledVariables,
116 size_t SpillAreaPaddingBytes,
117 size_t SpillAreaSizeBytes,
118 size_t GlobalsAndSubsequentPaddingSize) {
119 const VariablesMetadata *VMetadata = Func->getVMetadata();
120 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
121 size_t NextStackOffset = SpillAreaPaddingBytes;
122 CfgVector<size_t> LocalsSize(Func->getNumNodes());
123 const bool SimpleCoalescing = !callsReturnsTwice();
124 for (Variable *Var : SortedSpilledVariables) {
125 size_t Increment = typeWidthInBytesOnStack(Var->getType());
126 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
127 if (VMetadata->isMultiBlock(Var)) {
128 GlobalsSpaceUsed += Increment;
129 NextStackOffset = GlobalsSpaceUsed;
130 } else {
131 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
132 LocalsSize[NodeIndex] += Increment;
133 NextStackOffset = SpillAreaPaddingBytes +
134 GlobalsAndSubsequentPaddingSize +
135 LocalsSize[NodeIndex];
136 }
137 } else {
138 NextStackOffset += Increment;
139 }
140 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
141 }
142 }
143
staticInit(GlobalContext * Ctx)144 void TargetMIPS32::staticInit(GlobalContext *Ctx) {
145 (void)Ctx;
146 RegNumT::setLimit(RegMIPS32::Reg_NUM);
147 SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
148 SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
149 SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
150 SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
151 SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
152 SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
153 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
154 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
155 IntegerRegisters[RegMIPS32::val] = isInt; \
156 I64PairRegisters[RegMIPS32::val] = isI64Pair; \
157 Float32Registers[RegMIPS32::val] = isFP32; \
158 Float64Registers[RegMIPS32::val] = isFP64; \
159 VectorRegisters[RegMIPS32::val] = isVec128; \
160 RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM); \
161 for (SizeT RegAlias : alias_init) { \
162 assert(!RegisterAliases[RegMIPS32::val][RegAlias] && \
163 "Duplicate alias for " #val); \
164 RegisterAliases[RegMIPS32::val].set(RegAlias); \
165 } \
166 RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM); \
167 assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]);
168 REGMIPS32_TABLE;
169 #undef X
170
171 // TODO(mohit.bhakkad): Change these inits once we provide argument related
172 // field in register tables
173 for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++)
174 GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i);
175
176 for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++)
177 I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i);
178
179 for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) {
180 FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2);
181 FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i);
182 }
183
184 TypeToRegisterSet[IceType_void] = InvalidRegisters;
185 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
186 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
187 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
188 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
189 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
190 TypeToRegisterSet[IceType_f32] = Float32Registers;
191 TypeToRegisterSet[IceType_f64] = Float64Registers;
192 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
193 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
194 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
195 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
196 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
197 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
198 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
199
200 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
201 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
202
203 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
204 llvm::array_lengthof(TypeToRegisterSet),
205 RegMIPS32::getRegName, getRegClassName);
206 }
207
unsetIfNonLeafFunc()208 void TargetMIPS32::unsetIfNonLeafFunc() {
209 for (CfgNode *Node : Func->getNodes()) {
210 for (Inst &Instr : Node->getInsts()) {
211 if (llvm::isa<InstCall>(&Instr)) {
212 // Unset MaybeLeafFunc if call instruction exists.
213 MaybeLeafFunc = false;
214 return;
215 }
216 }
217 }
218 }
219
getStackAlignment() const220 uint32_t TargetMIPS32::getStackAlignment() const {
221 return MIPS32_STACK_ALIGNMENT_BYTES;
222 }
223
getCallStackArgumentsSizeBytes(const InstCall * Call)224 uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
225 TargetMIPS32::CallingConv CC;
226 RegNumT DummyReg;
227 size_t OutArgsSizeBytes = 0;
228 Variable *Dest = Call->getDest();
229 bool PartialOnStack = false;
230 if (Dest != nullptr && isVectorFloatingType(Dest->getType())) {
231 CC.discardReg(RegMIPS32::Reg_A0);
232 // Next vector is partially on stack
233 PartialOnStack = true;
234 }
235 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
236 Operand *Arg = legalizeUndef(Call->getArg(i));
237 const Type Ty = Arg->getType();
238 RegNumT RegNum;
239 if (CC.argInReg(Ty, i, &RegNum)) {
240 // If PartialOnStack is true and if this is a vector type then last two
241 // elements are on stack
242 if (PartialOnStack && isVectorType(Ty)) {
243 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64);
244 OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2;
245 }
246 continue;
247 }
248 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
249 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
250 }
251 // Add size of argument save area
252 constexpr int BytesPerStackArg = 4;
253 OutArgsSizeBytes += MIPS32_MAX_GPR_ARG * BytesPerStackArg;
254 return applyStackAlignment(OutArgsSizeBytes);
255 }
256
257 namespace {
getConstantMemoryOrder(Operand * Opnd)258 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
259 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
260 return Integer->getValue();
261 return Intrinsics::MemoryOrderInvalid;
262 }
263 } // namespace
264
genTargetHelperCallFor(Inst * Instr)265 void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
266 constexpr bool NoTailCall = false;
267 constexpr bool IsTargetHelperCall = true;
268 Variable *Dest = Instr->getDest();
269 const Type DestTy = Dest ? Dest->getType() : IceType_void;
270
271 switch (Instr->getKind()) {
272 default:
273 return;
274 case Inst::Select: {
275 if (isVectorType(DestTy)) {
276 Operand *SrcT = llvm::cast<InstSelect>(Instr)->getTrueOperand();
277 Operand *SrcF = llvm::cast<InstSelect>(Instr)->getFalseOperand();
278 Operand *Cond = llvm::cast<InstSelect>(Instr)->getCondition();
279 Variable *T = Func->makeVariable(DestTy);
280 auto *Undef = ConstantUndef::create(Ctx, DestTy);
281 Context.insert<InstAssign>(T, Undef);
282 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
283 VarVecOn32->initVecElement(Func);
284 for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
285 auto *Index = Ctx->getConstantInt32(I);
286 auto *OpC = Func->makeVariable(typeElementType(Cond->getType()));
287 Context.insert<InstExtractElement>(OpC, Cond, Index);
288 auto *OpT = Func->makeVariable(typeElementType(DestTy));
289 Context.insert<InstExtractElement>(OpT, SrcT, Index);
290 auto *OpF = Func->makeVariable(typeElementType(DestTy));
291 Context.insert<InstExtractElement>(OpF, SrcF, Index);
292 auto *Dst = Func->makeVariable(typeElementType(DestTy));
293 Variable *DestT = Func->makeVariable(DestTy);
294 Context.insert<InstSelect>(Dst, OpC, OpT, OpF);
295 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
296 T = DestT;
297 }
298 Context.insert<InstAssign>(Dest, T);
299 Instr->setDeleted();
300 }
301 return;
302 }
303 case Inst::Fcmp: {
304 if (isVectorType(DestTy)) {
305 InstFcmp::FCond Cond = llvm::cast<InstFcmp>(Instr)->getCondition();
306 Operand *Src0 = Instr->getSrc(0);
307 Operand *Src1 = Instr->getSrc(1);
308 Variable *T = Func->makeVariable(IceType_v4f32);
309 auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
310 Context.insert<InstAssign>(T, Undef);
311 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
312 VarVecOn32->initVecElement(Func);
313 for (SizeT I = 0; I < typeNumElements(IceType_v4f32); ++I) {
314 auto *Index = Ctx->getConstantInt32(I);
315 auto *Op0 = Func->makeVariable(IceType_f32);
316 Context.insert<InstExtractElement>(Op0, Src0, Index);
317 auto *Op1 = Func->makeVariable(IceType_f32);
318 Context.insert<InstExtractElement>(Op1, Src1, Index);
319 auto *Dst = Func->makeVariable(IceType_f32);
320 Variable *DestT = Func->makeVariable(IceType_v4f32);
321 Context.insert<InstFcmp>(Cond, Dst, Op0, Op1);
322 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
323 T = DestT;
324 }
325 Context.insert<InstAssign>(Dest, T);
326 Instr->setDeleted();
327 }
328 return;
329 }
330 case Inst::Icmp: {
331 if (isVectorType(DestTy)) {
332 InstIcmp::ICond Cond = llvm::cast<InstIcmp>(Instr)->getCondition();
333 Operand *Src0 = Instr->getSrc(0);
334 Operand *Src1 = Instr->getSrc(1);
335 const Type SrcType = Src0->getType();
336 Variable *T = Func->makeVariable(DestTy);
337 auto *Undef = ConstantUndef::create(Ctx, DestTy);
338 Context.insert<InstAssign>(T, Undef);
339 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
340 VarVecOn32->initVecElement(Func);
341 for (SizeT I = 0; I < typeNumElements(SrcType); ++I) {
342 auto *Index = Ctx->getConstantInt32(I);
343 auto *Op0 = Func->makeVariable(typeElementType(SrcType));
344 Context.insert<InstExtractElement>(Op0, Src0, Index);
345 auto *Op1 = Func->makeVariable(typeElementType(SrcType));
346 Context.insert<InstExtractElement>(Op1, Src1, Index);
347 auto *Dst = Func->makeVariable(typeElementType(DestTy));
348 Variable *DestT = Func->makeVariable(DestTy);
349 Context.insert<InstIcmp>(Cond, Dst, Op0, Op1);
350 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
351 T = DestT;
352 }
353 Context.insert<InstAssign>(Dest, T);
354 Instr->setDeleted();
355 }
356 return;
357 }
358 case Inst::Arithmetic: {
359 const InstArithmetic::OpKind Op =
360 llvm::cast<InstArithmetic>(Instr)->getOp();
361 if (isVectorType(DestTy)) {
362 scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
363 Instr->setDeleted();
364 return;
365 }
366 switch (DestTy) {
367 default:
368 return;
369 case IceType_i64: {
370 RuntimeHelper HelperID = RuntimeHelper::H_Num;
371 switch (Op) {
372 default:
373 return;
374 case InstArithmetic::Udiv:
375 HelperID = RuntimeHelper::H_udiv_i64;
376 break;
377 case InstArithmetic::Sdiv:
378 HelperID = RuntimeHelper::H_sdiv_i64;
379 break;
380 case InstArithmetic::Urem:
381 HelperID = RuntimeHelper::H_urem_i64;
382 break;
383 case InstArithmetic::Srem:
384 HelperID = RuntimeHelper::H_srem_i64;
385 break;
386 }
387
388 if (HelperID == RuntimeHelper::H_Num) {
389 return;
390 }
391
392 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
393 constexpr SizeT MaxArgs = 2;
394 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
395 NoTailCall, IsTargetHelperCall);
396 Call->addArg(Instr->getSrc(0));
397 Call->addArg(Instr->getSrc(1));
398 Instr->setDeleted();
399 return;
400 }
401 case IceType_f32:
402 case IceType_f64: {
403 if (Op != InstArithmetic::Frem) {
404 return;
405 }
406 constexpr SizeT MaxArgs = 2;
407 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
408 DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
409 : RuntimeHelper::H_frem_f64);
410 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
411 NoTailCall, IsTargetHelperCall);
412 Call->addArg(Instr->getSrc(0));
413 Call->addArg(Instr->getSrc(1));
414 Instr->setDeleted();
415 return;
416 }
417 }
418 llvm::report_fatal_error("Control flow should never have reached here.");
419 }
420 case Inst::Cast: {
421 Operand *Src0 = Instr->getSrc(0);
422 const Type SrcTy = Src0->getType();
423 auto *CastInstr = llvm::cast<InstCast>(Instr);
424 const InstCast::OpKind CastKind = CastInstr->getCastKind();
425
426 if (isVectorType(DestTy)) {
427 Variable *T = Func->makeVariable(DestTy);
428 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
429 VarVecOn32->initVecElement(Func);
430 auto *Undef = ConstantUndef::create(Ctx, DestTy);
431 Context.insert<InstAssign>(T, Undef);
432 for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
433 auto *Index = Ctx->getConstantInt32(I);
434 auto *Op = Func->makeVariable(typeElementType(SrcTy));
435 Context.insert<InstExtractElement>(Op, Src0, Index);
436 auto *Dst = Func->makeVariable(typeElementType(DestTy));
437 Variable *DestT = Func->makeVariable(DestTy);
438 Context.insert<InstCast>(CastKind, Dst, Op);
439 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
440 T = DestT;
441 }
442 Context.insert<InstAssign>(Dest, T);
443 Instr->setDeleted();
444 return;
445 }
446
447 switch (CastKind) {
448 default:
449 return;
450 case InstCast::Fptosi:
451 case InstCast::Fptoui: {
452 if ((DestTy != IceType_i32) && (DestTy != IceType_i64)) {
453 return;
454 }
455 const bool DestIs32 = DestTy == IceType_i32;
456 const bool DestIsSigned = CastKind == InstCast::Fptosi;
457 const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
458 RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
459 if (DestIsSigned) {
460 if (DestIs32) {
461 return;
462 }
463 RTHFunc = Src0IsF32 ? RuntimeHelper::H_fptosi_f32_i64
464 : RuntimeHelper::H_fptosi_f64_i64;
465 } else {
466 RTHFunc = Src0IsF32 ? (DestIs32 ? RuntimeHelper::H_fptoui_f32_i32
467 : RuntimeHelper::H_fptoui_f32_i64)
468 : (DestIs32 ? RuntimeHelper::H_fptoui_f64_i32
469 : RuntimeHelper::H_fptoui_f64_i64);
470 }
471 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
472 static constexpr SizeT MaxArgs = 1;
473 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
474 NoTailCall, IsTargetHelperCall);
475 Call->addArg(Src0);
476 Instr->setDeleted();
477 return;
478 }
479 case InstCast::Sitofp:
480 case InstCast::Uitofp: {
481 if ((SrcTy != IceType_i32) && (SrcTy != IceType_i64)) {
482 return;
483 }
484 const bool SourceIs32 = SrcTy == IceType_i32;
485 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
486 const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
487 RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
488 if (SourceIsSigned) {
489 if (SourceIs32) {
490 return;
491 }
492 RTHFunc = DestIsF32 ? RuntimeHelper::H_sitofp_i64_f32
493 : RuntimeHelper::H_sitofp_i64_f64;
494 } else {
495 RTHFunc = DestIsF32 ? (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f32
496 : RuntimeHelper::H_uitofp_i64_f32)
497 : (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f64
498 : RuntimeHelper::H_uitofp_i64_f64);
499 }
500 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
501 static constexpr SizeT MaxArgs = 1;
502 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
503 NoTailCall, IsTargetHelperCall);
504 Call->addArg(Src0);
505 Instr->setDeleted();
506 return;
507 }
508 case InstCast::Bitcast: {
509 if (DestTy == SrcTy) {
510 return;
511 }
512 Variable *CallDest = Dest;
513 RuntimeHelper HelperID = RuntimeHelper::H_Num;
514 switch (DestTy) {
515 default:
516 return;
517 case IceType_i8:
518 assert(SrcTy == IceType_v8i1);
519 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
520 CallDest = Func->makeVariable(IceType_i32);
521 break;
522 case IceType_i16:
523 assert(SrcTy == IceType_v16i1);
524 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
525 CallDest = Func->makeVariable(IceType_i32);
526 break;
527 case IceType_v8i1: {
528 assert(SrcTy == IceType_i8);
529 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
530 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
531 // Arguments to functions are required to be at least 32 bits wide.
532 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
533 Src0 = Src0AsI32;
534 } break;
535 case IceType_v16i1: {
536 assert(SrcTy == IceType_i16);
537 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
538 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
539 // Arguments to functions are required to be at least 32 bits wide.
540 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
541 Src0 = Src0AsI32;
542 } break;
543 }
544 constexpr SizeT MaxSrcs = 1;
545 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
546 Call->addArg(Src0);
547 Context.insert(Call);
548 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
549 // call result to the appropriate type as necessary.
550 if (CallDest->getType() != DestTy)
551 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
552 Instr->setDeleted();
553 return;
554 }
555 case InstCast::Trunc: {
556 if (DestTy == SrcTy) {
557 return;
558 }
559 if (!isVectorType(SrcTy)) {
560 return;
561 }
562 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
563 assert(typeElementType(DestTy) == IceType_i1);
564 assert(isVectorIntegerType(SrcTy));
565 return;
566 }
567 case InstCast::Sext:
568 case InstCast::Zext: {
569 if (DestTy == SrcTy) {
570 return;
571 }
572 if (!isVectorType(DestTy)) {
573 return;
574 }
575 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
576 assert(typeElementType(SrcTy) == IceType_i1);
577 assert(isVectorIntegerType(DestTy));
578 return;
579 }
580 }
581 llvm::report_fatal_error("Control flow should never have reached here.");
582 }
583 case Inst::Intrinsic: {
584 auto *Intrinsic = llvm::cast<InstIntrinsic>(Instr);
585 Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicID();
586 if (isVectorType(DestTy) && ID == Intrinsics::Fabs) {
587 Operand *Src0 = Intrinsic->getArg(0);
588 Intrinsics::IntrinsicInfo Info = Intrinsic->getIntrinsicInfo();
589
590 Variable *T = Func->makeVariable(IceType_v4f32);
591 auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
592 Context.insert<InstAssign>(T, Undef);
593 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
594 VarVecOn32->initVecElement(Func);
595
596 for (SizeT i = 0; i < typeNumElements(IceType_v4f32); ++i) {
597 auto *Index = Ctx->getConstantInt32(i);
598 auto *Op = Func->makeVariable(IceType_f32);
599 Context.insert<InstExtractElement>(Op, Src0, Index);
600 auto *Res = Func->makeVariable(IceType_f32);
601 Variable *DestT = Func->makeVariable(IceType_v4f32);
602 auto *Intrinsic = Context.insert<InstIntrinsic>(1, Res, Info);
603 Intrinsic->addArg(Op);
604 Context.insert<InstInsertElement>(DestT, T, Res, Index);
605 T = DestT;
606 }
607
608 Context.insert<InstAssign>(Dest, T);
609
610 Instr->setDeleted();
611 return;
612 }
613 switch (ID) {
614 default:
615 return;
616 case Intrinsics::AtomicLoad: {
617 if (DestTy != IceType_i64)
618 return;
619 if (!Intrinsics::isMemoryOrderValid(
620 ID, getConstantMemoryOrder(Intrinsic->getArg(1)))) {
621 Func->setError("Unexpected memory ordering for AtomicLoad");
622 return;
623 }
624 Operand *Addr = Intrinsic->getArg(0);
625 Operand *TargetHelper = Ctx->getConstantExternSym(
626 Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
627 static constexpr SizeT MaxArgs = 3;
628 auto *_0 = Ctx->getConstantZero(IceType_i64);
629 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
630 NoTailCall, IsTargetHelperCall);
631 Call->addArg(Addr);
632 Call->addArg(_0);
633 Call->addArg(_0);
634 Context.insert<InstMIPS32Sync>();
635 Instr->setDeleted();
636 return;
637 }
638 case Intrinsics::AtomicStore: {
639 Operand *Val = Intrinsic->getArg(0);
640 if (Val->getType() != IceType_i64)
641 return;
642 if (!Intrinsics::isMemoryOrderValid(
643 ID, getConstantMemoryOrder(Intrinsic->getArg(2)))) {
644 Func->setError("Unexpected memory ordering for AtomicStore");
645 return;
646 }
647 Operand *Addr = Intrinsic->getArg(1);
648 Variable *NoDest = nullptr;
649 Operand *TargetHelper = Ctx->getConstantExternSym(
650 Ctx->getGlobalString("__sync_lock_test_and_set_8"));
651 Context.insert<InstMIPS32Sync>();
652 static constexpr SizeT MaxArgs = 2;
653 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
654 NoTailCall, IsTargetHelperCall);
655 Call->addArg(Addr);
656 Call->addArg(Val);
657 Context.insert<InstMIPS32Sync>();
658 Instr->setDeleted();
659 return;
660 }
661 case Intrinsics::AtomicCmpxchg: {
662 if (DestTy != IceType_i64)
663 return;
664 if (!Intrinsics::isMemoryOrderValid(
665 ID, getConstantMemoryOrder(Intrinsic->getArg(3)),
666 getConstantMemoryOrder(Intrinsic->getArg(4)))) {
667 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
668 return;
669 }
670 Operand *Addr = Intrinsic->getArg(0);
671 Operand *Oldval = Intrinsic->getArg(1);
672 Operand *Newval = Intrinsic->getArg(2);
673 Operand *TargetHelper = Ctx->getConstantExternSym(
674 Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
675 Context.insert<InstMIPS32Sync>();
676 static constexpr SizeT MaxArgs = 3;
677 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
678 NoTailCall, IsTargetHelperCall);
679 Call->addArg(Addr);
680 Call->addArg(Oldval);
681 Call->addArg(Newval);
682 Context.insert<InstMIPS32Sync>();
683 Instr->setDeleted();
684 return;
685 }
686 case Intrinsics::AtomicRMW: {
687 if (DestTy != IceType_i64)
688 return;
689 if (!Intrinsics::isMemoryOrderValid(
690 ID, getConstantMemoryOrder(Intrinsic->getArg(3)))) {
691 Func->setError("Unexpected memory ordering for AtomicRMW");
692 return;
693 }
694 auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
695 llvm::cast<ConstantInteger32>(Intrinsic->getArg(0))->getValue());
696 auto *Addr = Intrinsic->getArg(1);
697 auto *Newval = Intrinsic->getArg(2);
698 Operand *TargetHelper;
699 switch (Operation) {
700 case Intrinsics::AtomicAdd:
701 TargetHelper = Ctx->getConstantExternSym(
702 Ctx->getGlobalString("__sync_fetch_and_add_8"));
703 break;
704 case Intrinsics::AtomicSub:
705 TargetHelper = Ctx->getConstantExternSym(
706 Ctx->getGlobalString("__sync_fetch_and_sub_8"));
707 break;
708 case Intrinsics::AtomicOr:
709 TargetHelper = Ctx->getConstantExternSym(
710 Ctx->getGlobalString("__sync_fetch_and_or_8"));
711 break;
712 case Intrinsics::AtomicAnd:
713 TargetHelper = Ctx->getConstantExternSym(
714 Ctx->getGlobalString("__sync_fetch_and_and_8"));
715 break;
716 case Intrinsics::AtomicXor:
717 TargetHelper = Ctx->getConstantExternSym(
718 Ctx->getGlobalString("__sync_fetch_and_xor_8"));
719 break;
720 case Intrinsics::AtomicExchange:
721 TargetHelper = Ctx->getConstantExternSym(
722 Ctx->getGlobalString("__sync_lock_test_and_set_8"));
723 break;
724 default:
725 llvm::report_fatal_error("Unknown AtomicRMW operation");
726 return;
727 }
728 Context.insert<InstMIPS32Sync>();
729 static constexpr SizeT MaxArgs = 2;
730 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
731 NoTailCall, IsTargetHelperCall);
732 Call->addArg(Addr);
733 Call->addArg(Newval);
734 Context.insert<InstMIPS32Sync>();
735 Instr->setDeleted();
736 return;
737 }
738 case Intrinsics::Ctpop: {
739 Operand *Src0 = Intrinsic->getArg(0);
740 Operand *TargetHelper =
741 Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
742 ? RuntimeHelper::H_call_ctpop_i32
743 : RuntimeHelper::H_call_ctpop_i64);
744 static constexpr SizeT MaxArgs = 1;
745 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
746 NoTailCall, IsTargetHelperCall);
747 Call->addArg(Src0);
748 Instr->setDeleted();
749 return;
750 }
751 case Intrinsics::Longjmp: {
752 static constexpr SizeT MaxArgs = 2;
753 static constexpr Variable *NoDest = nullptr;
754 Operand *TargetHelper =
755 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
756 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
757 NoTailCall, IsTargetHelperCall);
758 Call->addArg(Intrinsic->getArg(0));
759 Call->addArg(Intrinsic->getArg(1));
760 Instr->setDeleted();
761 return;
762 }
763 case Intrinsics::Memcpy: {
764 static constexpr SizeT MaxArgs = 3;
765 static constexpr Variable *NoDest = nullptr;
766 Operand *TargetHelper =
767 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
768 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
769 NoTailCall, IsTargetHelperCall);
770 Call->addArg(Intrinsic->getArg(0));
771 Call->addArg(Intrinsic->getArg(1));
772 Call->addArg(Intrinsic->getArg(2));
773 Instr->setDeleted();
774 return;
775 }
776 case Intrinsics::Memmove: {
777 static constexpr SizeT MaxArgs = 3;
778 static constexpr Variable *NoDest = nullptr;
779 Operand *TargetHelper =
780 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
781 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
782 NoTailCall, IsTargetHelperCall);
783 Call->addArg(Intrinsic->getArg(0));
784 Call->addArg(Intrinsic->getArg(1));
785 Call->addArg(Intrinsic->getArg(2));
786 Instr->setDeleted();
787 return;
788 }
789 case Intrinsics::Memset: {
790 Operand *ValOp = Intrinsic->getArg(1);
791 assert(ValOp->getType() == IceType_i8);
792 Variable *ValExt = Func->makeVariable(stackSlotType());
793 Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
794
795 static constexpr SizeT MaxArgs = 3;
796 static constexpr Variable *NoDest = nullptr;
797 Operand *TargetHelper =
798 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
799 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
800 NoTailCall, IsTargetHelperCall);
801 Call->addArg(Intrinsic->getArg(0));
802 Call->addArg(ValExt);
803 Call->addArg(Intrinsic->getArg(2));
804 Instr->setDeleted();
805 return;
806 }
807 case Intrinsics::NaClReadTP: {
808 if (SandboxingType == ST_NaCl) {
809 return;
810 }
811 static constexpr SizeT MaxArgs = 0;
812 assert(SandboxingType != ST_Nonsfi);
813 Operand *TargetHelper =
814 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
815 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
816 IsTargetHelperCall);
817 Instr->setDeleted();
818 return;
819 }
820 case Intrinsics::Setjmp: {
821 static constexpr SizeT MaxArgs = 1;
822 Operand *TargetHelper =
823 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
824 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
825 NoTailCall, IsTargetHelperCall);
826 Call->addArg(Intrinsic->getArg(0));
827 Instr->setDeleted();
828 return;
829 }
830 }
831 llvm::report_fatal_error("Control flow should never have reached here.");
832 }
833 }
834 }
835
findMaxStackOutArgsSize()836 void TargetMIPS32::findMaxStackOutArgsSize() {
837 // MinNeededOutArgsBytes should be updated if the Target ever creates a
838 // high-level InstCall that requires more stack bytes.
839 size_t MinNeededOutArgsBytes = 0;
840 if (!MaybeLeafFunc)
841 MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
842 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
843 for (CfgNode *Node : Func->getNodes()) {
844 Context.init(Node);
845 while (!Context.atEnd()) {
846 PostIncrLoweringContext PostIncrement(Context);
847 Inst *CurInstr = iteratorToInst(Context.getCur());
848 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
849 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
850 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
851 }
852 }
853 }
854 CurrentAllocaOffset = MaxOutArgsSizeBytes;
855 }
856
translateO2()857 void TargetMIPS32::translateO2() {
858 TimerMarker T(TimerStack::TT_O2, Func);
859
860 // TODO(stichnot): share passes with X86?
861 // https://code.google.com/p/nativeclient/issues/detail?id=4094
862 genTargetHelperCalls();
863
864 unsetIfNonLeafFunc();
865
866 findMaxStackOutArgsSize();
867
868 // Merge Alloca instructions, and lay out the stack.
869 static constexpr bool SortAndCombineAllocas = true;
870 Func->processAllocas(SortAndCombineAllocas);
871 Func->dump("After Alloca processing");
872
873 if (!getFlags().getEnablePhiEdgeSplit()) {
874 // Lower Phi instructions.
875 Func->placePhiLoads();
876 if (Func->hasError())
877 return;
878 Func->placePhiStores();
879 if (Func->hasError())
880 return;
881 Func->deletePhis();
882 if (Func->hasError())
883 return;
884 Func->dump("After Phi lowering");
885 }
886
887 // Address mode optimization.
888 Func->getVMetadata()->init(VMK_SingleDefs);
889 Func->doAddressOpt();
890
891 // Argument lowering
892 Func->doArgLowering();
893
894 // Target lowering. This requires liveness analysis for some parts of the
895 // lowering decisions, such as compare/branch fusing. If non-lightweight
896 // liveness analysis is used, the instructions need to be renumbered first.
897 // TODO: This renumbering should only be necessary if we're actually
898 // calculating live intervals, which we only do for register allocation.
899 Func->renumberInstructions();
900 if (Func->hasError())
901 return;
902
903 // TODO: It should be sufficient to use the fastest liveness calculation,
904 // i.e. livenessLightweight(). However, for some reason that slows down the
905 // rest of the translation. Investigate.
906 Func->liveness(Liveness_Basic);
907 if (Func->hasError())
908 return;
909 Func->dump("After MIPS32 address mode opt");
910
911 Func->genCode();
912 if (Func->hasError())
913 return;
914 Func->dump("After MIPS32 codegen");
915
916 // Register allocation. This requires instruction renumbering and full
917 // liveness analysis.
918 Func->renumberInstructions();
919 if (Func->hasError())
920 return;
921 Func->liveness(Liveness_Intervals);
922 if (Func->hasError())
923 return;
924 // The post-codegen dump is done here, after liveness analysis and associated
925 // cleanup, to make the dump cleaner and more useful.
926 Func->dump("After initial MIPS32 codegen");
927 // Validate the live range computations. The expensive validation call is
928 // deliberately only made when assertions are enabled.
929 assert(Func->validateLiveness());
930 Func->getVMetadata()->init(VMK_All);
931 regAlloc(RAK_Global);
932 if (Func->hasError())
933 return;
934 Func->dump("After linear scan regalloc");
935
936 if (getFlags().getEnablePhiEdgeSplit()) {
937 Func->advancedPhiLowering();
938 Func->dump("After advanced Phi lowering");
939 }
940
941 // Stack frame mapping.
942 Func->genFrame();
943 if (Func->hasError())
944 return;
945 Func->dump("After stack frame mapping");
946
947 postLowerLegalization();
948 if (Func->hasError())
949 return;
950 Func->dump("After postLowerLegalization");
951
952 Func->contractEmptyNodes();
953 Func->reorderNodes();
954
955 // Branch optimization. This needs to be done just before code emission. In
956 // particular, no transformations that insert or reorder CfgNodes should be
957 // done after branch optimization. We go ahead and do it before nop insertion
958 // to reduce the amount of work needed for searching for opportunities.
959 Func->doBranchOpt();
960 Func->dump("After branch optimization");
961 }
962
translateOm1()963 void TargetMIPS32::translateOm1() {
964 TimerMarker T(TimerStack::TT_Om1, Func);
965
966 // TODO: share passes with X86?
967 genTargetHelperCalls();
968
969 unsetIfNonLeafFunc();
970
971 findMaxStackOutArgsSize();
972
973 // Do not merge Alloca instructions, and lay out the stack.
974 static constexpr bool SortAndCombineAllocas = false;
975 Func->processAllocas(SortAndCombineAllocas);
976 Func->dump("After Alloca processing");
977
978 Func->placePhiLoads();
979 if (Func->hasError())
980 return;
981 Func->placePhiStores();
982 if (Func->hasError())
983 return;
984 Func->deletePhis();
985 if (Func->hasError())
986 return;
987 Func->dump("After Phi lowering");
988
989 Func->doArgLowering();
990
991 Func->genCode();
992 if (Func->hasError())
993 return;
994 Func->dump("After initial MIPS32 codegen");
995
996 regAlloc(RAK_InfOnly);
997 if (Func->hasError())
998 return;
999 Func->dump("After regalloc of infinite-weight variables");
1000
1001 Func->genFrame();
1002 if (Func->hasError())
1003 return;
1004 Func->dump("After stack frame mapping");
1005
1006 postLowerLegalization();
1007 if (Func->hasError())
1008 return;
1009 Func->dump("After postLowerLegalization");
1010 }
1011
doBranchOpt(Inst * Instr,const CfgNode * NextNode)1012 bool TargetMIPS32::doBranchOpt(Inst *Instr, const CfgNode *NextNode) {
1013 if (auto *Br = llvm::dyn_cast<InstMIPS32Br>(Instr)) {
1014 return Br->optimizeBranch(NextNode);
1015 }
1016 return false;
1017 }
1018
1019 namespace {
1020
1021 const char *RegNames[RegMIPS32::Reg_NUM] = {
1022 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
1023 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
1024 name,
1025 REGMIPS32_TABLE
1026 #undef X
1027 };
1028
1029 } // end of anonymous namespace
1030
getRegName(RegNumT RegNum)1031 const char *RegMIPS32::getRegName(RegNumT RegNum) {
1032 RegNum.assertIsValid();
1033 return RegNames[RegNum];
1034 }
1035
getRegName(RegNumT RegNum,Type Ty) const1036 const char *TargetMIPS32::getRegName(RegNumT RegNum, Type Ty) const {
1037 (void)Ty;
1038 return RegMIPS32::getRegName(RegNum);
1039 }
1040
getPhysicalRegister(RegNumT RegNum,Type Ty)1041 Variable *TargetMIPS32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1042 if (Ty == IceType_void)
1043 Ty = IceType_i32;
1044 if (PhysicalRegisters[Ty].empty())
1045 PhysicalRegisters[Ty].resize(RegMIPS32::Reg_NUM);
1046 RegNum.assertIsValid();
1047 Variable *Reg = PhysicalRegisters[Ty][RegNum];
1048 if (Reg == nullptr) {
1049 Reg = Func->makeVariable(Ty);
1050 Reg->setRegNum(RegNum);
1051 PhysicalRegisters[Ty][RegNum] = Reg;
1052 // Specially mark a named physical register as an "argument" so that it is
1053 // considered live upon function entry. Otherwise it's possible to get
1054 // liveness validation errors for saving callee-save registers.
1055 Func->addImplicitArg(Reg);
1056 // Don't bother tracking the live range of a named physical register.
1057 Reg->setIgnoreLiveness();
1058 }
1059 return Reg;
1060 }
1061
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1062 void TargetMIPS32::emitJumpTable(const Cfg *Func,
1063 const InstJumpTable *JumpTable) const {
1064 (void)Func;
1065 (void)JumpTable;
1066 UnimplementedError(getFlags());
1067 }
1068
1069 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)1070 Variable *TargetMIPS32::legalizeToReg(Operand *From, RegNumT RegNum) {
1071 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
1072 }
1073
1074 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)1075 Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) {
1076 (void)RegNum;
1077 Type Ty = From->getType();
1078 if (llvm::isa<ConstantUndef>(From)) {
1079 // Lower undefs to zero. Another option is to lower undefs to an
1080 // uninitialized register; however, using an uninitialized register
1081 // results in less predictable code.
1082 //
1083 // If in the future the implementation is changed to lower undef
1084 // values to uninitialized registers, a FakeDef will be needed:
1085 // Context.insert(InstFakeDef::create(Func, Reg));
1086 // This is in order to ensure that the live range of Reg is not
1087 // overestimated. If the constant being lowered is a 64 bit value,
1088 // then the result should be split and the lo and hi components will
1089 // need to go in uninitialized registers.
1090 if (isVectorType(Ty)) {
1091 Variable *Var = makeReg(Ty, RegNum);
1092 auto *Reg = llvm::cast<VariableVecOn32>(Var);
1093 Reg->initVecElement(Func);
1094 auto *Zero = getZero();
1095 for (Variable *Var : Reg->getContainers()) {
1096 _mov(Var, Zero);
1097 }
1098 return Reg;
1099 }
1100 return Ctx->getConstantZero(Ty);
1101 }
1102 return From;
1103 }
1104
makeReg(Type Type,RegNumT RegNum)1105 Variable *TargetMIPS32::makeReg(Type Type, RegNumT RegNum) {
1106 // There aren't any 64-bit integer registers for Mips32.
1107 assert(Type != IceType_i64);
1108 Variable *Reg = Func->makeVariable(Type);
1109 if (RegNum.hasValue())
1110 Reg->setRegNum(RegNum);
1111 else
1112 Reg->setMustHaveReg();
1113 return Reg;
1114 }
1115
formMemoryOperand(Operand * Operand,Type Ty)1116 OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
1117 // It may be the case that address mode optimization already creates an
1118 // OperandMIPS32Mem, so in that case it wouldn't need another level of
1119 // transformation.
1120 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
1121 return llvm::cast<OperandMIPS32Mem>(legalize(Mem));
1122 }
1123
1124 // If we didn't do address mode optimization, then we only have a base/offset
1125 // to work with. MIPS always requires a base register, so just use that to
1126 // hold the operand.
1127 auto *Base = llvm::cast<Variable>(
1128 legalize(Operand, Legal_Reg | Legal_Rematerializable));
1129 const int32_t Offset = Base->hasStackOffset() ? Base->getStackOffset() : 0;
1130 return OperandMIPS32Mem::create(
1131 Func, Ty, Base,
1132 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)));
1133 }
1134
emitVariable(const Variable * Var) const1135 void TargetMIPS32::emitVariable(const Variable *Var) const {
1136 if (!BuildDefs::dump())
1137 return;
1138 Ostream &Str = Ctx->getStrEmit();
1139 const Type FrameSPTy = IceType_i32;
1140 if (Var->hasReg()) {
1141 Str << '$' << getRegName(Var->getRegNum(), Var->getType());
1142 return;
1143 }
1144 if (Var->mustHaveReg()) {
1145 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1146 ") has no register assigned - function " +
1147 Func->getFunctionName());
1148 }
1149 const int32_t Offset = Var->getStackOffset();
1150 Str << Offset;
1151 Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy);
1152 Str << ")";
1153 }
1154
CallingConv()1155 TargetMIPS32::CallingConv::CallingConv()
1156 : GPRegsUsed(RegMIPS32::Reg_NUM),
1157 GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1158 I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1159 VFPRegsUsed(RegMIPS32::Reg_NUM),
1160 FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1161 FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {}
1162
1163 // In MIPS O32 abi FP argument registers can be used only if first argument is
1164 // of type float/double. UseFPRegs flag is used to care of that. Also FP arg
1165 // registers can be used only for first 2 arguments, so we require argument
1166 // number to make register allocation decisions.
argInReg(Type Ty,uint32_t ArgNo,RegNumT * Reg)1167 bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,
1168 RegNumT *Reg) {
1169 if (isScalarIntegerType(Ty) || isVectorType(Ty))
1170 return argInGPR(Ty, Reg);
1171 if (isScalarFloatingType(Ty)) {
1172 if (ArgNo == 0) {
1173 UseFPRegs = true;
1174 return argInVFP(Ty, Reg);
1175 }
1176 if (UseFPRegs && ArgNo == 1) {
1177 UseFPRegs = false;
1178 return argInVFP(Ty, Reg);
1179 }
1180 return argInGPR(Ty, Reg);
1181 }
1182 llvm::report_fatal_error("argInReg: Invalid type.");
1183 return false;
1184 }
1185
argInGPR(Type Ty,RegNumT * Reg)1186 bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1187 CfgVector<RegNumT> *Source;
1188
1189 switch (Ty) {
1190 default: {
1191 llvm::report_fatal_error("argInGPR: Invalid type.");
1192 return false;
1193 } break;
1194 case IceType_v4i1:
1195 case IceType_v8i1:
1196 case IceType_v16i1:
1197 case IceType_v16i8:
1198 case IceType_v8i16:
1199 case IceType_v4i32:
1200 case IceType_v4f32:
1201 case IceType_i32:
1202 case IceType_f32: {
1203 Source = &GPRArgs;
1204 } break;
1205 case IceType_i64:
1206 case IceType_f64: {
1207 Source = &I64Args;
1208 } break;
1209 }
1210
1211 discardUnavailableGPRsAndTheirAliases(Source);
1212
1213 // If $4 is used for any scalar type (or returining v4f32) then the next
1214 // vector type if passed in $6:$7:stack:stack
1215 if (isVectorType(Ty)) {
1216 alignGPR(Source);
1217 }
1218
1219 if (Source->empty()) {
1220 GPRegsUsed.set();
1221 return false;
1222 }
1223
1224 *Reg = Source->back();
1225 // Note that we don't Source->pop_back() here. This is intentional. Notice how
1226 // we mark all of Reg's aliases as Used. So, for the next argument,
1227 // Source->back() is marked as unavailable, and it is thus implicitly popped
1228 // from the stack.
1229 GPRegsUsed |= RegisterAliases[*Reg];
1230
1231 // All vector arguments irrespective of their base type are passed in GP
1232 // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd
1233 // is passed in $6:$7:stack:stack. If it is 1st argument then discard
1234 // $4:$5:$6:$7 otherwise discard $6:$7 only.
1235 if (isVectorType(Ty)) {
1236 if (((unsigned)*Reg) == RegMIPS32::Reg_A0) {
1237 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1];
1238 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2];
1239 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1240 } else {
1241 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1242 }
1243 }
1244
1245 return true;
1246 }
1247
discardNextGPRAndItsAliases(CfgVector<RegNumT> * Regs)1248 inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases(
1249 CfgVector<RegNumT> *Regs) {
1250 GPRegsUsed |= RegisterAliases[Regs->back()];
1251 Regs->pop_back();
1252 }
1253
alignGPR(CfgVector<RegNumT> * Regs)1254 inline void TargetMIPS32::CallingConv::alignGPR(CfgVector<RegNumT> *Regs) {
1255 if (Regs->back() == RegMIPS32::Reg_A1 || Regs->back() == RegMIPS32::Reg_A3)
1256 discardNextGPRAndItsAliases(Regs);
1257 }
1258
1259 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1260 // i32) will have the first argument in a0, the second in a2-a3, and the third
1261 // on the stack. To model this behavior, whenever we pop a register from Regs,
1262 // we remove all of its aliases from the pool of available GPRs. This has the
1263 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1264 void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1265 CfgVector<RegNumT> *Regs) {
1266 while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1267 discardNextGPRAndItsAliases(Regs);
1268 }
1269 }
1270
argInVFP(Type Ty,RegNumT * Reg)1271 bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1272 CfgVector<RegNumT> *Source;
1273
1274 switch (Ty) {
1275 default: {
1276 llvm::report_fatal_error("argInVFP: Invalid type.");
1277 return false;
1278 } break;
1279 case IceType_f32: {
1280 Source = &FP32Args;
1281 } break;
1282 case IceType_f64: {
1283 Source = &FP64Args;
1284 } break;
1285 }
1286
1287 discardUnavailableVFPRegsAndTheirAliases(Source);
1288
1289 if (Source->empty()) {
1290 VFPRegsUsed.set();
1291 return false;
1292 }
1293
1294 *Reg = Source->back();
1295 VFPRegsUsed |= RegisterAliases[*Reg];
1296
1297 // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0
1298 // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg
1299 // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes
1300 // in reg_a3 and a0, a1 are not used.
1301 Source = &GPRArgs;
1302 // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes)
1303 if (Ty == IceType_f64) {
1304 // In MIPS o32 abi, when we use GPR argument pairs to store F64 values, pair
1305 // must be aligned at even register. Similarly when we discard GPR registers
1306 // when some arguments from starting 16 bytes goes in FPR, we must take care
1307 // of alignment. For example if fun args are (f32, f64, f32), for first f32
1308 // we discard a0, now for f64 argument, which will go in F14F15, we must
1309 // first align GPR vector to even register by discarding a1, then discard
1310 // two GPRs a2 and a3. Now last f32 argument will go on stack.
1311 alignGPR(Source);
1312 discardNextGPRAndItsAliases(Source);
1313 }
1314 discardNextGPRAndItsAliases(Source);
1315 return true;
1316 }
1317
discardUnavailableVFPRegsAndTheirAliases(CfgVector<RegNumT> * Regs)1318 void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases(
1319 CfgVector<RegNumT> *Regs) {
1320 while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1321 Regs->pop_back();
1322 }
1323 }
1324
lowerArguments()1325 void TargetMIPS32::lowerArguments() {
1326 VarList &Args = Func->getArgs();
1327 TargetMIPS32::CallingConv CC;
1328
1329 // For each register argument, replace Arg in the argument list with the home
1330 // register. Then generate an instruction in the prolog to copy the home
1331 // register to the assigned location of Arg.
1332 Context.init(Func->getEntryNode());
1333 Context.setInsertPoint(Context.getCur());
1334
1335 // v4f32 is returned through stack. $4 is setup by the caller and passed as
1336 // first argument implicitly. Callee then copies the return vector at $4.
1337 Variable *ImplicitRetVec = nullptr;
1338 if (isVectorFloatingType(Func->getReturnType())) {
1339 ImplicitRetVec = Func->makeVariable(IceType_i32);
1340 ImplicitRetVec->setName(Func, "ImplicitRet_v4f32");
1341 ImplicitRetVec->setIsArg();
1342 Args.insert(Args.begin(), ImplicitRetVec);
1343 setImplicitRet(ImplicitRetVec);
1344 }
1345
1346 for (SizeT i = 0, E = Args.size(); i < E; ++i) {
1347 Variable *Arg = Args[i];
1348 Type Ty = Arg->getType();
1349 RegNumT RegNum;
1350 if (!CC.argInReg(Ty, i, &RegNum)) {
1351 continue;
1352 }
1353 Variable *RegisterArg = Func->makeVariable(Ty);
1354 if (BuildDefs::dump()) {
1355 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1356 }
1357 RegisterArg->setIsArg();
1358 Arg->setIsArg(false);
1359 Args[i] = RegisterArg;
1360
1361 if (isVectorType(Ty)) {
1362 auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg);
1363 RegisterArgVec->initVecElement(Func);
1364 RegisterArgVec->getContainers()[0]->setRegNum(
1365 RegNumT::fixme((unsigned)RegNum + 0));
1366 RegisterArgVec->getContainers()[1]->setRegNum(
1367 RegNumT::fixme((unsigned)RegNum + 1));
1368 // First two elements of second vector argument are passed
1369 // in $6:$7 and remaining two on stack. Do not assign register
1370 // to this is second vector argument.
1371 if (i == 0) {
1372 RegisterArgVec->getContainers()[2]->setRegNum(
1373 RegNumT::fixme((unsigned)RegNum + 2));
1374 RegisterArgVec->getContainers()[3]->setRegNum(
1375 RegNumT::fixme((unsigned)RegNum + 3));
1376 } else {
1377 RegisterArgVec->getContainers()[2]->setRegNum(
1378 RegNumT::fixme(RegNumT()));
1379 RegisterArgVec->getContainers()[3]->setRegNum(
1380 RegNumT::fixme(RegNumT()));
1381 }
1382 } else {
1383 switch (Ty) {
1384 default: {
1385 RegisterArg->setRegNum(RegNum);
1386 } break;
1387 case IceType_i64: {
1388 auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1389 RegisterArg64->initHiLo(Func);
1390 RegisterArg64->getLo()->setRegNum(
1391 RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
1392 RegisterArg64->getHi()->setRegNum(
1393 RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
1394 } break;
1395 }
1396 }
1397 Context.insert<InstAssign>(Arg, RegisterArg);
1398 }
1399
1400 // Insert fake use of ImplicitRet_v4f32 to keep it live
1401 if (ImplicitRetVec) {
1402 for (CfgNode *Node : Func->getNodes()) {
1403 for (Inst &Instr : Node->getInsts()) {
1404 if (llvm::isa<InstRet>(&Instr)) {
1405 Context.setInsertPoint(instToIterator(&Instr));
1406 Context.insert<InstFakeUse>(ImplicitRetVec);
1407 break;
1408 }
1409 }
1410 }
1411 }
1412 }
1413
stackSlotType()1414 Type TargetMIPS32::stackSlotType() { return IceType_i32; }
1415
1416 // Helper function for addProlog().
1417 //
1418 // This assumes Arg is an argument passed on the stack. This sets the frame
1419 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1420 // I64 arg that has been split into Lo and Hi components, it calls itself
1421 // recursively on the components, taking care to handle Lo first because of the
1422 // little-endian architecture. Lastly, this function generates an instruction
1423 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,bool PartialOnStack,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1424 void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack,
1425 Variable *FramePtr,
1426 size_t BasicFrameOffset,
1427 size_t *InArgsSizeBytes) {
1428 const Type Ty = Arg->getType();
1429 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1430
1431 // If $4 is used for any scalar type (or returining v4f32) then the next
1432 // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element
1433 // from agument stack.
1434 if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) {
1435 if (PartialOnStack == false) {
1436 auto *Elem0 = ArgVecOn32->getContainers()[0];
1437 auto *Elem1 = ArgVecOn32->getContainers()[1];
1438 finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset,
1439 InArgsSizeBytes);
1440 finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset,
1441 InArgsSizeBytes);
1442 }
1443 auto *Elem2 = ArgVecOn32->getContainers()[2];
1444 auto *Elem3 = ArgVecOn32->getContainers()[3];
1445 finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset,
1446 InArgsSizeBytes);
1447 finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset,
1448 InArgsSizeBytes);
1449 return;
1450 }
1451
1452 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1453 Variable *const Lo = Arg64On32->getLo();
1454 Variable *const Hi = Arg64On32->getHi();
1455 finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset,
1456 InArgsSizeBytes);
1457 finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset,
1458 InArgsSizeBytes);
1459 return;
1460 }
1461
1462 assert(Ty != IceType_i64);
1463 assert(!isVectorType(Ty));
1464
1465 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1466 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1467
1468 if (!Arg->hasReg()) {
1469 Arg->setStackOffset(ArgStackOffset);
1470 return;
1471 }
1472
1473 // If the argument variable has been assigned a register, we need to copy the
1474 // value from the stack slot.
1475 Variable *Parameter = Func->makeVariable(Ty);
1476 Parameter->setMustNotHaveReg();
1477 Parameter->setStackOffset(ArgStackOffset);
1478 _mov(Arg, Parameter);
1479 }
1480
addProlog(CfgNode * Node)1481 void TargetMIPS32::addProlog(CfgNode *Node) {
1482 // Stack frame layout:
1483 //
1484 // +------------------------+
1485 // | 1. preserved registers |
1486 // +------------------------+
1487 // | 2. padding |
1488 // +------------------------+
1489 // | 3. global spill area |
1490 // +------------------------+
1491 // | 4. padding |
1492 // +------------------------+
1493 // | 5. local spill area |
1494 // +------------------------+
1495 // | 6. padding |
1496 // +------------------------+
1497 // | 7. allocas |
1498 // +------------------------+
1499 // | 8. padding |
1500 // +------------------------+
1501 // | 9. out args |
1502 // +------------------------+ <--- StackPointer
1503 //
1504 // The following variables record the size in bytes of the given areas:
1505 // * PreservedRegsSizeBytes: area 1
1506 // * SpillAreaPaddingBytes: area 2
1507 // * GlobalsSize: area 3
1508 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1509 // * LocalsSpillAreaSize: area 5
1510 // * SpillAreaSizeBytes: areas 2 - 9
1511 // * maxOutArgsSizeBytes(): area 9
1512
1513 Context.init(Node);
1514 Context.setInsertPoint(Context.getCur());
1515
1516 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1517 RegsUsed = SmallBitVector(CalleeSaves.size());
1518
1519 VarList SortedSpilledVariables;
1520
1521 size_t GlobalsSize = 0;
1522 // If there is a separate locals area, this represents that area. Otherwise
1523 // it counts any variable not counted by GlobalsSize.
1524 SpillAreaSizeBytes = 0;
1525 // If there is a separate locals area, this specifies the alignment for it.
1526 uint32_t LocalsSlotsAlignmentBytes = 0;
1527 // The entire spill locations area gets aligned to largest natural alignment
1528 // of the variables that have a spill slot.
1529 uint32_t SpillAreaAlignmentBytes = 0;
1530 // For now, we don't have target-specific variables that need special
1531 // treatment (no stack-slot-linked SpillVariable type).
1532 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1533 static constexpr bool AssignStackSlot = false;
1534 static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1535 if (llvm::isa<Variable64On32>(Var)) {
1536 return DontAssignStackSlot;
1537 }
1538 return AssignStackSlot;
1539 };
1540
1541 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1542 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1543 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1544 &LocalsSlotsAlignmentBytes, TargetVarHook);
1545 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1546 SpillAreaSizeBytes += GlobalsSize;
1547
1548 PreservedGPRs.reserve(CalleeSaves.size());
1549
1550 // Consider FP and RA as callee-save / used as needed.
1551 if (UsesFramePointer) {
1552 if (RegsUsed[RegMIPS32::Reg_FP]) {
1553 llvm::report_fatal_error("Frame pointer has been used.");
1554 }
1555 CalleeSaves[RegMIPS32::Reg_FP] = true;
1556 RegsUsed[RegMIPS32::Reg_FP] = true;
1557 }
1558 if (!MaybeLeafFunc) {
1559 CalleeSaves[RegMIPS32::Reg_RA] = true;
1560 RegsUsed[RegMIPS32::Reg_RA] = true;
1561 }
1562
1563 // Make two passes over the used registers. The first pass records all the
1564 // used registers -- and their aliases. Then, we figure out which GPR
1565 // registers should be saved.
1566 SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
1567 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1568 if (CalleeSaves[i] && RegsUsed[i]) {
1569 ToPreserve |= RegisterAliases[i];
1570 }
1571 }
1572
1573 uint32_t NumCallee = 0;
1574
1575 // RegClasses is a tuple of
1576 //
1577 // <First Register in Class, Last Register in Class, Vector of Save Registers>
1578 //
1579 // We use this tuple to figure out which register we should save/restore
1580 // during
1581 // prolog/epilog.
1582 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1583 const RegClassType RegClass = RegClassType(
1584 RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs);
1585 const uint32_t FirstRegInClass = std::get<0>(RegClass);
1586 const uint32_t LastRegInClass = std::get<1>(RegClass);
1587 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1588 for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
1589 if (!ToPreserve[Reg]) {
1590 continue;
1591 }
1592 ++NumCallee;
1593 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1594 PreservedRegsSizeBytes +=
1595 typeWidthInBytesOnStack(PhysicalRegister->getType());
1596 PreservedRegsInClass->push_back(PhysicalRegister);
1597 }
1598
1599 Ctx->statsUpdateRegistersSaved(NumCallee);
1600
1601 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1602 // after the preserved registers and before the spill areas.
1603 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1604 // locals area if they are separate.
1605 assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
1606 (void)MIPS32_STACK_ALIGNMENT_BYTES;
1607 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1608 uint32_t SpillAreaPaddingBytes = 0;
1609 uint32_t LocalsSlotsPaddingBytes = 0;
1610 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1611 GlobalsSize, LocalsSlotsAlignmentBytes,
1612 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1613 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1614 uint32_t GlobalsAndSubsequentPaddingSize =
1615 GlobalsSize + LocalsSlotsPaddingBytes;
1616
1617 // Adds the out args space to the stack, and align SP if necessary.
1618 if (!NeedsStackAlignment) {
1619 SpillAreaSizeBytes += MaxOutArgsSizeBytes * (VariableAllocaUsed ? 0 : 1);
1620 } else {
1621 SpillAreaSizeBytes = applyStackAlignment(
1622 SpillAreaSizeBytes +
1623 (VariableAllocaUsed ? VariableAllocaAlignBytes : MaxOutArgsSizeBytes));
1624 }
1625
1626 // Combine fixed alloca with SpillAreaSize.
1627 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1628
1629 TotalStackSizeBytes =
1630 applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes);
1631
1632 // Generate "addiu sp, sp, -TotalStackSizeBytes"
1633 if (TotalStackSizeBytes) {
1634 // Use the scratch register if needed to legalize the immediate.
1635 Sandboxer(this).addiu_sp(-TotalStackSizeBytes);
1636 }
1637
1638 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
1639
1640 if (!PreservedGPRs.empty()) {
1641 uint32_t StackOffset = TotalStackSizeBytes;
1642 for (Variable *Var : *PreservedRegsInClass) {
1643 Type RegType;
1644 if (RegMIPS32::isFPRReg(Var->getRegNum()))
1645 RegType = IceType_f32;
1646 else
1647 RegType = IceType_i32;
1648 auto *PhysicalRegister = makeReg(RegType, Var->getRegNum());
1649 StackOffset -= typeWidthInBytesOnStack(RegType);
1650 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1651 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1652 Func, RegType, SP,
1653 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1654 Sandboxer(this).sw(PhysicalRegister, MemoryLocation);
1655 }
1656 }
1657
1658 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1659
1660 // Generate "mov FP, SP" if needed.
1661 if (UsesFramePointer) {
1662 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1663 _mov(FP, SP);
1664 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1665 Context.insert<InstFakeUse>(FP);
1666 }
1667
1668 // Fill in stack offsets for stack args, and copy args into registers for
1669 // those that were register-allocated. Args are pushed right to left, so
1670 // Arg[0] is closest to the stack/frame pointer.
1671 const VarList &Args = Func->getArgs();
1672 size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
1673 TargetMIPS32::CallingConv CC;
1674 uint32_t ArgNo = 0;
1675
1676 for (Variable *Arg : Args) {
1677 RegNumT DummyReg;
1678 const Type Ty = Arg->getType();
1679 bool PartialOnStack;
1680 // Skip arguments passed in registers.
1681 if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
1682 // Load argument from stack:
1683 // 1. If this is first vector argument and return type is v4f32.
1684 // In this case $4 is used to pass stack address implicitly.
1685 // 3rd and 4th element of vector argument is passed through stack.
1686 // 2. If this is second vector argument.
1687 if (ArgNo != 0 && isVectorType(Ty)) {
1688 PartialOnStack = true;
1689 finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1690 &InArgsSizeBytes);
1691 }
1692 } else {
1693 PartialOnStack = false;
1694 finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1695 &InArgsSizeBytes);
1696 }
1697 ++ArgNo;
1698 }
1699
1700 // Fill in stack offsets for locals.
1701 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1702 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize);
1703 this->HasComputedFrame = true;
1704
1705 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1706 OstreamLocker _(Func->getContext());
1707 Ostream &Str = Func->getContext()->getStrDump();
1708
1709 Str << "Stack layout:\n";
1710 uint32_t SPAdjustmentPaddingSize =
1711 SpillAreaSizeBytes - LocalsSpillAreaSize -
1712 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1713 MaxOutArgsSizeBytes;
1714 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1715 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1716 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1717 << " globals spill area = " << GlobalsSize << " bytes\n"
1718 << " globals-locals spill areas intermediate padding = "
1719 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1720 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1721 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1722
1723 Str << "Stack details:\n"
1724 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1725 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1726 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1727 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1728 << " bytes\n"
1729 << " is FP based = " << 1 << "\n";
1730 }
1731 return;
1732 }
1733
addEpilog(CfgNode * Node)1734 void TargetMIPS32::addEpilog(CfgNode *Node) {
1735 InstList &Insts = Node->getInsts();
1736 InstList::reverse_iterator RI, E;
1737 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1738 if (llvm::isa<InstMIPS32Ret>(*RI))
1739 break;
1740 }
1741 if (RI == E)
1742 return;
1743
1744 // Convert the reverse_iterator position into its corresponding (forward)
1745 // iterator position.
1746 InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1747 --InsertPoint;
1748 Context.init(Node);
1749 Context.setInsertPoint(InsertPoint);
1750
1751 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1752 if (UsesFramePointer) {
1753 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1754 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1755 // use of SP before the assignment of SP=FP keeps previous SP adjustments
1756 // from being dead-code eliminated.
1757 Context.insert<InstFakeUse>(SP);
1758 Sandboxer(this).reset_sp(FP);
1759 }
1760
1761 VarList::reverse_iterator RIter, END;
1762
1763 if (!PreservedGPRs.empty()) {
1764 uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes;
1765 for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend();
1766 RIter != END; ++RIter) {
1767 Type RegType;
1768 if (RegMIPS32::isFPRReg((*RIter)->getRegNum()))
1769 RegType = IceType_f32;
1770 else
1771 RegType = IceType_i32;
1772 auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum());
1773 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1774 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1775 Func, RegType, SP,
1776 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1777 _lw(PhysicalRegister, MemoryLocation);
1778 StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType());
1779 }
1780 }
1781
1782 if (TotalStackSizeBytes) {
1783 Sandboxer(this).addiu_sp(TotalStackSizeBytes);
1784 }
1785 if (!getFlags().getUseSandboxing())
1786 return;
1787
1788 Variable *RA = getPhysicalRegister(RegMIPS32::Reg_RA);
1789 Variable *RetValue = nullptr;
1790 if (RI->getSrcSize())
1791 RetValue = llvm::cast<Variable>(RI->getSrc(0));
1792
1793 Sandboxer(this).ret(RA, RetValue);
1794
1795 RI->setDeleted();
1796 }
1797
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1798 Variable *TargetMIPS32::PostLoweringLegalizer::newBaseRegister(
1799 Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1800 // Legalize will likely need a lui/ori combination, but if the top bits are
1801 // all 0 from negating the offset and subtracting, we could use that instead.
1802 const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1803 Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1804 if (ShouldSub) {
1805 Target->_addi(ScratchReg, Base, -Offset);
1806 } else {
1807 constexpr bool SignExt = true;
1808 if (!OperandMIPS32Mem::canHoldOffset(Base->getType(), SignExt, Offset)) {
1809 const uint32_t UpperBits = (Offset >> 16) & 0xFFFF;
1810 const uint32_t LowerBits = Offset & 0xFFFF;
1811 Target->_lui(ScratchReg, Target->Ctx->getConstantInt32(UpperBits));
1812 if (LowerBits)
1813 Target->_ori(ScratchReg, ScratchReg, LowerBits);
1814 Target->_addu(ScratchReg, ScratchReg, Base);
1815 } else {
1816 Target->_addiu(ScratchReg, Base, Offset);
1817 }
1818 }
1819
1820 return ScratchReg;
1821 }
1822
legalizeMovFp(InstMIPS32MovFP64ToI64 * MovInstr)1823 void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp(
1824 InstMIPS32MovFP64ToI64 *MovInstr) {
1825 Variable *Dest = MovInstr->getDest();
1826 Operand *Src = MovInstr->getSrc(0);
1827 const Type SrcTy = Src->getType();
1828
1829 if (Dest != nullptr && SrcTy == IceType_f64) {
1830 int32_t Offset = Dest->getStackOffset();
1831 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1832 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1833 Target->Func, IceType_f32, Base,
1834 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1835 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1836 auto *SrcV = llvm::cast<Variable>(Src);
1837 Variable *SrcR;
1838 if (MovInstr->getInt64Part() == Int64_Lo) {
1839 SrcR = Target->makeReg(
1840 IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum()));
1841 } else {
1842 SrcR = Target->makeReg(
1843 IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum()));
1844 }
1845 Sandboxer(Target).sw(SrcR, Addr);
1846 if (MovInstr->isDestRedefined()) {
1847 Target->_set_dest_redefined();
1848 }
1849 MovInstr->setDeleted();
1850 return;
1851 }
1852
1853 llvm::report_fatal_error("legalizeMovFp: Invalid operands");
1854 }
1855
legalizeMov(InstMIPS32Mov * MovInstr)1856 void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) {
1857 Variable *Dest = MovInstr->getDest();
1858 assert(Dest != nullptr);
1859 const Type DestTy = Dest->getType();
1860 assert(DestTy != IceType_i64);
1861
1862 Operand *Src = MovInstr->getSrc(0);
1863 const Type SrcTy = Src->getType();
1864 (void)SrcTy;
1865 assert(SrcTy != IceType_i64);
1866
1867 bool Legalized = false;
1868 auto *SrcR = llvm::cast<Variable>(Src);
1869 if (Dest->hasReg() && SrcR->hasReg()) {
1870 // This might be a GP to/from FP move generated due to argument passing.
1871 // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of
1872 // different types.
1873 const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum());
1874 const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum());
1875 const RegNumT SRegNum = SrcR->getRegNum();
1876 const RegNumT DRegNum = Dest->getRegNum();
1877 if (IsDstGPR != IsSrcGPR) {
1878 if (IsDstGPR) {
1879 // Dest is GPR and SrcR is FPR. Use mfc1.
1880 int32_t TypeWidth = typeWidthInBytes(DestTy);
1881 if (MovInstr->getDestHi() != nullptr)
1882 TypeWidth += typeWidthInBytes(MovInstr->getDestHi()->getType());
1883 if (TypeWidth == 8) {
1884 // Split it into two mfc1 instructions
1885 Variable *SrcGPRHi = Target->makeReg(
1886 IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1887 Variable *SrcGPRLo = Target->makeReg(
1888 IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1889 Variable *DstFPRHi, *DstFPRLo;
1890 if (MovInstr->getDestHi() != nullptr && Dest != nullptr) {
1891 DstFPRHi = Target->makeReg(IceType_i32,
1892 MovInstr->getDestHi()->getRegNum());
1893 DstFPRLo = Target->makeReg(IceType_i32, Dest->getRegNum());
1894 } else {
1895 DstFPRHi = Target->makeReg(
1896 IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1897 DstFPRLo = Target->makeReg(
1898 IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1899 }
1900 Target->_mov(DstFPRHi, SrcGPRHi);
1901 Target->_mov(DstFPRLo, SrcGPRLo);
1902 Legalized = true;
1903 } else {
1904 Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
1905 Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum);
1906 Target->_mov(DstFPR, SrcGPR);
1907 Legalized = true;
1908 }
1909 } else {
1910 // Dest is FPR and SrcR is GPR. Use mtc1.
1911 if (typeWidthInBytes(Dest->getType()) == 8) {
1912 Variable *SrcGPRHi, *SrcGPRLo;
1913 // SrcR could be $zero which is i32
1914 if (SRegNum == RegMIPS32::Reg_ZERO) {
1915 SrcGPRHi = Target->makeReg(IceType_i32, SRegNum);
1916 SrcGPRLo = SrcGPRHi;
1917 } else {
1918 // Split it into two mtc1 instructions
1919 if (MovInstr->getSrcSize() == 2) {
1920 const auto FirstReg =
1921 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1922 const auto SecondReg =
1923 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1924 SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1925 SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1926 } else {
1927 SrcGPRLo = Target->makeReg(
1928 IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1929 SrcGPRHi = Target->makeReg(
1930 IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1931 }
1932 }
1933 Variable *DstFPRHi = Target->makeReg(
1934 IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1935 Variable *DstFPRLo = Target->makeReg(
1936 IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1937 Target->_mov(DstFPRHi, SrcGPRLo);
1938 Target->_mov(DstFPRLo, SrcGPRHi);
1939 Legalized = true;
1940 } else {
1941 Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum);
1942 Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum);
1943 Target->_mov(DstFPR, SrcGPR);
1944 Legalized = true;
1945 }
1946 }
1947 }
1948 if (Legalized) {
1949 if (MovInstr->isDestRedefined()) {
1950 Target->_set_dest_redefined();
1951 }
1952 MovInstr->setDeleted();
1953 return;
1954 }
1955 }
1956
1957 if (!Dest->hasReg()) {
1958 auto *SrcR = llvm::cast<Variable>(Src);
1959 assert(SrcR->hasReg());
1960 assert(!SrcR->isRematerializable());
1961 int32_t Offset = Dest->getStackOffset();
1962
1963 // This is a _mov(Mem(), Variable), i.e., a store.
1964 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1965
1966 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1967 Target->Func, DestTy, Base,
1968 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1969 OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
1970 Target->Func, DestTy, Base,
1971 llvm::cast<ConstantInteger32>(
1972 Target->Ctx->getConstantInt32(Offset + 4)));
1973 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1974
1975 // FP arguments are passed in GP reg if first argument is in GP. In this
1976 // case type of the SrcR is still FP thus we need to explicitly generate sw
1977 // instead of swc1.
1978 const RegNumT RegNum = SrcR->getRegNum();
1979 const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum());
1980 if (SrcTy == IceType_f32 && IsSrcGPReg) {
1981 Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum);
1982 Sandboxer(Target).sw(SrcGPR, Addr);
1983 } else if (SrcTy == IceType_f64 && IsSrcGPReg) {
1984 Variable *SrcGPRHi =
1985 Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
1986 Variable *SrcGPRLo = Target->makeReg(
1987 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
1988 Sandboxer(Target).sw(SrcGPRHi, Addr);
1989 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
1990 Sandboxer(Target).sw(SrcGPRLo, AddrHi);
1991 } else if (DestTy == IceType_f64 && IsSrcGPReg) {
1992 const auto FirstReg =
1993 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1994 const auto SecondReg =
1995 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1996 Variable *SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1997 Variable *SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1998 Sandboxer(Target).sw(SrcGPRLo, Addr);
1999 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2000 Sandboxer(Target).sw(SrcGPRHi, AddrHi);
2001 } else {
2002 Sandboxer(Target).sw(SrcR, Addr);
2003 }
2004
2005 Target->Context.insert<InstFakeDef>(Dest);
2006 Legalized = true;
2007 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
2008 if (Var->isRematerializable()) {
2009 // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
2010
2011 // ExtraOffset is only needed for stack-pointer based frames as we have
2012 // to account for spill storage.
2013 const int32_t ExtraOffset =
2014 (Var->getRegNum() == Target->getFrameOrStackReg())
2015 ? Target->getFrameFixedAllocaOffset()
2016 : 0;
2017
2018 const int32_t Offset = Var->getStackOffset() + ExtraOffset;
2019 Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
2020 Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
2021 Target->_mov(Dest, T);
2022 Legalized = true;
2023 } else {
2024 if (!Var->hasReg()) {
2025 // This is a _mov(Variable, Mem()), i.e., a load.
2026 const int32_t Offset = Var->getStackOffset();
2027 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
2028 const RegNumT RegNum = Dest->getRegNum();
2029 const bool IsDstGPReg = RegMIPS32::isGPRReg(Dest->getRegNum());
2030 // If we are moving i64 to a double using stack then the address may
2031 // not be aligned to 8-byte boundary as we split i64 into Hi-Lo parts
2032 // and store them individually with 4-byte alignment. Load the Hi-Lo
2033 // parts in TmpReg and move them to the dest using mtc1.
2034 if (DestTy == IceType_f64 && !Utils::IsAligned(Offset, 8) &&
2035 !IsDstGPReg) {
2036 auto *Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2037 const RegNumT RegNum = Dest->getRegNum();
2038 Variable *DestLo = Target->makeReg(
2039 IceType_f32, RegMIPS32::get64PairFirstRegNum(RegNum));
2040 Variable *DestHi = Target->makeReg(
2041 IceType_f32, RegMIPS32::get64PairSecondRegNum(RegNum));
2042 OperandMIPS32Mem *AddrLo = OperandMIPS32Mem::create(
2043 Target->Func, IceType_i32, Base,
2044 llvm::cast<ConstantInteger32>(
2045 Target->Ctx->getConstantInt32(Offset)));
2046 OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create(
2047 Target->Func, IceType_i32, Base,
2048 llvm::cast<ConstantInteger32>(
2049 Target->Ctx->getConstantInt32(Offset + 4)));
2050 Sandboxer(Target).lw(Reg, AddrLo);
2051 Target->_mov(DestLo, Reg);
2052 Sandboxer(Target).lw(Reg, AddrHi);
2053 Target->_mov(DestHi, Reg);
2054 } else {
2055 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
2056 Target->Func, DestTy, Base,
2057 llvm::cast<ConstantInteger32>(
2058 Target->Ctx->getConstantInt32(Offset)));
2059 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
2060 OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
2061 Target->Func, DestTy, Base,
2062 llvm::cast<ConstantInteger32>(
2063 Target->Ctx->getConstantInt32(Offset + 4)));
2064 // FP arguments are passed in GP reg if first argument is in GP.
2065 // In this case type of the Dest is still FP thus we need to
2066 // explicitly generate lw instead of lwc1.
2067 if (DestTy == IceType_f32 && IsDstGPReg) {
2068 Variable *DstGPR = Target->makeReg(IceType_i32, RegNum);
2069 Sandboxer(Target).lw(DstGPR, Addr);
2070 } else if (DestTy == IceType_f64 && IsDstGPReg) {
2071 Variable *DstGPRHi = Target->makeReg(
2072 IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2073 Variable *DstGPRLo = Target->makeReg(
2074 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2075 Sandboxer(Target).lw(DstGPRHi, Addr);
2076 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2077 Sandboxer(Target).lw(DstGPRLo, AddrHi);
2078 } else if (DestTy == IceType_f64 && IsDstGPReg) {
2079 const auto FirstReg =
2080 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2081 const auto SecondReg =
2082 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2083 Variable *DstGPRHi = Target->makeReg(IceType_i32, FirstReg);
2084 Variable *DstGPRLo = Target->makeReg(IceType_i32, SecondReg);
2085 Sandboxer(Target).lw(DstGPRLo, Addr);
2086 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2087 Sandboxer(Target).lw(DstGPRHi, AddrHi);
2088 } else {
2089 Sandboxer(Target).lw(Dest, Addr);
2090 }
2091 }
2092 Legalized = true;
2093 }
2094 }
2095 }
2096
2097 if (Legalized) {
2098 if (MovInstr->isDestRedefined()) {
2099 Target->_set_dest_redefined();
2100 }
2101 MovInstr->setDeleted();
2102 }
2103 }
2104
2105 OperandMIPS32Mem *
legalizeMemOperand(OperandMIPS32Mem * Mem)2106 TargetMIPS32::PostLoweringLegalizer::legalizeMemOperand(OperandMIPS32Mem *Mem) {
2107 if (llvm::isa<ConstantRelocatable>(Mem->getOffset())) {
2108 return nullptr;
2109 }
2110 Variable *Base = Mem->getBase();
2111 auto *Ci32 = llvm::cast<ConstantInteger32>(Mem->getOffset());
2112 int32_t Offset = Ci32->getValue();
2113
2114 if (Base->isRematerializable()) {
2115 const int32_t ExtraOffset =
2116 (Base->getRegNum() == Target->getFrameOrStackReg())
2117 ? Target->getFrameFixedAllocaOffset()
2118 : 0;
2119 Offset += Base->getStackOffset() + ExtraOffset;
2120 Base = Target->getPhysicalRegister(Base->getRegNum());
2121 }
2122
2123 constexpr bool SignExt = true;
2124 if (!OperandMIPS32Mem::canHoldOffset(Mem->getType(), SignExt, Offset)) {
2125 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2126 Offset = 0;
2127 }
2128
2129 return OperandMIPS32Mem::create(
2130 Target->Func, Mem->getType(), Base,
2131 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
2132 }
2133
legalizeImmediate(int32_t Imm)2134 Variable *TargetMIPS32::PostLoweringLegalizer::legalizeImmediate(int32_t Imm) {
2135 Variable *Reg = nullptr;
2136 if (!((std::numeric_limits<int16_t>::min() <= Imm) &&
2137 (Imm <= std::numeric_limits<int16_t>::max()))) {
2138 const uint32_t UpperBits = (Imm >> 16) & 0xFFFF;
2139 const uint32_t LowerBits = Imm & 0xFFFF;
2140 Variable *TReg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2141 Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2142 if (LowerBits) {
2143 Target->_lui(TReg, Target->Ctx->getConstantInt32(UpperBits));
2144 Target->_ori(Reg, TReg, LowerBits);
2145 } else {
2146 Target->_lui(Reg, Target->Ctx->getConstantInt32(UpperBits));
2147 }
2148 }
2149 return Reg;
2150 }
2151
postLowerLegalization()2152 void TargetMIPS32::postLowerLegalization() {
2153 Func->dump("Before postLowerLegalization");
2154 assert(hasComputedFrame());
2155 for (CfgNode *Node : Func->getNodes()) {
2156 Context.init(Node);
2157 PostLoweringLegalizer Legalizer(this);
2158 while (!Context.atEnd()) {
2159 PostIncrLoweringContext PostIncrement(Context);
2160 Inst *CurInstr = iteratorToInst(Context.getCur());
2161 const SizeT NumSrcs = CurInstr->getSrcSize();
2162 Operand *Src0 = NumSrcs < 1 ? nullptr : CurInstr->getSrc(0);
2163 Operand *Src1 = NumSrcs < 2 ? nullptr : CurInstr->getSrc(1);
2164 auto *Src0V = llvm::dyn_cast_or_null<Variable>(Src0);
2165 auto *Src0M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src0);
2166 auto *Src1M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src1);
2167 Variable *Dst = CurInstr->getDest();
2168 if (auto *MovInstr = llvm::dyn_cast<InstMIPS32Mov>(CurInstr)) {
2169 Legalizer.legalizeMov(MovInstr);
2170 continue;
2171 }
2172 if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) {
2173 Legalizer.legalizeMovFp(MovInstr);
2174 continue;
2175 }
2176 if (llvm::isa<InstMIPS32Sw>(CurInstr)) {
2177 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2178 Sandboxer(this).sw(Src0V, LegalMem);
2179 CurInstr->setDeleted();
2180 }
2181 continue;
2182 }
2183 if (llvm::isa<InstMIPS32Swc1>(CurInstr)) {
2184 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2185 _swc1(Src0V, LegalMem);
2186 CurInstr->setDeleted();
2187 }
2188 continue;
2189 }
2190 if (llvm::isa<InstMIPS32Sdc1>(CurInstr)) {
2191 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2192 _sdc1(Src0V, LegalMem);
2193 CurInstr->setDeleted();
2194 }
2195 continue;
2196 }
2197 if (llvm::isa<InstMIPS32Lw>(CurInstr)) {
2198 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2199 Sandboxer(this).lw(Dst, LegalMem);
2200 CurInstr->setDeleted();
2201 }
2202 continue;
2203 }
2204 if (llvm::isa<InstMIPS32Lwc1>(CurInstr)) {
2205 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2206 _lwc1(Dst, LegalMem);
2207 CurInstr->setDeleted();
2208 }
2209 continue;
2210 }
2211 if (llvm::isa<InstMIPS32Ldc1>(CurInstr)) {
2212 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2213 _ldc1(Dst, LegalMem);
2214 CurInstr->setDeleted();
2215 }
2216 continue;
2217 }
2218 if (auto *AddiuInstr = llvm::dyn_cast<InstMIPS32Addiu>(CurInstr)) {
2219 if (auto *LegalImm = Legalizer.legalizeImmediate(
2220 static_cast<int32_t>(AddiuInstr->getImmediateValue()))) {
2221 _addu(Dst, Src0V, LegalImm);
2222 CurInstr->setDeleted();
2223 }
2224 continue;
2225 }
2226 }
2227 }
2228 }
2229
loOperand(Operand * Operand)2230 Operand *TargetMIPS32::loOperand(Operand *Operand) {
2231 assert(Operand->getType() == IceType_i64);
2232 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2233 return Var64On32->getLo();
2234 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2235 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2236 }
2237 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2238 // Conservatively disallow memory operands with side-effects (pre/post
2239 // increment) in case of duplication.
2240 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2241 return OperandMIPS32Mem::create(Func, IceType_i32, Mem->getBase(),
2242 Mem->getOffset(), Mem->getAddrMode());
2243 }
2244 llvm_unreachable("Unsupported operand type");
2245 return nullptr;
2246 }
2247
getOperandAtIndex(Operand * Operand,Type BaseType,uint32_t Index)2248 Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType,
2249 uint32_t Index) {
2250 if (!isVectorType(Operand->getType())) {
2251 llvm::report_fatal_error("getOperandAtIndex: Operand is not vector");
2252 return nullptr;
2253 }
2254
2255 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2256 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2257 Variable *Base = Mem->getBase();
2258 auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2259 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2260 int32_t NextOffsetVal =
2261 Offset->getValue() + (Index * typeWidthInBytes(BaseType));
2262 constexpr bool NoSignExt = false;
2263 if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) {
2264 Constant *_4 = Ctx->getConstantInt32(4);
2265 Variable *NewBase = Func->makeVariable(Base->getType());
2266 lowerArithmetic(
2267 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4));
2268 Base = NewBase;
2269 } else {
2270 Offset =
2271 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2272 }
2273 return OperandMIPS32Mem::create(Func, BaseType, Base, Offset,
2274 Mem->getAddrMode());
2275 }
2276
2277 if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand))
2278 return VarVecOn32->getContainers()[Index];
2279
2280 llvm_unreachable("Unsupported operand type");
2281 return nullptr;
2282 }
2283
hiOperand(Operand * Operand)2284 Operand *TargetMIPS32::hiOperand(Operand *Operand) {
2285 assert(Operand->getType() == IceType_i64);
2286 if (Operand->getType() != IceType_i64)
2287 return Operand;
2288 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2289 return Var64On32->getHi();
2290 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2291 return Ctx->getConstantInt32(
2292 static_cast<uint32_t>(Const->getValue() >> 32));
2293 }
2294 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2295 // Conservatively disallow memory operands with side-effects
2296 // in case of duplication.
2297 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2298 const Type SplitType = IceType_i32;
2299 Variable *Base = Mem->getBase();
2300 auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2301 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2302 int32_t NextOffsetVal = Offset->getValue() + 4;
2303 constexpr bool SignExt = false;
2304 if (!OperandMIPS32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
2305 // We have to make a temp variable and add 4 to either Base or Offset.
2306 // If we add 4 to Offset, this will convert a non-RegReg addressing
2307 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2308 // RegReg addressing modes, prefer adding to base and replacing instead.
2309 // Thus we leave the old offset alone.
2310 Constant *Four = Ctx->getConstantInt32(4);
2311 Variable *NewBase = Func->makeVariable(Base->getType());
2312 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2313 Base, Four));
2314 Base = NewBase;
2315 } else {
2316 Offset =
2317 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2318 }
2319 return OperandMIPS32Mem::create(Func, SplitType, Base, Offset,
2320 Mem->getAddrMode());
2321 }
2322 llvm_unreachable("Unsupported operand type");
2323 return nullptr;
2324 }
2325
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2326 SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
2327 RegSetMask Exclude) const {
2328 SmallBitVector Registers(RegMIPS32::Reg_NUM);
2329
2330 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
2331 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
2332 if (scratch && (Include & RegSet_CallerSave)) \
2333 Registers[RegMIPS32::val] = true; \
2334 if (preserved && (Include & RegSet_CalleeSave)) \
2335 Registers[RegMIPS32::val] = true; \
2336 if (stackptr && (Include & RegSet_StackPointer)) \
2337 Registers[RegMIPS32::val] = true; \
2338 if (frameptr && (Include & RegSet_FramePointer)) \
2339 Registers[RegMIPS32::val] = true; \
2340 if (scratch && (Exclude & RegSet_CallerSave)) \
2341 Registers[RegMIPS32::val] = false; \
2342 if (preserved && (Exclude & RegSet_CalleeSave)) \
2343 Registers[RegMIPS32::val] = false; \
2344 if (stackptr && (Exclude & RegSet_StackPointer)) \
2345 Registers[RegMIPS32::val] = false; \
2346 if (frameptr && (Exclude & RegSet_FramePointer)) \
2347 Registers[RegMIPS32::val] = false;
2348
2349 REGMIPS32_TABLE
2350
2351 #undef X
2352
2353 if (NeedSandboxing) {
2354 Registers[RegMIPS32::Reg_T6] = false;
2355 Registers[RegMIPS32::Reg_T7] = false;
2356 Registers[RegMIPS32::Reg_T8] = false;
2357 }
2358 return Registers;
2359 }
2360
lowerAlloca(const InstAlloca * Instr)2361 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
2362 // Conservatively require the stack to be aligned. Some stack adjustment
2363 // operations implemented below assume that the stack is aligned before the
2364 // alloca. All the alloca code ensures that the stack alignment is preserved
2365 // after the alloca. The stack alignment restriction can be relaxed in some
2366 // cases.
2367 NeedsStackAlignment = true;
2368
2369 // For default align=0, set it to the real value 1, to avoid any
2370 // bit-manipulation problems below.
2371 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2372
2373 // LLVM enforces power of 2 alignment.
2374 assert(llvm::isPowerOf2_32(AlignmentParam));
2375 assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
2376
2377 const uint32_t Alignment =
2378 std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
2379 const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
2380 const bool OptM1 = Func->getOptLevel() == Opt_m1;
2381 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2382 const bool UseFramePointer =
2383 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2384
2385 if (UseFramePointer)
2386 setHasFramePointer();
2387
2388 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
2389
2390 Variable *Dest = Instr->getDest();
2391 Operand *TotalSize = Instr->getSizeInBytes();
2392
2393 if (const auto *ConstantTotalSize =
2394 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2395 const uint32_t Value =
2396 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2397 FixedAllocaSizeBytes += Value;
2398 // Constant size alloca.
2399 if (!UseFramePointer) {
2400 // If we don't need a Frame Pointer, this alloca has a known offset to the
2401 // stack pointer. We don't need adjust the stack pointer, nor assign any
2402 // value to Dest, as Dest is rematerializable.
2403 assert(Dest->isRematerializable());
2404 Context.insert<InstFakeDef>(Dest);
2405 return;
2406 }
2407
2408 if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) {
2409 CurrentAllocaOffset =
2410 Utils::applyAlignment(CurrentAllocaOffset, Alignment);
2411 }
2412 auto *T = I32Reg();
2413 _addiu(T, SP, CurrentAllocaOffset);
2414 _mov(Dest, T);
2415 CurrentAllocaOffset += Value;
2416 return;
2417
2418 } else {
2419 // Non-constant sizes need to be adjusted to the next highest multiple of
2420 // the required alignment at runtime.
2421 VariableAllocaUsed = true;
2422 VariableAllocaAlignBytes = AlignmentParam;
2423 Variable *AlignAmount;
2424 auto *TotalSizeR = legalizeToReg(TotalSize, Legal_Reg);
2425 auto *T1 = I32Reg();
2426 auto *T2 = I32Reg();
2427 auto *T3 = I32Reg();
2428 auto *T4 = I32Reg();
2429 auto *T5 = I32Reg();
2430 _addiu(T1, TotalSizeR, MIPS32_STACK_ALIGNMENT_BYTES - 1);
2431 _addiu(T2, getZero(), -MIPS32_STACK_ALIGNMENT_BYTES);
2432 _and(T3, T1, T2);
2433 _subu(T4, SP, T3);
2434 if (Instr->getAlignInBytes()) {
2435 AlignAmount =
2436 legalizeToReg(Ctx->getConstantInt32(-AlignmentParam), Legal_Reg);
2437 _and(T5, T4, AlignAmount);
2438 _mov(Dest, T5);
2439 } else {
2440 _mov(Dest, T4);
2441 }
2442 if (OptM1)
2443 _mov(SP, Dest);
2444 else
2445 Sandboxer(this).reset_sp(Dest);
2446 return;
2447 }
2448 }
2449
lowerInt64Arithmetic(const InstArithmetic * Instr,Variable * Dest,Operand * Src0,Operand * Src1)2450 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
2451 Variable *Dest, Operand *Src0,
2452 Operand *Src1) {
2453 InstArithmetic::OpKind Op = Instr->getOp();
2454 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2455 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2456 Variable *Src0LoR = nullptr;
2457 Variable *Src1LoR = nullptr;
2458 Variable *Src0HiR = nullptr;
2459 Variable *Src1HiR = nullptr;
2460
2461 switch (Op) {
2462 case InstArithmetic::_num:
2463 llvm::report_fatal_error("Unknown arithmetic operator");
2464 return;
2465 case InstArithmetic::Add: {
2466 Src0LoR = legalizeToReg(loOperand(Src0));
2467 Src1LoR = legalizeToReg(loOperand(Src1));
2468 Src0HiR = legalizeToReg(hiOperand(Src0));
2469 Src1HiR = legalizeToReg(hiOperand(Src1));
2470 auto *T_Carry = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2471 *T_Hi2 = I32Reg();
2472 _addu(T_Lo, Src0LoR, Src1LoR);
2473 _mov(DestLo, T_Lo);
2474 _sltu(T_Carry, T_Lo, Src0LoR);
2475 _addu(T_Hi, T_Carry, Src0HiR);
2476 _addu(T_Hi2, Src1HiR, T_Hi);
2477 _mov(DestHi, T_Hi2);
2478 return;
2479 }
2480 case InstArithmetic::And: {
2481 Src0LoR = legalizeToReg(loOperand(Src0));
2482 Src1LoR = legalizeToReg(loOperand(Src1));
2483 Src0HiR = legalizeToReg(hiOperand(Src0));
2484 Src1HiR = legalizeToReg(hiOperand(Src1));
2485 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2486 _and(T_Lo, Src0LoR, Src1LoR);
2487 _mov(DestLo, T_Lo);
2488 _and(T_Hi, Src0HiR, Src1HiR);
2489 _mov(DestHi, T_Hi);
2490 return;
2491 }
2492 case InstArithmetic::Sub: {
2493 Src0LoR = legalizeToReg(loOperand(Src0));
2494 Src1LoR = legalizeToReg(loOperand(Src1));
2495 Src0HiR = legalizeToReg(hiOperand(Src0));
2496 Src1HiR = legalizeToReg(hiOperand(Src1));
2497 auto *T_Borrow = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2498 *T_Hi2 = I32Reg();
2499 _subu(T_Lo, Src0LoR, Src1LoR);
2500 _mov(DestLo, T_Lo);
2501 _sltu(T_Borrow, Src0LoR, Src1LoR);
2502 _addu(T_Hi, T_Borrow, Src1HiR);
2503 _subu(T_Hi2, Src0HiR, T_Hi);
2504 _mov(DestHi, T_Hi2);
2505 return;
2506 }
2507 case InstArithmetic::Or: {
2508 Src0LoR = legalizeToReg(loOperand(Src0));
2509 Src1LoR = legalizeToReg(loOperand(Src1));
2510 Src0HiR = legalizeToReg(hiOperand(Src0));
2511 Src1HiR = legalizeToReg(hiOperand(Src1));
2512 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2513 _or(T_Lo, Src0LoR, Src1LoR);
2514 _mov(DestLo, T_Lo);
2515 _or(T_Hi, Src0HiR, Src1HiR);
2516 _mov(DestHi, T_Hi);
2517 return;
2518 }
2519 case InstArithmetic::Xor: {
2520 Src0LoR = legalizeToReg(loOperand(Src0));
2521 Src1LoR = legalizeToReg(loOperand(Src1));
2522 Src0HiR = legalizeToReg(hiOperand(Src0));
2523 Src1HiR = legalizeToReg(hiOperand(Src1));
2524 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2525 _xor(T_Lo, Src0LoR, Src1LoR);
2526 _mov(DestLo, T_Lo);
2527 _xor(T_Hi, Src0HiR, Src1HiR);
2528 _mov(DestHi, T_Hi);
2529 return;
2530 }
2531 case InstArithmetic::Mul: {
2532 // TODO(rkotler): Make sure that mul has the side effect of clobbering
2533 // LO, HI. Check for any other LO, HI quirkiness in this section.
2534 Src0LoR = legalizeToReg(loOperand(Src0));
2535 Src1LoR = legalizeToReg(loOperand(Src1));
2536 Src0HiR = legalizeToReg(hiOperand(Src0));
2537 Src1HiR = legalizeToReg(hiOperand(Src1));
2538 auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
2539 auto *T1 = I32Reg(), *T2 = I32Reg();
2540 auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
2541 _multu(T_Lo, Src0LoR, Src1LoR);
2542 Context.insert<InstFakeDef>(T_Hi, T_Lo);
2543 _mflo(T1, T_Lo);
2544 _mfhi(T2, T_Hi);
2545 _mov(DestLo, T1);
2546 _mul(TM1, Src0HiR, Src1LoR);
2547 _mul(TM2, Src0LoR, Src1HiR);
2548 _addu(TM3, TM1, T2);
2549 _addu(TM4, TM3, TM2);
2550 _mov(DestHi, TM4);
2551 return;
2552 }
2553 case InstArithmetic::Shl: {
2554 auto *T_Lo = I32Reg();
2555 auto *T_Hi = I32Reg();
2556 auto *T1_Lo = I32Reg();
2557 auto *T1_Hi = I32Reg();
2558 auto *T1 = I32Reg();
2559 auto *T2 = I32Reg();
2560 auto *T3 = I32Reg();
2561 auto *T4 = I32Reg();
2562 auto *T5 = I32Reg();
2563
2564 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2565 Src0LoR = legalizeToReg(loOperand(Src0));
2566 int64_t ShiftAmount = Const->getValue();
2567 if (ShiftAmount == 1) {
2568 Src0HiR = legalizeToReg(hiOperand(Src0));
2569 _addu(T_Lo, Src0LoR, Src0LoR);
2570 _sltu(T1, T_Lo, Src0LoR);
2571 _addu(T2, T1, Src0HiR);
2572 _addu(T_Hi, Src0HiR, T2);
2573 } else if (ShiftAmount < INT32_BITS) {
2574 Src0HiR = legalizeToReg(hiOperand(Src0));
2575 _srl(T1, Src0LoR, INT32_BITS - ShiftAmount);
2576 _sll(T2, Src0HiR, ShiftAmount);
2577 _or(T_Hi, T1, T2);
2578 _sll(T_Lo, Src0LoR, ShiftAmount);
2579 } else if (ShiftAmount == INT32_BITS) {
2580 _addiu(T_Lo, getZero(), 0);
2581 _mov(T_Hi, Src0LoR);
2582 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2583 _sll(T_Hi, Src0LoR, ShiftAmount - INT32_BITS);
2584 _addiu(T_Lo, getZero(), 0);
2585 }
2586 _mov(DestLo, T_Lo);
2587 _mov(DestHi, T_Hi);
2588 return;
2589 }
2590
2591 Src0LoR = legalizeToReg(loOperand(Src0));
2592 Src1LoR = legalizeToReg(loOperand(Src1));
2593 Src0HiR = legalizeToReg(hiOperand(Src0));
2594
2595 _sllv(T1, Src0HiR, Src1LoR);
2596 _not(T2, Src1LoR);
2597 _srl(T3, Src0LoR, 1);
2598 _srlv(T4, T3, T2);
2599 _or(T_Hi, T1, T4);
2600 _sllv(T_Lo, Src0LoR, Src1LoR);
2601
2602 _mov(T1_Hi, T_Hi);
2603 _mov(T1_Lo, T_Lo);
2604 _andi(T5, Src1LoR, INT32_BITS);
2605 _movn(T1_Hi, T_Lo, T5);
2606 _movn(T1_Lo, getZero(), T5);
2607 _mov(DestHi, T1_Hi);
2608 _mov(DestLo, T1_Lo);
2609 return;
2610 }
2611 case InstArithmetic::Lshr: {
2612
2613 auto *T_Lo = I32Reg();
2614 auto *T_Hi = I32Reg();
2615 auto *T1_Lo = I32Reg();
2616 auto *T1_Hi = I32Reg();
2617 auto *T1 = I32Reg();
2618 auto *T2 = I32Reg();
2619 auto *T3 = I32Reg();
2620 auto *T4 = I32Reg();
2621 auto *T5 = I32Reg();
2622
2623 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2624 Src0HiR = legalizeToReg(hiOperand(Src0));
2625 int64_t ShiftAmount = Const->getValue();
2626 if (ShiftAmount < INT32_BITS) {
2627 Src0LoR = legalizeToReg(loOperand(Src0));
2628 _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2629 _srl(T2, Src0LoR, ShiftAmount);
2630 _or(T_Lo, T1, T2);
2631 _srl(T_Hi, Src0HiR, ShiftAmount);
2632 } else if (ShiftAmount == INT32_BITS) {
2633 _mov(T_Lo, Src0HiR);
2634 _addiu(T_Hi, getZero(), 0);
2635 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2636 _srl(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2637 _addiu(T_Hi, getZero(), 0);
2638 }
2639 _mov(DestLo, T_Lo);
2640 _mov(DestHi, T_Hi);
2641 return;
2642 }
2643
2644 Src0LoR = legalizeToReg(loOperand(Src0));
2645 Src1LoR = legalizeToReg(loOperand(Src1));
2646 Src0HiR = legalizeToReg(hiOperand(Src0));
2647
2648 _srlv(T1, Src0LoR, Src1LoR);
2649 _not(T2, Src1LoR);
2650 _sll(T3, Src0HiR, 1);
2651 _sllv(T4, T3, T2);
2652 _or(T_Lo, T1, T4);
2653 _srlv(T_Hi, Src0HiR, Src1LoR);
2654
2655 _mov(T1_Hi, T_Hi);
2656 _mov(T1_Lo, T_Lo);
2657 _andi(T5, Src1LoR, INT32_BITS);
2658 _movn(T1_Lo, T_Hi, T5);
2659 _movn(T1_Hi, getZero(), T5);
2660 _mov(DestHi, T1_Hi);
2661 _mov(DestLo, T1_Lo);
2662 return;
2663 }
2664 case InstArithmetic::Ashr: {
2665
2666 auto *T_Lo = I32Reg();
2667 auto *T_Hi = I32Reg();
2668 auto *T1_Lo = I32Reg();
2669 auto *T1_Hi = I32Reg();
2670 auto *T1 = I32Reg();
2671 auto *T2 = I32Reg();
2672 auto *T3 = I32Reg();
2673 auto *T4 = I32Reg();
2674 auto *T5 = I32Reg();
2675 auto *T6 = I32Reg();
2676
2677 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2678 Src0HiR = legalizeToReg(hiOperand(Src0));
2679 int64_t ShiftAmount = Const->getValue();
2680 if (ShiftAmount < INT32_BITS) {
2681 Src0LoR = legalizeToReg(loOperand(Src0));
2682 _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2683 _srl(T2, Src0LoR, ShiftAmount);
2684 _or(T_Lo, T1, T2);
2685 _sra(T_Hi, Src0HiR, ShiftAmount);
2686 } else if (ShiftAmount == INT32_BITS) {
2687 _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2688 _mov(T_Lo, Src0HiR);
2689 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2690 _sra(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2691 _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2692 }
2693 _mov(DestLo, T_Lo);
2694 _mov(DestHi, T_Hi);
2695 return;
2696 }
2697
2698 Src0LoR = legalizeToReg(loOperand(Src0));
2699 Src1LoR = legalizeToReg(loOperand(Src1));
2700 Src0HiR = legalizeToReg(hiOperand(Src0));
2701
2702 _srlv(T1, Src0LoR, Src1LoR);
2703 _not(T2, Src1LoR);
2704 _sll(T3, Src0HiR, 1);
2705 _sllv(T4, T3, T2);
2706 _or(T_Lo, T1, T4);
2707 _srav(T_Hi, Src0HiR, Src1LoR);
2708
2709 _mov(T1_Hi, T_Hi);
2710 _mov(T1_Lo, T_Lo);
2711 _andi(T5, Src1LoR, INT32_BITS);
2712 _movn(T1_Lo, T_Hi, T5);
2713 _sra(T6, Src0HiR, INT32_BITS - 1);
2714 _movn(T1_Hi, T6, T5);
2715 _mov(DestHi, T1_Hi);
2716 _mov(DestLo, T1_Lo);
2717 return;
2718 }
2719 case InstArithmetic::Fadd:
2720 case InstArithmetic::Fsub:
2721 case InstArithmetic::Fmul:
2722 case InstArithmetic::Fdiv:
2723 case InstArithmetic::Frem:
2724 llvm::report_fatal_error("FP instruction with i64 type");
2725 return;
2726 case InstArithmetic::Udiv:
2727 case InstArithmetic::Sdiv:
2728 case InstArithmetic::Urem:
2729 case InstArithmetic::Srem:
2730 llvm::report_fatal_error("64-bit div and rem should have been prelowered");
2731 return;
2732 }
2733 }
2734
lowerArithmetic(const InstArithmetic * Instr)2735 void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {
2736 Variable *Dest = Instr->getDest();
2737
2738 if (Dest->isRematerializable()) {
2739 Context.insert<InstFakeDef>(Dest);
2740 return;
2741 }
2742
2743 // We need to signal all the UnimplementedLoweringError errors before any
2744 // legalization into new variables, otherwise Om1 register allocation may fail
2745 // when it sees variables that are defined but not used.
2746 Type DestTy = Dest->getType();
2747 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2748 Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2749 if (DestTy == IceType_i64) {
2750 lowerInt64Arithmetic(Instr, Instr->getDest(), Src0, Src1);
2751 return;
2752 }
2753 if (isVectorType(Dest->getType())) {
2754 llvm::report_fatal_error("Arithmetic: Destination type is vector");
2755 return;
2756 }
2757
2758 Variable *T = makeReg(Dest->getType());
2759 Variable *Src0R = legalizeToReg(Src0);
2760 Variable *Src1R = nullptr;
2761 uint32_t Value = 0;
2762 bool IsSrc1Imm16 = false;
2763
2764 switch (Instr->getOp()) {
2765 case InstArithmetic::Add:
2766 case InstArithmetic::Sub: {
2767 auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2768 if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
2769 IsSrc1Imm16 = true;
2770 Value = Const32->getValue();
2771 } else {
2772 Src1R = legalizeToReg(Src1);
2773 }
2774 break;
2775 }
2776 case InstArithmetic::And:
2777 case InstArithmetic::Or:
2778 case InstArithmetic::Xor:
2779 case InstArithmetic::Shl:
2780 case InstArithmetic::Lshr:
2781 case InstArithmetic::Ashr: {
2782 auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2783 if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) {
2784 IsSrc1Imm16 = true;
2785 Value = Const32->getValue();
2786 } else {
2787 Src1R = legalizeToReg(Src1);
2788 }
2789 break;
2790 }
2791 default:
2792 Src1R = legalizeToReg(Src1);
2793 break;
2794 }
2795 constexpr uint32_t DivideByZeroTrapCode = 7;
2796
2797 switch (Instr->getOp()) {
2798 case InstArithmetic::_num:
2799 break;
2800 case InstArithmetic::Add: {
2801 auto *T0R = Src0R;
2802 auto *T1R = Src1R;
2803 if (Dest->getType() != IceType_i32) {
2804 T0R = makeReg(IceType_i32);
2805 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2806 if (!IsSrc1Imm16) {
2807 T1R = makeReg(IceType_i32);
2808 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2809 }
2810 }
2811 if (IsSrc1Imm16) {
2812 _addiu(T, T0R, Value);
2813 } else {
2814 _addu(T, T0R, T1R);
2815 }
2816 _mov(Dest, T);
2817 return;
2818 }
2819 case InstArithmetic::And:
2820 if (IsSrc1Imm16) {
2821 _andi(T, Src0R, Value);
2822 } else {
2823 _and(T, Src0R, Src1R);
2824 }
2825 _mov(Dest, T);
2826 return;
2827 case InstArithmetic::Or:
2828 if (IsSrc1Imm16) {
2829 _ori(T, Src0R, Value);
2830 } else {
2831 _or(T, Src0R, Src1R);
2832 }
2833 _mov(Dest, T);
2834 return;
2835 case InstArithmetic::Xor:
2836 if (IsSrc1Imm16) {
2837 _xori(T, Src0R, Value);
2838 } else {
2839 _xor(T, Src0R, Src1R);
2840 }
2841 _mov(Dest, T);
2842 return;
2843 case InstArithmetic::Sub: {
2844 auto *T0R = Src0R;
2845 auto *T1R = Src1R;
2846 if (Dest->getType() != IceType_i32) {
2847 T0R = makeReg(IceType_i32);
2848 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2849 if (!IsSrc1Imm16) {
2850 T1R = makeReg(IceType_i32);
2851 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2852 }
2853 }
2854 if (IsSrc1Imm16) {
2855 _addiu(T, T0R, -Value);
2856 } else {
2857 _subu(T, T0R, T1R);
2858 }
2859 _mov(Dest, T);
2860 return;
2861 }
2862 case InstArithmetic::Mul: {
2863 _mul(T, Src0R, Src1R);
2864 _mov(Dest, T);
2865 return;
2866 }
2867 case InstArithmetic::Shl: {
2868 if (IsSrc1Imm16) {
2869 _sll(T, Src0R, Value);
2870 } else {
2871 _sllv(T, Src0R, Src1R);
2872 }
2873 _mov(Dest, T);
2874 return;
2875 }
2876 case InstArithmetic::Lshr: {
2877 auto *T0R = Src0R;
2878 auto *T1R = Src1R;
2879 if (Dest->getType() != IceType_i32) {
2880 T0R = makeReg(IceType_i32);
2881 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2882 if (!IsSrc1Imm16) {
2883 T1R = makeReg(IceType_i32);
2884 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2885 }
2886 }
2887 if (IsSrc1Imm16) {
2888 _srl(T, T0R, Value);
2889 } else {
2890 _srlv(T, T0R, T1R);
2891 }
2892 _mov(Dest, T);
2893 return;
2894 }
2895 case InstArithmetic::Ashr: {
2896 auto *T0R = Src0R;
2897 auto *T1R = Src1R;
2898 if (Dest->getType() != IceType_i32) {
2899 T0R = makeReg(IceType_i32);
2900 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2901 if (!IsSrc1Imm16) {
2902 T1R = makeReg(IceType_i32);
2903 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2904 }
2905 }
2906 if (IsSrc1Imm16) {
2907 _sra(T, T0R, Value);
2908 } else {
2909 _srav(T, T0R, T1R);
2910 }
2911 _mov(Dest, T);
2912 return;
2913 }
2914 case InstArithmetic::Udiv: {
2915 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2916 auto *T0R = Src0R;
2917 auto *T1R = Src1R;
2918 if (Dest->getType() != IceType_i32) {
2919 T0R = makeReg(IceType_i32);
2920 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2921 T1R = makeReg(IceType_i32);
2922 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2923 }
2924 _divu(T_Zero, T0R, T1R);
2925 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2926 _mflo(T, T_Zero);
2927 _mov(Dest, T);
2928 return;
2929 }
2930 case InstArithmetic::Sdiv: {
2931 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2932 auto *T0R = Src0R;
2933 auto *T1R = Src1R;
2934 if (Dest->getType() != IceType_i32) {
2935 T0R = makeReg(IceType_i32);
2936 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2937 T1R = makeReg(IceType_i32);
2938 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2939 }
2940 _div(T_Zero, T0R, T1R);
2941 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2942 _mflo(T, T_Zero);
2943 _mov(Dest, T);
2944 return;
2945 }
2946 case InstArithmetic::Urem: {
2947 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2948 auto *T0R = Src0R;
2949 auto *T1R = Src1R;
2950 if (Dest->getType() != IceType_i32) {
2951 T0R = makeReg(IceType_i32);
2952 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2953 T1R = makeReg(IceType_i32);
2954 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2955 }
2956 _divu(T_Zero, T0R, T1R);
2957 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2958 _mfhi(T, T_Zero);
2959 _mov(Dest, T);
2960 return;
2961 }
2962 case InstArithmetic::Srem: {
2963 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2964 auto *T0R = Src0R;
2965 auto *T1R = Src1R;
2966 if (Dest->getType() != IceType_i32) {
2967 T0R = makeReg(IceType_i32);
2968 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2969 T1R = makeReg(IceType_i32);
2970 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2971 }
2972 _div(T_Zero, T0R, T1R);
2973 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2974 _mfhi(T, T_Zero);
2975 _mov(Dest, T);
2976 return;
2977 }
2978 case InstArithmetic::Fadd: {
2979 if (DestTy == IceType_f32) {
2980 _add_s(T, Src0R, Src1R);
2981 _mov(Dest, T);
2982 return;
2983 }
2984 if (DestTy == IceType_f64) {
2985 _add_d(T, Src0R, Src1R);
2986 _mov(Dest, T);
2987 return;
2988 }
2989 break;
2990 }
2991 case InstArithmetic::Fsub:
2992 if (DestTy == IceType_f32) {
2993 _sub_s(T, Src0R, Src1R);
2994 _mov(Dest, T);
2995 return;
2996 }
2997 if (DestTy == IceType_f64) {
2998 _sub_d(T, Src0R, Src1R);
2999 _mov(Dest, T);
3000 return;
3001 }
3002 break;
3003 case InstArithmetic::Fmul:
3004 if (DestTy == IceType_f32) {
3005 _mul_s(T, Src0R, Src1R);
3006 _mov(Dest, T);
3007 return;
3008 }
3009 if (DestTy == IceType_f64) {
3010 _mul_d(T, Src0R, Src1R);
3011 _mov(Dest, T);
3012 return;
3013 }
3014 break;
3015 case InstArithmetic::Fdiv:
3016 if (DestTy == IceType_f32) {
3017 _div_s(T, Src0R, Src1R);
3018 _mov(Dest, T);
3019 return;
3020 }
3021 if (DestTy == IceType_f64) {
3022 _div_d(T, Src0R, Src1R);
3023 _mov(Dest, T);
3024 return;
3025 }
3026 break;
3027 case InstArithmetic::Frem:
3028 llvm::report_fatal_error("frem should have been prelowered.");
3029 break;
3030 }
3031 llvm::report_fatal_error("Unknown arithmetic operator");
3032 }
3033
lowerAssign(const InstAssign * Instr)3034 void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
3035 Variable *Dest = Instr->getDest();
3036
3037 if (Dest->isRematerializable()) {
3038 Context.insert<InstFakeDef>(Dest);
3039 return;
3040 }
3041
3042 // Source type may not be same as destination
3043 if (isVectorType(Dest->getType())) {
3044 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3045 auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest);
3046 for (SizeT i = 0; i < DstVec->ContainersPerVector; ++i) {
3047 auto *DCont = DstVec->getContainers()[i];
3048 auto *SCont =
3049 legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg);
3050 auto *TReg = makeReg(IceType_i32);
3051 _mov(TReg, SCont);
3052 _mov(DCont, TReg);
3053 }
3054 return;
3055 }
3056 Operand *Src0 = Instr->getSrc(0);
3057 assert(Dest->getType() == Src0->getType());
3058 if (Dest->getType() == IceType_i64) {
3059 Src0 = legalizeUndef(Src0);
3060 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg);
3061 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg);
3062 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3063 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3064 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
3065 _mov(T_Lo, Src0Lo);
3066 _mov(DestLo, T_Lo);
3067 _mov(T_Hi, Src0Hi);
3068 _mov(DestHi, T_Hi);
3069 return;
3070 }
3071 Operand *SrcR;
3072 if (Dest->hasReg()) {
3073 // If Dest already has a physical register, then legalize the Src operand
3074 // into a Variable with the same register assignment. This especially
3075 // helps allow the use of Flex operands.
3076 SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
3077 } else {
3078 // Dest could be a stack operand. Since we could potentially need
3079 // to do a Store (and store can only have Register operands),
3080 // legalize this to a register.
3081 SrcR = legalize(Src0, Legal_Reg);
3082 }
3083 _mov(Dest, SrcR);
3084 }
3085
lowerBr(const InstBr * Instr)3086 void TargetMIPS32::lowerBr(const InstBr *Instr) {
3087 if (Instr->isUnconditional()) {
3088 _br(Instr->getTargetUnconditional());
3089 return;
3090 }
3091 CfgNode *TargetTrue = Instr->getTargetTrue();
3092 CfgNode *TargetFalse = Instr->getTargetFalse();
3093 Operand *Boolean = Instr->getCondition();
3094 const Inst *Producer = Computations.getProducerOf(Boolean);
3095 if (Producer == nullptr) {
3096 // Since we don't know the producer of this boolean we will assume its
3097 // producer will keep it in positive logic and just emit beqz with this
3098 // Boolean as an operand.
3099 auto *BooleanR = legalizeToReg(Boolean);
3100 _br(TargetTrue, TargetFalse, BooleanR, CondMIPS32::Cond::EQZ);
3101 return;
3102 }
3103 if (Producer->getKind() == Inst::Icmp) {
3104 const InstIcmp *CompareInst = llvm::cast<InstIcmp>(Producer);
3105 Operand *Src0 = CompareInst->getSrc(0);
3106 Operand *Src1 = CompareInst->getSrc(1);
3107 const Type Src0Ty = Src0->getType();
3108 assert(Src0Ty == Src1->getType());
3109
3110 Variable *Src0R = nullptr;
3111 Variable *Src1R = nullptr;
3112 Variable *Src0HiR = nullptr;
3113 Variable *Src1HiR = nullptr;
3114 if (Src0Ty == IceType_i64) {
3115 Src0R = legalizeToReg(loOperand(Src0));
3116 Src1R = legalizeToReg(loOperand(Src1));
3117 Src0HiR = legalizeToReg(hiOperand(Src0));
3118 Src1HiR = legalizeToReg(hiOperand(Src1));
3119 } else {
3120 auto *Src0RT = legalizeToReg(Src0);
3121 auto *Src1RT = legalizeToReg(Src1);
3122 // Sign/Zero extend the source operands
3123 if (Src0Ty != IceType_i32) {
3124 InstCast::OpKind CastKind;
3125 switch (CompareInst->getCondition()) {
3126 case InstIcmp::Eq:
3127 case InstIcmp::Ne:
3128 case InstIcmp::Sgt:
3129 case InstIcmp::Sge:
3130 case InstIcmp::Slt:
3131 case InstIcmp::Sle:
3132 CastKind = InstCast::Sext;
3133 break;
3134 default:
3135 CastKind = InstCast::Zext;
3136 break;
3137 }
3138 Src0R = makeReg(IceType_i32);
3139 Src1R = makeReg(IceType_i32);
3140 lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT));
3141 lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT));
3142 } else {
3143 Src0R = Src0RT;
3144 Src1R = Src1RT;
3145 }
3146 }
3147 auto *DestT = makeReg(IceType_i32);
3148
3149 switch (CompareInst->getCondition()) {
3150 default:
3151 llvm_unreachable("unexpected condition");
3152 return;
3153 case InstIcmp::Eq: {
3154 if (Src0Ty == IceType_i64) {
3155 auto *T1 = I32Reg();
3156 auto *T2 = I32Reg();
3157 auto *T3 = I32Reg();
3158 _xor(T1, Src0HiR, Src1HiR);
3159 _xor(T2, Src0R, Src1R);
3160 _or(T3, T1, T2);
3161 _mov(DestT, T3);
3162 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3163 } else {
3164 _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::NE);
3165 }
3166 return;
3167 }
3168 case InstIcmp::Ne: {
3169 if (Src0Ty == IceType_i64) {
3170 auto *T1 = I32Reg();
3171 auto *T2 = I32Reg();
3172 auto *T3 = I32Reg();
3173 _xor(T1, Src0HiR, Src1HiR);
3174 _xor(T2, Src0R, Src1R);
3175 _or(T3, T1, T2);
3176 _mov(DestT, T3);
3177 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3178 } else {
3179 _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::EQ);
3180 }
3181 return;
3182 }
3183 case InstIcmp::Ugt: {
3184 if (Src0Ty == IceType_i64) {
3185 auto *T1 = I32Reg();
3186 auto *T2 = I32Reg();
3187 auto *T3 = I32Reg();
3188 auto *T4 = I32Reg();
3189 auto *T5 = I32Reg();
3190 _xor(T1, Src0HiR, Src1HiR);
3191 _sltu(T2, Src1HiR, Src0HiR);
3192 _xori(T3, T2, 1);
3193 _sltu(T4, Src1R, Src0R);
3194 _xori(T5, T4, 1);
3195 _movz(T3, T5, T1);
3196 _mov(DestT, T3);
3197 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3198 } else {
3199 _sltu(DestT, Src1R, Src0R);
3200 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3201 }
3202 return;
3203 }
3204 case InstIcmp::Uge: {
3205 if (Src0Ty == IceType_i64) {
3206 auto *T1 = I32Reg();
3207 auto *T2 = I32Reg();
3208 auto *T3 = I32Reg();
3209 _xor(T1, Src0HiR, Src1HiR);
3210 _sltu(T2, Src0HiR, Src1HiR);
3211 _sltu(T3, Src0R, Src1R);
3212 _movz(T2, T3, T1);
3213 _mov(DestT, T2);
3214 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3215 } else {
3216 _sltu(DestT, Src0R, Src1R);
3217 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3218 }
3219 return;
3220 }
3221 case InstIcmp::Ult: {
3222 if (Src0Ty == IceType_i64) {
3223 auto *T1 = I32Reg();
3224 auto *T2 = I32Reg();
3225 auto *T3 = I32Reg();
3226 auto *T4 = I32Reg();
3227 auto *T5 = I32Reg();
3228 _xor(T1, Src0HiR, Src1HiR);
3229 _sltu(T2, Src0HiR, Src1HiR);
3230 _xori(T3, T2, 1);
3231 _sltu(T4, Src0R, Src1R);
3232 _xori(T5, T4, 1);
3233 _movz(T3, T5, T1);
3234 _mov(DestT, T3);
3235 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3236 } else {
3237 _sltu(DestT, Src0R, Src1R);
3238 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3239 }
3240 return;
3241 }
3242 case InstIcmp::Ule: {
3243 if (Src0Ty == IceType_i64) {
3244 auto *T1 = I32Reg();
3245 auto *T2 = I32Reg();
3246 auto *T3 = I32Reg();
3247 _xor(T1, Src0HiR, Src1HiR);
3248 _sltu(T2, Src1HiR, Src0HiR);
3249 _sltu(T3, Src1R, Src0R);
3250 _movz(T2, T3, T1);
3251 _mov(DestT, T2);
3252 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3253 } else {
3254 _sltu(DestT, Src1R, Src0R);
3255 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3256 }
3257 return;
3258 }
3259 case InstIcmp::Sgt: {
3260 if (Src0Ty == IceType_i64) {
3261 auto *T1 = I32Reg();
3262 auto *T2 = I32Reg();
3263 auto *T3 = I32Reg();
3264 auto *T4 = I32Reg();
3265 auto *T5 = I32Reg();
3266 _xor(T1, Src0HiR, Src1HiR);
3267 _slt(T2, Src1HiR, Src0HiR);
3268 _xori(T3, T2, 1);
3269 _sltu(T4, Src1R, Src0R);
3270 _xori(T5, T4, 1);
3271 _movz(T3, T5, T1);
3272 _mov(DestT, T3);
3273 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3274 } else {
3275 _slt(DestT, Src1R, Src0R);
3276 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3277 }
3278 return;
3279 }
3280 case InstIcmp::Sge: {
3281 if (Src0Ty == IceType_i64) {
3282 auto *T1 = I32Reg();
3283 auto *T2 = I32Reg();
3284 auto *T3 = I32Reg();
3285 _xor(T1, Src0HiR, Src1HiR);
3286 _slt(T2, Src0HiR, Src1HiR);
3287 _sltu(T3, Src0R, Src1R);
3288 _movz(T2, T3, T1);
3289 _mov(DestT, T2);
3290 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3291 } else {
3292 _slt(DestT, Src0R, Src1R);
3293 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3294 }
3295 return;
3296 }
3297 case InstIcmp::Slt: {
3298 if (Src0Ty == IceType_i64) {
3299 auto *T1 = I32Reg();
3300 auto *T2 = I32Reg();
3301 auto *T3 = I32Reg();
3302 auto *T4 = I32Reg();
3303 auto *T5 = I32Reg();
3304 _xor(T1, Src0HiR, Src1HiR);
3305 _slt(T2, Src0HiR, Src1HiR);
3306 _xori(T3, T2, 1);
3307 _sltu(T4, Src0R, Src1R);
3308 _xori(T5, T4, 1);
3309 _movz(T3, T5, T1);
3310 _mov(DestT, T3);
3311 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3312 } else {
3313 _slt(DestT, Src0R, Src1R);
3314 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3315 }
3316 return;
3317 }
3318 case InstIcmp::Sle: {
3319 if (Src0Ty == IceType_i64) {
3320 auto *T1 = I32Reg();
3321 auto *T2 = I32Reg();
3322 auto *T3 = I32Reg();
3323 _xor(T1, Src0HiR, Src1HiR);
3324 _slt(T2, Src1HiR, Src0HiR);
3325 _sltu(T3, Src1R, Src0R);
3326 _movz(T2, T3, T1);
3327 _mov(DestT, T2);
3328 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3329 } else {
3330 _slt(DestT, Src1R, Src0R);
3331 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3332 }
3333 return;
3334 }
3335 }
3336 }
3337 }
3338
lowerCall(const InstCall * Instr)3339 void TargetMIPS32::lowerCall(const InstCall *Instr) {
3340 CfgVector<Variable *> RegArgs;
3341 NeedsStackAlignment = true;
3342
3343 // Assign arguments to registers and stack. Also reserve stack.
3344 TargetMIPS32::CallingConv CC;
3345
3346 // Pair of Arg Operand -> GPR number assignments.
3347 llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_GPR_ARG> GPRArgs;
3348 llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_FP_ARG> FPArgs;
3349 // Pair of Arg Operand -> stack offset.
3350 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3351 size_t ParameterAreaSizeBytes = 16;
3352
3353 // Classify each argument operand according to the location where the
3354 // argument is passed.
3355
3356 // v4f32 is returned through stack. $4 is setup by the caller and passed as
3357 // first argument implicitly. Callee then copies the return vector at $4.
3358 SizeT ArgNum = 0;
3359 Variable *Dest = Instr->getDest();
3360 Variable *RetVecFloat = nullptr;
3361 if (Dest && isVectorFloatingType(Dest->getType())) {
3362 ArgNum = 1;
3363 CC.discardReg(RegMIPS32::Reg_A0);
3364 RetVecFloat = Func->makeVariable(IceType_i32);
3365 auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16);
3366 constexpr SizeT Alignment = 4;
3367 lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment));
3368 RegArgs.emplace_back(
3369 legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0)));
3370 }
3371
3372 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3373 Operand *Arg = legalizeUndef(Instr->getArg(i));
3374 const Type Ty = Arg->getType();
3375 bool InReg = false;
3376 RegNumT Reg;
3377
3378 InReg = CC.argInReg(Ty, i, &Reg);
3379
3380 if (!InReg) {
3381 if (isVectorType(Ty)) {
3382 auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3383 ParameterAreaSizeBytes =
3384 applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3385 for (Variable *Elem : ArgVec->getContainers()) {
3386 StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
3387 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3388 }
3389 } else {
3390 ParameterAreaSizeBytes =
3391 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3392 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3393 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3394 }
3395 ++ArgNum;
3396 continue;
3397 }
3398
3399 if (isVectorType(Ty)) {
3400 auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3401 Operand *Elem0 = ArgVec->getContainers()[0];
3402 Operand *Elem1 = ArgVec->getContainers()[1];
3403 GPRArgs.push_back(
3404 std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0)));
3405 GPRArgs.push_back(
3406 std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1)));
3407 Operand *Elem2 = ArgVec->getContainers()[2];
3408 Operand *Elem3 = ArgVec->getContainers()[3];
3409 // First argument is passed in $4:$5:$6:$7
3410 // Second and rest arguments are passed in $6:$7:stack:stack
3411 if (ArgNum == 0) {
3412 GPRArgs.push_back(
3413 std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2)));
3414 GPRArgs.push_back(
3415 std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
3416 } else {
3417 ParameterAreaSizeBytes =
3418 applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3419 StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
3420 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3421 StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
3422 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3423 }
3424 } else if (Ty == IceType_i64) {
3425 Operand *Lo = loOperand(Arg);
3426 Operand *Hi = hiOperand(Arg);
3427 GPRArgs.push_back(
3428 std::make_pair(Lo, RegMIPS32::get64PairFirstRegNum(Reg)));
3429 GPRArgs.push_back(
3430 std::make_pair(Hi, RegMIPS32::get64PairSecondRegNum(Reg)));
3431 } else if (isScalarIntegerType(Ty)) {
3432 GPRArgs.push_back(std::make_pair(Arg, Reg));
3433 } else {
3434 FPArgs.push_back(std::make_pair(Arg, Reg));
3435 }
3436 ++ArgNum;
3437 }
3438
3439 // Adjust the parameter area so that the stack is aligned. It is assumed that
3440 // the stack is already aligned at the start of the calling sequence.
3441 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3442
3443 // Copy arguments that are passed on the stack to the appropriate stack
3444 // locations.
3445 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
3446 for (auto &StackArg : StackArgs) {
3447 ConstantInteger32 *Loc =
3448 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3449 Type Ty = StackArg.first->getType();
3450 OperandMIPS32Mem *Addr;
3451 constexpr bool SignExt = false;
3452 if (OperandMIPS32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3453 Addr = OperandMIPS32Mem::create(Func, Ty, SP, Loc);
3454 } else {
3455 Variable *NewBase = Func->makeVariable(SP->getType());
3456 lowerArithmetic(
3457 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3458 Addr = formMemoryOperand(NewBase, Ty);
3459 }
3460 lowerStore(InstStore::create(Func, StackArg.first, Addr));
3461 }
3462
3463 // Generate the call instruction. Assign its result to a temporary with high
3464 // register allocation weight.
3465
3466 // ReturnReg doubles as ReturnRegLo as necessary.
3467 Variable *ReturnReg = nullptr;
3468 Variable *ReturnRegHi = nullptr;
3469 if (Dest) {
3470 switch (Dest->getType()) {
3471 case IceType_NUM:
3472 llvm_unreachable("Invalid Call dest type");
3473 return;
3474 case IceType_void:
3475 break;
3476 case IceType_i1:
3477 case IceType_i8:
3478 case IceType_i16:
3479 case IceType_i32:
3480 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3481 break;
3482 case IceType_i64:
3483 ReturnReg = I32Reg(RegMIPS32::Reg_V0);
3484 ReturnRegHi = I32Reg(RegMIPS32::Reg_V1);
3485 break;
3486 case IceType_f32:
3487 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0);
3488 break;
3489 case IceType_f64:
3490 ReturnReg = makeReg(IceType_f64, RegMIPS32::Reg_F0);
3491 break;
3492 case IceType_v4i1:
3493 case IceType_v8i1:
3494 case IceType_v16i1:
3495 case IceType_v16i8:
3496 case IceType_v8i16:
3497 case IceType_v4i32: {
3498 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3499 auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg);
3500 RetVec->initVecElement(Func);
3501 for (SizeT i = 0; i < RetVec->ContainersPerVector; ++i) {
3502 auto *Var = RetVec->getContainers()[i];
3503 Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i));
3504 }
3505 break;
3506 }
3507 case IceType_v4f32:
3508 ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0);
3509 break;
3510 }
3511 }
3512 Operand *CallTarget = Instr->getCallTarget();
3513 // Allow ConstantRelocatable to be left alone as a direct call,
3514 // but force other constants like ConstantInteger32 to be in
3515 // a register and make it an indirect call.
3516 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3517 CallTarget = legalize(CallTarget, Legal_Reg);
3518 }
3519
3520 // Copy arguments to be passed in registers to the appropriate registers.
3521 for (auto &FPArg : FPArgs) {
3522 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3523 }
3524 for (auto &GPRArg : GPRArgs) {
3525 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3526 }
3527
3528 // Generate a FakeUse of register arguments so that they do not get dead code
3529 // eliminated as a result of the FakeKill of scratch registers after the call.
3530 // These fake-uses need to be placed here to avoid argument registers from
3531 // being used during the legalizeToReg() calls above.
3532 for (auto *RegArg : RegArgs) {
3533 Context.insert<InstFakeUse>(RegArg);
3534 }
3535
3536 // If variable alloca is used the extra 16 bytes for argument build area
3537 // will be allocated on stack before a call.
3538 if (VariableAllocaUsed)
3539 Sandboxer(this).addiu_sp(-MaxOutArgsSizeBytes);
3540
3541 Inst *NewCall;
3542
3543 // We don't need to define the return register if it is a vector.
3544 // We have inserted fake defs of it just after the call.
3545 if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) {
3546 Variable *RetReg = nullptr;
3547 NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget);
3548 Context.insert(NewCall);
3549 } else {
3550 NewCall = Sandboxer(this, InstBundleLock::Opt_AlignToEnd)
3551 .jal(ReturnReg, CallTarget);
3552 }
3553
3554 if (VariableAllocaUsed)
3555 Sandboxer(this).addiu_sp(MaxOutArgsSizeBytes);
3556
3557 // Insert a fake use of stack pointer to avoid dead code elimination of addiu
3558 // instruction.
3559 Context.insert<InstFakeUse>(SP);
3560
3561 if (ReturnRegHi)
3562 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
3563
3564 if (ReturnReg) {
3565 if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3566 for (Variable *Var : RetVec->getContainers()) {
3567 Context.insert(InstFakeDef::create(Func, Var));
3568 }
3569 }
3570 }
3571
3572 // Insert a register-kill pseudo instruction.
3573 Context.insert(InstFakeKill::create(Func, NewCall));
3574
3575 // Generate a FakeUse to keep the call live if necessary.
3576 if (Instr->hasSideEffects() && ReturnReg) {
3577 if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3578 for (Variable *Var : RetVec->getContainers()) {
3579 Context.insert<InstFakeUse>(Var);
3580 }
3581 } else {
3582 Context.insert<InstFakeUse>(ReturnReg);
3583 }
3584 }
3585
3586 if (Dest == nullptr)
3587 return;
3588
3589 // Assign the result of the call to Dest.
3590 if (ReturnReg) {
3591 if (RetVecFloat) {
3592 auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3593 auto *TBase = legalizeToReg(RetVecFloat);
3594 for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3595 auto *Var = DestVecOn32->getContainers()[i];
3596 auto *TVar = makeReg(IceType_i32);
3597 OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
3598 Func, IceType_i32, TBase,
3599 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
3600 _lw(TVar, Mem);
3601 _mov(Var, TVar);
3602 }
3603 } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3604 auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3605 for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3606 _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]);
3607 }
3608 } else if (ReturnRegHi) {
3609 assert(Dest->getType() == IceType_i64);
3610 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3611 Variable *DestLo = Dest64On32->getLo();
3612 Variable *DestHi = Dest64On32->getHi();
3613 _mov(DestLo, ReturnReg);
3614 _mov(DestHi, ReturnRegHi);
3615 } else {
3616 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
3617 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
3618 isScalarFloatingType(Dest->getType()) ||
3619 isVectorType(Dest->getType()));
3620 _mov(Dest, ReturnReg);
3621 }
3622 }
3623 }
3624
lowerCast(const InstCast * Instr)3625 void TargetMIPS32::lowerCast(const InstCast *Instr) {
3626 InstCast::OpKind CastKind = Instr->getCastKind();
3627 Variable *Dest = Instr->getDest();
3628 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3629 const Type DestTy = Dest->getType();
3630 const Type Src0Ty = Src0->getType();
3631 const uint32_t ShiftAmount =
3632 (Src0Ty == IceType_i1
3633 ? INT32_BITS - 1
3634 : INT32_BITS - (CHAR_BITS * typeWidthInBytes(Src0Ty)));
3635 const uint32_t Mask =
3636 (Src0Ty == IceType_i1
3637 ? 1
3638 : (1 << (CHAR_BITS * typeWidthInBytes(Src0Ty))) - 1);
3639
3640 if (isVectorType(DestTy)) {
3641 llvm::report_fatal_error("Cast: Destination type is vector");
3642 return;
3643 }
3644 switch (CastKind) {
3645 default:
3646 Func->setError("Cast type not supported");
3647 return;
3648 case InstCast::Sext: {
3649 if (DestTy == IceType_i64) {
3650 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3651 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3652 Variable *Src0R = legalizeToReg(Src0);
3653 Variable *T1_Lo = I32Reg();
3654 Variable *T2_Lo = I32Reg();
3655 Variable *T_Hi = I32Reg();
3656 if (Src0Ty == IceType_i1) {
3657 _sll(T1_Lo, Src0R, INT32_BITS - 1);
3658 _sra(T2_Lo, T1_Lo, INT32_BITS - 1);
3659 _mov(DestHi, T2_Lo);
3660 _mov(DestLo, T2_Lo);
3661 } else if (Src0Ty == IceType_i8 || Src0Ty == IceType_i16) {
3662 _sll(T1_Lo, Src0R, ShiftAmount);
3663 _sra(T2_Lo, T1_Lo, ShiftAmount);
3664 _sra(T_Hi, T2_Lo, INT32_BITS - 1);
3665 _mov(DestHi, T_Hi);
3666 _mov(DestLo, T2_Lo);
3667 } else if (Src0Ty == IceType_i32) {
3668 _mov(T1_Lo, Src0R);
3669 _sra(T_Hi, T1_Lo, INT32_BITS - 1);
3670 _mov(DestHi, T_Hi);
3671 _mov(DestLo, T1_Lo);
3672 }
3673 } else {
3674 Variable *Src0R = legalizeToReg(Src0);
3675 Variable *T1 = makeReg(DestTy);
3676 Variable *T2 = makeReg(DestTy);
3677 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3678 Src0Ty == IceType_i16) {
3679 _sll(T1, Src0R, ShiftAmount);
3680 _sra(T2, T1, ShiftAmount);
3681 _mov(Dest, T2);
3682 }
3683 }
3684 break;
3685 }
3686 case InstCast::Zext: {
3687 if (DestTy == IceType_i64) {
3688 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3689 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3690 Variable *Src0R = legalizeToReg(Src0);
3691 Variable *T_Lo = I32Reg();
3692 Variable *T_Hi = I32Reg();
3693
3694 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || Src0Ty == IceType_i16)
3695 _andi(T_Lo, Src0R, Mask);
3696 else if (Src0Ty == IceType_i32)
3697 _mov(T_Lo, Src0R);
3698 else
3699 assert(Src0Ty != IceType_i64);
3700 _mov(DestLo, T_Lo);
3701
3702 auto *Zero = getZero();
3703 _addiu(T_Hi, Zero, 0);
3704 _mov(DestHi, T_Hi);
3705 } else {
3706 Variable *Src0R = legalizeToReg(Src0);
3707 Variable *T = makeReg(DestTy);
3708 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3709 Src0Ty == IceType_i16) {
3710 _andi(T, Src0R, Mask);
3711 _mov(Dest, T);
3712 }
3713 }
3714 break;
3715 }
3716 case InstCast::Trunc: {
3717 if (Src0Ty == IceType_i64)
3718 Src0 = loOperand(Src0);
3719 Variable *Src0R = legalizeToReg(Src0);
3720 Variable *T = makeReg(DestTy);
3721 switch (DestTy) {
3722 case IceType_i1:
3723 _andi(T, Src0R, 0x1);
3724 break;
3725 case IceType_i8:
3726 _andi(T, Src0R, 0xff);
3727 break;
3728 case IceType_i16:
3729 _andi(T, Src0R, 0xffff);
3730 break;
3731 default:
3732 _mov(T, Src0R);
3733 break;
3734 }
3735 _mov(Dest, T);
3736 break;
3737 }
3738 case InstCast::Fptrunc: {
3739 assert(Dest->getType() == IceType_f32);
3740 assert(Src0->getType() == IceType_f64);
3741 auto *DestR = legalizeToReg(Dest);
3742 auto *Src0R = legalizeToReg(Src0);
3743 _cvt_s_d(DestR, Src0R);
3744 _mov(Dest, DestR);
3745 break;
3746 }
3747 case InstCast::Fpext: {
3748 assert(Dest->getType() == IceType_f64);
3749 assert(Src0->getType() == IceType_f32);
3750 auto *DestR = legalizeToReg(Dest);
3751 auto *Src0R = legalizeToReg(Src0);
3752 _cvt_d_s(DestR, Src0R);
3753 _mov(Dest, DestR);
3754 break;
3755 }
3756 case InstCast::Fptosi:
3757 case InstCast::Fptoui: {
3758 if (llvm::isa<Variable64On32>(Dest)) {
3759 llvm::report_fatal_error("fp-to-i64 should have been prelowered.");
3760 return;
3761 }
3762 if (DestTy != IceType_i64) {
3763 if (Src0Ty == IceType_f32 && isScalarIntegerType(DestTy)) {
3764 Variable *Src0R = legalizeToReg(Src0);
3765 Variable *FTmp = makeReg(IceType_f32);
3766 _trunc_w_s(FTmp, Src0R);
3767 _mov(Dest, FTmp);
3768 return;
3769 }
3770 if (Src0Ty == IceType_f64 && isScalarIntegerType(DestTy)) {
3771 Variable *Src0R = legalizeToReg(Src0);
3772 Variable *FTmp = makeReg(IceType_f64);
3773 _trunc_w_d(FTmp, Src0R);
3774 _mov(Dest, FTmp);
3775 return;
3776 }
3777 }
3778 llvm::report_fatal_error("Destination is i64 in fp-to-i32");
3779 break;
3780 }
3781 case InstCast::Sitofp:
3782 case InstCast::Uitofp: {
3783 if (llvm::isa<Variable64On32>(Dest)) {
3784 llvm::report_fatal_error("i64-to-fp should have been prelowered.");
3785 return;
3786 }
3787 if (Src0Ty != IceType_i64) {
3788 Variable *Src0R = legalizeToReg(Src0);
3789 auto *T0R = Src0R;
3790 if (Src0Ty != IceType_i32) {
3791 T0R = makeReg(IceType_i32);
3792 if (CastKind == InstCast::Uitofp)
3793 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
3794 else
3795 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
3796 }
3797 if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) {
3798 Variable *FTmp1 = makeReg(IceType_f32);
3799 Variable *FTmp2 = makeReg(IceType_f32);
3800 _mtc1(FTmp1, T0R);
3801 _cvt_s_w(FTmp2, FTmp1);
3802 _mov(Dest, FTmp2);
3803 return;
3804 }
3805 if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f64) {
3806 Variable *FTmp1 = makeReg(IceType_f64);
3807 Variable *FTmp2 = makeReg(IceType_f64);
3808 _mtc1(FTmp1, T0R);
3809 _cvt_d_w(FTmp2, FTmp1);
3810 _mov(Dest, FTmp2);
3811 return;
3812 }
3813 }
3814 llvm::report_fatal_error("Source is i64 in i32-to-fp");
3815 break;
3816 }
3817 case InstCast::Bitcast: {
3818 Operand *Src0 = Instr->getSrc(0);
3819 if (DestTy == Src0->getType()) {
3820 auto *Assign = InstAssign::create(Func, Dest, Src0);
3821 lowerAssign(Assign);
3822 return;
3823 }
3824 if (isVectorType(DestTy) || isVectorType(Src0->getType())) {
3825 llvm::report_fatal_error(
3826 "Bitcast: vector type should have been prelowered.");
3827 return;
3828 }
3829 switch (DestTy) {
3830 case IceType_NUM:
3831 case IceType_void:
3832 llvm::report_fatal_error("Unexpected bitcast.");
3833 case IceType_i1:
3834 UnimplementedLoweringError(this, Instr);
3835 break;
3836 case IceType_i8:
3837 assert(Src0->getType() == IceType_v8i1);
3838 llvm::report_fatal_error(
3839 "i8 to v8i1 conversion should have been prelowered.");
3840 break;
3841 case IceType_i16:
3842 assert(Src0->getType() == IceType_v16i1);
3843 llvm::report_fatal_error(
3844 "i16 to v16i1 conversion should have been prelowered.");
3845 break;
3846 case IceType_i32:
3847 case IceType_f32: {
3848 Variable *Src0R = legalizeToReg(Src0);
3849 _mov(Dest, Src0R);
3850 break;
3851 }
3852 case IceType_i64: {
3853 assert(Src0->getType() == IceType_f64);
3854 Variable *Src0R = legalizeToReg(Src0);
3855 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3856 T->initHiLo(Func);
3857 T->getHi()->setMustNotHaveReg();
3858 T->getLo()->setMustNotHaveReg();
3859 Context.insert<InstFakeDef>(T->getHi());
3860 Context.insert<InstFakeDef>(T->getLo());
3861 _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi);
3862 _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo);
3863 lowerAssign(InstAssign::create(Func, Dest, T));
3864 break;
3865 }
3866 case IceType_f64: {
3867 assert(Src0->getType() == IceType_i64);
3868 const uint32_t Mask = 0xFFFFFFFF;
3869 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src0)) {
3870 Variable *RegHi, *RegLo;
3871 const uint64_t Value = C64->getValue();
3872 uint64_t Upper32Bits = (Value >> INT32_BITS) & Mask;
3873 uint64_t Lower32Bits = Value & Mask;
3874 RegLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits));
3875 RegHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits));
3876 _mov(Dest, RegHi, RegLo);
3877 } else {
3878 auto *Var64On32 = llvm::cast<Variable64On32>(Src0);
3879 auto *RegLo = legalizeToReg(loOperand(Var64On32));
3880 auto *RegHi = legalizeToReg(hiOperand(Var64On32));
3881 _mov(Dest, RegHi, RegLo);
3882 }
3883 break;
3884 }
3885 default:
3886 llvm::report_fatal_error("Unexpected bitcast.");
3887 }
3888 break;
3889 }
3890 }
3891 }
3892
lowerExtractElement(const InstExtractElement * Instr)3893 void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) {
3894 Variable *Dest = Instr->getDest();
3895 const Type DestTy = Dest->getType();
3896 Operand *Src1 = Instr->getSrc(1);
3897 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
3898 const uint32_t Index = Imm->getValue();
3899 Variable *TDest = makeReg(DestTy);
3900 Variable *TReg = makeReg(DestTy);
3901 auto *Src0 = legalizeUndef(Instr->getSrc(0));
3902 auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
3903 // Number of elements in each container
3904 uint32_t ElemPerCont =
3905 typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
3906 auto *Src = Src0R->getContainers()[Index / ElemPerCont];
3907 auto *SrcE = legalizeToReg(Src);
3908 // Position of the element in the container
3909 uint32_t PosInCont = Index % ElemPerCont;
3910 if (ElemPerCont == 1) {
3911 _mov(TDest, SrcE);
3912 } else if (ElemPerCont == 2) {
3913 switch (PosInCont) {
3914 case 0:
3915 _andi(TDest, SrcE, 0xffff);
3916 break;
3917 case 1:
3918 _srl(TDest, SrcE, 16);
3919 break;
3920 default:
3921 llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3922 break;
3923 }
3924 } else if (ElemPerCont == 4) {
3925 switch (PosInCont) {
3926 case 0:
3927 _andi(TDest, SrcE, 0xff);
3928 break;
3929 case 1:
3930 _srl(TReg, SrcE, 8);
3931 _andi(TDest, TReg, 0xff);
3932 break;
3933 case 2:
3934 _srl(TReg, SrcE, 16);
3935 _andi(TDest, TReg, 0xff);
3936 break;
3937 case 3:
3938 _srl(TDest, SrcE, 24);
3939 break;
3940 default:
3941 llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3942 break;
3943 }
3944 }
3945 if (typeElementType(Src0R->getType()) == IceType_i1) {
3946 Variable *TReg1 = makeReg(DestTy);
3947 _andi(TReg1, TDest, 0x1);
3948 _mov(Dest, TReg1);
3949 } else {
3950 _mov(Dest, TDest);
3951 }
3952 return;
3953 }
3954 llvm::report_fatal_error("ExtractElement requires a constant index");
3955 }
3956
lowerFcmp(const InstFcmp * Instr)3957 void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) {
3958 Variable *Dest = Instr->getDest();
3959 if (isVectorType(Dest->getType())) {
3960 llvm::report_fatal_error("Fcmp: Destination type is vector");
3961 return;
3962 }
3963
3964 auto *Src0 = Instr->getSrc(0);
3965 auto *Src1 = Instr->getSrc(1);
3966 auto *Zero = getZero();
3967
3968 InstFcmp::FCond Cond = Instr->getCondition();
3969 auto *DestR = makeReg(IceType_i32);
3970 auto *Src0R = legalizeToReg(Src0);
3971 auto *Src1R = legalizeToReg(Src1);
3972 const Type Src0Ty = Src0->getType();
3973
3974 Operand *FCC0 = OperandMIPS32FCC::create(getFunc(), OperandMIPS32FCC::FCC0);
3975
3976 switch (Cond) {
3977 default: {
3978 llvm::report_fatal_error("Unhandled fp comparison.");
3979 return;
3980 }
3981 case InstFcmp::False: {
3982 Context.insert<InstFakeUse>(Src0R);
3983 Context.insert<InstFakeUse>(Src1R);
3984 _addiu(DestR, Zero, 0);
3985 _mov(Dest, DestR);
3986 break;
3987 }
3988 case InstFcmp::Oeq: {
3989 if (Src0Ty == IceType_f32) {
3990 _c_eq_s(Src0R, Src1R);
3991 } else {
3992 _c_eq_d(Src0R, Src1R);
3993 }
3994 _addiu(DestR, Zero, 1);
3995 _movf(DestR, Zero, FCC0);
3996 _mov(Dest, DestR);
3997 break;
3998 }
3999 case InstFcmp::Ogt: {
4000 if (Src0Ty == IceType_f32) {
4001 _c_ule_s(Src0R, Src1R);
4002 } else {
4003 _c_ule_d(Src0R, Src1R);
4004 }
4005 _addiu(DestR, Zero, 1);
4006 _movt(DestR, Zero, FCC0);
4007 _mov(Dest, DestR);
4008 break;
4009 }
4010 case InstFcmp::Oge: {
4011 if (Src0Ty == IceType_f32) {
4012 _c_ult_s(Src0R, Src1R);
4013 } else {
4014 _c_ult_d(Src0R, Src1R);
4015 }
4016 _addiu(DestR, Zero, 1);
4017 _movt(DestR, Zero, FCC0);
4018 _mov(Dest, DestR);
4019 break;
4020 }
4021 case InstFcmp::Olt: {
4022 if (Src0Ty == IceType_f32) {
4023 _c_olt_s(Src0R, Src1R);
4024 } else {
4025 _c_olt_d(Src0R, Src1R);
4026 }
4027 _addiu(DestR, Zero, 1);
4028 _movf(DestR, Zero, FCC0);
4029 _mov(Dest, DestR);
4030 break;
4031 }
4032 case InstFcmp::Ole: {
4033 if (Src0Ty == IceType_f32) {
4034 _c_ole_s(Src0R, Src1R);
4035 } else {
4036 _c_ole_d(Src0R, Src1R);
4037 }
4038 _addiu(DestR, Zero, 1);
4039 _movf(DestR, Zero, FCC0);
4040 _mov(Dest, DestR);
4041 break;
4042 }
4043 case InstFcmp::One: {
4044 if (Src0Ty == IceType_f32) {
4045 _c_ueq_s(Src0R, Src1R);
4046 } else {
4047 _c_ueq_d(Src0R, Src1R);
4048 }
4049 _addiu(DestR, Zero, 1);
4050 _movt(DestR, Zero, FCC0);
4051 _mov(Dest, DestR);
4052 break;
4053 }
4054 case InstFcmp::Ord: {
4055 if (Src0Ty == IceType_f32) {
4056 _c_un_s(Src0R, Src1R);
4057 } else {
4058 _c_un_d(Src0R, Src1R);
4059 }
4060 _addiu(DestR, Zero, 1);
4061 _movt(DestR, Zero, FCC0);
4062 _mov(Dest, DestR);
4063 break;
4064 }
4065 case InstFcmp::Ueq: {
4066 if (Src0Ty == IceType_f32) {
4067 _c_ueq_s(Src0R, Src1R);
4068 } else {
4069 _c_ueq_d(Src0R, Src1R);
4070 }
4071 _addiu(DestR, Zero, 1);
4072 _movf(DestR, Zero, FCC0);
4073 _mov(Dest, DestR);
4074 break;
4075 }
4076 case InstFcmp::Ugt: {
4077 if (Src0Ty == IceType_f32) {
4078 _c_ole_s(Src0R, Src1R);
4079 } else {
4080 _c_ole_d(Src0R, Src1R);
4081 }
4082 _addiu(DestR, Zero, 1);
4083 _movt(DestR, Zero, FCC0);
4084 _mov(Dest, DestR);
4085 break;
4086 }
4087 case InstFcmp::Uge: {
4088 if (Src0Ty == IceType_f32) {
4089 _c_olt_s(Src0R, Src1R);
4090 } else {
4091 _c_olt_d(Src0R, Src1R);
4092 }
4093 _addiu(DestR, Zero, 1);
4094 _movt(DestR, Zero, FCC0);
4095 _mov(Dest, DestR);
4096 break;
4097 }
4098 case InstFcmp::Ult: {
4099 if (Src0Ty == IceType_f32) {
4100 _c_ult_s(Src0R, Src1R);
4101 } else {
4102 _c_ult_d(Src0R, Src1R);
4103 }
4104 _addiu(DestR, Zero, 1);
4105 _movf(DestR, Zero, FCC0);
4106 _mov(Dest, DestR);
4107 break;
4108 }
4109 case InstFcmp::Ule: {
4110 if (Src0Ty == IceType_f32) {
4111 _c_ule_s(Src0R, Src1R);
4112 } else {
4113 _c_ule_d(Src0R, Src1R);
4114 }
4115 _addiu(DestR, Zero, 1);
4116 _movf(DestR, Zero, FCC0);
4117 _mov(Dest, DestR);
4118 break;
4119 }
4120 case InstFcmp::Une: {
4121 if (Src0Ty == IceType_f32) {
4122 _c_eq_s(Src0R, Src1R);
4123 } else {
4124 _c_eq_d(Src0R, Src1R);
4125 }
4126 _addiu(DestR, Zero, 1);
4127 _movt(DestR, Zero, FCC0);
4128 _mov(Dest, DestR);
4129 break;
4130 }
4131 case InstFcmp::Uno: {
4132 if (Src0Ty == IceType_f32) {
4133 _c_un_s(Src0R, Src1R);
4134 } else {
4135 _c_un_d(Src0R, Src1R);
4136 }
4137 _addiu(DestR, Zero, 1);
4138 _movf(DestR, Zero, FCC0);
4139 _mov(Dest, DestR);
4140 break;
4141 }
4142 case InstFcmp::True: {
4143 Context.insert<InstFakeUse>(Src0R);
4144 Context.insert<InstFakeUse>(Src1R);
4145 _addiu(DestR, Zero, 1);
4146 _mov(Dest, DestR);
4147 break;
4148 }
4149 }
4150 }
4151
lower64Icmp(const InstIcmp * Instr)4152 void TargetMIPS32::lower64Icmp(const InstIcmp *Instr) {
4153 Operand *Src0 = legalize(Instr->getSrc(0));
4154 Operand *Src1 = legalize(Instr->getSrc(1));
4155 Variable *Dest = Instr->getDest();
4156 InstIcmp::ICond Condition = Instr->getCondition();
4157
4158 Variable *Src0LoR = legalizeToReg(loOperand(Src0));
4159 Variable *Src0HiR = legalizeToReg(hiOperand(Src0));
4160 Variable *Src1LoR = legalizeToReg(loOperand(Src1));
4161 Variable *Src1HiR = legalizeToReg(hiOperand(Src1));
4162
4163 switch (Condition) {
4164 default:
4165 llvm_unreachable("unexpected condition");
4166 return;
4167 case InstIcmp::Eq: {
4168 auto *T1 = I32Reg();
4169 auto *T2 = I32Reg();
4170 auto *T3 = I32Reg();
4171 auto *T4 = I32Reg();
4172 _xor(T1, Src0HiR, Src1HiR);
4173 _xor(T2, Src0LoR, Src1LoR);
4174 _or(T3, T1, T2);
4175 _sltiu(T4, T3, 1);
4176 _mov(Dest, T4);
4177 return;
4178 }
4179 case InstIcmp::Ne: {
4180 auto *T1 = I32Reg();
4181 auto *T2 = I32Reg();
4182 auto *T3 = I32Reg();
4183 auto *T4 = I32Reg();
4184 _xor(T1, Src0HiR, Src1HiR);
4185 _xor(T2, Src0LoR, Src1LoR);
4186 _or(T3, T1, T2);
4187 _sltu(T4, getZero(), T3);
4188 _mov(Dest, T4);
4189 return;
4190 }
4191 case InstIcmp::Sgt: {
4192 auto *T1 = I32Reg();
4193 auto *T2 = I32Reg();
4194 auto *T3 = I32Reg();
4195 _xor(T1, Src0HiR, Src1HiR);
4196 _slt(T2, Src1HiR, Src0HiR);
4197 _sltu(T3, Src1LoR, Src0LoR);
4198 _movz(T2, T3, T1);
4199 _mov(Dest, T2);
4200 return;
4201 }
4202 case InstIcmp::Ugt: {
4203 auto *T1 = I32Reg();
4204 auto *T2 = I32Reg();
4205 auto *T3 = I32Reg();
4206 _xor(T1, Src0HiR, Src1HiR);
4207 _sltu(T2, Src1HiR, Src0HiR);
4208 _sltu(T3, Src1LoR, Src0LoR);
4209 _movz(T2, T3, T1);
4210 _mov(Dest, T2);
4211 return;
4212 }
4213 case InstIcmp::Sge: {
4214 auto *T1 = I32Reg();
4215 auto *T2 = I32Reg();
4216 auto *T3 = I32Reg();
4217 auto *T4 = I32Reg();
4218 auto *T5 = I32Reg();
4219 _xor(T1, Src0HiR, Src1HiR);
4220 _slt(T2, Src0HiR, Src1HiR);
4221 _xori(T3, T2, 1);
4222 _sltu(T4, Src0LoR, Src1LoR);
4223 _xori(T5, T4, 1);
4224 _movz(T3, T5, T1);
4225 _mov(Dest, T3);
4226 return;
4227 }
4228 case InstIcmp::Uge: {
4229 auto *T1 = I32Reg();
4230 auto *T2 = I32Reg();
4231 auto *T3 = I32Reg();
4232 auto *T4 = I32Reg();
4233 auto *T5 = I32Reg();
4234 _xor(T1, Src0HiR, Src1HiR);
4235 _sltu(T2, Src0HiR, Src1HiR);
4236 _xori(T3, T2, 1);
4237 _sltu(T4, Src0LoR, Src1LoR);
4238 _xori(T5, T4, 1);
4239 _movz(T3, T5, T1);
4240 _mov(Dest, T3);
4241 return;
4242 }
4243 case InstIcmp::Slt: {
4244 auto *T1 = I32Reg();
4245 auto *T2 = I32Reg();
4246 auto *T3 = I32Reg();
4247 _xor(T1, Src0HiR, Src1HiR);
4248 _slt(T2, Src0HiR, Src1HiR);
4249 _sltu(T3, Src0LoR, Src1LoR);
4250 _movz(T2, T3, T1);
4251 _mov(Dest, T2);
4252 return;
4253 }
4254 case InstIcmp::Ult: {
4255 auto *T1 = I32Reg();
4256 auto *T2 = I32Reg();
4257 auto *T3 = I32Reg();
4258 _xor(T1, Src0HiR, Src1HiR);
4259 _sltu(T2, Src0HiR, Src1HiR);
4260 _sltu(T3, Src0LoR, Src1LoR);
4261 _movz(T2, T3, T1);
4262 _mov(Dest, T2);
4263 return;
4264 }
4265 case InstIcmp::Sle: {
4266 auto *T1 = I32Reg();
4267 auto *T2 = I32Reg();
4268 auto *T3 = I32Reg();
4269 auto *T4 = I32Reg();
4270 auto *T5 = I32Reg();
4271 _xor(T1, Src0HiR, Src1HiR);
4272 _slt(T2, Src1HiR, Src0HiR);
4273 _xori(T3, T2, 1);
4274 _sltu(T4, Src1LoR, Src0LoR);
4275 _xori(T5, T4, 1);
4276 _movz(T3, T5, T1);
4277 _mov(Dest, T3);
4278 return;
4279 }
4280 case InstIcmp::Ule: {
4281 auto *T1 = I32Reg();
4282 auto *T2 = I32Reg();
4283 auto *T3 = I32Reg();
4284 auto *T4 = I32Reg();
4285 auto *T5 = I32Reg();
4286 _xor(T1, Src0HiR, Src1HiR);
4287 _sltu(T2, Src1HiR, Src0HiR);
4288 _xori(T3, T2, 1);
4289 _sltu(T4, Src1LoR, Src0LoR);
4290 _xori(T5, T4, 1);
4291 _movz(T3, T5, T1);
4292 _mov(Dest, T3);
4293 return;
4294 }
4295 }
4296 }
4297
lowerIcmp(const InstIcmp * Instr)4298 void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) {
4299 auto *Src0 = Instr->getSrc(0);
4300 auto *Src1 = Instr->getSrc(1);
4301 if (Src0->getType() == IceType_i64) {
4302 lower64Icmp(Instr);
4303 return;
4304 }
4305 Variable *Dest = Instr->getDest();
4306 if (isVectorType(Dest->getType())) {
4307 llvm::report_fatal_error("Icmp: Destination type is vector");
4308 return;
4309 }
4310 InstIcmp::ICond Cond = Instr->getCondition();
4311 auto *Src0R = legalizeToReg(Src0);
4312 auto *Src1R = legalizeToReg(Src1);
4313 const Type Src0Ty = Src0R->getType();
4314 const uint32_t ShAmt = INT32_BITS - getScalarIntBitWidth(Src0->getType());
4315 Variable *Src0RT = I32Reg();
4316 Variable *Src1RT = I32Reg();
4317
4318 if (Src0Ty != IceType_i32) {
4319 _sll(Src0RT, Src0R, ShAmt);
4320 _sll(Src1RT, Src1R, ShAmt);
4321 } else {
4322 _mov(Src0RT, Src0R);
4323 _mov(Src1RT, Src1R);
4324 }
4325
4326 switch (Cond) {
4327 case InstIcmp::Eq: {
4328 auto *DestT = I32Reg();
4329 auto *T = I32Reg();
4330 _xor(T, Src0RT, Src1RT);
4331 _sltiu(DestT, T, 1);
4332 _mov(Dest, DestT);
4333 return;
4334 }
4335 case InstIcmp::Ne: {
4336 auto *DestT = I32Reg();
4337 auto *T = I32Reg();
4338 auto *Zero = getZero();
4339 _xor(T, Src0RT, Src1RT);
4340 _sltu(DestT, Zero, T);
4341 _mov(Dest, DestT);
4342 return;
4343 }
4344 case InstIcmp::Ugt: {
4345 auto *DestT = I32Reg();
4346 _sltu(DestT, Src1RT, Src0RT);
4347 _mov(Dest, DestT);
4348 return;
4349 }
4350 case InstIcmp::Uge: {
4351 auto *DestT = I32Reg();
4352 auto *T = I32Reg();
4353 _sltu(T, Src0RT, Src1RT);
4354 _xori(DestT, T, 1);
4355 _mov(Dest, DestT);
4356 return;
4357 }
4358 case InstIcmp::Ult: {
4359 auto *DestT = I32Reg();
4360 _sltu(DestT, Src0RT, Src1RT);
4361 _mov(Dest, DestT);
4362 return;
4363 }
4364 case InstIcmp::Ule: {
4365 auto *DestT = I32Reg();
4366 auto *T = I32Reg();
4367 _sltu(T, Src1RT, Src0RT);
4368 _xori(DestT, T, 1);
4369 _mov(Dest, DestT);
4370 return;
4371 }
4372 case InstIcmp::Sgt: {
4373 auto *DestT = I32Reg();
4374 _slt(DestT, Src1RT, Src0RT);
4375 _mov(Dest, DestT);
4376 return;
4377 }
4378 case InstIcmp::Sge: {
4379 auto *DestT = I32Reg();
4380 auto *T = I32Reg();
4381 _slt(T, Src0RT, Src1RT);
4382 _xori(DestT, T, 1);
4383 _mov(Dest, DestT);
4384 return;
4385 }
4386 case InstIcmp::Slt: {
4387 auto *DestT = I32Reg();
4388 _slt(DestT, Src0RT, Src1RT);
4389 _mov(Dest, DestT);
4390 return;
4391 }
4392 case InstIcmp::Sle: {
4393 auto *DestT = I32Reg();
4394 auto *T = I32Reg();
4395 _slt(T, Src1RT, Src0RT);
4396 _xori(DestT, T, 1);
4397 _mov(Dest, DestT);
4398 return;
4399 }
4400 default:
4401 llvm_unreachable("Invalid ICmp operator");
4402 return;
4403 }
4404 }
4405
lowerInsertElement(const InstInsertElement * Instr)4406 void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) {
4407 Variable *Dest = Instr->getDest();
4408 const Type DestTy = Dest->getType();
4409 Operand *Src2 = Instr->getSrc(2);
4410 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4411 const uint32_t Index = Imm->getValue();
4412 // Vector to insert in
4413 auto *Src0 = legalizeUndef(Instr->getSrc(0));
4414 auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
4415 // Number of elements in each container
4416 uint32_t ElemPerCont =
4417 typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
4418 // Source Element
4419 auto *Src = Src0R->getContainers()[Index / ElemPerCont];
4420 auto *SrcE = Src;
4421 if (ElemPerCont > 1)
4422 SrcE = legalizeToReg(Src);
4423 // Dest is a vector
4424 auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest);
4425 VDest->initVecElement(Func);
4426 // Temp vector variable
4427 auto *TDest = makeReg(DestTy);
4428 auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest);
4429 TVDest->initVecElement(Func);
4430 // Destination element
4431 auto *DstE = TVDest->getContainers()[Index / ElemPerCont];
4432 // Element to insert
4433 auto *Src1R = legalizeToReg(Instr->getSrc(1));
4434 auto *TReg1 = makeReg(IceType_i32);
4435 auto *TReg2 = makeReg(IceType_i32);
4436 auto *TReg3 = makeReg(IceType_i32);
4437 auto *TReg4 = makeReg(IceType_i32);
4438 auto *TReg5 = makeReg(IceType_i32);
4439 auto *TDReg = makeReg(IceType_i32);
4440 // Position of the element in the container
4441 uint32_t PosInCont = Index % ElemPerCont;
4442 // Load source vector in a temporary vector
4443 for (SizeT i = 0; i < TVDest->ContainersPerVector; ++i) {
4444 auto *DCont = TVDest->getContainers()[i];
4445 // Do not define DstE as we are going to redefine it
4446 if (DCont == DstE)
4447 continue;
4448 auto *SCont = Src0R->getContainers()[i];
4449 auto *TReg = makeReg(IceType_i32);
4450 _mov(TReg, SCont);
4451 _mov(DCont, TReg);
4452 }
4453 // Insert the element
4454 if (ElemPerCont == 1) {
4455 _mov(DstE, Src1R);
4456 } else if (ElemPerCont == 2) {
4457 switch (PosInCont) {
4458 case 0:
4459 _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source
4460 _srl(TReg2, SrcE, 16);
4461 _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element
4462 _or(TDReg, TReg1, TReg3);
4463 _mov(DstE, TDReg);
4464 break;
4465 case 1:
4466 _sll(TReg1, Src1R, 16); // Clear lower 16-bits of source
4467 _sll(TReg2, SrcE, 16);
4468 _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element
4469 _or(TDReg, TReg1, TReg3);
4470 _mov(DstE, TDReg);
4471 break;
4472 default:
4473 llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4474 break;
4475 }
4476 } else if (ElemPerCont == 4) {
4477 switch (PosInCont) {
4478 case 0:
4479 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4480 _srl(TReg2, SrcE, 8);
4481 _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element
4482 _or(TDReg, TReg1, TReg3);
4483 _mov(DstE, TDReg);
4484 break;
4485 case 1:
4486 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4487 _sll(TReg5, TReg1, 8); // Position in the destination
4488 _lui(TReg2, Ctx->getConstantInt32(0xffff));
4489 _ori(TReg3, TReg2, 0x00ff);
4490 _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4491 _or(TDReg, TReg5, TReg4);
4492 _mov(DstE, TDReg);
4493 break;
4494 case 2:
4495 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4496 _sll(TReg5, TReg1, 16); // Position in the destination
4497 _lui(TReg2, Ctx->getConstantInt32(0xff00));
4498 _ori(TReg3, TReg2, 0xffff);
4499 _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4500 _or(TDReg, TReg5, TReg4);
4501 _mov(DstE, TDReg);
4502 break;
4503 case 3:
4504 _sll(TReg1, Src1R, 24); // Position in the destination
4505 _sll(TReg2, SrcE, 8);
4506 _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
4507 _or(TDReg, TReg1, TReg3);
4508 _mov(DstE, TDReg);
4509 break;
4510 default:
4511 llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4512 break;
4513 }
4514 }
4515 // Write back temporary vector to the destination
4516 auto *Assign = InstAssign::create(Func, Dest, TDest);
4517 lowerAssign(Assign);
4518 return;
4519 }
4520 llvm::report_fatal_error("InsertElement requires a constant index");
4521 }
4522
createArithInst(Intrinsics::AtomicRMWOperation Operation,Variable * Dest,Variable * Src0,Variable * Src1)4523 void TargetMIPS32::createArithInst(Intrinsics::AtomicRMWOperation Operation,
4524 Variable *Dest, Variable *Src0,
4525 Variable *Src1) {
4526 switch (Operation) {
4527 default:
4528 llvm::report_fatal_error("Unknown AtomicRMW operation");
4529 case Intrinsics::AtomicExchange:
4530 llvm::report_fatal_error("Can't handle Atomic xchg operation");
4531 case Intrinsics::AtomicAdd:
4532 _addu(Dest, Src0, Src1);
4533 break;
4534 case Intrinsics::AtomicAnd:
4535 _and(Dest, Src0, Src1);
4536 break;
4537 case Intrinsics::AtomicSub:
4538 _subu(Dest, Src0, Src1);
4539 break;
4540 case Intrinsics::AtomicOr:
4541 _or(Dest, Src0, Src1);
4542 break;
4543 case Intrinsics::AtomicXor:
4544 _xor(Dest, Src0, Src1);
4545 break;
4546 }
4547 }
4548
lowerIntrinsic(const InstIntrinsic * Instr)4549 void TargetMIPS32::lowerIntrinsic(const InstIntrinsic *Instr) {
4550 Variable *Dest = Instr->getDest();
4551 Type DestTy = (Dest == nullptr) ? IceType_void : Dest->getType();
4552
4553 Intrinsics::IntrinsicID ID = Instr->getIntrinsicID();
4554 switch (ID) {
4555 case Intrinsics::AtomicLoad: {
4556 assert(isScalarIntegerType(DestTy));
4557 // We require the memory address to be naturally aligned. Given that is the
4558 // case, then normal loads are atomic.
4559 if (!Intrinsics::isMemoryOrderValid(
4560 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4561 Func->setError("Unexpected memory ordering for AtomicLoad");
4562 return;
4563 }
4564 if (DestTy == IceType_i64) {
4565 llvm::report_fatal_error("AtomicLoad.i64 should have been prelowered.");
4566 return;
4567 } else if (DestTy == IceType_i32) {
4568 auto *T1 = makeReg(DestTy);
4569 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4570 auto *Base = legalizeToReg(Instr->getArg(0));
4571 auto *Addr = formMemoryOperand(Base, DestTy);
4572 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4573 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4574 constexpr CfgNode *NoTarget = nullptr;
4575 _sync();
4576 Context.insert(Retry);
4577 Sandboxer(this).ll(T1, Addr);
4578 _br(NoTarget, NoTarget, T1, getZero(), Exit, CondMIPS32::Cond::NE);
4579 _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4580 Sandboxer(this).sc(RegAt, Addr);
4581 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4582 Context.insert(Exit);
4583 _sync();
4584 _mov(Dest, T1);
4585 Context.insert<InstFakeUse>(T1);
4586 } else {
4587 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4588 auto *Base = legalizeToReg(Instr->getArg(0));
4589 auto *T1 = makeReg(IceType_i32);
4590 auto *T2 = makeReg(IceType_i32);
4591 auto *T3 = makeReg(IceType_i32);
4592 auto *T4 = makeReg(IceType_i32);
4593 auto *T5 = makeReg(IceType_i32);
4594 auto *T6 = makeReg(IceType_i32);
4595 auto *SrcMask = makeReg(IceType_i32);
4596 auto *Tdest = makeReg(IceType_i32);
4597 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4598 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4599 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4600 constexpr CfgNode *NoTarget = nullptr;
4601 _sync();
4602 _addiu(T1, getZero(), -4); // Address mask 0xFFFFFFFC
4603 _andi(T2, Base, 3); // Last two bits of the address
4604 _and(T3, Base, T1); // Align the address
4605 _sll(T4, T2, 3);
4606 _ori(T5, getZero(), Mask);
4607 _sllv(SrcMask, T5, T4); // Source mask
4608 auto *Addr = formMemoryOperand(T3, IceType_i32);
4609 Context.insert(Retry);
4610 Sandboxer(this).ll(T6, Addr);
4611 _and(Tdest, T6, SrcMask);
4612 _br(NoTarget, NoTarget, T6, getZero(), Exit, CondMIPS32::Cond::NE);
4613 _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4614 Sandboxer(this).sc(RegAt, Addr);
4615 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4616 Context.insert(Exit);
4617 auto *T7 = makeReg(IceType_i32);
4618 auto *T8 = makeReg(IceType_i32);
4619 _srlv(T7, Tdest, T4);
4620 _andi(T8, T7, Mask);
4621 _sync();
4622 _mov(Dest, T8);
4623 Context.insert<InstFakeUse>(T6);
4624 Context.insert<InstFakeUse>(SrcMask);
4625 }
4626 return;
4627 }
4628 case Intrinsics::AtomicStore: {
4629 // We require the memory address to be naturally aligned. Given that is the
4630 // case, then normal stores are atomic.
4631 if (!Intrinsics::isMemoryOrderValid(
4632 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4633 Func->setError("Unexpected memory ordering for AtomicStore");
4634 return;
4635 }
4636 auto *Val = Instr->getArg(0);
4637 auto Ty = Val->getType();
4638 if (Ty == IceType_i64) {
4639 llvm::report_fatal_error("AtomicStore.i64 should have been prelowered.");
4640 return;
4641 } else if (Ty == IceType_i32) {
4642 auto *Val = legalizeToReg(Instr->getArg(0));
4643 auto *Base = legalizeToReg(Instr->getArg(1));
4644 auto *Addr = formMemoryOperand(Base, Ty);
4645 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4646 constexpr CfgNode *NoTarget = nullptr;
4647 auto *T1 = makeReg(IceType_i32);
4648 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4649 _sync();
4650 Context.insert(Retry);
4651 Sandboxer(this).ll(T1, Addr);
4652 _mov(RegAt, Val);
4653 Sandboxer(this).sc(RegAt, Addr);
4654 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4655 Context.insert<InstFakeUse>(T1); // To keep LL alive
4656 _sync();
4657 } else {
4658 auto *Val = legalizeToReg(Instr->getArg(0));
4659 auto *Base = legalizeToReg(Instr->getArg(1));
4660 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4661 constexpr CfgNode *NoTarget = nullptr;
4662 auto *T1 = makeReg(IceType_i32);
4663 auto *T2 = makeReg(IceType_i32);
4664 auto *T3 = makeReg(IceType_i32);
4665 auto *T4 = makeReg(IceType_i32);
4666 auto *T5 = makeReg(IceType_i32);
4667 auto *T6 = makeReg(IceType_i32);
4668 auto *T7 = makeReg(IceType_i32);
4669 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4670 auto *SrcMask = makeReg(IceType_i32);
4671 auto *DstMask = makeReg(IceType_i32);
4672 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(Ty))) - 1;
4673 _sync();
4674 _addiu(T1, getZero(), -4);
4675 _and(T7, Base, T1);
4676 auto *Addr = formMemoryOperand(T7, Ty);
4677 _andi(T2, Base, 3);
4678 _sll(T3, T2, 3);
4679 _ori(T4, getZero(), Mask);
4680 _sllv(T5, T4, T3);
4681 _sllv(T6, Val, T3);
4682 _nor(SrcMask, getZero(), T5);
4683 _and(DstMask, T6, T5);
4684 Context.insert(Retry);
4685 Sandboxer(this).ll(RegAt, Addr);
4686 _and(RegAt, RegAt, SrcMask);
4687 _or(RegAt, RegAt, DstMask);
4688 Sandboxer(this).sc(RegAt, Addr);
4689 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4690 Context.insert<InstFakeUse>(SrcMask);
4691 Context.insert<InstFakeUse>(DstMask);
4692 _sync();
4693 }
4694 return;
4695 }
4696 case Intrinsics::AtomicCmpxchg: {
4697 assert(isScalarIntegerType(DestTy));
4698 // We require the memory address to be naturally aligned. Given that is the
4699 // case, then normal loads are atomic.
4700 if (!Intrinsics::isMemoryOrderValid(
4701 ID, getConstantMemoryOrder(Instr->getArg(3)),
4702 getConstantMemoryOrder(Instr->getArg(4)))) {
4703 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4704 return;
4705 }
4706
4707 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4708 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4709 constexpr CfgNode *NoTarget = nullptr;
4710 auto *New = Instr->getArg(2);
4711 auto *Expected = Instr->getArg(1);
4712 auto *ActualAddress = Instr->getArg(0);
4713
4714 if (DestTy == IceType_i64) {
4715 llvm::report_fatal_error(
4716 "AtomicCmpxchg.i64 should have been prelowered.");
4717 return;
4718 } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4719 auto *NewR = legalizeToReg(New);
4720 auto *ExpectedR = legalizeToReg(Expected);
4721 auto *ActualAddressR = legalizeToReg(ActualAddress);
4722 const uint32_t ShiftAmount =
4723 (INT32_BITS - CHAR_BITS * typeWidthInBytes(DestTy));
4724 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4725 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4726 auto *T1 = I32Reg();
4727 auto *T2 = I32Reg();
4728 auto *T3 = I32Reg();
4729 auto *T4 = I32Reg();
4730 auto *T5 = I32Reg();
4731 auto *T6 = I32Reg();
4732 auto *T7 = I32Reg();
4733 auto *T8 = I32Reg();
4734 auto *T9 = I32Reg();
4735 _addiu(RegAt, getZero(), -4);
4736 _and(T1, ActualAddressR, RegAt);
4737 auto *Addr = formMemoryOperand(T1, DestTy);
4738 _andi(RegAt, ActualAddressR, 3);
4739 _sll(T2, RegAt, 3);
4740 _ori(RegAt, getZero(), Mask);
4741 _sllv(T3, RegAt, T2);
4742 _nor(T4, getZero(), T3);
4743 _andi(RegAt, ExpectedR, Mask);
4744 _sllv(T5, RegAt, T2);
4745 _andi(RegAt, NewR, Mask);
4746 _sllv(T6, RegAt, T2);
4747 _sync();
4748 Context.insert(Retry);
4749 Sandboxer(this).ll(T7, Addr);
4750 _and(T8, T7, T3);
4751 _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE);
4752 _and(RegAt, T7, T4);
4753 _or(T9, RegAt, T6);
4754 Sandboxer(this).sc(T9, Addr);
4755 _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ);
4756 Context.insert<InstFakeUse>(getZero());
4757 Context.insert(Exit);
4758 _srlv(RegAt, T8, T2);
4759 _sll(RegAt, RegAt, ShiftAmount);
4760 _sra(RegAt, RegAt, ShiftAmount);
4761 _mov(Dest, RegAt);
4762 _sync();
4763 Context.insert<InstFakeUse>(T3);
4764 Context.insert<InstFakeUse>(T4);
4765 Context.insert<InstFakeUse>(T5);
4766 Context.insert<InstFakeUse>(T6);
4767 Context.insert<InstFakeUse>(T8);
4768 Context.insert<InstFakeUse>(ExpectedR);
4769 Context.insert<InstFakeUse>(NewR);
4770 } else {
4771 auto *T1 = I32Reg();
4772 auto *T2 = I32Reg();
4773 auto *NewR = legalizeToReg(New);
4774 auto *ExpectedR = legalizeToReg(Expected);
4775 auto *ActualAddressR = legalizeToReg(ActualAddress);
4776 _sync();
4777 Context.insert(Retry);
4778 Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4779 _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE);
4780 _mov(T2, NewR);
4781 Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4782 _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4783 Context.insert<InstFakeUse>(getZero());
4784 Context.insert(Exit);
4785 _mov(Dest, T1);
4786 _sync();
4787 Context.insert<InstFakeUse>(ExpectedR);
4788 Context.insert<InstFakeUse>(NewR);
4789 }
4790 return;
4791 }
4792 case Intrinsics::AtomicRMW: {
4793 assert(isScalarIntegerType(DestTy));
4794 // We require the memory address to be naturally aligned. Given that is the
4795 // case, then normal loads are atomic.
4796 if (!Intrinsics::isMemoryOrderValid(
4797 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4798 Func->setError("Unexpected memory ordering for AtomicRMW");
4799 return;
4800 }
4801
4802 constexpr CfgNode *NoTarget = nullptr;
4803 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4804 auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
4805 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue());
4806 auto *New = Instr->getArg(2);
4807 auto *ActualAddress = Instr->getArg(1);
4808
4809 if (DestTy == IceType_i64) {
4810 llvm::report_fatal_error("AtomicRMW.i64 should have been prelowered.");
4811 return;
4812 } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4813 const uint32_t ShiftAmount =
4814 INT32_BITS - (CHAR_BITS * typeWidthInBytes(DestTy));
4815 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4816 auto *NewR = legalizeToReg(New);
4817 auto *ActualAddressR = legalizeToReg(ActualAddress);
4818 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4819 auto *T1 = I32Reg();
4820 auto *T2 = I32Reg();
4821 auto *T3 = I32Reg();
4822 auto *T4 = I32Reg();
4823 auto *T5 = I32Reg();
4824 auto *T6 = I32Reg();
4825 auto *T7 = I32Reg();
4826 _sync();
4827 _addiu(RegAt, getZero(), -4);
4828 _and(T1, ActualAddressR, RegAt);
4829 _andi(RegAt, ActualAddressR, 3);
4830 _sll(T2, RegAt, 3);
4831 _ori(RegAt, getZero(), Mask);
4832 _sllv(T3, RegAt, T2);
4833 _nor(T4, getZero(), T3);
4834 _sllv(T5, NewR, T2);
4835 Context.insert(Retry);
4836 Sandboxer(this).ll(T6, formMemoryOperand(T1, DestTy));
4837 if (Operation != Intrinsics::AtomicExchange) {
4838 createArithInst(Operation, RegAt, T6, T5);
4839 _and(RegAt, RegAt, T3);
4840 }
4841 _and(T7, T6, T4);
4842 if (Operation == Intrinsics::AtomicExchange) {
4843 _or(RegAt, T7, T5);
4844 } else {
4845 _or(RegAt, T7, RegAt);
4846 }
4847 Sandboxer(this).sc(RegAt, formMemoryOperand(T1, DestTy));
4848 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4849 Context.insert<InstFakeUse>(getZero());
4850 _and(RegAt, T6, T3);
4851 _srlv(RegAt, RegAt, T2);
4852 _sll(RegAt, RegAt, ShiftAmount);
4853 _sra(RegAt, RegAt, ShiftAmount);
4854 _mov(Dest, RegAt);
4855 _sync();
4856 Context.insert<InstFakeUse>(NewR);
4857 Context.insert<InstFakeUse>(Dest);
4858 } else {
4859 auto *T1 = I32Reg();
4860 auto *T2 = I32Reg();
4861 auto *NewR = legalizeToReg(New);
4862 auto *ActualAddressR = legalizeToReg(ActualAddress);
4863 _sync();
4864 Context.insert(Retry);
4865 Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4866 if (Operation == Intrinsics::AtomicExchange) {
4867 _mov(T2, NewR);
4868 } else {
4869 createArithInst(Operation, T2, T1, NewR);
4870 }
4871 Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4872 _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4873 Context.insert<InstFakeUse>(getZero());
4874 _mov(Dest, T1);
4875 _sync();
4876 Context.insert<InstFakeUse>(NewR);
4877 Context.insert<InstFakeUse>(Dest);
4878 }
4879 return;
4880 }
4881 case Intrinsics::AtomicFence:
4882 case Intrinsics::AtomicFenceAll:
4883 assert(Dest == nullptr);
4884 _sync();
4885 return;
4886 case Intrinsics::AtomicIsLockFree: {
4887 Operand *ByteSize = Instr->getArg(0);
4888 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4889 auto *T = I32Reg();
4890 if (CI == nullptr) {
4891 // The PNaCl ABI requires the byte size to be a compile-time constant.
4892 Func->setError("AtomicIsLockFree byte size should be compile-time const");
4893 return;
4894 }
4895 static constexpr int32_t NotLockFree = 0;
4896 static constexpr int32_t LockFree = 1;
4897 int32_t Result = NotLockFree;
4898 switch (CI->getValue()) {
4899 case 1:
4900 case 2:
4901 case 4:
4902 Result = LockFree;
4903 break;
4904 }
4905 _addiu(T, getZero(), Result);
4906 _mov(Dest, T);
4907 return;
4908 }
4909 case Intrinsics::Bswap: {
4910 auto *Src = Instr->getArg(0);
4911 const Type SrcTy = Src->getType();
4912 assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
4913 SrcTy == IceType_i64);
4914 switch (SrcTy) {
4915 case IceType_i16: {
4916 auto *T1 = I32Reg();
4917 auto *T2 = I32Reg();
4918 auto *T3 = I32Reg();
4919 auto *T4 = I32Reg();
4920 auto *SrcR = legalizeToReg(Src);
4921 _sll(T1, SrcR, 8);
4922 _lui(T2, Ctx->getConstantInt32(255));
4923 _and(T1, T1, T2);
4924 _sll(T3, SrcR, 24);
4925 _or(T1, T3, T1);
4926 _srl(T4, T1, 16);
4927 _mov(Dest, T4);
4928 return;
4929 }
4930 case IceType_i32: {
4931 auto *T1 = I32Reg();
4932 auto *T2 = I32Reg();
4933 auto *T3 = I32Reg();
4934 auto *T4 = I32Reg();
4935 auto *T5 = I32Reg();
4936 auto *SrcR = legalizeToReg(Src);
4937 _srl(T1, SrcR, 24);
4938 _srl(T2, SrcR, 8);
4939 _andi(T2, T2, 0xFF00);
4940 _or(T1, T2, T1);
4941 _sll(T4, SrcR, 8);
4942 _lui(T3, Ctx->getConstantInt32(255));
4943 _and(T4, T4, T3);
4944 _sll(T5, SrcR, 24);
4945 _or(T4, T5, T4);
4946 _or(T4, T4, T1);
4947 _mov(Dest, T4);
4948 return;
4949 }
4950 case IceType_i64: {
4951 auto *T1 = I32Reg();
4952 auto *T2 = I32Reg();
4953 auto *T3 = I32Reg();
4954 auto *T4 = I32Reg();
4955 auto *T5 = I32Reg();
4956 auto *T6 = I32Reg();
4957 auto *T7 = I32Reg();
4958 auto *T8 = I32Reg();
4959 auto *T9 = I32Reg();
4960 auto *T10 = I32Reg();
4961 auto *T11 = I32Reg();
4962 auto *T12 = I32Reg();
4963 auto *T13 = I32Reg();
4964 auto *T14 = I32Reg();
4965 auto *T15 = I32Reg();
4966 auto *T16 = I32Reg();
4967 auto *T17 = I32Reg();
4968 auto *T18 = I32Reg();
4969 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4970 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4971 Src = legalizeUndef(Src);
4972 auto *SrcLoR = legalizeToReg(loOperand(Src));
4973 auto *SrcHiR = legalizeToReg(hiOperand(Src));
4974 _sll(T1, SrcHiR, 8);
4975 _srl(T2, SrcHiR, 24);
4976 _srl(T3, SrcHiR, 8);
4977 _andi(T3, T3, 0xFF00);
4978 _lui(T4, Ctx->getConstantInt32(255));
4979 _or(T5, T3, T2);
4980 _and(T6, T1, T4);
4981 _sll(T7, SrcHiR, 24);
4982 _or(T8, T7, T6);
4983 _srl(T9, SrcLoR, 24);
4984 _srl(T10, SrcLoR, 8);
4985 _andi(T11, T10, 0xFF00);
4986 _or(T12, T8, T5);
4987 _or(T13, T11, T9);
4988 _sll(T14, SrcLoR, 8);
4989 _and(T15, T14, T4);
4990 _sll(T16, SrcLoR, 24);
4991 _or(T17, T16, T15);
4992 _or(T18, T17, T13);
4993 _mov(DestLo, T12);
4994 _mov(DestHi, T18);
4995 return;
4996 }
4997 default:
4998 llvm::report_fatal_error("Control flow should never have reached here.");
4999 }
5000 return;
5001 }
5002 case Intrinsics::Ctpop: {
5003 llvm::report_fatal_error("Ctpop should have been prelowered.");
5004 return;
5005 }
5006 case Intrinsics::Ctlz: {
5007 auto *Src = Instr->getArg(0);
5008 const Type SrcTy = Src->getType();
5009 assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5010 switch (SrcTy) {
5011 case IceType_i32: {
5012 auto *T = I32Reg();
5013 auto *SrcR = legalizeToReg(Src);
5014 _clz(T, SrcR);
5015 _mov(Dest, T);
5016 break;
5017 }
5018 case IceType_i64: {
5019 auto *T1 = I32Reg();
5020 auto *T2 = I32Reg();
5021 auto *T3 = I32Reg();
5022 auto *T4 = I32Reg();
5023 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5024 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5025 Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5026 Variable *SrcLoR = legalizeToReg(loOperand(Src));
5027 _clz(T1, SrcHiR);
5028 _clz(T2, SrcLoR);
5029 _addiu(T3, T2, 32);
5030 _movn(T3, T1, SrcHiR);
5031 _addiu(T4, getZero(), 0);
5032 _mov(DestHi, T4);
5033 _mov(DestLo, T3);
5034 break;
5035 }
5036 default:
5037 llvm::report_fatal_error("Control flow should never have reached here.");
5038 }
5039 break;
5040 }
5041 case Intrinsics::Cttz: {
5042 auto *Src = Instr->getArg(0);
5043 const Type SrcTy = Src->getType();
5044 assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5045 switch (SrcTy) {
5046 case IceType_i32: {
5047 auto *T1 = I32Reg();
5048 auto *T2 = I32Reg();
5049 auto *T3 = I32Reg();
5050 auto *T4 = I32Reg();
5051 auto *T5 = I32Reg();
5052 auto *T6 = I32Reg();
5053 auto *SrcR = legalizeToReg(Src);
5054 _addiu(T1, SrcR, -1);
5055 _not(T2, SrcR);
5056 _and(T3, T2, T1);
5057 _clz(T4, T3);
5058 _addiu(T5, getZero(), 32);
5059 _subu(T6, T5, T4);
5060 _mov(Dest, T6);
5061 break;
5062 }
5063 case IceType_i64: {
5064 auto *THi1 = I32Reg();
5065 auto *THi2 = I32Reg();
5066 auto *THi3 = I32Reg();
5067 auto *THi4 = I32Reg();
5068 auto *THi5 = I32Reg();
5069 auto *THi6 = I32Reg();
5070 auto *TLo1 = I32Reg();
5071 auto *TLo2 = I32Reg();
5072 auto *TLo3 = I32Reg();
5073 auto *TLo4 = I32Reg();
5074 auto *TLo5 = I32Reg();
5075 auto *TLo6 = I32Reg();
5076 auto *TResHi = I32Reg();
5077 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5078 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5079 Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5080 Variable *SrcLoR = legalizeToReg(loOperand(Src));
5081 _addiu(THi1, SrcHiR, -1);
5082 _not(THi2, SrcHiR);
5083 _and(THi3, THi2, THi1);
5084 _clz(THi4, THi3);
5085 _addiu(THi5, getZero(), 64);
5086 _subu(THi6, THi5, THi4);
5087 _addiu(TLo1, SrcLoR, -1);
5088 _not(TLo2, SrcLoR);
5089 _and(TLo3, TLo2, TLo1);
5090 _clz(TLo4, TLo3);
5091 _addiu(TLo5, getZero(), 32);
5092 _subu(TLo6, TLo5, TLo4);
5093 _movn(THi6, TLo6, SrcLoR);
5094 _addiu(TResHi, getZero(), 0);
5095 _mov(DestHi, TResHi);
5096 _mov(DestLo, THi6);
5097 break;
5098 }
5099 default:
5100 llvm::report_fatal_error("Control flow should never have reached here.");
5101 }
5102 return;
5103 }
5104 case Intrinsics::Fabs: {
5105 if (isScalarFloatingType(DestTy)) {
5106 Variable *T = makeReg(DestTy);
5107 if (DestTy == IceType_f32) {
5108 _abs_s(T, legalizeToReg(Instr->getArg(0)));
5109 } else {
5110 _abs_d(T, legalizeToReg(Instr->getArg(0)));
5111 }
5112 _mov(Dest, T);
5113 }
5114 return;
5115 }
5116 case Intrinsics::Longjmp: {
5117 llvm::report_fatal_error("longjmp should have been prelowered.");
5118 return;
5119 }
5120 case Intrinsics::Memcpy: {
5121 llvm::report_fatal_error("memcpy should have been prelowered.");
5122 return;
5123 }
5124 case Intrinsics::Memmove: {
5125 llvm::report_fatal_error("memmove should have been prelowered.");
5126 return;
5127 }
5128 case Intrinsics::Memset: {
5129 llvm::report_fatal_error("memset should have been prelowered.");
5130 return;
5131 }
5132 case Intrinsics::NaClReadTP: {
5133 if (SandboxingType != ST_NaCl)
5134 llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5135 else {
5136 auto *T8 = makeReg(IceType_i32, RegMIPS32::Reg_T8);
5137 Context.insert<InstFakeDef>(T8);
5138 Variable *TP = legalizeToReg(OperandMIPS32Mem::create(
5139 Func, getPointerType(), T8,
5140 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5141 _mov(Dest, TP);
5142 }
5143 return;
5144 }
5145 case Intrinsics::Setjmp: {
5146 llvm::report_fatal_error("setjmp should have been prelowered.");
5147 return;
5148 }
5149 case Intrinsics::Sqrt: {
5150 if (isScalarFloatingType(DestTy)) {
5151 Variable *T = makeReg(DestTy);
5152 if (DestTy == IceType_f32) {
5153 _sqrt_s(T, legalizeToReg(Instr->getArg(0)));
5154 } else {
5155 _sqrt_d(T, legalizeToReg(Instr->getArg(0)));
5156 }
5157 _mov(Dest, T);
5158 } else {
5159 assert(getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5160 UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5161 }
5162 return;
5163 }
5164 case Intrinsics::Stacksave: {
5165 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5166 _mov(Dest, SP);
5167 return;
5168 }
5169 case Intrinsics::Stackrestore: {
5170 Variable *Val = legalizeToReg(Instr->getArg(0));
5171 Sandboxer(this).reset_sp(Val);
5172 return;
5173 }
5174 case Intrinsics::Trap: {
5175 const uint32_t TrapCodeZero = 0;
5176 _teq(getZero(), getZero(), TrapCodeZero);
5177 return;
5178 }
5179 case Intrinsics::LoadSubVector: {
5180 UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5181 return;
5182 }
5183 case Intrinsics::StoreSubVector: {
5184 UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5185 return;
5186 }
5187 default: // UnknownIntrinsic
5188 Func->setError("Unexpected intrinsic");
5189 return;
5190 }
5191 return;
5192 }
5193
lowerLoad(const InstLoad * Instr)5194 void TargetMIPS32::lowerLoad(const InstLoad *Instr) {
5195 // A Load instruction can be treated the same as an Assign instruction, after
5196 // the source operand is transformed into an OperandMIPS32Mem operand.
5197 Type Ty = Instr->getDest()->getType();
5198 Operand *Src0 = formMemoryOperand(Instr->getLoadAddress(), Ty);
5199 Variable *DestLoad = Instr->getDest();
5200 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5201 lowerAssign(Assign);
5202 }
5203
5204 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Inst * Reason)5205 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5206 const Inst *Reason) {
5207 if (!BuildDefs::dump())
5208 return;
5209 if (!Func->isVerbose(IceV_AddrOpt))
5210 return;
5211 OstreamLocker _(Func->getContext());
5212 Ostream &Str = Func->getContext()->getStrDump();
5213 Str << "Instruction: ";
5214 Reason->dumpDecorated(Func);
5215 Str << " results in Base=";
5216 if (Base)
5217 Base->dump(Func);
5218 else
5219 Str << "<null>";
5220 Str << ", Offset=" << Offset << "\n";
5221 }
5222
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5223 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5224 int32_t *Offset, const Inst **Reason) {
5225 // Var originates from Var=SrcVar ==> set Var:=SrcVar
5226 if (*Var == nullptr)
5227 return false;
5228 const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5229 if (!VarAssign)
5230 return false;
5231 assert(!VMetadata->isMultiDef(*Var));
5232 if (!llvm::isa<InstAssign>(VarAssign))
5233 return false;
5234
5235 Operand *SrcOp = VarAssign->getSrc(0);
5236 bool Optimized = false;
5237 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5238 if (!VMetadata->isMultiDef(SrcVar) ||
5239 // TODO: ensure SrcVar stays single-BB
5240 false) {
5241 Optimized = true;
5242 *Var = SrcVar;
5243 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5244 int32_t MoreOffset = Const->getValue();
5245 int32_t NewOffset = MoreOffset + *Offset;
5246 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5247 return false;
5248 *Var = nullptr;
5249 *Offset += NewOffset;
5250 Optimized = true;
5251 }
5252 }
5253
5254 if (Optimized) {
5255 *Reason = VarAssign;
5256 }
5257
5258 return Optimized;
5259 }
5260
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5261 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5262 if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5263 switch (Arith->getOp()) {
5264 default:
5265 return false;
5266 case InstArithmetic::Add:
5267 case InstArithmetic::Sub:
5268 *Kind = Arith->getOp();
5269 return true;
5270 }
5271 }
5272 return false;
5273 }
5274
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5275 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5276 int32_t *Offset, const Inst **Reason) {
5277 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5278 // set Base=Var, Offset+=Const
5279 // Base is Base=Var-Const ==>
5280 // set Base=Var, Offset-=Const
5281 if (*Base == nullptr)
5282 return false;
5283 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5284 if (BaseInst == nullptr) {
5285 return false;
5286 }
5287 assert(!VMetadata->isMultiDef(*Base));
5288
5289 auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5290 if (ArithInst == nullptr)
5291 return false;
5292 InstArithmetic::OpKind Kind;
5293 if (!isAddOrSub(ArithInst, &Kind))
5294 return false;
5295 bool IsAdd = Kind == InstArithmetic::Add;
5296 Operand *Src0 = ArithInst->getSrc(0);
5297 Operand *Src1 = ArithInst->getSrc(1);
5298 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5299 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5300 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5301 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5302 Variable *NewBase = nullptr;
5303 int32_t NewOffset = *Offset;
5304
5305 if (Var0 == nullptr && Const0 == nullptr) {
5306 assert(llvm::isa<ConstantRelocatable>(Src0));
5307 return false;
5308 }
5309
5310 if (Var1 == nullptr && Const1 == nullptr) {
5311 assert(llvm::isa<ConstantRelocatable>(Src1));
5312 return false;
5313 }
5314
5315 if (Var0 && Var1)
5316 // TODO(jpp): merge base/index splitting into here.
5317 return false;
5318 if (!IsAdd && Var1)
5319 return false;
5320 if (Var0)
5321 NewBase = Var0;
5322 else if (Var1)
5323 NewBase = Var1;
5324 // Compute the updated constant offset.
5325 if (Const0) {
5326 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5327 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5328 return false;
5329 NewOffset += MoreOffset;
5330 }
5331 if (Const1) {
5332 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5333 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5334 return false;
5335 NewOffset += MoreOffset;
5336 }
5337
5338 // Update the computed address parameters once we are sure optimization
5339 // is valid.
5340 *Base = NewBase;
5341 *Offset = NewOffset;
5342 *Reason = BaseInst;
5343 return true;
5344 }
5345 } // end of anonymous namespace
5346
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5347 OperandMIPS32Mem *TargetMIPS32::formAddressingMode(Type Ty, Cfg *Func,
5348 const Inst *LdSt,
5349 Operand *Base) {
5350 assert(Base != nullptr);
5351 int32_t OffsetImm = 0;
5352
5353 Func->resetCurrentNode();
5354 if (Func->isVerbose(IceV_AddrOpt)) {
5355 OstreamLocker _(Func->getContext());
5356 Ostream &Str = Func->getContext()->getStrDump();
5357 Str << "\nAddress mode formation:\t";
5358 LdSt->dumpDecorated(Func);
5359 }
5360
5361 if (isVectorType(Ty)) {
5362 return nullptr;
5363 }
5364
5365 auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5366 if (BaseVar == nullptr)
5367 return nullptr;
5368
5369 const VariablesMetadata *VMetadata = Func->getVMetadata();
5370 const Inst *Reason = nullptr;
5371
5372 do {
5373 if (Reason != nullptr) {
5374 dumpAddressOpt(Func, BaseVar, OffsetImm, Reason);
5375 Reason = nullptr;
5376 }
5377
5378 if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5379 continue;
5380 }
5381
5382 if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5383 continue;
5384 }
5385 } while (Reason);
5386
5387 if (BaseVar == nullptr) {
5388 // We need base register rather than just OffsetImm. Move the OffsetImm to
5389 // BaseVar and form 0(BaseVar) addressing.
5390 const Type PointerType = getPointerType();
5391 BaseVar = makeReg(PointerType);
5392 Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5393 OffsetImm = 0;
5394 } else if (OffsetImm != 0) {
5395 // If the OffsetImm is more than signed 16-bit value then add it in the
5396 // BaseVar and form 0(BaseVar) addressing.
5397 const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5398 const InstArithmetic::OpKind Op =
5399 OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5400 constexpr bool ZeroExt = false;
5401 if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, OffsetImm)) {
5402 const Type PointerType = getPointerType();
5403 Variable *T = makeReg(PointerType);
5404 Context.insert<InstArithmetic>(Op, T, BaseVar,
5405 Ctx->getConstantInt32(PositiveOffset));
5406 BaseVar = T;
5407 OffsetImm = 0;
5408 }
5409 }
5410
5411 assert(BaseVar != nullptr);
5412 assert(OffsetImm < 0 ? (-OffsetImm & 0x0000ffff) == -OffsetImm
5413 : (OffsetImm & 0x0000ffff) == OffsetImm);
5414
5415 return OperandMIPS32Mem::create(
5416 Func, Ty, BaseVar,
5417 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5418 }
5419
doAddressOptLoad()5420 void TargetMIPS32::doAddressOptLoad() {
5421 Inst *Instr = iteratorToInst(Context.getCur());
5422 assert(llvm::isa<InstLoad>(Instr));
5423 Variable *Dest = Instr->getDest();
5424 Operand *Addr = Instr->getSrc(0);
5425 if (OperandMIPS32Mem *Mem =
5426 formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5427 Instr->setDeleted();
5428 Context.insert<InstLoad>(Dest, Mem);
5429 }
5430 }
5431
lowerPhi(const InstPhi *)5432 void TargetMIPS32::lowerPhi(const InstPhi * /*Instr*/) {
5433 Func->setError("Phi found in regular instruction list");
5434 }
5435
lowerRet(const InstRet * Instr)5436 void TargetMIPS32::lowerRet(const InstRet *Instr) {
5437 Variable *Reg = nullptr;
5438 if (Instr->hasRetValue()) {
5439 Operand *Src0 = Instr->getRetValue();
5440 switch (Src0->getType()) {
5441 case IceType_f32: {
5442 Operand *Src0F = legalizeToReg(Src0);
5443 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0);
5444 _mov(Reg, Src0F);
5445 break;
5446 }
5447 case IceType_f64: {
5448 Operand *Src0F = legalizeToReg(Src0);
5449 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0F1);
5450 _mov(Reg, Src0F);
5451 break;
5452 }
5453 case IceType_i1:
5454 case IceType_i8:
5455 case IceType_i16:
5456 case IceType_i32: {
5457 Operand *Src0F = legalizeToReg(Src0);
5458 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_V0);
5459 _mov(Reg, Src0F);
5460 break;
5461 }
5462 case IceType_i64: {
5463 Src0 = legalizeUndef(Src0);
5464 Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0);
5465 Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1);
5466 Reg = R0;
5467 Context.insert<InstFakeUse>(R1);
5468 break;
5469 }
5470 case IceType_v4i1:
5471 case IceType_v8i1:
5472 case IceType_v16i1:
5473 case IceType_v16i8:
5474 case IceType_v8i16:
5475 case IceType_v4i32: {
5476 auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5477 Variable *V0 =
5478 legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0);
5479 Variable *V1 =
5480 legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1);
5481 Variable *A0 =
5482 legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0);
5483 Variable *A1 =
5484 legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1);
5485 Reg = V0;
5486 Context.insert<InstFakeUse>(V1);
5487 Context.insert<InstFakeUse>(A0);
5488 Context.insert<InstFakeUse>(A1);
5489 break;
5490 }
5491 case IceType_v4f32: {
5492 auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5493 Reg = getImplicitRet();
5494 auto *RegT = legalizeToReg(Reg);
5495 // Return the vector through buffer in implicit argument a0
5496 for (SizeT i = 0; i < SrcVec->ContainersPerVector; ++i) {
5497 OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
5498 Func, IceType_f32, RegT,
5499 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
5500 Variable *Var = legalizeToReg(SrcVec->getContainers()[i]);
5501 _sw(Var, Mem);
5502 }
5503 Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0);
5504 _mov(V0, Reg); // move v0,a0
5505 Context.insert<InstFakeUse>(Reg);
5506 Context.insert<InstFakeUse>(V0);
5507 break;
5508 }
5509 default:
5510 llvm::report_fatal_error("Ret: Invalid type.");
5511 break;
5512 }
5513 }
5514 _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);
5515 }
5516
lowerSelect(const InstSelect * Instr)5517 void TargetMIPS32::lowerSelect(const InstSelect *Instr) {
5518 Variable *Dest = Instr->getDest();
5519 const Type DestTy = Dest->getType();
5520
5521 if (isVectorType(DestTy)) {
5522 llvm::report_fatal_error("Select: Destination type is vector");
5523 return;
5524 }
5525
5526 Variable *DestR = nullptr;
5527 Variable *DestHiR = nullptr;
5528 Variable *SrcTR = nullptr;
5529 Variable *SrcTHiR = nullptr;
5530 Variable *SrcFR = nullptr;
5531 Variable *SrcFHiR = nullptr;
5532
5533 if (DestTy == IceType_i64) {
5534 DestR = llvm::cast<Variable>(loOperand(Dest));
5535 DestHiR = llvm::cast<Variable>(hiOperand(Dest));
5536 SrcTR = legalizeToReg(loOperand(legalizeUndef(Instr->getTrueOperand())));
5537 SrcTHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getTrueOperand())));
5538 SrcFR = legalizeToReg(loOperand(legalizeUndef(Instr->getFalseOperand())));
5539 SrcFHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getFalseOperand())));
5540 } else {
5541 SrcTR = legalizeToReg(legalizeUndef(Instr->getTrueOperand()));
5542 SrcFR = legalizeToReg(legalizeUndef(Instr->getFalseOperand()));
5543 }
5544
5545 Variable *ConditionR = legalizeToReg(Instr->getCondition());
5546
5547 assert(Instr->getCondition()->getType() == IceType_i1);
5548
5549 switch (DestTy) {
5550 case IceType_i1:
5551 case IceType_i8:
5552 case IceType_i16:
5553 case IceType_i32:
5554 _movn(SrcFR, SrcTR, ConditionR);
5555 _mov(Dest, SrcFR);
5556 break;
5557 case IceType_i64:
5558 _movn(SrcFR, SrcTR, ConditionR);
5559 _movn(SrcFHiR, SrcTHiR, ConditionR);
5560 _mov(DestR, SrcFR);
5561 _mov(DestHiR, SrcFHiR);
5562 break;
5563 case IceType_f32:
5564 _movn_s(SrcFR, SrcTR, ConditionR);
5565 _mov(Dest, SrcFR);
5566 break;
5567 case IceType_f64:
5568 _movn_d(SrcFR, SrcTR, ConditionR);
5569 _mov(Dest, SrcFR);
5570 break;
5571 default:
5572 llvm::report_fatal_error("Select: Invalid type.");
5573 }
5574 }
5575
lowerShuffleVector(const InstShuffleVector * Instr)5576 void TargetMIPS32::lowerShuffleVector(const InstShuffleVector *Instr) {
5577 UnimplementedLoweringError(this, Instr);
5578 }
5579
lowerStore(const InstStore * Instr)5580 void TargetMIPS32::lowerStore(const InstStore *Instr) {
5581 Operand *Value = Instr->getData();
5582 Operand *Addr = Instr->getStoreAddress();
5583 OperandMIPS32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5584 Type Ty = NewAddr->getType();
5585
5586 if (Ty == IceType_i64) {
5587 Value = legalizeUndef(Value);
5588 Variable *ValueHi = legalizeToReg(hiOperand(Value));
5589 Variable *ValueLo = legalizeToReg(loOperand(Value));
5590 _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr)));
5591 _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr)));
5592 } else if (isVectorType(Value->getType())) {
5593 auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value);
5594 for (SizeT i = 0; i < DataVec->ContainersPerVector; ++i) {
5595 auto *DCont = legalizeToReg(DataVec->getContainers()[i]);
5596 auto *MCont = llvm::cast<OperandMIPS32Mem>(
5597 getOperandAtIndex(NewAddr, IceType_i32, i));
5598 _sw(DCont, MCont);
5599 }
5600 } else {
5601 Variable *ValueR = legalizeToReg(Value);
5602 _sw(ValueR, NewAddr);
5603 }
5604 }
5605
doAddressOptStore()5606 void TargetMIPS32::doAddressOptStore() {
5607 Inst *Instr = iteratorToInst(Context.getCur());
5608 assert(llvm::isa<InstStore>(Instr));
5609 Operand *Src = Instr->getSrc(0);
5610 Operand *Addr = Instr->getSrc(1);
5611 if (OperandMIPS32Mem *Mem =
5612 formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5613 Instr->setDeleted();
5614 Context.insert<InstStore>(Src, Mem);
5615 }
5616 }
5617
lowerSwitch(const InstSwitch * Instr)5618 void TargetMIPS32::lowerSwitch(const InstSwitch *Instr) {
5619 Operand *Src = Instr->getComparison();
5620 SizeT NumCases = Instr->getNumCases();
5621 if (Src->getType() == IceType_i64) {
5622 Src = legalizeUndef(Src);
5623 Variable *Src0Lo = legalizeToReg(loOperand(Src));
5624 Variable *Src0Hi = legalizeToReg(hiOperand(Src));
5625 for (SizeT I = 0; I < NumCases; ++I) {
5626 Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5627 Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5628 CfgNode *TargetTrue = Instr->getLabel(I);
5629 constexpr CfgNode *NoTarget = nullptr;
5630 ValueHi = legalizeToReg(ValueHi);
5631 InstMIPS32Label *IntraLabel = InstMIPS32Label::create(Func, this);
5632 _br(NoTarget, NoTarget, Src0Hi, ValueHi, IntraLabel,
5633 CondMIPS32::Cond::NE);
5634 ValueLo = legalizeToReg(ValueLo);
5635 _br(NoTarget, TargetTrue, Src0Lo, ValueLo, CondMIPS32::Cond::EQ);
5636 Context.insert(IntraLabel);
5637 }
5638 _br(Instr->getLabelDefault());
5639 return;
5640 }
5641 Variable *SrcVar = legalizeToReg(Src);
5642 assert(SrcVar->mustHaveReg());
5643 for (SizeT I = 0; I < NumCases; ++I) {
5644 Operand *Value = Ctx->getConstantInt32(Instr->getValue(I));
5645 CfgNode *TargetTrue = Instr->getLabel(I);
5646 constexpr CfgNode *NoTargetFalse = nullptr;
5647 Value = legalizeToReg(Value);
5648 _br(NoTargetFalse, TargetTrue, SrcVar, Value, CondMIPS32::Cond::EQ);
5649 }
5650 _br(Instr->getLabelDefault());
5651 }
5652
lowerBreakpoint(const InstBreakpoint * Instr)5653 void TargetMIPS32::lowerBreakpoint(const InstBreakpoint *Instr) {
5654 UnimplementedLoweringError(this, Instr);
5655 }
5656
lowerUnreachable(const InstUnreachable *)5657 void TargetMIPS32::lowerUnreachable(const InstUnreachable *) {
5658 const uint32_t TrapCodeZero = 0;
5659 _teq(getZero(), getZero(), TrapCodeZero);
5660 }
5661
lowerOther(const Inst * Instr)5662 void TargetMIPS32::lowerOther(const Inst *Instr) {
5663 if (llvm::isa<InstMIPS32Sync>(Instr)) {
5664 _sync();
5665 } else {
5666 TargetLowering::lowerOther(Instr);
5667 }
5668 }
5669
5670 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5671 // integrity of liveness analysis. Undef values are also turned into zeroes,
5672 // since loOperand() and hiOperand() don't expect Undef input.
prelowerPhis()5673 void TargetMIPS32::prelowerPhis() {
5674 PhiLowering::prelowerPhis32Bit<TargetMIPS32>(this, Context.getNode(), Func);
5675 }
5676
postLower()5677 void TargetMIPS32::postLower() {
5678 if (Func->getOptLevel() == Opt_m1)
5679 return;
5680 markRedefinitions();
5681 Context.availabilityUpdate();
5682 }
5683
5684 /* TODO(jvoung): avoid duplicate symbols with multiple targets.
5685 void ConstantUndef::emitWithoutDollar(GlobalContext *) const {
5686 llvm_unreachable("Not expecting to emitWithoutDollar undef");
5687 }
5688
5689 void ConstantUndef::emit(GlobalContext *) const {
5690 llvm_unreachable("undef value encountered by emitter.");
5691 }
5692 */
5693
TargetDataMIPS32(GlobalContext * Ctx)5694 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
5695 : TargetDataLowering(Ctx) {}
5696
5697 // Generate .MIPS.abiflags section. This section contains a versioned data
5698 // structure with essential information required for loader to determine the
5699 // requirements of the application.
emitTargetRODataSections()5700 void TargetDataMIPS32::emitTargetRODataSections() {
5701 struct MipsABIFlagsSection Flags;
5702 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5703 const std::string Name = ".MIPS.abiflags";
5704 const llvm::ELF::Elf64_Word ShType = llvm::ELF::SHT_MIPS_ABIFLAGS;
5705 const llvm::ELF::Elf64_Xword ShFlags = llvm::ELF::SHF_ALLOC;
5706 const llvm::ELF::Elf64_Xword ShAddralign = 8;
5707 const llvm::ELF::Elf64_Xword ShEntsize = sizeof(Flags);
5708 Writer->writeTargetRODataSection(
5709 Name, ShType, ShFlags, ShAddralign, ShEntsize,
5710 llvm::StringRef(reinterpret_cast<const char *>(&Flags), sizeof(Flags)));
5711 }
5712
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)5713 void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars,
5714 const std::string &SectionSuffix) {
5715 const bool IsPIC = getFlags().getUseNonsfi();
5716 switch (getFlags().getOutFileType()) {
5717 case FT_Elf: {
5718 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5719 Writer->writeDataSection(Vars, llvm::ELF::R_MIPS_32, SectionSuffix, IsPIC);
5720 } break;
5721 case FT_Asm:
5722 case FT_Iasm: {
5723 OstreamLocker L(Ctx);
5724 for (const VariableDeclaration *Var : Vars) {
5725 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
5726 emitGlobal(*Var, SectionSuffix);
5727 }
5728 }
5729 } break;
5730 }
5731 }
5732
5733 namespace {
5734 template <typename T> struct ConstantPoolEmitterTraits;
5735
5736 static_assert(sizeof(uint64_t) == 8,
5737 "uint64_t is supposed to be 8 bytes wide.");
5738
5739 // TODO(jaydeep.patil): implement the following when implementing constant
5740 // randomization:
5741 // * template <> struct ConstantPoolEmitterTraits<uint8_t>
5742 // * template <> struct ConstantPoolEmitterTraits<uint16_t>
5743 // * template <> struct ConstantPoolEmitterTraits<uint32_t>
5744 template <> struct ConstantPoolEmitterTraits<float> {
5745 using ConstantType = ConstantFloat;
5746 static constexpr Type IceType = IceType_f32;
5747 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
5748 // about them being constexpr.
5749 static const char AsmTag[];
5750 static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anon2d79189b0611::ConstantPoolEmitterTraits5751 static uint64_t bitcastToUint64(float Value) {
5752 static_assert(sizeof(Value) == sizeof(uint32_t),
5753 "Float should be 4 bytes.");
5754 const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
5755 return static_cast<uint64_t>(IntValue);
5756 }
5757 };
5758 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".word";
5759 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
5760
5761 template <> struct ConstantPoolEmitterTraits<double> {
5762 using ConstantType = ConstantDouble;
5763 static constexpr Type IceType = IceType_f64;
5764 static const char AsmTag[];
5765 static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anon2d79189b0611::ConstantPoolEmitterTraits5766 static uint64_t bitcastToUint64(double Value) {
5767 static_assert(sizeof(double) == sizeof(uint64_t),
5768 "Double should be 8 bytes.");
5769 return Utils::bitCopy<uint64_t>(Value);
5770 }
5771 };
5772 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
5773 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
5774
5775 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)5776 void emitConstant(
5777 Ostream &Str,
5778 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
5779 if (!BuildDefs::dump())
5780 return;
5781 using Traits = ConstantPoolEmitterTraits<T>;
5782 Str << Const->getLabelName();
5783 T Value = Const->getValue();
5784 Str << ":\n\t" << Traits::AsmTag << "\t0x";
5785 Str.write_hex(Traits::bitcastToUint64(Value));
5786 Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
5787 }
5788
emitConstantPool(GlobalContext * Ctx)5789 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
5790 if (!BuildDefs::dump())
5791 return;
5792 using Traits = ConstantPoolEmitterTraits<T>;
5793 static constexpr size_t MinimumAlignment = 4;
5794 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
5795 assert((Align % 4) == 0 && "Constants should be aligned");
5796 Ostream &Str = Ctx->getStrEmit();
5797 ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
5798 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
5799 << "\n"
5800 << "\t.align\t" << (Align == 4 ? 2 : 3) << "\n";
5801 for (Constant *C : Pool) {
5802 if (!C->getShouldBePooled()) {
5803 continue;
5804 }
5805 emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
5806 }
5807 }
5808 } // end of anonymous namespace
5809
lowerConstants()5810 void TargetDataMIPS32::lowerConstants() {
5811 if (getFlags().getDisableTranslation())
5812 return;
5813 switch (getFlags().getOutFileType()) {
5814 case FT_Elf: {
5815 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5816 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5817 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5818 } break;
5819 case FT_Asm:
5820 case FT_Iasm: {
5821 OstreamLocker _(Ctx);
5822 emitConstantPool<float>(Ctx);
5823 emitConstantPool<double>(Ctx);
5824 break;
5825 }
5826 }
5827 }
5828
lowerJumpTables()5829 void TargetDataMIPS32::lowerJumpTables() {
5830 if (getFlags().getDisableTranslation())
5831 return;
5832 }
5833
5834 // Helper for legalize() to emit the right code to lower an operand to a
5835 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)5836 Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {
5837 Type Ty = Src->getType();
5838 Variable *Reg = makeReg(Ty, RegNum);
5839 if (isVectorType(Ty)) {
5840 llvm::report_fatal_error("Invalid copy from vector type.");
5841 } else {
5842 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) {
5843 _lw(Reg, Mem);
5844 } else {
5845 _mov(Reg, Src);
5846 }
5847 }
5848 return Reg;
5849 }
5850
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)5851 Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
5852 RegNumT RegNum) {
5853 Type Ty = From->getType();
5854 // Assert that a physical register is allowed. To date, all calls
5855 // to legalize() allow a physical register. Legal_Flex converts
5856 // registers to the right type OperandMIPS32FlexReg as needed.
5857 assert(Allowed & Legal_Reg);
5858
5859 if (RegNum.hasNoValue()) {
5860 if (Variable *Subst = getContext().availabilityGet(From)) {
5861 // At this point we know there is a potential substitution available.
5862 if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
5863 !Subst->hasReg()) {
5864 // At this point we know the substitution will have a register.
5865 if (From->getType() == Subst->getType()) {
5866 // At this point we know the substitution's register is compatible.
5867 return Subst;
5868 }
5869 }
5870 }
5871 }
5872
5873 // Go through the various types of operands:
5874 // OperandMIPS32Mem, Constant, and Variable.
5875 // Given the above assertion, if type of operand is not legal
5876 // (e.g., OperandMIPS32Mem and !Legal_Mem), we can always copy
5877 // to a register.
5878 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(From)) {
5879 // Base must be in a physical register.
5880 Variable *Base = Mem->getBase();
5881 ConstantInteger32 *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
5882 Variable *RegBase = nullptr;
5883 assert(Base);
5884
5885 RegBase = llvm::cast<Variable>(
5886 legalize(Base, Legal_Reg | Legal_Rematerializable));
5887
5888 if (Offset != nullptr && Offset->getValue() != 0) {
5889 static constexpr bool ZeroExt = false;
5890 if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
5891 llvm::report_fatal_error("Invalid memory offset.");
5892 }
5893 }
5894
5895 // Create a new operand if there was a change.
5896 if (Base != RegBase) {
5897 Mem = OperandMIPS32Mem::create(Func, Ty, RegBase, Offset,
5898 Mem->getAddrMode());
5899 }
5900
5901 if (Allowed & Legal_Mem) {
5902 From = Mem;
5903 } else {
5904 Variable *Reg = makeReg(Ty, RegNum);
5905 _lw(Reg, Mem);
5906 From = Reg;
5907 }
5908 return From;
5909 }
5910
5911 if (llvm::isa<Constant>(From)) {
5912 if (llvm::isa<ConstantUndef>(From)) {
5913 From = legalizeUndef(From, RegNum);
5914 if (isVectorType(Ty))
5915 return From;
5916 }
5917 if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
5918 Variable *Reg = makeReg(Ty, RegNum);
5919 Variable *TReg = makeReg(Ty, RegNum);
5920 _lui(TReg, C, RO_Hi);
5921 _addiu(Reg, TReg, C, RO_Lo);
5922 return Reg;
5923 } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5924 const uint32_t Value = C32->getValue();
5925 // Use addiu if the immediate is a 16bit value. Otherwise load it
5926 // using a lui-ori instructions.
5927 Variable *Reg = makeReg(Ty, RegNum);
5928 if (isInt<16>(int32_t(Value))) {
5929 Variable *Zero = makeReg(Ty, RegMIPS32::Reg_ZERO);
5930 Context.insert<InstFakeDef>(Zero);
5931 _addiu(Reg, Zero, Value);
5932 } else {
5933 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5934 uint32_t LowerBits = Value & 0xFFFF;
5935 if (LowerBits) {
5936 Variable *TReg = makeReg(Ty, RegNum);
5937 _lui(TReg, Ctx->getConstantInt32(UpperBits));
5938 _ori(Reg, TReg, LowerBits);
5939 } else {
5940 _lui(Reg, Ctx->getConstantInt32(UpperBits));
5941 }
5942 }
5943 return Reg;
5944 } else if (isScalarFloatingType(Ty)) {
5945 auto *CFrom = llvm::cast<Constant>(From);
5946 Variable *TReg = makeReg(Ty);
5947 if (!CFrom->getShouldBePooled()) {
5948 // Float/Double constant 0 is not pooled.
5949 Context.insert<InstFakeDef>(TReg);
5950 _mov(TReg, getZero());
5951 } else {
5952 // Load floats/doubles from literal pool.
5953 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
5954 Variable *TReg1 = makeReg(getPointerType());
5955 _lui(TReg1, Offset, RO_Hi);
5956 OperandMIPS32Mem *Addr =
5957 OperandMIPS32Mem::create(Func, Ty, TReg1, Offset);
5958 if (Ty == IceType_f32)
5959 Sandboxer(this).lwc1(TReg, Addr, RO_Lo);
5960 else
5961 Sandboxer(this).ldc1(TReg, Addr, RO_Lo);
5962 }
5963 return copyToReg(TReg, RegNum);
5964 }
5965 }
5966
5967 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
5968 if (Var->isRematerializable()) {
5969 if (Allowed & Legal_Rematerializable) {
5970 return From;
5971 }
5972
5973 Variable *T = makeReg(Var->getType(), RegNum);
5974 _mov(T, Var);
5975 return T;
5976 }
5977 // Check if the variable is guaranteed a physical register. This
5978 // can happen either when the variable is pre-colored or when it is
5979 // assigned infinite weight.
5980 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
5981 // We need a new physical register for the operand if:
5982 // Mem is not allowed and Var isn't guaranteed a physical
5983 // register, or
5984 // RegNum is required and Var->getRegNum() doesn't match.
5985 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5986 (RegNum.hasValue() && RegNum != Var->getRegNum())) {
5987 From = copyToReg(From, RegNum);
5988 }
5989 return From;
5990 }
5991 return From;
5992 }
5993
5994 namespace BoolFolding {
5995 // TODO(sagar.thakur): Add remaining instruction kinds to shouldTrackProducer()
5996 // and isValidConsumer()
shouldTrackProducer(const Inst & Instr)5997 bool shouldTrackProducer(const Inst &Instr) {
5998 return Instr.getKind() == Inst::Icmp;
5999 }
6000
isValidConsumer(const Inst & Instr)6001 bool isValidConsumer(const Inst &Instr) { return Instr.getKind() == Inst::Br; }
6002 } // end of namespace BoolFolding
6003
recordProducers(CfgNode * Node)6004 void TargetMIPS32::ComputationTracker::recordProducers(CfgNode *Node) {
6005 for (Inst &Instr : Node->getInsts()) {
6006 if (Instr.isDeleted())
6007 continue;
6008 // Check whether Instr is a valid producer.
6009 Variable *Dest = Instr.getDest();
6010 if (Dest // only consider instructions with an actual dest var; and
6011 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6012 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6013 KnownComputations.emplace(Dest->getIndex(),
6014 ComputationEntry(&Instr, IceType_i1));
6015 }
6016 // Check each src variable against the map.
6017 FOREACH_VAR_IN_INST(Var, Instr) {
6018 SizeT VarNum = Var->getIndex();
6019 auto ComputationIter = KnownComputations.find(VarNum);
6020 if (ComputationIter == KnownComputations.end()) {
6021 continue;
6022 }
6023
6024 ++ComputationIter->second.NumUses;
6025 switch (ComputationIter->second.ComputationType) {
6026 default:
6027 KnownComputations.erase(VarNum);
6028 continue;
6029 case IceType_i1:
6030 if (!BoolFolding::isValidConsumer(Instr)) {
6031 KnownComputations.erase(VarNum);
6032 continue;
6033 }
6034 break;
6035 }
6036
6037 if (Instr.isLastUse(Var)) {
6038 ComputationIter->second.IsLiveOut = false;
6039 }
6040 }
6041 }
6042
6043 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
6044 Iter != End;) {
6045 // Disable the folding if its dest may be live beyond this block.
6046 if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6047 Iter = KnownComputations.erase(Iter);
6048 continue;
6049 }
6050
6051 // Mark as "dead" rather than outright deleting. This is so that other
6052 // peephole style optimizations during or before lowering have access to
6053 // this instruction in undeleted form. See for example
6054 // tryOptimizedCmpxchgCmpBr().
6055 Iter->second.Instr->setDead();
6056 ++Iter;
6057 }
6058 }
6059
TargetHeaderMIPS32(GlobalContext * Ctx)6060 TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx)
6061 : TargetHeaderLowering(Ctx) {}
6062
lower()6063 void TargetHeaderMIPS32::lower() {
6064 if (!BuildDefs::dump())
6065 return;
6066 OstreamLocker L(Ctx);
6067 Ostream &Str = Ctx->getStrEmit();
6068 Str << "\t.set\t"
6069 << "nomicromips\n";
6070 Str << "\t.set\t"
6071 << "nomips16\n";
6072 Str << "\t.set\t"
6073 << "noat\n";
6074 if (getFlags().getUseSandboxing())
6075 Str << "\t.bundle_align_mode 4\n";
6076 }
6077
6078 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
6079 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
6080 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
6081
Sandboxer(TargetMIPS32 * Target,InstBundleLock::Option BundleOption)6082 TargetMIPS32::Sandboxer::Sandboxer(TargetMIPS32 *Target,
6083 InstBundleLock::Option BundleOption)
6084 : Target(Target), BundleOption(BundleOption) {}
6085
~Sandboxer()6086 TargetMIPS32::Sandboxer::~Sandboxer() {}
6087
createAutoBundle()6088 void TargetMIPS32::Sandboxer::createAutoBundle() {
6089 Bundler = makeUnique<AutoBundle>(Target, BundleOption);
6090 }
6091
addiu_sp(uint32_t StackOffset)6092 void TargetMIPS32::Sandboxer::addiu_sp(uint32_t StackOffset) {
6093 Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6094 if (!Target->NeedSandboxing) {
6095 Target->_addiu(SP, SP, StackOffset);
6096 return;
6097 }
6098 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6099 Target->Context.insert<InstFakeDef>(T7);
6100 createAutoBundle();
6101 Target->_addiu(SP, SP, StackOffset);
6102 Target->_and(SP, SP, T7);
6103 }
6104
lw(Variable * Dest,OperandMIPS32Mem * Mem)6105 void TargetMIPS32::Sandboxer::lw(Variable *Dest, OperandMIPS32Mem *Mem) {
6106 Variable *Base = Mem->getBase();
6107 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum()) &&
6108 (RegMIPS32::Reg_T8 != Base->getRegNum())) {
6109 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6110 Target->Context.insert<InstFakeDef>(T7);
6111 createAutoBundle();
6112 Target->_and(Base, Base, T7);
6113 }
6114 Target->_lw(Dest, Mem);
6115 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6116 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6117 Target->Context.insert<InstFakeDef>(T7);
6118 Target->_and(Dest, Dest, T7);
6119 }
6120 }
6121
ll(Variable * Dest,OperandMIPS32Mem * Mem)6122 void TargetMIPS32::Sandboxer::ll(Variable *Dest, OperandMIPS32Mem *Mem) {
6123 Variable *Base = Mem->getBase();
6124 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6125 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6126 Target->Context.insert<InstFakeDef>(T7);
6127 createAutoBundle();
6128 Target->_and(Base, Base, T7);
6129 }
6130 Target->_ll(Dest, Mem);
6131 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6132 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6133 Target->Context.insert<InstFakeDef>(T7);
6134 Target->_and(Dest, Dest, T7);
6135 }
6136 }
6137
sc(Variable * Dest,OperandMIPS32Mem * Mem)6138 void TargetMIPS32::Sandboxer::sc(Variable *Dest, OperandMIPS32Mem *Mem) {
6139 Variable *Base = Mem->getBase();
6140 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6141 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6142 Target->Context.insert<InstFakeDef>(T7);
6143 createAutoBundle();
6144 Target->_and(Base, Base, T7);
6145 }
6146 Target->_sc(Dest, Mem);
6147 }
6148
sw(Variable * Dest,OperandMIPS32Mem * Mem)6149 void TargetMIPS32::Sandboxer::sw(Variable *Dest, OperandMIPS32Mem *Mem) {
6150 Variable *Base = Mem->getBase();
6151 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6152 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6153 Target->Context.insert<InstFakeDef>(T7);
6154 createAutoBundle();
6155 Target->_and(Base, Base, T7);
6156 }
6157 Target->_sw(Dest, Mem);
6158 }
6159
lwc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6160 void TargetMIPS32::Sandboxer::lwc1(Variable *Dest, OperandMIPS32Mem *Mem,
6161 RelocOp Reloc) {
6162 Variable *Base = Mem->getBase();
6163 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6164 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6165 Target->Context.insert<InstFakeDef>(T7);
6166 createAutoBundle();
6167 Target->_and(Base, Base, T7);
6168 }
6169 Target->_lwc1(Dest, Mem, Reloc);
6170 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6171 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6172 Target->Context.insert<InstFakeDef>(T7);
6173 Target->_and(Dest, Dest, T7);
6174 }
6175 }
6176
ldc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6177 void TargetMIPS32::Sandboxer::ldc1(Variable *Dest, OperandMIPS32Mem *Mem,
6178 RelocOp Reloc) {
6179 Variable *Base = Mem->getBase();
6180 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6181 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6182 Target->Context.insert<InstFakeDef>(T7);
6183 createAutoBundle();
6184 Target->_and(Base, Base, T7);
6185 }
6186 Target->_ldc1(Dest, Mem, Reloc);
6187 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6188 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6189 Target->Context.insert<InstFakeDef>(T7);
6190 Target->_and(Dest, Dest, T7);
6191 }
6192 }
6193
ret(Variable * RetAddr,Variable * RetValue)6194 void TargetMIPS32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
6195 if (!Target->NeedSandboxing) {
6196 Target->_ret(RetAddr, RetValue);
6197 }
6198 auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6199 Target->Context.insert<InstFakeDef>(T6);
6200 createAutoBundle();
6201 Target->_and(RetAddr, RetAddr, T6);
6202 Target->_ret(RetAddr, RetValue);
6203 }
6204
reset_sp(Variable * Src)6205 void TargetMIPS32::Sandboxer::reset_sp(Variable *Src) {
6206 Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6207 if (!Target->NeedSandboxing) {
6208 Target->_mov(SP, Src);
6209 return;
6210 }
6211 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6212 Target->Context.insert<InstFakeDef>(T7);
6213 createAutoBundle();
6214 Target->_mov(SP, Src);
6215 Target->_and(SP, SP, T7);
6216 Target->getContext().insert<InstFakeUse>(SP);
6217 }
6218
jal(Variable * ReturnReg,Operand * CallTarget)6219 InstMIPS32Call *TargetMIPS32::Sandboxer::jal(Variable *ReturnReg,
6220 Operand *CallTarget) {
6221 if (Target->NeedSandboxing) {
6222 createAutoBundle();
6223 if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
6224 auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6225 Target->Context.insert<InstFakeDef>(T6);
6226 Target->_and(CallTargetR, CallTargetR, T6);
6227 }
6228 }
6229 return Target->Context.insert<InstMIPS32Call>(ReturnReg, CallTarget);
6230 }
6231
6232 } // end of namespace MIPS32
6233 } // end of namespace Ice
6234