1 //
2 // The Subzero Code Generator
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
11 /// entirely of the lowering sequence for each high-level instruction.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "IceTargetLoweringMIPS32.h"
16
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstMIPS32.h"
24 #include "IceInstVarIter.h"
25 #include "IceLiveness.h"
26 #include "IceOperand.h"
27 #include "IcePhiLoweringImpl.h"
28 #include "IceRegistersMIPS32.h"
29 #include "IceTargetLoweringMIPS32.def"
30 #include "IceUtils.h"
31 #include "llvm/Support/MathExtras.h"
32
33 namespace MIPS32 {
createTargetLowering(::Ice::Cfg * Func)34 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
35 return ::Ice::MIPS32::TargetMIPS32::create(Func);
36 }
37
38 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)39 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
40 return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx);
41 }
42
43 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)44 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
45 return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx);
46 }
47
staticInit(::Ice::GlobalContext * Ctx)48 void staticInit(::Ice::GlobalContext *Ctx) {
49 ::Ice::MIPS32::TargetMIPS32::staticInit(Ctx);
50 }
51
shouldBePooled(const::Ice::Constant * C)52 bool shouldBePooled(const ::Ice::Constant *C) {
53 return ::Ice::MIPS32::TargetMIPS32::shouldBePooled(C);
54 }
55
getPointerType()56 ::Ice::Type getPointerType() {
57 return ::Ice::MIPS32::TargetMIPS32::getPointerType();
58 }
59
60 } // end of namespace MIPS32
61
62 namespace Ice {
63 namespace MIPS32 {
64
65 using llvm::isInt;
66
67 namespace {
68
69 // The maximum number of arguments to pass in GPR registers.
70 constexpr uint32_t MIPS32_MAX_GPR_ARG = 4;
71
72 std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer;
73 std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer;
74
75 constexpr uint32_t MIPS32_MAX_FP_ARG = 2;
76
77 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer;
78 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer;
79
getRegClassName(RegClass C)80 const char *getRegClassName(RegClass C) {
81 auto ClassNum = static_cast<RegClassMIPS32>(C);
82 assert(ClassNum < RCMIPS32_NUM);
83 switch (ClassNum) {
84 default:
85 assert(C < RC_Target);
86 return regClassString(C);
87 // Add handling of new register classes below.
88 }
89 }
90
91 // Stack alignment
92 constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16;
93
94 // Value is in bytes. Return Value adjusted to the next highest multiple of the
95 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)96 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
97 size_t typeAlignInBytes = typeWidthInBytes(Ty);
98 // Vectors are stored on stack with the same alignment as that of int type
99 if (isVectorType(Ty))
100 typeAlignInBytes = typeWidthInBytes(IceType_i64);
101 return Utils::applyAlignment(Value, typeAlignInBytes);
102 }
103
104 // Value is in bytes. Return Value adjusted to the next highest multiple of the
105 // stack alignment.
applyStackAlignment(uint32_t Value)106 uint32_t applyStackAlignment(uint32_t Value) {
107 return Utils::applyAlignment(Value, MIPS32_STACK_ALIGNMENT_BYTES);
108 }
109
110 } // end of anonymous namespace
111
TargetMIPS32(Cfg * Func)112 TargetMIPS32::TargetMIPS32(Cfg *Func)
113 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {}
114
assignVarStackSlots(VarList & SortedSpilledVariables,size_t SpillAreaPaddingBytes,size_t SpillAreaSizeBytes,size_t GlobalsAndSubsequentPaddingSize)115 void TargetMIPS32::assignVarStackSlots(VarList &SortedSpilledVariables,
116 size_t SpillAreaPaddingBytes,
117 size_t SpillAreaSizeBytes,
118 size_t GlobalsAndSubsequentPaddingSize) {
119 const VariablesMetadata *VMetadata = Func->getVMetadata();
120 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
121 size_t NextStackOffset = SpillAreaPaddingBytes;
122 CfgVector<size_t> LocalsSize(Func->getNumNodes());
123 const bool SimpleCoalescing = !callsReturnsTwice();
124 for (Variable *Var : SortedSpilledVariables) {
125 size_t Increment = typeWidthInBytesOnStack(Var->getType());
126 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
127 if (VMetadata->isMultiBlock(Var)) {
128 GlobalsSpaceUsed += Increment;
129 NextStackOffset = GlobalsSpaceUsed;
130 } else {
131 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
132 LocalsSize[NodeIndex] += Increment;
133 NextStackOffset = SpillAreaPaddingBytes +
134 GlobalsAndSubsequentPaddingSize +
135 LocalsSize[NodeIndex];
136 }
137 } else {
138 NextStackOffset += Increment;
139 }
140 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
141 }
142 }
143
staticInit(GlobalContext * Ctx)144 void TargetMIPS32::staticInit(GlobalContext *Ctx) {
145 (void)Ctx;
146 RegNumT::setLimit(RegMIPS32::Reg_NUM);
147 SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
148 SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
149 SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
150 SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
151 SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
152 SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
153 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
154 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
155 IntegerRegisters[RegMIPS32::val] = isInt; \
156 I64PairRegisters[RegMIPS32::val] = isI64Pair; \
157 Float32Registers[RegMIPS32::val] = isFP32; \
158 Float64Registers[RegMIPS32::val] = isFP64; \
159 VectorRegisters[RegMIPS32::val] = isVec128; \
160 RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM); \
161 for (SizeT RegAlias : alias_init) { \
162 assert(!RegisterAliases[RegMIPS32::val][RegAlias] && \
163 "Duplicate alias for " #val); \
164 RegisterAliases[RegMIPS32::val].set(RegAlias); \
165 } \
166 RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM); \
167 assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]);
168 REGMIPS32_TABLE;
169 #undef X
170
171 // TODO(mohit.bhakkad): Change these inits once we provide argument related
172 // field in register tables
173 for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++)
174 GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i);
175
176 for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++)
177 I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i);
178
179 for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) {
180 FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2);
181 FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i);
182 }
183
184 TypeToRegisterSet[IceType_void] = InvalidRegisters;
185 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
186 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
187 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
188 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
189 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
190 TypeToRegisterSet[IceType_f32] = Float32Registers;
191 TypeToRegisterSet[IceType_f64] = Float64Registers;
192 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
193 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
194 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
195 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
196 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
197 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
198 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
199
200 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
201 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
202
203 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
204 llvm::array_lengthof(TypeToRegisterSet),
205 RegMIPS32::getRegName, getRegClassName);
206 }
207
unsetIfNonLeafFunc()208 void TargetMIPS32::unsetIfNonLeafFunc() {
209 for (CfgNode *Node : Func->getNodes()) {
210 for (Inst &Instr : Node->getInsts()) {
211 if (llvm::isa<InstCall>(&Instr)) {
212 // Unset MaybeLeafFunc if call instruction exists.
213 MaybeLeafFunc = false;
214 return;
215 }
216 }
217 }
218 }
219
getStackAlignment() const220 uint32_t TargetMIPS32::getStackAlignment() const {
221 return MIPS32_STACK_ALIGNMENT_BYTES;
222 }
223
getCallStackArgumentsSizeBytes(const InstCall * Call)224 uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
225 TargetMIPS32::CallingConv CC;
226 RegNumT DummyReg;
227 size_t OutArgsSizeBytes = 0;
228 Variable *Dest = Call->getDest();
229 bool PartialOnStack = false;
230 if (Dest != nullptr && isVectorFloatingType(Dest->getType())) {
231 CC.discardReg(RegMIPS32::Reg_A0);
232 // Next vector is partially on stack
233 PartialOnStack = true;
234 }
235 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
236 Operand *Arg = legalizeUndef(Call->getArg(i));
237 const Type Ty = Arg->getType();
238 RegNumT RegNum;
239 if (CC.argInReg(Ty, i, &RegNum)) {
240 // If PartialOnStack is true and if this is a vector type then last two
241 // elements are on stack
242 if (PartialOnStack && isVectorType(Ty)) {
243 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64);
244 OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2;
245 }
246 continue;
247 }
248 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
249 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
250 }
251 // Add size of argument save area
252 constexpr int BytesPerStackArg = 4;
253 OutArgsSizeBytes += MIPS32_MAX_GPR_ARG * BytesPerStackArg;
254 return applyStackAlignment(OutArgsSizeBytes);
255 }
256
257 namespace {
getConstantMemoryOrder(Operand * Opnd)258 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
259 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
260 return Integer->getValue();
261 return Intrinsics::MemoryOrderInvalid;
262 }
263 }
264
genTargetHelperCallFor(Inst * Instr)265 void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
266 constexpr bool NoTailCall = false;
267 constexpr bool IsTargetHelperCall = true;
268 Variable *Dest = Instr->getDest();
269 const Type DestTy = Dest ? Dest->getType() : IceType_void;
270
271 switch (Instr->getKind()) {
272 default:
273 return;
274 case Inst::Select: {
275 if (isVectorType(DestTy)) {
276 Operand *SrcT = llvm::cast<InstSelect>(Instr)->getTrueOperand();
277 Operand *SrcF = llvm::cast<InstSelect>(Instr)->getFalseOperand();
278 Operand *Cond = llvm::cast<InstSelect>(Instr)->getCondition();
279 Variable *T = Func->makeVariable(DestTy);
280 auto *Undef = ConstantUndef::create(Ctx, DestTy);
281 Context.insert<InstAssign>(T, Undef);
282 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
283 VarVecOn32->initVecElement(Func);
284 for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
285 auto *Index = Ctx->getConstantInt32(I);
286 auto *OpC = Func->makeVariable(typeElementType(Cond->getType()));
287 Context.insert<InstExtractElement>(OpC, Cond, Index);
288 auto *OpT = Func->makeVariable(typeElementType(DestTy));
289 Context.insert<InstExtractElement>(OpT, SrcT, Index);
290 auto *OpF = Func->makeVariable(typeElementType(DestTy));
291 Context.insert<InstExtractElement>(OpF, SrcF, Index);
292 auto *Dst = Func->makeVariable(typeElementType(DestTy));
293 Variable *DestT = Func->makeVariable(DestTy);
294 Context.insert<InstSelect>(Dst, OpC, OpT, OpF);
295 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
296 T = DestT;
297 }
298 Context.insert<InstAssign>(Dest, T);
299 Instr->setDeleted();
300 }
301 return;
302 }
303 case Inst::Fcmp: {
304 if (isVectorType(DestTy)) {
305 InstFcmp::FCond Cond = llvm::cast<InstFcmp>(Instr)->getCondition();
306 Operand *Src0 = Instr->getSrc(0);
307 Operand *Src1 = Instr->getSrc(1);
308 Variable *T = Func->makeVariable(IceType_v4f32);
309 auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
310 Context.insert<InstAssign>(T, Undef);
311 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
312 VarVecOn32->initVecElement(Func);
313 for (SizeT I = 0; I < typeNumElements(IceType_v4f32); ++I) {
314 auto *Index = Ctx->getConstantInt32(I);
315 auto *Op0 = Func->makeVariable(IceType_f32);
316 Context.insert<InstExtractElement>(Op0, Src0, Index);
317 auto *Op1 = Func->makeVariable(IceType_f32);
318 Context.insert<InstExtractElement>(Op1, Src1, Index);
319 auto *Dst = Func->makeVariable(IceType_f32);
320 Variable *DestT = Func->makeVariable(IceType_v4f32);
321 Context.insert<InstFcmp>(Cond, Dst, Op0, Op1);
322 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
323 T = DestT;
324 }
325 Context.insert<InstAssign>(Dest, T);
326 Instr->setDeleted();
327 }
328 return;
329 }
330 case Inst::Icmp: {
331 if (isVectorType(DestTy)) {
332 InstIcmp::ICond Cond = llvm::cast<InstIcmp>(Instr)->getCondition();
333 Operand *Src0 = Instr->getSrc(0);
334 Operand *Src1 = Instr->getSrc(1);
335 const Type SrcType = Src0->getType();
336 Variable *T = Func->makeVariable(DestTy);
337 auto *Undef = ConstantUndef::create(Ctx, DestTy);
338 Context.insert<InstAssign>(T, Undef);
339 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
340 VarVecOn32->initVecElement(Func);
341 for (SizeT I = 0; I < typeNumElements(SrcType); ++I) {
342 auto *Index = Ctx->getConstantInt32(I);
343 auto *Op0 = Func->makeVariable(typeElementType(SrcType));
344 Context.insert<InstExtractElement>(Op0, Src0, Index);
345 auto *Op1 = Func->makeVariable(typeElementType(SrcType));
346 Context.insert<InstExtractElement>(Op1, Src1, Index);
347 auto *Dst = Func->makeVariable(typeElementType(DestTy));
348 Variable *DestT = Func->makeVariable(DestTy);
349 Context.insert<InstIcmp>(Cond, Dst, Op0, Op1);
350 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
351 T = DestT;
352 }
353 Context.insert<InstAssign>(Dest, T);
354 Instr->setDeleted();
355 }
356 return;
357 }
358 case Inst::Arithmetic: {
359 const InstArithmetic::OpKind Op =
360 llvm::cast<InstArithmetic>(Instr)->getOp();
361 if (isVectorType(DestTy)) {
362 scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
363 Instr->setDeleted();
364 return;
365 }
366 switch (DestTy) {
367 default:
368 return;
369 case IceType_i64: {
370 RuntimeHelper HelperID = RuntimeHelper::H_Num;
371 switch (Op) {
372 default:
373 return;
374 case InstArithmetic::Udiv:
375 HelperID = RuntimeHelper::H_udiv_i64;
376 break;
377 case InstArithmetic::Sdiv:
378 HelperID = RuntimeHelper::H_sdiv_i64;
379 break;
380 case InstArithmetic::Urem:
381 HelperID = RuntimeHelper::H_urem_i64;
382 break;
383 case InstArithmetic::Srem:
384 HelperID = RuntimeHelper::H_srem_i64;
385 break;
386 }
387
388 if (HelperID == RuntimeHelper::H_Num) {
389 return;
390 }
391
392 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
393 constexpr SizeT MaxArgs = 2;
394 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
395 NoTailCall, IsTargetHelperCall);
396 Call->addArg(Instr->getSrc(0));
397 Call->addArg(Instr->getSrc(1));
398 Instr->setDeleted();
399 return;
400 }
401 case IceType_f32:
402 case IceType_f64: {
403 if (Op != InstArithmetic::Frem) {
404 return;
405 }
406 constexpr SizeT MaxArgs = 2;
407 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
408 DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
409 : RuntimeHelper::H_frem_f64);
410 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
411 NoTailCall, IsTargetHelperCall);
412 Call->addArg(Instr->getSrc(0));
413 Call->addArg(Instr->getSrc(1));
414 Instr->setDeleted();
415 return;
416 }
417 }
418 llvm::report_fatal_error("Control flow should never have reached here.");
419 }
420 case Inst::Cast: {
421 Operand *Src0 = Instr->getSrc(0);
422 const Type SrcTy = Src0->getType();
423 auto *CastInstr = llvm::cast<InstCast>(Instr);
424 const InstCast::OpKind CastKind = CastInstr->getCastKind();
425
426 if (isVectorType(DestTy)) {
427 Variable *T = Func->makeVariable(DestTy);
428 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
429 VarVecOn32->initVecElement(Func);
430 auto *Undef = ConstantUndef::create(Ctx, DestTy);
431 Context.insert<InstAssign>(T, Undef);
432 for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
433 auto *Index = Ctx->getConstantInt32(I);
434 auto *Op = Func->makeVariable(typeElementType(SrcTy));
435 Context.insert<InstExtractElement>(Op, Src0, Index);
436 auto *Dst = Func->makeVariable(typeElementType(DestTy));
437 Variable *DestT = Func->makeVariable(DestTy);
438 Context.insert<InstCast>(CastKind, Dst, Op);
439 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
440 T = DestT;
441 }
442 Context.insert<InstAssign>(Dest, T);
443 Instr->setDeleted();
444 return;
445 }
446
447 switch (CastKind) {
448 default:
449 return;
450 case InstCast::Fptosi:
451 case InstCast::Fptoui: {
452 if ((DestTy != IceType_i32) && (DestTy != IceType_i64)) {
453 return;
454 }
455 const bool DestIs32 = DestTy == IceType_i32;
456 const bool DestIsSigned = CastKind == InstCast::Fptosi;
457 const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
458 RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
459 if (DestIsSigned) {
460 if (DestIs32) {
461 return;
462 }
463 RTHFunc = Src0IsF32 ? RuntimeHelper::H_fptosi_f32_i64
464 : RuntimeHelper::H_fptosi_f64_i64;
465 } else {
466 RTHFunc = Src0IsF32 ? (DestIs32 ? RuntimeHelper::H_fptoui_f32_i32
467 : RuntimeHelper::H_fptoui_f32_i64)
468 : (DestIs32 ? RuntimeHelper::H_fptoui_f64_i32
469 : RuntimeHelper::H_fptoui_f64_i64);
470 }
471 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
472 static constexpr SizeT MaxArgs = 1;
473 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
474 NoTailCall, IsTargetHelperCall);
475 Call->addArg(Src0);
476 Instr->setDeleted();
477 return;
478 }
479 case InstCast::Sitofp:
480 case InstCast::Uitofp: {
481 if ((SrcTy != IceType_i32) && (SrcTy != IceType_i64)) {
482 return;
483 }
484 const bool SourceIs32 = SrcTy == IceType_i32;
485 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
486 const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
487 RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
488 if (SourceIsSigned) {
489 if (SourceIs32) {
490 return;
491 }
492 RTHFunc = DestIsF32 ? RuntimeHelper::H_sitofp_i64_f32
493 : RuntimeHelper::H_sitofp_i64_f64;
494 } else {
495 RTHFunc = DestIsF32 ? (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f32
496 : RuntimeHelper::H_uitofp_i64_f32)
497 : (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f64
498 : RuntimeHelper::H_uitofp_i64_f64);
499 }
500 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
501 static constexpr SizeT MaxArgs = 1;
502 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
503 NoTailCall, IsTargetHelperCall);
504 Call->addArg(Src0);
505 Instr->setDeleted();
506 return;
507 }
508 case InstCast::Bitcast: {
509 if (DestTy == SrcTy) {
510 return;
511 }
512 Variable *CallDest = Dest;
513 RuntimeHelper HelperID = RuntimeHelper::H_Num;
514 switch (DestTy) {
515 default:
516 return;
517 case IceType_i8:
518 assert(SrcTy == IceType_v8i1);
519 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
520 CallDest = Func->makeVariable(IceType_i32);
521 break;
522 case IceType_i16:
523 assert(SrcTy == IceType_v16i1);
524 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
525 CallDest = Func->makeVariable(IceType_i32);
526 break;
527 case IceType_v8i1: {
528 assert(SrcTy == IceType_i8);
529 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
530 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
531 // Arguments to functions are required to be at least 32 bits wide.
532 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
533 Src0 = Src0AsI32;
534 } break;
535 case IceType_v16i1: {
536 assert(SrcTy == IceType_i16);
537 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
538 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
539 // Arguments to functions are required to be at least 32 bits wide.
540 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
541 Src0 = Src0AsI32;
542 } break;
543 }
544 constexpr SizeT MaxSrcs = 1;
545 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
546 Call->addArg(Src0);
547 Context.insert(Call);
548 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
549 // call result to the appropriate type as necessary.
550 if (CallDest->getType() != DestTy)
551 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
552 Instr->setDeleted();
553 return;
554 }
555 case InstCast::Trunc: {
556 if (DestTy == SrcTy) {
557 return;
558 }
559 if (!isVectorType(SrcTy)) {
560 return;
561 }
562 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
563 assert(typeElementType(DestTy) == IceType_i1);
564 assert(isVectorIntegerType(SrcTy));
565 return;
566 }
567 case InstCast::Sext:
568 case InstCast::Zext: {
569 if (DestTy == SrcTy) {
570 return;
571 }
572 if (!isVectorType(DestTy)) {
573 return;
574 }
575 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
576 assert(typeElementType(SrcTy) == IceType_i1);
577 assert(isVectorIntegerType(DestTy));
578 return;
579 }
580 }
581 llvm::report_fatal_error("Control flow should never have reached here.");
582 }
583 case Inst::IntrinsicCall: {
584 auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr);
585 Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID;
586 if (isVectorType(DestTy) && ID == Intrinsics::Fabs) {
587 Operand *Src0 = IntrinsicCall->getArg(0);
588 GlobalString FabsFloat = Ctx->getGlobalString("llvm.fabs.f32");
589 Operand *CallTarget = Ctx->getConstantExternSym(FabsFloat);
590 GlobalString FabsVec = Ctx->getGlobalString("llvm.fabs.v4f32");
591 bool BadIntrinsic = false;
592 const Intrinsics::FullIntrinsicInfo *FullInfo =
593 Ctx->getIntrinsicsInfo().find(FabsVec, BadIntrinsic);
594 Intrinsics::IntrinsicInfo Info = FullInfo->Info;
595
596 Variable *T = Func->makeVariable(IceType_v4f32);
597 auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
598 Context.insert<InstAssign>(T, Undef);
599 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
600 VarVecOn32->initVecElement(Func);
601
602 for (SizeT i = 0; i < typeNumElements(IceType_v4f32); ++i) {
603 auto *Index = Ctx->getConstantInt32(i);
604 auto *Op = Func->makeVariable(IceType_f32);
605 Context.insert<InstExtractElement>(Op, Src0, Index);
606 auto *Res = Func->makeVariable(IceType_f32);
607 Variable *DestT = Func->makeVariable(IceType_v4f32);
608 auto *Call =
609 Context.insert<InstIntrinsicCall>(1, Res, CallTarget, Info);
610 Call->addArg(Op);
611 Context.insert<InstInsertElement>(DestT, T, Res, Index);
612 T = DestT;
613 }
614
615 Context.insert<InstAssign>(Dest, T);
616
617 Instr->setDeleted();
618 return;
619 }
620 switch (ID) {
621 default:
622 return;
623 case Intrinsics::AtomicLoad: {
624 if (DestTy != IceType_i64)
625 return;
626 if (!Intrinsics::isMemoryOrderValid(
627 ID, getConstantMemoryOrder(IntrinsicCall->getArg(1)))) {
628 Func->setError("Unexpected memory ordering for AtomicLoad");
629 return;
630 }
631 Operand *Addr = IntrinsicCall->getArg(0);
632 Operand *TargetHelper = Ctx->getConstantExternSym(
633 Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
634 static constexpr SizeT MaxArgs = 3;
635 auto *_0 = Ctx->getConstantZero(IceType_i64);
636 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
637 NoTailCall, IsTargetHelperCall);
638 Call->addArg(Addr);
639 Call->addArg(_0);
640 Call->addArg(_0);
641 Context.insert<InstMIPS32Sync>();
642 Instr->setDeleted();
643 return;
644 }
645 case Intrinsics::AtomicStore: {
646 Operand *Val = IntrinsicCall->getArg(0);
647 if (Val->getType() != IceType_i64)
648 return;
649 if (!Intrinsics::isMemoryOrderValid(
650 ID, getConstantMemoryOrder(IntrinsicCall->getArg(2)))) {
651 Func->setError("Unexpected memory ordering for AtomicStore");
652 return;
653 }
654 Operand *Addr = IntrinsicCall->getArg(1);
655 Variable *NoDest = nullptr;
656 Operand *TargetHelper = Ctx->getConstantExternSym(
657 Ctx->getGlobalString("__sync_lock_test_and_set_8"));
658 Context.insert<InstMIPS32Sync>();
659 static constexpr SizeT MaxArgs = 2;
660 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
661 NoTailCall, IsTargetHelperCall);
662 Call->addArg(Addr);
663 Call->addArg(Val);
664 Context.insert<InstMIPS32Sync>();
665 Instr->setDeleted();
666 return;
667 }
668 case Intrinsics::AtomicCmpxchg: {
669 if (DestTy != IceType_i64)
670 return;
671 if (!Intrinsics::isMemoryOrderValid(
672 ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)),
673 getConstantMemoryOrder(IntrinsicCall->getArg(4)))) {
674 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
675 return;
676 }
677 Operand *Addr = IntrinsicCall->getArg(0);
678 Operand *Oldval = IntrinsicCall->getArg(1);
679 Operand *Newval = IntrinsicCall->getArg(2);
680 Operand *TargetHelper = Ctx->getConstantExternSym(
681 Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
682 Context.insert<InstMIPS32Sync>();
683 static constexpr SizeT MaxArgs = 3;
684 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
685 NoTailCall, IsTargetHelperCall);
686 Call->addArg(Addr);
687 Call->addArg(Oldval);
688 Call->addArg(Newval);
689 Context.insert<InstMIPS32Sync>();
690 Instr->setDeleted();
691 return;
692 }
693 case Intrinsics::AtomicRMW: {
694 if (DestTy != IceType_i64)
695 return;
696 if (!Intrinsics::isMemoryOrderValid(
697 ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)))) {
698 Func->setError("Unexpected memory ordering for AtomicRMW");
699 return;
700 }
701 auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
702 llvm::cast<ConstantInteger32>(IntrinsicCall->getArg(0))->getValue());
703 auto *Addr = IntrinsicCall->getArg(1);
704 auto *Newval = IntrinsicCall->getArg(2);
705 Operand *TargetHelper;
706 switch (Operation) {
707 case Intrinsics::AtomicAdd:
708 TargetHelper = Ctx->getConstantExternSym(
709 Ctx->getGlobalString("__sync_fetch_and_add_8"));
710 break;
711 case Intrinsics::AtomicSub:
712 TargetHelper = Ctx->getConstantExternSym(
713 Ctx->getGlobalString("__sync_fetch_and_sub_8"));
714 break;
715 case Intrinsics::AtomicOr:
716 TargetHelper = Ctx->getConstantExternSym(
717 Ctx->getGlobalString("__sync_fetch_and_or_8"));
718 break;
719 case Intrinsics::AtomicAnd:
720 TargetHelper = Ctx->getConstantExternSym(
721 Ctx->getGlobalString("__sync_fetch_and_and_8"));
722 break;
723 case Intrinsics::AtomicXor:
724 TargetHelper = Ctx->getConstantExternSym(
725 Ctx->getGlobalString("__sync_fetch_and_xor_8"));
726 break;
727 case Intrinsics::AtomicExchange:
728 TargetHelper = Ctx->getConstantExternSym(
729 Ctx->getGlobalString("__sync_lock_test_and_set_8"));
730 break;
731 default:
732 llvm::report_fatal_error("Unknown AtomicRMW operation");
733 return;
734 }
735 Context.insert<InstMIPS32Sync>();
736 static constexpr SizeT MaxArgs = 2;
737 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
738 NoTailCall, IsTargetHelperCall);
739 Call->addArg(Addr);
740 Call->addArg(Newval);
741 Context.insert<InstMIPS32Sync>();
742 Instr->setDeleted();
743 return;
744 }
745 case Intrinsics::Ctpop: {
746 Operand *Src0 = IntrinsicCall->getArg(0);
747 Operand *TargetHelper =
748 Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
749 ? RuntimeHelper::H_call_ctpop_i32
750 : RuntimeHelper::H_call_ctpop_i64);
751 static constexpr SizeT MaxArgs = 1;
752 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
753 NoTailCall, IsTargetHelperCall);
754 Call->addArg(Src0);
755 Instr->setDeleted();
756 return;
757 }
758 case Intrinsics::Longjmp: {
759 static constexpr SizeT MaxArgs = 2;
760 static constexpr Variable *NoDest = nullptr;
761 Operand *TargetHelper =
762 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
763 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
764 NoTailCall, IsTargetHelperCall);
765 Call->addArg(IntrinsicCall->getArg(0));
766 Call->addArg(IntrinsicCall->getArg(1));
767 Instr->setDeleted();
768 return;
769 }
770 case Intrinsics::Memcpy: {
771 static constexpr SizeT MaxArgs = 3;
772 static constexpr Variable *NoDest = nullptr;
773 Operand *TargetHelper =
774 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
775 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
776 NoTailCall, IsTargetHelperCall);
777 Call->addArg(IntrinsicCall->getArg(0));
778 Call->addArg(IntrinsicCall->getArg(1));
779 Call->addArg(IntrinsicCall->getArg(2));
780 Instr->setDeleted();
781 return;
782 }
783 case Intrinsics::Memmove: {
784 static constexpr SizeT MaxArgs = 3;
785 static constexpr Variable *NoDest = nullptr;
786 Operand *TargetHelper =
787 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
788 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
789 NoTailCall, IsTargetHelperCall);
790 Call->addArg(IntrinsicCall->getArg(0));
791 Call->addArg(IntrinsicCall->getArg(1));
792 Call->addArg(IntrinsicCall->getArg(2));
793 Instr->setDeleted();
794 return;
795 }
796 case Intrinsics::Memset: {
797 Operand *ValOp = IntrinsicCall->getArg(1);
798 assert(ValOp->getType() == IceType_i8);
799 Variable *ValExt = Func->makeVariable(stackSlotType());
800 Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
801
802 static constexpr SizeT MaxArgs = 3;
803 static constexpr Variable *NoDest = nullptr;
804 Operand *TargetHelper =
805 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
806 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
807 NoTailCall, IsTargetHelperCall);
808 Call->addArg(IntrinsicCall->getArg(0));
809 Call->addArg(ValExt);
810 Call->addArg(IntrinsicCall->getArg(2));
811 Instr->setDeleted();
812 return;
813 }
814 case Intrinsics::NaClReadTP: {
815 if (SandboxingType == ST_NaCl) {
816 return;
817 }
818 static constexpr SizeT MaxArgs = 0;
819 assert(SandboxingType != ST_Nonsfi);
820 Operand *TargetHelper =
821 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
822 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
823 IsTargetHelperCall);
824 Instr->setDeleted();
825 return;
826 }
827 case Intrinsics::Setjmp: {
828 static constexpr SizeT MaxArgs = 1;
829 Operand *TargetHelper =
830 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
831 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
832 NoTailCall, IsTargetHelperCall);
833 Call->addArg(IntrinsicCall->getArg(0));
834 Instr->setDeleted();
835 return;
836 }
837 }
838 llvm::report_fatal_error("Control flow should never have reached here.");
839 }
840 }
841 }
842
findMaxStackOutArgsSize()843 void TargetMIPS32::findMaxStackOutArgsSize() {
844 // MinNeededOutArgsBytes should be updated if the Target ever creates a
845 // high-level InstCall that requires more stack bytes.
846 size_t MinNeededOutArgsBytes = 0;
847 if (!MaybeLeafFunc)
848 MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
849 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
850 for (CfgNode *Node : Func->getNodes()) {
851 Context.init(Node);
852 while (!Context.atEnd()) {
853 PostIncrLoweringContext PostIncrement(Context);
854 Inst *CurInstr = iteratorToInst(Context.getCur());
855 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
856 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
857 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
858 }
859 }
860 }
861 CurrentAllocaOffset = MaxOutArgsSizeBytes;
862 }
863
translateO2()864 void TargetMIPS32::translateO2() {
865 TimerMarker T(TimerStack::TT_O2, Func);
866
867 // TODO(stichnot): share passes with X86?
868 // https://code.google.com/p/nativeclient/issues/detail?id=4094
869 genTargetHelperCalls();
870
871 unsetIfNonLeafFunc();
872
873 findMaxStackOutArgsSize();
874
875 // Merge Alloca instructions, and lay out the stack.
876 static constexpr bool SortAndCombineAllocas = true;
877 Func->processAllocas(SortAndCombineAllocas);
878 Func->dump("After Alloca processing");
879
880 if (!getFlags().getEnablePhiEdgeSplit()) {
881 // Lower Phi instructions.
882 Func->placePhiLoads();
883 if (Func->hasError())
884 return;
885 Func->placePhiStores();
886 if (Func->hasError())
887 return;
888 Func->deletePhis();
889 if (Func->hasError())
890 return;
891 Func->dump("After Phi lowering");
892 }
893
894 // Address mode optimization.
895 Func->getVMetadata()->init(VMK_SingleDefs);
896 Func->doAddressOpt();
897
898 // Argument lowering
899 Func->doArgLowering();
900
901 // Target lowering. This requires liveness analysis for some parts of the
902 // lowering decisions, such as compare/branch fusing. If non-lightweight
903 // liveness analysis is used, the instructions need to be renumbered first.
904 // TODO: This renumbering should only be necessary if we're actually
905 // calculating live intervals, which we only do for register allocation.
906 Func->renumberInstructions();
907 if (Func->hasError())
908 return;
909
910 // TODO: It should be sufficient to use the fastest liveness calculation,
911 // i.e. livenessLightweight(). However, for some reason that slows down the
912 // rest of the translation. Investigate.
913 Func->liveness(Liveness_Basic);
914 if (Func->hasError())
915 return;
916 Func->dump("After MIPS32 address mode opt");
917
918 Func->genCode();
919 if (Func->hasError())
920 return;
921 Func->dump("After MIPS32 codegen");
922
923 // Register allocation. This requires instruction renumbering and full
924 // liveness analysis.
925 Func->renumberInstructions();
926 if (Func->hasError())
927 return;
928 Func->liveness(Liveness_Intervals);
929 if (Func->hasError())
930 return;
931 // The post-codegen dump is done here, after liveness analysis and associated
932 // cleanup, to make the dump cleaner and more useful.
933 Func->dump("After initial MIPS32 codegen");
934 // Validate the live range computations. The expensive validation call is
935 // deliberately only made when assertions are enabled.
936 assert(Func->validateLiveness());
937 Func->getVMetadata()->init(VMK_All);
938 regAlloc(RAK_Global);
939 if (Func->hasError())
940 return;
941 Func->dump("After linear scan regalloc");
942
943 if (getFlags().getEnablePhiEdgeSplit()) {
944 Func->advancedPhiLowering();
945 Func->dump("After advanced Phi lowering");
946 }
947
948 // Stack frame mapping.
949 Func->genFrame();
950 if (Func->hasError())
951 return;
952 Func->dump("After stack frame mapping");
953
954 postLowerLegalization();
955 if (Func->hasError())
956 return;
957 Func->dump("After postLowerLegalization");
958
959 Func->contractEmptyNodes();
960 Func->reorderNodes();
961
962 // Branch optimization. This needs to be done just before code emission. In
963 // particular, no transformations that insert or reorder CfgNodes should be
964 // done after branch optimization. We go ahead and do it before nop insertion
965 // to reduce the amount of work needed for searching for opportunities.
966 Func->doBranchOpt();
967 Func->dump("After branch optimization");
968
969 // Nop insertion
970 if (getFlags().getShouldDoNopInsertion()) {
971 Func->doNopInsertion();
972 }
973 }
974
translateOm1()975 void TargetMIPS32::translateOm1() {
976 TimerMarker T(TimerStack::TT_Om1, Func);
977
978 // TODO: share passes with X86?
979 genTargetHelperCalls();
980
981 unsetIfNonLeafFunc();
982
983 findMaxStackOutArgsSize();
984
985 // Do not merge Alloca instructions, and lay out the stack.
986 static constexpr bool SortAndCombineAllocas = false;
987 Func->processAllocas(SortAndCombineAllocas);
988 Func->dump("After Alloca processing");
989
990 Func->placePhiLoads();
991 if (Func->hasError())
992 return;
993 Func->placePhiStores();
994 if (Func->hasError())
995 return;
996 Func->deletePhis();
997 if (Func->hasError())
998 return;
999 Func->dump("After Phi lowering");
1000
1001 Func->doArgLowering();
1002
1003 Func->genCode();
1004 if (Func->hasError())
1005 return;
1006 Func->dump("After initial MIPS32 codegen");
1007
1008 regAlloc(RAK_InfOnly);
1009 if (Func->hasError())
1010 return;
1011 Func->dump("After regalloc of infinite-weight variables");
1012
1013 Func->genFrame();
1014 if (Func->hasError())
1015 return;
1016 Func->dump("After stack frame mapping");
1017
1018 postLowerLegalization();
1019 if (Func->hasError())
1020 return;
1021 Func->dump("After postLowerLegalization");
1022
1023 // Nop insertion
1024 if (getFlags().getShouldDoNopInsertion()) {
1025 Func->doNopInsertion();
1026 }
1027 }
1028
doBranchOpt(Inst * Instr,const CfgNode * NextNode)1029 bool TargetMIPS32::doBranchOpt(Inst *Instr, const CfgNode *NextNode) {
1030 if (auto *Br = llvm::dyn_cast<InstMIPS32Br>(Instr)) {
1031 return Br->optimizeBranch(NextNode);
1032 }
1033 return false;
1034 }
1035
1036 namespace {
1037
1038 const char *RegNames[RegMIPS32::Reg_NUM] = {
1039 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
1040 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
1041 name,
1042 REGMIPS32_TABLE
1043 #undef X
1044 };
1045
1046 } // end of anonymous namespace
1047
getRegName(RegNumT RegNum)1048 const char *RegMIPS32::getRegName(RegNumT RegNum) {
1049 RegNum.assertIsValid();
1050 return RegNames[RegNum];
1051 }
1052
getRegName(RegNumT RegNum,Type Ty) const1053 const char *TargetMIPS32::getRegName(RegNumT RegNum, Type Ty) const {
1054 (void)Ty;
1055 return RegMIPS32::getRegName(RegNum);
1056 }
1057
getPhysicalRegister(RegNumT RegNum,Type Ty)1058 Variable *TargetMIPS32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1059 if (Ty == IceType_void)
1060 Ty = IceType_i32;
1061 if (PhysicalRegisters[Ty].empty())
1062 PhysicalRegisters[Ty].resize(RegMIPS32::Reg_NUM);
1063 RegNum.assertIsValid();
1064 Variable *Reg = PhysicalRegisters[Ty][RegNum];
1065 if (Reg == nullptr) {
1066 Reg = Func->makeVariable(Ty);
1067 Reg->setRegNum(RegNum);
1068 PhysicalRegisters[Ty][RegNum] = Reg;
1069 // Specially mark a named physical register as an "argument" so that it is
1070 // considered live upon function entry. Otherwise it's possible to get
1071 // liveness validation errors for saving callee-save registers.
1072 Func->addImplicitArg(Reg);
1073 // Don't bother tracking the live range of a named physical register.
1074 Reg->setIgnoreLiveness();
1075 }
1076 return Reg;
1077 }
1078
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1079 void TargetMIPS32::emitJumpTable(const Cfg *Func,
1080 const InstJumpTable *JumpTable) const {
1081 (void)Func;
1082 (void)JumpTable;
1083 UnimplementedError(getFlags());
1084 }
1085
1086 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)1087 Variable *TargetMIPS32::legalizeToReg(Operand *From, RegNumT RegNum) {
1088 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
1089 }
1090
1091 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)1092 Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) {
1093 (void)RegNum;
1094 Type Ty = From->getType();
1095 if (llvm::isa<ConstantUndef>(From)) {
1096 // Lower undefs to zero. Another option is to lower undefs to an
1097 // uninitialized register; however, using an uninitialized register
1098 // results in less predictable code.
1099 //
1100 // If in the future the implementation is changed to lower undef
1101 // values to uninitialized registers, a FakeDef will be needed:
1102 // Context.insert(InstFakeDef::create(Func, Reg));
1103 // This is in order to ensure that the live range of Reg is not
1104 // overestimated. If the constant being lowered is a 64 bit value,
1105 // then the result should be split and the lo and hi components will
1106 // need to go in uninitialized registers.
1107 if (isVectorType(Ty)) {
1108 Variable *Var = makeReg(Ty, RegNum);
1109 auto *Reg = llvm::cast<VariableVecOn32>(Var);
1110 Reg->initVecElement(Func);
1111 auto *Zero = getZero();
1112 for (Variable *Var : Reg->getContainers()) {
1113 _mov(Var, Zero);
1114 }
1115 return Reg;
1116 }
1117 return Ctx->getConstantZero(Ty);
1118 }
1119 return From;
1120 }
1121
makeReg(Type Type,RegNumT RegNum)1122 Variable *TargetMIPS32::makeReg(Type Type, RegNumT RegNum) {
1123 // There aren't any 64-bit integer registers for Mips32.
1124 assert(Type != IceType_i64);
1125 Variable *Reg = Func->makeVariable(Type);
1126 if (RegNum.hasValue())
1127 Reg->setRegNum(RegNum);
1128 else
1129 Reg->setMustHaveReg();
1130 return Reg;
1131 }
1132
formMemoryOperand(Operand * Operand,Type Ty)1133 OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
1134 // It may be the case that address mode optimization already creates an
1135 // OperandMIPS32Mem, so in that case it wouldn't need another level of
1136 // transformation.
1137 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
1138 return llvm::cast<OperandMIPS32Mem>(legalize(Mem));
1139 }
1140
1141 // If we didn't do address mode optimization, then we only have a base/offset
1142 // to work with. MIPS always requires a base register, so just use that to
1143 // hold the operand.
1144 auto *Base = llvm::cast<Variable>(
1145 legalize(Operand, Legal_Reg | Legal_Rematerializable));
1146 const int32_t Offset = Base->hasStackOffset() ? Base->getStackOffset() : 0;
1147 return OperandMIPS32Mem::create(
1148 Func, Ty, Base,
1149 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)));
1150 }
1151
emitVariable(const Variable * Var) const1152 void TargetMIPS32::emitVariable(const Variable *Var) const {
1153 if (!BuildDefs::dump())
1154 return;
1155 Ostream &Str = Ctx->getStrEmit();
1156 const Type FrameSPTy = IceType_i32;
1157 if (Var->hasReg()) {
1158 Str << '$' << getRegName(Var->getRegNum(), Var->getType());
1159 return;
1160 }
1161 if (Var->mustHaveReg()) {
1162 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1163 ") has no register assigned - function " +
1164 Func->getFunctionName());
1165 }
1166 const int32_t Offset = Var->getStackOffset();
1167 Str << Offset;
1168 Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy);
1169 Str << ")";
1170 }
1171
CallingConv()1172 TargetMIPS32::CallingConv::CallingConv()
1173 : GPRegsUsed(RegMIPS32::Reg_NUM),
1174 GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1175 I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1176 VFPRegsUsed(RegMIPS32::Reg_NUM),
1177 FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1178 FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {}
1179
1180 // In MIPS O32 abi FP argument registers can be used only if first argument is
1181 // of type float/double. UseFPRegs flag is used to care of that. Also FP arg
1182 // registers can be used only for first 2 arguments, so we require argument
1183 // number to make register allocation decisions.
argInReg(Type Ty,uint32_t ArgNo,RegNumT * Reg)1184 bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,
1185 RegNumT *Reg) {
1186 if (isScalarIntegerType(Ty) || isVectorType(Ty))
1187 return argInGPR(Ty, Reg);
1188 if (isScalarFloatingType(Ty)) {
1189 if (ArgNo == 0) {
1190 UseFPRegs = true;
1191 return argInVFP(Ty, Reg);
1192 }
1193 if (UseFPRegs && ArgNo == 1) {
1194 UseFPRegs = false;
1195 return argInVFP(Ty, Reg);
1196 }
1197 return argInGPR(Ty, Reg);
1198 }
1199 llvm::report_fatal_error("argInReg: Invalid type.");
1200 return false;
1201 }
1202
argInGPR(Type Ty,RegNumT * Reg)1203 bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1204 CfgVector<RegNumT> *Source;
1205
1206 switch (Ty) {
1207 default: {
1208 llvm::report_fatal_error("argInGPR: Invalid type.");
1209 return false;
1210 } break;
1211 case IceType_v4i1:
1212 case IceType_v8i1:
1213 case IceType_v16i1:
1214 case IceType_v16i8:
1215 case IceType_v8i16:
1216 case IceType_v4i32:
1217 case IceType_v4f32:
1218 case IceType_i32:
1219 case IceType_f32: {
1220 Source = &GPRArgs;
1221 } break;
1222 case IceType_i64:
1223 case IceType_f64: {
1224 Source = &I64Args;
1225 } break;
1226 }
1227
1228 discardUnavailableGPRsAndTheirAliases(Source);
1229
1230 // If $4 is used for any scalar type (or returining v4f32) then the next
1231 // vector type if passed in $6:$7:stack:stack
1232 if (isVectorType(Ty)) {
1233 alignGPR(Source);
1234 }
1235
1236 if (Source->empty()) {
1237 GPRegsUsed.set();
1238 return false;
1239 }
1240
1241 *Reg = Source->back();
1242 // Note that we don't Source->pop_back() here. This is intentional. Notice how
1243 // we mark all of Reg's aliases as Used. So, for the next argument,
1244 // Source->back() is marked as unavailable, and it is thus implicitly popped
1245 // from the stack.
1246 GPRegsUsed |= RegisterAliases[*Reg];
1247
1248 // All vector arguments irrespective of their base type are passed in GP
1249 // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd
1250 // is passed in $6:$7:stack:stack. If it is 1st argument then discard
1251 // $4:$5:$6:$7 otherwise discard $6:$7 only.
1252 if (isVectorType(Ty)) {
1253 if (((unsigned)*Reg) == RegMIPS32::Reg_A0) {
1254 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1];
1255 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2];
1256 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1257 } else {
1258 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1259 }
1260 }
1261
1262 return true;
1263 }
1264
discardNextGPRAndItsAliases(CfgVector<RegNumT> * Regs)1265 inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases(
1266 CfgVector<RegNumT> *Regs) {
1267 GPRegsUsed |= RegisterAliases[Regs->back()];
1268 Regs->pop_back();
1269 }
1270
alignGPR(CfgVector<RegNumT> * Regs)1271 inline void TargetMIPS32::CallingConv::alignGPR(CfgVector<RegNumT> *Regs) {
1272 if (Regs->back() == RegMIPS32::Reg_A1 || Regs->back() == RegMIPS32::Reg_A3)
1273 discardNextGPRAndItsAliases(Regs);
1274 }
1275
1276 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1277 // i32) will have the first argument in a0, the second in a2-a3, and the third
1278 // on the stack. To model this behavior, whenever we pop a register from Regs,
1279 // we remove all of its aliases from the pool of available GPRs. This has the
1280 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1281 void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1282 CfgVector<RegNumT> *Regs) {
1283 while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1284 discardNextGPRAndItsAliases(Regs);
1285 }
1286 }
1287
argInVFP(Type Ty,RegNumT * Reg)1288 bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1289 CfgVector<RegNumT> *Source;
1290
1291 switch (Ty) {
1292 default: {
1293 llvm::report_fatal_error("argInVFP: Invalid type.");
1294 return false;
1295 } break;
1296 case IceType_f32: {
1297 Source = &FP32Args;
1298 } break;
1299 case IceType_f64: {
1300 Source = &FP64Args;
1301 } break;
1302 }
1303
1304 discardUnavailableVFPRegsAndTheirAliases(Source);
1305
1306 if (Source->empty()) {
1307 VFPRegsUsed.set();
1308 return false;
1309 }
1310
1311 *Reg = Source->back();
1312 VFPRegsUsed |= RegisterAliases[*Reg];
1313
1314 // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0
1315 // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg
1316 // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes
1317 // in reg_a3 and a0, a1 are not used.
1318 Source = &GPRArgs;
1319 // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes)
1320 if (Ty == IceType_f64) {
1321 // In MIPS o32 abi, when we use GPR argument pairs to store F64 values, pair
1322 // must be aligned at even register. Similarly when we discard GPR registers
1323 // when some arguments from starting 16 bytes goes in FPR, we must take care
1324 // of alignment. For example if fun args are (f32, f64, f32), for first f32
1325 // we discard a0, now for f64 argument, which will go in F14F15, we must
1326 // first align GPR vector to even register by discarding a1, then discard
1327 // two GPRs a2 and a3. Now last f32 argument will go on stack.
1328 alignGPR(Source);
1329 discardNextGPRAndItsAliases(Source);
1330 }
1331 discardNextGPRAndItsAliases(Source);
1332 return true;
1333 }
1334
discardUnavailableVFPRegsAndTheirAliases(CfgVector<RegNumT> * Regs)1335 void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases(
1336 CfgVector<RegNumT> *Regs) {
1337 while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1338 Regs->pop_back();
1339 }
1340 }
1341
lowerArguments()1342 void TargetMIPS32::lowerArguments() {
1343 VarList &Args = Func->getArgs();
1344 TargetMIPS32::CallingConv CC;
1345
1346 // For each register argument, replace Arg in the argument list with the home
1347 // register. Then generate an instruction in the prolog to copy the home
1348 // register to the assigned location of Arg.
1349 Context.init(Func->getEntryNode());
1350 Context.setInsertPoint(Context.getCur());
1351
1352 // v4f32 is returned through stack. $4 is setup by the caller and passed as
1353 // first argument implicitly. Callee then copies the return vector at $4.
1354 Variable *ImplicitRetVec = nullptr;
1355 if (isVectorFloatingType(Func->getReturnType())) {
1356 ImplicitRetVec = Func->makeVariable(IceType_i32);
1357 ImplicitRetVec->setName(Func, "ImplicitRet_v4f32");
1358 ImplicitRetVec->setIsArg();
1359 Args.insert(Args.begin(), ImplicitRetVec);
1360 setImplicitRet(ImplicitRetVec);
1361 }
1362
1363 for (SizeT i = 0, E = Args.size(); i < E; ++i) {
1364 Variable *Arg = Args[i];
1365 Type Ty = Arg->getType();
1366 RegNumT RegNum;
1367 if (!CC.argInReg(Ty, i, &RegNum)) {
1368 continue;
1369 }
1370 Variable *RegisterArg = Func->makeVariable(Ty);
1371 if (BuildDefs::dump()) {
1372 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1373 }
1374 RegisterArg->setIsArg();
1375 Arg->setIsArg(false);
1376 Args[i] = RegisterArg;
1377
1378 if (isVectorType(Ty)) {
1379 auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg);
1380 RegisterArgVec->initVecElement(Func);
1381 RegisterArgVec->getContainers()[0]->setRegNum(
1382 RegNumT::fixme((unsigned)RegNum + 0));
1383 RegisterArgVec->getContainers()[1]->setRegNum(
1384 RegNumT::fixme((unsigned)RegNum + 1));
1385 // First two elements of second vector argument are passed
1386 // in $6:$7 and remaining two on stack. Do not assign register
1387 // to this is second vector argument.
1388 if (i == 0) {
1389 RegisterArgVec->getContainers()[2]->setRegNum(
1390 RegNumT::fixme((unsigned)RegNum + 2));
1391 RegisterArgVec->getContainers()[3]->setRegNum(
1392 RegNumT::fixme((unsigned)RegNum + 3));
1393 } else {
1394 RegisterArgVec->getContainers()[2]->setRegNum(
1395 RegNumT::fixme(RegNumT()));
1396 RegisterArgVec->getContainers()[3]->setRegNum(
1397 RegNumT::fixme(RegNumT()));
1398 }
1399 } else {
1400 switch (Ty) {
1401 default: { RegisterArg->setRegNum(RegNum); } break;
1402 case IceType_i64: {
1403 auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1404 RegisterArg64->initHiLo(Func);
1405 RegisterArg64->getLo()->setRegNum(
1406 RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
1407 RegisterArg64->getHi()->setRegNum(
1408 RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
1409 } break;
1410 }
1411 }
1412 Context.insert<InstAssign>(Arg, RegisterArg);
1413 }
1414
1415 // Insert fake use of ImplicitRet_v4f32 to keep it live
1416 if (ImplicitRetVec) {
1417 for (CfgNode *Node : Func->getNodes()) {
1418 for (Inst &Instr : Node->getInsts()) {
1419 if (llvm::isa<InstRet>(&Instr)) {
1420 Context.setInsertPoint(instToIterator(&Instr));
1421 Context.insert<InstFakeUse>(ImplicitRetVec);
1422 break;
1423 }
1424 }
1425 }
1426 }
1427 }
1428
stackSlotType()1429 Type TargetMIPS32::stackSlotType() { return IceType_i32; }
1430
1431 // Helper function for addProlog().
1432 //
1433 // This assumes Arg is an argument passed on the stack. This sets the frame
1434 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1435 // I64 arg that has been split into Lo and Hi components, it calls itself
1436 // recursively on the components, taking care to handle Lo first because of the
1437 // little-endian architecture. Lastly, this function generates an instruction
1438 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,bool PartialOnStack,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1439 void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack,
1440 Variable *FramePtr,
1441 size_t BasicFrameOffset,
1442 size_t *InArgsSizeBytes) {
1443 const Type Ty = Arg->getType();
1444 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1445
1446 // If $4 is used for any scalar type (or returining v4f32) then the next
1447 // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element
1448 // from agument stack.
1449 if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) {
1450 if (PartialOnStack == false) {
1451 auto *Elem0 = ArgVecOn32->getContainers()[0];
1452 auto *Elem1 = ArgVecOn32->getContainers()[1];
1453 finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset,
1454 InArgsSizeBytes);
1455 finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset,
1456 InArgsSizeBytes);
1457 }
1458 auto *Elem2 = ArgVecOn32->getContainers()[2];
1459 auto *Elem3 = ArgVecOn32->getContainers()[3];
1460 finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset,
1461 InArgsSizeBytes);
1462 finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset,
1463 InArgsSizeBytes);
1464 return;
1465 }
1466
1467 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1468 Variable *const Lo = Arg64On32->getLo();
1469 Variable *const Hi = Arg64On32->getHi();
1470 finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset,
1471 InArgsSizeBytes);
1472 finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset,
1473 InArgsSizeBytes);
1474 return;
1475 }
1476
1477 assert(Ty != IceType_i64);
1478 assert(!isVectorType(Ty));
1479
1480 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1481 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1482
1483 if (!Arg->hasReg()) {
1484 Arg->setStackOffset(ArgStackOffset);
1485 return;
1486 }
1487
1488 // If the argument variable has been assigned a register, we need to copy the
1489 // value from the stack slot.
1490 Variable *Parameter = Func->makeVariable(Ty);
1491 Parameter->setMustNotHaveReg();
1492 Parameter->setStackOffset(ArgStackOffset);
1493 _mov(Arg, Parameter);
1494 }
1495
addProlog(CfgNode * Node)1496 void TargetMIPS32::addProlog(CfgNode *Node) {
1497 // Stack frame layout:
1498 //
1499 // +------------------------+
1500 // | 1. preserved registers |
1501 // +------------------------+
1502 // | 2. padding |
1503 // +------------------------+
1504 // | 3. global spill area |
1505 // +------------------------+
1506 // | 4. padding |
1507 // +------------------------+
1508 // | 5. local spill area |
1509 // +------------------------+
1510 // | 6. padding |
1511 // +------------------------+
1512 // | 7. allocas |
1513 // +------------------------+
1514 // | 8. padding |
1515 // +------------------------+
1516 // | 9. out args |
1517 // +------------------------+ <--- StackPointer
1518 //
1519 // The following variables record the size in bytes of the given areas:
1520 // * PreservedRegsSizeBytes: area 1
1521 // * SpillAreaPaddingBytes: area 2
1522 // * GlobalsSize: area 3
1523 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1524 // * LocalsSpillAreaSize: area 5
1525 // * SpillAreaSizeBytes: areas 2 - 9
1526 // * maxOutArgsSizeBytes(): area 9
1527
1528 Context.init(Node);
1529 Context.setInsertPoint(Context.getCur());
1530
1531 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1532 RegsUsed = SmallBitVector(CalleeSaves.size());
1533
1534 VarList SortedSpilledVariables;
1535
1536 size_t GlobalsSize = 0;
1537 // If there is a separate locals area, this represents that area. Otherwise
1538 // it counts any variable not counted by GlobalsSize.
1539 SpillAreaSizeBytes = 0;
1540 // If there is a separate locals area, this specifies the alignment for it.
1541 uint32_t LocalsSlotsAlignmentBytes = 0;
1542 // The entire spill locations area gets aligned to largest natural alignment
1543 // of the variables that have a spill slot.
1544 uint32_t SpillAreaAlignmentBytes = 0;
1545 // For now, we don't have target-specific variables that need special
1546 // treatment (no stack-slot-linked SpillVariable type).
1547 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1548 static constexpr bool AssignStackSlot = false;
1549 static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1550 if (llvm::isa<Variable64On32>(Var)) {
1551 return DontAssignStackSlot;
1552 }
1553 return AssignStackSlot;
1554 };
1555
1556 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1557 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1558 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1559 &LocalsSlotsAlignmentBytes, TargetVarHook);
1560 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1561 SpillAreaSizeBytes += GlobalsSize;
1562
1563 PreservedGPRs.reserve(CalleeSaves.size());
1564
1565 // Consider FP and RA as callee-save / used as needed.
1566 if (UsesFramePointer) {
1567 if (RegsUsed[RegMIPS32::Reg_FP]) {
1568 llvm::report_fatal_error("Frame pointer has been used.");
1569 }
1570 CalleeSaves[RegMIPS32::Reg_FP] = true;
1571 RegsUsed[RegMIPS32::Reg_FP] = true;
1572 }
1573 if (!MaybeLeafFunc) {
1574 CalleeSaves[RegMIPS32::Reg_RA] = true;
1575 RegsUsed[RegMIPS32::Reg_RA] = true;
1576 }
1577
1578 // Make two passes over the used registers. The first pass records all the
1579 // used registers -- and their aliases. Then, we figure out which GPR
1580 // registers should be saved.
1581 SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
1582 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1583 if (CalleeSaves[i] && RegsUsed[i]) {
1584 ToPreserve |= RegisterAliases[i];
1585 }
1586 }
1587
1588 uint32_t NumCallee = 0;
1589
1590 // RegClasses is a tuple of
1591 //
1592 // <First Register in Class, Last Register in Class, Vector of Save Registers>
1593 //
1594 // We use this tuple to figure out which register we should save/restore
1595 // during
1596 // prolog/epilog.
1597 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1598 const RegClassType RegClass = RegClassType(
1599 RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs);
1600 const uint32_t FirstRegInClass = std::get<0>(RegClass);
1601 const uint32_t LastRegInClass = std::get<1>(RegClass);
1602 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1603 for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
1604 if (!ToPreserve[Reg]) {
1605 continue;
1606 }
1607 ++NumCallee;
1608 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1609 PreservedRegsSizeBytes +=
1610 typeWidthInBytesOnStack(PhysicalRegister->getType());
1611 PreservedRegsInClass->push_back(PhysicalRegister);
1612 }
1613
1614 Ctx->statsUpdateRegistersSaved(NumCallee);
1615
1616 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1617 // after the preserved registers and before the spill areas.
1618 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1619 // locals area if they are separate.
1620 assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
1621 (void)MIPS32_STACK_ALIGNMENT_BYTES;
1622 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1623 uint32_t SpillAreaPaddingBytes = 0;
1624 uint32_t LocalsSlotsPaddingBytes = 0;
1625 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1626 GlobalsSize, LocalsSlotsAlignmentBytes,
1627 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1628 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1629 uint32_t GlobalsAndSubsequentPaddingSize =
1630 GlobalsSize + LocalsSlotsPaddingBytes;
1631
1632 // Adds the out args space to the stack, and align SP if necessary.
1633 if (!NeedsStackAlignment) {
1634 SpillAreaSizeBytes += MaxOutArgsSizeBytes * (VariableAllocaUsed ? 0 : 1);
1635 } else {
1636 SpillAreaSizeBytes = applyStackAlignment(
1637 SpillAreaSizeBytes +
1638 (VariableAllocaUsed ? VariableAllocaAlignBytes : MaxOutArgsSizeBytes));
1639 }
1640
1641 // Combine fixed alloca with SpillAreaSize.
1642 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1643
1644 TotalStackSizeBytes =
1645 applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes);
1646
1647 // Generate "addiu sp, sp, -TotalStackSizeBytes"
1648 if (TotalStackSizeBytes) {
1649 // Use the scratch register if needed to legalize the immediate.
1650 Sandboxer(this).addiu_sp(-TotalStackSizeBytes);
1651 }
1652
1653 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
1654
1655 if (!PreservedGPRs.empty()) {
1656 uint32_t StackOffset = TotalStackSizeBytes;
1657 for (Variable *Var : *PreservedRegsInClass) {
1658 Type RegType;
1659 if (RegMIPS32::isFPRReg(Var->getRegNum()))
1660 RegType = IceType_f32;
1661 else
1662 RegType = IceType_i32;
1663 auto *PhysicalRegister = makeReg(RegType, Var->getRegNum());
1664 StackOffset -= typeWidthInBytesOnStack(RegType);
1665 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1666 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1667 Func, RegType, SP,
1668 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1669 Sandboxer(this).sw(PhysicalRegister, MemoryLocation);
1670 }
1671 }
1672
1673 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1674
1675 // Generate "mov FP, SP" if needed.
1676 if (UsesFramePointer) {
1677 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1678 _mov(FP, SP);
1679 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1680 Context.insert<InstFakeUse>(FP);
1681 }
1682
1683 // Fill in stack offsets for stack args, and copy args into registers for
1684 // those that were register-allocated. Args are pushed right to left, so
1685 // Arg[0] is closest to the stack/frame pointer.
1686 const VarList &Args = Func->getArgs();
1687 size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
1688 TargetMIPS32::CallingConv CC;
1689 uint32_t ArgNo = 0;
1690
1691 for (Variable *Arg : Args) {
1692 RegNumT DummyReg;
1693 const Type Ty = Arg->getType();
1694 bool PartialOnStack;
1695 // Skip arguments passed in registers.
1696 if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
1697 // Load argument from stack:
1698 // 1. If this is first vector argument and return type is v4f32.
1699 // In this case $4 is used to pass stack address implicitly.
1700 // 3rd and 4th element of vector argument is passed through stack.
1701 // 2. If this is second vector argument.
1702 if (ArgNo != 0 && isVectorType(Ty)) {
1703 PartialOnStack = true;
1704 finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1705 &InArgsSizeBytes);
1706 }
1707 } else {
1708 PartialOnStack = false;
1709 finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1710 &InArgsSizeBytes);
1711 }
1712 ++ArgNo;
1713 }
1714
1715 // Fill in stack offsets for locals.
1716 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1717 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize);
1718 this->HasComputedFrame = true;
1719
1720 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1721 OstreamLocker _(Func->getContext());
1722 Ostream &Str = Func->getContext()->getStrDump();
1723
1724 Str << "Stack layout:\n";
1725 uint32_t SPAdjustmentPaddingSize =
1726 SpillAreaSizeBytes - LocalsSpillAreaSize -
1727 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1728 MaxOutArgsSizeBytes;
1729 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1730 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1731 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1732 << " globals spill area = " << GlobalsSize << " bytes\n"
1733 << " globals-locals spill areas intermediate padding = "
1734 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1735 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1736 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1737
1738 Str << "Stack details:\n"
1739 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1740 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1741 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1742 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1743 << " bytes\n"
1744 << " is FP based = " << 1 << "\n";
1745 }
1746 return;
1747 }
1748
addEpilog(CfgNode * Node)1749 void TargetMIPS32::addEpilog(CfgNode *Node) {
1750 InstList &Insts = Node->getInsts();
1751 InstList::reverse_iterator RI, E;
1752 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1753 if (llvm::isa<InstMIPS32Ret>(*RI))
1754 break;
1755 }
1756 if (RI == E)
1757 return;
1758
1759 // Convert the reverse_iterator position into its corresponding (forward)
1760 // iterator position.
1761 InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1762 --InsertPoint;
1763 Context.init(Node);
1764 Context.setInsertPoint(InsertPoint);
1765
1766 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1767 if (UsesFramePointer) {
1768 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1769 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1770 // use of SP before the assignment of SP=FP keeps previous SP adjustments
1771 // from being dead-code eliminated.
1772 Context.insert<InstFakeUse>(SP);
1773 Sandboxer(this).reset_sp(FP);
1774 }
1775
1776 VarList::reverse_iterator RIter, END;
1777
1778 if (!PreservedGPRs.empty()) {
1779 uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes;
1780 for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend();
1781 RIter != END; ++RIter) {
1782 Type RegType;
1783 if (RegMIPS32::isFPRReg((*RIter)->getRegNum()))
1784 RegType = IceType_f32;
1785 else
1786 RegType = IceType_i32;
1787 auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum());
1788 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1789 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1790 Func, RegType, SP,
1791 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1792 _lw(PhysicalRegister, MemoryLocation);
1793 StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType());
1794 }
1795 }
1796
1797 if (TotalStackSizeBytes) {
1798 Sandboxer(this).addiu_sp(TotalStackSizeBytes);
1799 }
1800 if (!getFlags().getUseSandboxing())
1801 return;
1802
1803 Variable *RA = getPhysicalRegister(RegMIPS32::Reg_RA);
1804 Variable *RetValue = nullptr;
1805 if (RI->getSrcSize())
1806 RetValue = llvm::cast<Variable>(RI->getSrc(0));
1807
1808 Sandboxer(this).ret(RA, RetValue);
1809
1810 RI->setDeleted();
1811 }
1812
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1813 Variable *TargetMIPS32::PostLoweringLegalizer::newBaseRegister(
1814 Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1815 // Legalize will likely need a lui/ori combination, but if the top bits are
1816 // all 0 from negating the offset and subtracting, we could use that instead.
1817 const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1818 Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1819 if (ShouldSub) {
1820 Target->_addi(ScratchReg, Base, -Offset);
1821 } else {
1822 constexpr bool SignExt = true;
1823 if (!OperandMIPS32Mem::canHoldOffset(Base->getType(), SignExt, Offset)) {
1824 const uint32_t UpperBits = (Offset >> 16) & 0xFFFF;
1825 const uint32_t LowerBits = Offset & 0xFFFF;
1826 Target->_lui(ScratchReg, Target->Ctx->getConstantInt32(UpperBits));
1827 if (LowerBits)
1828 Target->_ori(ScratchReg, ScratchReg, LowerBits);
1829 Target->_addu(ScratchReg, ScratchReg, Base);
1830 } else {
1831 Target->_addiu(ScratchReg, Base, Offset);
1832 }
1833 }
1834
1835 return ScratchReg;
1836 }
1837
legalizeMovFp(InstMIPS32MovFP64ToI64 * MovInstr)1838 void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp(
1839 InstMIPS32MovFP64ToI64 *MovInstr) {
1840 Variable *Dest = MovInstr->getDest();
1841 Operand *Src = MovInstr->getSrc(0);
1842 const Type SrcTy = Src->getType();
1843
1844 if (Dest != nullptr && SrcTy == IceType_f64) {
1845 int32_t Offset = Dest->getStackOffset();
1846 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1847 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1848 Target->Func, IceType_f32, Base,
1849 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1850 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1851 auto *SrcV = llvm::cast<Variable>(Src);
1852 Variable *SrcR;
1853 if (MovInstr->getInt64Part() == Int64_Lo) {
1854 SrcR = Target->makeReg(
1855 IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum()));
1856 } else {
1857 SrcR = Target->makeReg(
1858 IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum()));
1859 }
1860 Sandboxer(Target).sw(SrcR, Addr);
1861 if (MovInstr->isDestRedefined()) {
1862 Target->_set_dest_redefined();
1863 }
1864 MovInstr->setDeleted();
1865 return;
1866 }
1867
1868 llvm::report_fatal_error("legalizeMovFp: Invalid operands");
1869 }
1870
legalizeMov(InstMIPS32Mov * MovInstr)1871 void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) {
1872 Variable *Dest = MovInstr->getDest();
1873 assert(Dest != nullptr);
1874 const Type DestTy = Dest->getType();
1875 assert(DestTy != IceType_i64);
1876
1877 Operand *Src = MovInstr->getSrc(0);
1878 const Type SrcTy = Src->getType();
1879 (void)SrcTy;
1880 assert(SrcTy != IceType_i64);
1881
1882 bool Legalized = false;
1883 auto *SrcR = llvm::cast<Variable>(Src);
1884 if (Dest->hasReg() && SrcR->hasReg()) {
1885 // This might be a GP to/from FP move generated due to argument passing.
1886 // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of
1887 // different types.
1888 const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum());
1889 const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum());
1890 const RegNumT SRegNum = SrcR->getRegNum();
1891 const RegNumT DRegNum = Dest->getRegNum();
1892 if (IsDstGPR != IsSrcGPR) {
1893 if (IsDstGPR) {
1894 // Dest is GPR and SrcR is FPR. Use mfc1.
1895 int32_t TypeWidth = typeWidthInBytes(DestTy);
1896 if (MovInstr->getDestHi() != nullptr)
1897 TypeWidth += typeWidthInBytes(MovInstr->getDestHi()->getType());
1898 if (TypeWidth == 8) {
1899 // Split it into two mfc1 instructions
1900 Variable *SrcGPRHi = Target->makeReg(
1901 IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1902 Variable *SrcGPRLo = Target->makeReg(
1903 IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1904 Variable *DstFPRHi, *DstFPRLo;
1905 if (MovInstr->getDestHi() != nullptr && Dest != nullptr) {
1906 DstFPRHi = Target->makeReg(IceType_i32,
1907 MovInstr->getDestHi()->getRegNum());
1908 DstFPRLo = Target->makeReg(IceType_i32, Dest->getRegNum());
1909 } else {
1910 DstFPRHi = Target->makeReg(
1911 IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1912 DstFPRLo = Target->makeReg(
1913 IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1914 }
1915 Target->_mov(DstFPRHi, SrcGPRHi);
1916 Target->_mov(DstFPRLo, SrcGPRLo);
1917 Legalized = true;
1918 } else {
1919 Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
1920 Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum);
1921 Target->_mov(DstFPR, SrcGPR);
1922 Legalized = true;
1923 }
1924 } else {
1925 // Dest is FPR and SrcR is GPR. Use mtc1.
1926 if (typeWidthInBytes(Dest->getType()) == 8) {
1927 Variable *SrcGPRHi, *SrcGPRLo;
1928 // SrcR could be $zero which is i32
1929 if (SRegNum == RegMIPS32::Reg_ZERO) {
1930 SrcGPRHi = Target->makeReg(IceType_i32, SRegNum);
1931 SrcGPRLo = SrcGPRHi;
1932 } else {
1933 // Split it into two mtc1 instructions
1934 if (MovInstr->getSrcSize() == 2) {
1935 const auto FirstReg =
1936 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1937 const auto SecondReg =
1938 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1939 SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1940 SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1941 } else {
1942 SrcGPRLo = Target->makeReg(
1943 IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1944 SrcGPRHi = Target->makeReg(
1945 IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1946 }
1947 }
1948 Variable *DstFPRHi = Target->makeReg(
1949 IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1950 Variable *DstFPRLo = Target->makeReg(
1951 IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1952 Target->_mov(DstFPRHi, SrcGPRLo);
1953 Target->_mov(DstFPRLo, SrcGPRHi);
1954 Legalized = true;
1955 } else {
1956 Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum);
1957 Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum);
1958 Target->_mov(DstFPR, SrcGPR);
1959 Legalized = true;
1960 }
1961 }
1962 }
1963 if (Legalized) {
1964 if (MovInstr->isDestRedefined()) {
1965 Target->_set_dest_redefined();
1966 }
1967 MovInstr->setDeleted();
1968 return;
1969 }
1970 }
1971
1972 if (!Dest->hasReg()) {
1973 auto *SrcR = llvm::cast<Variable>(Src);
1974 assert(SrcR->hasReg());
1975 assert(!SrcR->isRematerializable());
1976 int32_t Offset = Dest->getStackOffset();
1977
1978 // This is a _mov(Mem(), Variable), i.e., a store.
1979 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1980
1981 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1982 Target->Func, DestTy, Base,
1983 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1984 OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
1985 Target->Func, DestTy, Base,
1986 llvm::cast<ConstantInteger32>(
1987 Target->Ctx->getConstantInt32(Offset + 4)));
1988 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1989
1990 // FP arguments are passed in GP reg if first argument is in GP. In this
1991 // case type of the SrcR is still FP thus we need to explicitly generate sw
1992 // instead of swc1.
1993 const RegNumT RegNum = SrcR->getRegNum();
1994 const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum());
1995 if (SrcTy == IceType_f32 && IsSrcGPReg) {
1996 Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum);
1997 Sandboxer(Target).sw(SrcGPR, Addr);
1998 } else if (SrcTy == IceType_f64 && IsSrcGPReg) {
1999 Variable *SrcGPRHi =
2000 Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2001 Variable *SrcGPRLo = Target->makeReg(
2002 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2003 Sandboxer(Target).sw(SrcGPRHi, Addr);
2004 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2005 Sandboxer(Target).sw(SrcGPRLo, AddrHi);
2006 } else if (DestTy == IceType_f64 && IsSrcGPReg) {
2007 const auto FirstReg =
2008 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2009 const auto SecondReg =
2010 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2011 Variable *SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
2012 Variable *SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
2013 Sandboxer(Target).sw(SrcGPRLo, Addr);
2014 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2015 Sandboxer(Target).sw(SrcGPRHi, AddrHi);
2016 } else {
2017 Sandboxer(Target).sw(SrcR, Addr);
2018 }
2019
2020 Target->Context.insert<InstFakeDef>(Dest);
2021 Legalized = true;
2022 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
2023 if (Var->isRematerializable()) {
2024 // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
2025
2026 // ExtraOffset is only needed for stack-pointer based frames as we have
2027 // to account for spill storage.
2028 const int32_t ExtraOffset =
2029 (Var->getRegNum() == Target->getFrameOrStackReg())
2030 ? Target->getFrameFixedAllocaOffset()
2031 : 0;
2032
2033 const int32_t Offset = Var->getStackOffset() + ExtraOffset;
2034 Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
2035 Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
2036 Target->_mov(Dest, T);
2037 Legalized = true;
2038 } else {
2039 if (!Var->hasReg()) {
2040 // This is a _mov(Variable, Mem()), i.e., a load.
2041 const int32_t Offset = Var->getStackOffset();
2042 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
2043 const RegNumT RegNum = Dest->getRegNum();
2044 const bool IsDstGPReg = RegMIPS32::isGPRReg(Dest->getRegNum());
2045 // If we are moving i64 to a double using stack then the address may
2046 // not be aligned to 8-byte boundary as we split i64 into Hi-Lo parts
2047 // and store them individually with 4-byte alignment. Load the Hi-Lo
2048 // parts in TmpReg and move them to the dest using mtc1.
2049 if (DestTy == IceType_f64 && !Utils::IsAligned(Offset, 8) &&
2050 !IsDstGPReg) {
2051 auto *Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2052 const RegNumT RegNum = Dest->getRegNum();
2053 Variable *DestLo = Target->makeReg(
2054 IceType_f32, RegMIPS32::get64PairFirstRegNum(RegNum));
2055 Variable *DestHi = Target->makeReg(
2056 IceType_f32, RegMIPS32::get64PairSecondRegNum(RegNum));
2057 OperandMIPS32Mem *AddrLo = OperandMIPS32Mem::create(
2058 Target->Func, IceType_i32, Base,
2059 llvm::cast<ConstantInteger32>(
2060 Target->Ctx->getConstantInt32(Offset)));
2061 OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create(
2062 Target->Func, IceType_i32, Base,
2063 llvm::cast<ConstantInteger32>(
2064 Target->Ctx->getConstantInt32(Offset + 4)));
2065 Sandboxer(Target).lw(Reg, AddrLo);
2066 Target->_mov(DestLo, Reg);
2067 Sandboxer(Target).lw(Reg, AddrHi);
2068 Target->_mov(DestHi, Reg);
2069 } else {
2070 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
2071 Target->Func, DestTy, Base,
2072 llvm::cast<ConstantInteger32>(
2073 Target->Ctx->getConstantInt32(Offset)));
2074 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
2075 OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
2076 Target->Func, DestTy, Base,
2077 llvm::cast<ConstantInteger32>(
2078 Target->Ctx->getConstantInt32(Offset + 4)));
2079 // FP arguments are passed in GP reg if first argument is in GP.
2080 // In this case type of the Dest is still FP thus we need to
2081 // explicitly generate lw instead of lwc1.
2082 if (DestTy == IceType_f32 && IsDstGPReg) {
2083 Variable *DstGPR = Target->makeReg(IceType_i32, RegNum);
2084 Sandboxer(Target).lw(DstGPR, Addr);
2085 } else if (DestTy == IceType_f64 && IsDstGPReg) {
2086 Variable *DstGPRHi = Target->makeReg(
2087 IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2088 Variable *DstGPRLo = Target->makeReg(
2089 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2090 Sandboxer(Target).lw(DstGPRHi, Addr);
2091 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2092 Sandboxer(Target).lw(DstGPRLo, AddrHi);
2093 } else if (DestTy == IceType_f64 && IsDstGPReg) {
2094 const auto FirstReg =
2095 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2096 const auto SecondReg =
2097 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2098 Variable *DstGPRHi = Target->makeReg(IceType_i32, FirstReg);
2099 Variable *DstGPRLo = Target->makeReg(IceType_i32, SecondReg);
2100 Sandboxer(Target).lw(DstGPRLo, Addr);
2101 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2102 Sandboxer(Target).lw(DstGPRHi, AddrHi);
2103 } else {
2104 Sandboxer(Target).lw(Dest, Addr);
2105 }
2106 }
2107 Legalized = true;
2108 }
2109 }
2110 }
2111
2112 if (Legalized) {
2113 if (MovInstr->isDestRedefined()) {
2114 Target->_set_dest_redefined();
2115 }
2116 MovInstr->setDeleted();
2117 }
2118 }
2119
2120 OperandMIPS32Mem *
legalizeMemOperand(OperandMIPS32Mem * Mem)2121 TargetMIPS32::PostLoweringLegalizer::legalizeMemOperand(OperandMIPS32Mem *Mem) {
2122 if (llvm::isa<ConstantRelocatable>(Mem->getOffset())) {
2123 return nullptr;
2124 }
2125 Variable *Base = Mem->getBase();
2126 auto *Ci32 = llvm::cast<ConstantInteger32>(Mem->getOffset());
2127 int32_t Offset = Ci32->getValue();
2128
2129 if (Base->isRematerializable()) {
2130 const int32_t ExtraOffset =
2131 (Base->getRegNum() == Target->getFrameOrStackReg())
2132 ? Target->getFrameFixedAllocaOffset()
2133 : 0;
2134 Offset += Base->getStackOffset() + ExtraOffset;
2135 Base = Target->getPhysicalRegister(Base->getRegNum());
2136 }
2137
2138 constexpr bool SignExt = true;
2139 if (!OperandMIPS32Mem::canHoldOffset(Mem->getType(), SignExt, Offset)) {
2140 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2141 Offset = 0;
2142 }
2143
2144 return OperandMIPS32Mem::create(
2145 Target->Func, Mem->getType(), Base,
2146 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
2147 }
2148
legalizeImmediate(int32_t Imm)2149 Variable *TargetMIPS32::PostLoweringLegalizer::legalizeImmediate(int32_t Imm) {
2150 Variable *Reg = nullptr;
2151 if (!((std::numeric_limits<int16_t>::min() <= Imm) &&
2152 (Imm <= std::numeric_limits<int16_t>::max()))) {
2153 const uint32_t UpperBits = (Imm >> 16) & 0xFFFF;
2154 const uint32_t LowerBits = Imm & 0xFFFF;
2155 Variable *TReg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2156 Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2157 if (LowerBits) {
2158 Target->_lui(TReg, Target->Ctx->getConstantInt32(UpperBits));
2159 Target->_ori(Reg, TReg, LowerBits);
2160 } else {
2161 Target->_lui(Reg, Target->Ctx->getConstantInt32(UpperBits));
2162 }
2163 }
2164 return Reg;
2165 }
2166
postLowerLegalization()2167 void TargetMIPS32::postLowerLegalization() {
2168 Func->dump("Before postLowerLegalization");
2169 assert(hasComputedFrame());
2170 for (CfgNode *Node : Func->getNodes()) {
2171 Context.init(Node);
2172 PostLoweringLegalizer Legalizer(this);
2173 while (!Context.atEnd()) {
2174 PostIncrLoweringContext PostIncrement(Context);
2175 Inst *CurInstr = iteratorToInst(Context.getCur());
2176 const SizeT NumSrcs = CurInstr->getSrcSize();
2177 Operand *Src0 = NumSrcs < 1 ? nullptr : CurInstr->getSrc(0);
2178 Operand *Src1 = NumSrcs < 2 ? nullptr : CurInstr->getSrc(1);
2179 auto *Src0V = llvm::dyn_cast_or_null<Variable>(Src0);
2180 auto *Src0M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src0);
2181 auto *Src1M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src1);
2182 Variable *Dst = CurInstr->getDest();
2183 if (auto *MovInstr = llvm::dyn_cast<InstMIPS32Mov>(CurInstr)) {
2184 Legalizer.legalizeMov(MovInstr);
2185 continue;
2186 }
2187 if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) {
2188 Legalizer.legalizeMovFp(MovInstr);
2189 continue;
2190 }
2191 if (llvm::isa<InstMIPS32Sw>(CurInstr)) {
2192 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2193 Sandboxer(this).sw(Src0V, LegalMem);
2194 CurInstr->setDeleted();
2195 }
2196 continue;
2197 }
2198 if (llvm::isa<InstMIPS32Swc1>(CurInstr)) {
2199 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2200 _swc1(Src0V, LegalMem);
2201 CurInstr->setDeleted();
2202 }
2203 continue;
2204 }
2205 if (llvm::isa<InstMIPS32Sdc1>(CurInstr)) {
2206 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2207 _sdc1(Src0V, LegalMem);
2208 CurInstr->setDeleted();
2209 }
2210 continue;
2211 }
2212 if (llvm::isa<InstMIPS32Lw>(CurInstr)) {
2213 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2214 Sandboxer(this).lw(Dst, LegalMem);
2215 CurInstr->setDeleted();
2216 }
2217 continue;
2218 }
2219 if (llvm::isa<InstMIPS32Lwc1>(CurInstr)) {
2220 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2221 _lwc1(Dst, LegalMem);
2222 CurInstr->setDeleted();
2223 }
2224 continue;
2225 }
2226 if (llvm::isa<InstMIPS32Ldc1>(CurInstr)) {
2227 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2228 _ldc1(Dst, LegalMem);
2229 CurInstr->setDeleted();
2230 }
2231 continue;
2232 }
2233 if (auto *AddiuInstr = llvm::dyn_cast<InstMIPS32Addiu>(CurInstr)) {
2234 if (auto *LegalImm = Legalizer.legalizeImmediate(
2235 static_cast<int32_t>(AddiuInstr->getImmediateValue()))) {
2236 _addu(Dst, Src0V, LegalImm);
2237 CurInstr->setDeleted();
2238 }
2239 continue;
2240 }
2241 }
2242 }
2243 }
2244
loOperand(Operand * Operand)2245 Operand *TargetMIPS32::loOperand(Operand *Operand) {
2246 assert(Operand->getType() == IceType_i64);
2247 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2248 return Var64On32->getLo();
2249 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2250 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2251 }
2252 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2253 // Conservatively disallow memory operands with side-effects (pre/post
2254 // increment) in case of duplication.
2255 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2256 return OperandMIPS32Mem::create(Func, IceType_i32, Mem->getBase(),
2257 Mem->getOffset(), Mem->getAddrMode());
2258 }
2259 llvm_unreachable("Unsupported operand type");
2260 return nullptr;
2261 }
2262
getOperandAtIndex(Operand * Operand,Type BaseType,uint32_t Index)2263 Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType,
2264 uint32_t Index) {
2265 if (!isVectorType(Operand->getType())) {
2266 llvm::report_fatal_error("getOperandAtIndex: Operand is not vector");
2267 return nullptr;
2268 }
2269
2270 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2271 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2272 Variable *Base = Mem->getBase();
2273 auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2274 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2275 int32_t NextOffsetVal =
2276 Offset->getValue() + (Index * typeWidthInBytes(BaseType));
2277 constexpr bool NoSignExt = false;
2278 if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) {
2279 Constant *_4 = Ctx->getConstantInt32(4);
2280 Variable *NewBase = Func->makeVariable(Base->getType());
2281 lowerArithmetic(
2282 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4));
2283 Base = NewBase;
2284 } else {
2285 Offset =
2286 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2287 }
2288 return OperandMIPS32Mem::create(Func, BaseType, Base, Offset,
2289 Mem->getAddrMode());
2290 }
2291
2292 if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand))
2293 return VarVecOn32->getContainers()[Index];
2294
2295 llvm_unreachable("Unsupported operand type");
2296 return nullptr;
2297 }
2298
hiOperand(Operand * Operand)2299 Operand *TargetMIPS32::hiOperand(Operand *Operand) {
2300 assert(Operand->getType() == IceType_i64);
2301 if (Operand->getType() != IceType_i64)
2302 return Operand;
2303 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2304 return Var64On32->getHi();
2305 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2306 return Ctx->getConstantInt32(
2307 static_cast<uint32_t>(Const->getValue() >> 32));
2308 }
2309 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2310 // Conservatively disallow memory operands with side-effects
2311 // in case of duplication.
2312 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2313 const Type SplitType = IceType_i32;
2314 Variable *Base = Mem->getBase();
2315 auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2316 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2317 int32_t NextOffsetVal = Offset->getValue() + 4;
2318 constexpr bool SignExt = false;
2319 if (!OperandMIPS32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
2320 // We have to make a temp variable and add 4 to either Base or Offset.
2321 // If we add 4 to Offset, this will convert a non-RegReg addressing
2322 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2323 // RegReg addressing modes, prefer adding to base and replacing instead.
2324 // Thus we leave the old offset alone.
2325 Constant *Four = Ctx->getConstantInt32(4);
2326 Variable *NewBase = Func->makeVariable(Base->getType());
2327 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2328 Base, Four));
2329 Base = NewBase;
2330 } else {
2331 Offset =
2332 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2333 }
2334 return OperandMIPS32Mem::create(Func, SplitType, Base, Offset,
2335 Mem->getAddrMode());
2336 }
2337 llvm_unreachable("Unsupported operand type");
2338 return nullptr;
2339 }
2340
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2341 SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
2342 RegSetMask Exclude) const {
2343 SmallBitVector Registers(RegMIPS32::Reg_NUM);
2344
2345 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
2346 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
2347 if (scratch && (Include & RegSet_CallerSave)) \
2348 Registers[RegMIPS32::val] = true; \
2349 if (preserved && (Include & RegSet_CalleeSave)) \
2350 Registers[RegMIPS32::val] = true; \
2351 if (stackptr && (Include & RegSet_StackPointer)) \
2352 Registers[RegMIPS32::val] = true; \
2353 if (frameptr && (Include & RegSet_FramePointer)) \
2354 Registers[RegMIPS32::val] = true; \
2355 if (scratch && (Exclude & RegSet_CallerSave)) \
2356 Registers[RegMIPS32::val] = false; \
2357 if (preserved && (Exclude & RegSet_CalleeSave)) \
2358 Registers[RegMIPS32::val] = false; \
2359 if (stackptr && (Exclude & RegSet_StackPointer)) \
2360 Registers[RegMIPS32::val] = false; \
2361 if (frameptr && (Exclude & RegSet_FramePointer)) \
2362 Registers[RegMIPS32::val] = false;
2363
2364 REGMIPS32_TABLE
2365
2366 #undef X
2367
2368 if (NeedSandboxing) {
2369 Registers[RegMIPS32::Reg_T6] = false;
2370 Registers[RegMIPS32::Reg_T7] = false;
2371 Registers[RegMIPS32::Reg_T8] = false;
2372 }
2373 return Registers;
2374 }
2375
lowerAlloca(const InstAlloca * Instr)2376 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
2377 // Conservatively require the stack to be aligned. Some stack adjustment
2378 // operations implemented below assume that the stack is aligned before the
2379 // alloca. All the alloca code ensures that the stack alignment is preserved
2380 // after the alloca. The stack alignment restriction can be relaxed in some
2381 // cases.
2382 NeedsStackAlignment = true;
2383
2384 // For default align=0, set it to the real value 1, to avoid any
2385 // bit-manipulation problems below.
2386 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2387
2388 // LLVM enforces power of 2 alignment.
2389 assert(llvm::isPowerOf2_32(AlignmentParam));
2390 assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
2391
2392 const uint32_t Alignment =
2393 std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
2394 const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
2395 const bool OptM1 = Func->getOptLevel() == Opt_m1;
2396 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2397 const bool UseFramePointer =
2398 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2399
2400 if (UseFramePointer)
2401 setHasFramePointer();
2402
2403 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
2404
2405 Variable *Dest = Instr->getDest();
2406 Operand *TotalSize = Instr->getSizeInBytes();
2407
2408 if (const auto *ConstantTotalSize =
2409 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2410 const uint32_t Value =
2411 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2412 FixedAllocaSizeBytes += Value;
2413 // Constant size alloca.
2414 if (!UseFramePointer) {
2415 // If we don't need a Frame Pointer, this alloca has a known offset to the
2416 // stack pointer. We don't need adjust the stack pointer, nor assign any
2417 // value to Dest, as Dest is rematerializable.
2418 assert(Dest->isRematerializable());
2419 Context.insert<InstFakeDef>(Dest);
2420 return;
2421 }
2422
2423 if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) {
2424 CurrentAllocaOffset =
2425 Utils::applyAlignment(CurrentAllocaOffset, Alignment);
2426 }
2427 auto *T = I32Reg();
2428 _addiu(T, SP, CurrentAllocaOffset);
2429 _mov(Dest, T);
2430 CurrentAllocaOffset += Value;
2431 return;
2432
2433 } else {
2434 // Non-constant sizes need to be adjusted to the next highest multiple of
2435 // the required alignment at runtime.
2436 VariableAllocaUsed = true;
2437 VariableAllocaAlignBytes = AlignmentParam;
2438 Variable *AlignAmount;
2439 auto *TotalSizeR = legalizeToReg(TotalSize, Legal_Reg);
2440 auto *T1 = I32Reg();
2441 auto *T2 = I32Reg();
2442 auto *T3 = I32Reg();
2443 auto *T4 = I32Reg();
2444 auto *T5 = I32Reg();
2445 _addiu(T1, TotalSizeR, MIPS32_STACK_ALIGNMENT_BYTES - 1);
2446 _addiu(T2, getZero(), -MIPS32_STACK_ALIGNMENT_BYTES);
2447 _and(T3, T1, T2);
2448 _subu(T4, SP, T3);
2449 if (Instr->getAlignInBytes()) {
2450 AlignAmount =
2451 legalizeToReg(Ctx->getConstantInt32(-AlignmentParam), Legal_Reg);
2452 _and(T5, T4, AlignAmount);
2453 _mov(Dest, T5);
2454 } else {
2455 _mov(Dest, T4);
2456 }
2457 if (OptM1)
2458 _mov(SP, Dest);
2459 else
2460 Sandboxer(this).reset_sp(Dest);
2461 return;
2462 }
2463 }
2464
lowerInt64Arithmetic(const InstArithmetic * Instr,Variable * Dest,Operand * Src0,Operand * Src1)2465 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
2466 Variable *Dest, Operand *Src0,
2467 Operand *Src1) {
2468 InstArithmetic::OpKind Op = Instr->getOp();
2469 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2470 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2471 Variable *Src0LoR = nullptr;
2472 Variable *Src1LoR = nullptr;
2473 Variable *Src0HiR = nullptr;
2474 Variable *Src1HiR = nullptr;
2475
2476 switch (Op) {
2477 case InstArithmetic::_num:
2478 llvm::report_fatal_error("Unknown arithmetic operator");
2479 return;
2480 case InstArithmetic::Add: {
2481 Src0LoR = legalizeToReg(loOperand(Src0));
2482 Src1LoR = legalizeToReg(loOperand(Src1));
2483 Src0HiR = legalizeToReg(hiOperand(Src0));
2484 Src1HiR = legalizeToReg(hiOperand(Src1));
2485 auto *T_Carry = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2486 *T_Hi2 = I32Reg();
2487 _addu(T_Lo, Src0LoR, Src1LoR);
2488 _mov(DestLo, T_Lo);
2489 _sltu(T_Carry, T_Lo, Src0LoR);
2490 _addu(T_Hi, T_Carry, Src0HiR);
2491 _addu(T_Hi2, Src1HiR, T_Hi);
2492 _mov(DestHi, T_Hi2);
2493 return;
2494 }
2495 case InstArithmetic::And: {
2496 Src0LoR = legalizeToReg(loOperand(Src0));
2497 Src1LoR = legalizeToReg(loOperand(Src1));
2498 Src0HiR = legalizeToReg(hiOperand(Src0));
2499 Src1HiR = legalizeToReg(hiOperand(Src1));
2500 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2501 _and(T_Lo, Src0LoR, Src1LoR);
2502 _mov(DestLo, T_Lo);
2503 _and(T_Hi, Src0HiR, Src1HiR);
2504 _mov(DestHi, T_Hi);
2505 return;
2506 }
2507 case InstArithmetic::Sub: {
2508 Src0LoR = legalizeToReg(loOperand(Src0));
2509 Src1LoR = legalizeToReg(loOperand(Src1));
2510 Src0HiR = legalizeToReg(hiOperand(Src0));
2511 Src1HiR = legalizeToReg(hiOperand(Src1));
2512 auto *T_Borrow = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2513 *T_Hi2 = I32Reg();
2514 _subu(T_Lo, Src0LoR, Src1LoR);
2515 _mov(DestLo, T_Lo);
2516 _sltu(T_Borrow, Src0LoR, Src1LoR);
2517 _addu(T_Hi, T_Borrow, Src1HiR);
2518 _subu(T_Hi2, Src0HiR, T_Hi);
2519 _mov(DestHi, T_Hi2);
2520 return;
2521 }
2522 case InstArithmetic::Or: {
2523 Src0LoR = legalizeToReg(loOperand(Src0));
2524 Src1LoR = legalizeToReg(loOperand(Src1));
2525 Src0HiR = legalizeToReg(hiOperand(Src0));
2526 Src1HiR = legalizeToReg(hiOperand(Src1));
2527 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2528 _or(T_Lo, Src0LoR, Src1LoR);
2529 _mov(DestLo, T_Lo);
2530 _or(T_Hi, Src0HiR, Src1HiR);
2531 _mov(DestHi, T_Hi);
2532 return;
2533 }
2534 case InstArithmetic::Xor: {
2535 Src0LoR = legalizeToReg(loOperand(Src0));
2536 Src1LoR = legalizeToReg(loOperand(Src1));
2537 Src0HiR = legalizeToReg(hiOperand(Src0));
2538 Src1HiR = legalizeToReg(hiOperand(Src1));
2539 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2540 _xor(T_Lo, Src0LoR, Src1LoR);
2541 _mov(DestLo, T_Lo);
2542 _xor(T_Hi, Src0HiR, Src1HiR);
2543 _mov(DestHi, T_Hi);
2544 return;
2545 }
2546 case InstArithmetic::Mul: {
2547 // TODO(rkotler): Make sure that mul has the side effect of clobbering
2548 // LO, HI. Check for any other LO, HI quirkiness in this section.
2549 Src0LoR = legalizeToReg(loOperand(Src0));
2550 Src1LoR = legalizeToReg(loOperand(Src1));
2551 Src0HiR = legalizeToReg(hiOperand(Src0));
2552 Src1HiR = legalizeToReg(hiOperand(Src1));
2553 auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
2554 auto *T1 = I32Reg(), *T2 = I32Reg();
2555 auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
2556 _multu(T_Lo, Src0LoR, Src1LoR);
2557 Context.insert<InstFakeDef>(T_Hi, T_Lo);
2558 _mflo(T1, T_Lo);
2559 _mfhi(T2, T_Hi);
2560 _mov(DestLo, T1);
2561 _mul(TM1, Src0HiR, Src1LoR);
2562 _mul(TM2, Src0LoR, Src1HiR);
2563 _addu(TM3, TM1, T2);
2564 _addu(TM4, TM3, TM2);
2565 _mov(DestHi, TM4);
2566 return;
2567 }
2568 case InstArithmetic::Shl: {
2569 auto *T_Lo = I32Reg();
2570 auto *T_Hi = I32Reg();
2571 auto *T1_Lo = I32Reg();
2572 auto *T1_Hi = I32Reg();
2573 auto *T1 = I32Reg();
2574 auto *T2 = I32Reg();
2575 auto *T3 = I32Reg();
2576 auto *T4 = I32Reg();
2577 auto *T5 = I32Reg();
2578
2579 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2580 Src0LoR = legalizeToReg(loOperand(Src0));
2581 int64_t ShiftAmount = Const->getValue();
2582 if (ShiftAmount == 1) {
2583 Src0HiR = legalizeToReg(hiOperand(Src0));
2584 _addu(T_Lo, Src0LoR, Src0LoR);
2585 _sltu(T1, T_Lo, Src0LoR);
2586 _addu(T2, T1, Src0HiR);
2587 _addu(T_Hi, Src0HiR, T2);
2588 } else if (ShiftAmount < INT32_BITS) {
2589 Src0HiR = legalizeToReg(hiOperand(Src0));
2590 _srl(T1, Src0LoR, INT32_BITS - ShiftAmount);
2591 _sll(T2, Src0HiR, ShiftAmount);
2592 _or(T_Hi, T1, T2);
2593 _sll(T_Lo, Src0LoR, ShiftAmount);
2594 } else if (ShiftAmount == INT32_BITS) {
2595 _addiu(T_Lo, getZero(), 0);
2596 _mov(T_Hi, Src0LoR);
2597 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2598 _sll(T_Hi, Src0LoR, ShiftAmount - INT32_BITS);
2599 _addiu(T_Lo, getZero(), 0);
2600 }
2601 _mov(DestLo, T_Lo);
2602 _mov(DestHi, T_Hi);
2603 return;
2604 }
2605
2606 Src0LoR = legalizeToReg(loOperand(Src0));
2607 Src1LoR = legalizeToReg(loOperand(Src1));
2608 Src0HiR = legalizeToReg(hiOperand(Src0));
2609
2610 _sllv(T1, Src0HiR, Src1LoR);
2611 _not(T2, Src1LoR);
2612 _srl(T3, Src0LoR, 1);
2613 _srlv(T4, T3, T2);
2614 _or(T_Hi, T1, T4);
2615 _sllv(T_Lo, Src0LoR, Src1LoR);
2616
2617 _mov(T1_Hi, T_Hi);
2618 _mov(T1_Lo, T_Lo);
2619 _andi(T5, Src1LoR, INT32_BITS);
2620 _movn(T1_Hi, T_Lo, T5);
2621 _movn(T1_Lo, getZero(), T5);
2622 _mov(DestHi, T1_Hi);
2623 _mov(DestLo, T1_Lo);
2624 return;
2625 }
2626 case InstArithmetic::Lshr: {
2627
2628 auto *T_Lo = I32Reg();
2629 auto *T_Hi = I32Reg();
2630 auto *T1_Lo = I32Reg();
2631 auto *T1_Hi = I32Reg();
2632 auto *T1 = I32Reg();
2633 auto *T2 = I32Reg();
2634 auto *T3 = I32Reg();
2635 auto *T4 = I32Reg();
2636 auto *T5 = I32Reg();
2637
2638 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2639 Src0HiR = legalizeToReg(hiOperand(Src0));
2640 int64_t ShiftAmount = Const->getValue();
2641 if (ShiftAmount < INT32_BITS) {
2642 Src0LoR = legalizeToReg(loOperand(Src0));
2643 _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2644 _srl(T2, Src0LoR, ShiftAmount);
2645 _or(T_Lo, T1, T2);
2646 _srl(T_Hi, Src0HiR, ShiftAmount);
2647 } else if (ShiftAmount == INT32_BITS) {
2648 _mov(T_Lo, Src0HiR);
2649 _addiu(T_Hi, getZero(), 0);
2650 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2651 _srl(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2652 _addiu(T_Hi, getZero(), 0);
2653 }
2654 _mov(DestLo, T_Lo);
2655 _mov(DestHi, T_Hi);
2656 return;
2657 }
2658
2659 Src0LoR = legalizeToReg(loOperand(Src0));
2660 Src1LoR = legalizeToReg(loOperand(Src1));
2661 Src0HiR = legalizeToReg(hiOperand(Src0));
2662
2663 _srlv(T1, Src0LoR, Src1LoR);
2664 _not(T2, Src1LoR);
2665 _sll(T3, Src0HiR, 1);
2666 _sllv(T4, T3, T2);
2667 _or(T_Lo, T1, T4);
2668 _srlv(T_Hi, Src0HiR, Src1LoR);
2669
2670 _mov(T1_Hi, T_Hi);
2671 _mov(T1_Lo, T_Lo);
2672 _andi(T5, Src1LoR, INT32_BITS);
2673 _movn(T1_Lo, T_Hi, T5);
2674 _movn(T1_Hi, getZero(), T5);
2675 _mov(DestHi, T1_Hi);
2676 _mov(DestLo, T1_Lo);
2677 return;
2678 }
2679 case InstArithmetic::Ashr: {
2680
2681 auto *T_Lo = I32Reg();
2682 auto *T_Hi = I32Reg();
2683 auto *T1_Lo = I32Reg();
2684 auto *T1_Hi = I32Reg();
2685 auto *T1 = I32Reg();
2686 auto *T2 = I32Reg();
2687 auto *T3 = I32Reg();
2688 auto *T4 = I32Reg();
2689 auto *T5 = I32Reg();
2690 auto *T6 = I32Reg();
2691
2692 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2693 Src0HiR = legalizeToReg(hiOperand(Src0));
2694 int64_t ShiftAmount = Const->getValue();
2695 if (ShiftAmount < INT32_BITS) {
2696 Src0LoR = legalizeToReg(loOperand(Src0));
2697 _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2698 _srl(T2, Src0LoR, ShiftAmount);
2699 _or(T_Lo, T1, T2);
2700 _sra(T_Hi, Src0HiR, ShiftAmount);
2701 } else if (ShiftAmount == INT32_BITS) {
2702 _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2703 _mov(T_Lo, Src0HiR);
2704 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2705 _sra(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2706 _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2707 }
2708 _mov(DestLo, T_Lo);
2709 _mov(DestHi, T_Hi);
2710 return;
2711 }
2712
2713 Src0LoR = legalizeToReg(loOperand(Src0));
2714 Src1LoR = legalizeToReg(loOperand(Src1));
2715 Src0HiR = legalizeToReg(hiOperand(Src0));
2716
2717 _srlv(T1, Src0LoR, Src1LoR);
2718 _not(T2, Src1LoR);
2719 _sll(T3, Src0HiR, 1);
2720 _sllv(T4, T3, T2);
2721 _or(T_Lo, T1, T4);
2722 _srav(T_Hi, Src0HiR, Src1LoR);
2723
2724 _mov(T1_Hi, T_Hi);
2725 _mov(T1_Lo, T_Lo);
2726 _andi(T5, Src1LoR, INT32_BITS);
2727 _movn(T1_Lo, T_Hi, T5);
2728 _sra(T6, Src0HiR, INT32_BITS - 1);
2729 _movn(T1_Hi, T6, T5);
2730 _mov(DestHi, T1_Hi);
2731 _mov(DestLo, T1_Lo);
2732 return;
2733 }
2734 case InstArithmetic::Fadd:
2735 case InstArithmetic::Fsub:
2736 case InstArithmetic::Fmul:
2737 case InstArithmetic::Fdiv:
2738 case InstArithmetic::Frem:
2739 llvm::report_fatal_error("FP instruction with i64 type");
2740 return;
2741 case InstArithmetic::Udiv:
2742 case InstArithmetic::Sdiv:
2743 case InstArithmetic::Urem:
2744 case InstArithmetic::Srem:
2745 llvm::report_fatal_error("64-bit div and rem should have been prelowered");
2746 return;
2747 }
2748 }
2749
lowerArithmetic(const InstArithmetic * Instr)2750 void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {
2751 Variable *Dest = Instr->getDest();
2752
2753 if (Dest->isRematerializable()) {
2754 Context.insert<InstFakeDef>(Dest);
2755 return;
2756 }
2757
2758 // We need to signal all the UnimplementedLoweringError errors before any
2759 // legalization into new variables, otherwise Om1 register allocation may fail
2760 // when it sees variables that are defined but not used.
2761 Type DestTy = Dest->getType();
2762 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2763 Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2764 if (DestTy == IceType_i64) {
2765 lowerInt64Arithmetic(Instr, Instr->getDest(), Src0, Src1);
2766 return;
2767 }
2768 if (isVectorType(Dest->getType())) {
2769 llvm::report_fatal_error("Arithmetic: Destination type is vector");
2770 return;
2771 }
2772
2773 Variable *T = makeReg(Dest->getType());
2774 Variable *Src0R = legalizeToReg(Src0);
2775 Variable *Src1R = nullptr;
2776 uint32_t Value = 0;
2777 bool IsSrc1Imm16 = false;
2778
2779 switch (Instr->getOp()) {
2780 case InstArithmetic::Add:
2781 case InstArithmetic::Sub: {
2782 auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2783 if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
2784 IsSrc1Imm16 = true;
2785 Value = Const32->getValue();
2786 } else {
2787 Src1R = legalizeToReg(Src1);
2788 }
2789 break;
2790 }
2791 case InstArithmetic::And:
2792 case InstArithmetic::Or:
2793 case InstArithmetic::Xor:
2794 case InstArithmetic::Shl:
2795 case InstArithmetic::Lshr:
2796 case InstArithmetic::Ashr: {
2797 auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2798 if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) {
2799 IsSrc1Imm16 = true;
2800 Value = Const32->getValue();
2801 } else {
2802 Src1R = legalizeToReg(Src1);
2803 }
2804 break;
2805 }
2806 default:
2807 Src1R = legalizeToReg(Src1);
2808 break;
2809 }
2810 constexpr uint32_t DivideByZeroTrapCode = 7;
2811
2812 switch (Instr->getOp()) {
2813 case InstArithmetic::_num:
2814 break;
2815 case InstArithmetic::Add: {
2816 auto *T0R = Src0R;
2817 auto *T1R = Src1R;
2818 if (Dest->getType() != IceType_i32) {
2819 T0R = makeReg(IceType_i32);
2820 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2821 if (!IsSrc1Imm16) {
2822 T1R = makeReg(IceType_i32);
2823 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2824 }
2825 }
2826 if (IsSrc1Imm16) {
2827 _addiu(T, T0R, Value);
2828 } else {
2829 _addu(T, T0R, T1R);
2830 }
2831 _mov(Dest, T);
2832 return;
2833 }
2834 case InstArithmetic::And:
2835 if (IsSrc1Imm16) {
2836 _andi(T, Src0R, Value);
2837 } else {
2838 _and(T, Src0R, Src1R);
2839 }
2840 _mov(Dest, T);
2841 return;
2842 case InstArithmetic::Or:
2843 if (IsSrc1Imm16) {
2844 _ori(T, Src0R, Value);
2845 } else {
2846 _or(T, Src0R, Src1R);
2847 }
2848 _mov(Dest, T);
2849 return;
2850 case InstArithmetic::Xor:
2851 if (IsSrc1Imm16) {
2852 _xori(T, Src0R, Value);
2853 } else {
2854 _xor(T, Src0R, Src1R);
2855 }
2856 _mov(Dest, T);
2857 return;
2858 case InstArithmetic::Sub: {
2859 auto *T0R = Src0R;
2860 auto *T1R = Src1R;
2861 if (Dest->getType() != IceType_i32) {
2862 T0R = makeReg(IceType_i32);
2863 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2864 if (!IsSrc1Imm16) {
2865 T1R = makeReg(IceType_i32);
2866 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2867 }
2868 }
2869 if (IsSrc1Imm16) {
2870 _addiu(T, T0R, -Value);
2871 } else {
2872 _subu(T, T0R, T1R);
2873 }
2874 _mov(Dest, T);
2875 return;
2876 }
2877 case InstArithmetic::Mul: {
2878 _mul(T, Src0R, Src1R);
2879 _mov(Dest, T);
2880 return;
2881 }
2882 case InstArithmetic::Shl: {
2883 if (IsSrc1Imm16) {
2884 _sll(T, Src0R, Value);
2885 } else {
2886 _sllv(T, Src0R, Src1R);
2887 }
2888 _mov(Dest, T);
2889 return;
2890 }
2891 case InstArithmetic::Lshr: {
2892 auto *T0R = Src0R;
2893 auto *T1R = Src1R;
2894 if (Dest->getType() != IceType_i32) {
2895 T0R = makeReg(IceType_i32);
2896 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2897 if (!IsSrc1Imm16) {
2898 T1R = makeReg(IceType_i32);
2899 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2900 }
2901 }
2902 if (IsSrc1Imm16) {
2903 _srl(T, T0R, Value);
2904 } else {
2905 _srlv(T, T0R, T1R);
2906 }
2907 _mov(Dest, T);
2908 return;
2909 }
2910 case InstArithmetic::Ashr: {
2911 auto *T0R = Src0R;
2912 auto *T1R = Src1R;
2913 if (Dest->getType() != IceType_i32) {
2914 T0R = makeReg(IceType_i32);
2915 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2916 if (!IsSrc1Imm16) {
2917 T1R = makeReg(IceType_i32);
2918 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2919 }
2920 }
2921 if (IsSrc1Imm16) {
2922 _sra(T, T0R, Value);
2923 } else {
2924 _srav(T, T0R, T1R);
2925 }
2926 _mov(Dest, T);
2927 return;
2928 }
2929 case InstArithmetic::Udiv: {
2930 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2931 auto *T0R = Src0R;
2932 auto *T1R = Src1R;
2933 if (Dest->getType() != IceType_i32) {
2934 T0R = makeReg(IceType_i32);
2935 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2936 T1R = makeReg(IceType_i32);
2937 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2938 }
2939 _divu(T_Zero, T0R, T1R);
2940 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2941 _mflo(T, T_Zero);
2942 _mov(Dest, T);
2943 return;
2944 }
2945 case InstArithmetic::Sdiv: {
2946 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2947 auto *T0R = Src0R;
2948 auto *T1R = Src1R;
2949 if (Dest->getType() != IceType_i32) {
2950 T0R = makeReg(IceType_i32);
2951 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2952 T1R = makeReg(IceType_i32);
2953 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2954 }
2955 _div(T_Zero, T0R, T1R);
2956 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2957 _mflo(T, T_Zero);
2958 _mov(Dest, T);
2959 return;
2960 }
2961 case InstArithmetic::Urem: {
2962 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2963 auto *T0R = Src0R;
2964 auto *T1R = Src1R;
2965 if (Dest->getType() != IceType_i32) {
2966 T0R = makeReg(IceType_i32);
2967 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2968 T1R = makeReg(IceType_i32);
2969 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2970 }
2971 _divu(T_Zero, T0R, T1R);
2972 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2973 _mfhi(T, T_Zero);
2974 _mov(Dest, T);
2975 return;
2976 }
2977 case InstArithmetic::Srem: {
2978 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2979 auto *T0R = Src0R;
2980 auto *T1R = Src1R;
2981 if (Dest->getType() != IceType_i32) {
2982 T0R = makeReg(IceType_i32);
2983 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2984 T1R = makeReg(IceType_i32);
2985 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2986 }
2987 _div(T_Zero, T0R, T1R);
2988 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2989 _mfhi(T, T_Zero);
2990 _mov(Dest, T);
2991 return;
2992 }
2993 case InstArithmetic::Fadd: {
2994 if (DestTy == IceType_f32) {
2995 _add_s(T, Src0R, Src1R);
2996 _mov(Dest, T);
2997 return;
2998 }
2999 if (DestTy == IceType_f64) {
3000 _add_d(T, Src0R, Src1R);
3001 _mov(Dest, T);
3002 return;
3003 }
3004 break;
3005 }
3006 case InstArithmetic::Fsub:
3007 if (DestTy == IceType_f32) {
3008 _sub_s(T, Src0R, Src1R);
3009 _mov(Dest, T);
3010 return;
3011 }
3012 if (DestTy == IceType_f64) {
3013 _sub_d(T, Src0R, Src1R);
3014 _mov(Dest, T);
3015 return;
3016 }
3017 break;
3018 case InstArithmetic::Fmul:
3019 if (DestTy == IceType_f32) {
3020 _mul_s(T, Src0R, Src1R);
3021 _mov(Dest, T);
3022 return;
3023 }
3024 if (DestTy == IceType_f64) {
3025 _mul_d(T, Src0R, Src1R);
3026 _mov(Dest, T);
3027 return;
3028 }
3029 break;
3030 case InstArithmetic::Fdiv:
3031 if (DestTy == IceType_f32) {
3032 _div_s(T, Src0R, Src1R);
3033 _mov(Dest, T);
3034 return;
3035 }
3036 if (DestTy == IceType_f64) {
3037 _div_d(T, Src0R, Src1R);
3038 _mov(Dest, T);
3039 return;
3040 }
3041 break;
3042 case InstArithmetic::Frem:
3043 llvm::report_fatal_error("frem should have been prelowered.");
3044 break;
3045 }
3046 llvm::report_fatal_error("Unknown arithmetic operator");
3047 }
3048
lowerAssign(const InstAssign * Instr)3049 void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
3050 Variable *Dest = Instr->getDest();
3051
3052 if (Dest->isRematerializable()) {
3053 Context.insert<InstFakeDef>(Dest);
3054 return;
3055 }
3056
3057 // Source type may not be same as destination
3058 if (isVectorType(Dest->getType())) {
3059 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3060 auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest);
3061 for (SizeT i = 0; i < DstVec->ContainersPerVector; ++i) {
3062 auto *DCont = DstVec->getContainers()[i];
3063 auto *SCont =
3064 legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg);
3065 auto *TReg = makeReg(IceType_i32);
3066 _mov(TReg, SCont);
3067 _mov(DCont, TReg);
3068 }
3069 return;
3070 }
3071 Operand *Src0 = Instr->getSrc(0);
3072 assert(Dest->getType() == Src0->getType());
3073 if (Dest->getType() == IceType_i64) {
3074 Src0 = legalizeUndef(Src0);
3075 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg);
3076 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg);
3077 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3078 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3079 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
3080 _mov(T_Lo, Src0Lo);
3081 _mov(DestLo, T_Lo);
3082 _mov(T_Hi, Src0Hi);
3083 _mov(DestHi, T_Hi);
3084 return;
3085 }
3086 Operand *SrcR;
3087 if (Dest->hasReg()) {
3088 // If Dest already has a physical register, then legalize the Src operand
3089 // into a Variable with the same register assignment. This especially
3090 // helps allow the use of Flex operands.
3091 SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
3092 } else {
3093 // Dest could be a stack operand. Since we could potentially need
3094 // to do a Store (and store can only have Register operands),
3095 // legalize this to a register.
3096 SrcR = legalize(Src0, Legal_Reg);
3097 }
3098 _mov(Dest, SrcR);
3099 }
3100
lowerBr(const InstBr * Instr)3101 void TargetMIPS32::lowerBr(const InstBr *Instr) {
3102 if (Instr->isUnconditional()) {
3103 _br(Instr->getTargetUnconditional());
3104 return;
3105 }
3106 CfgNode *TargetTrue = Instr->getTargetTrue();
3107 CfgNode *TargetFalse = Instr->getTargetFalse();
3108 Operand *Boolean = Instr->getCondition();
3109 const Inst *Producer = Computations.getProducerOf(Boolean);
3110 if (Producer == nullptr) {
3111 // Since we don't know the producer of this boolean we will assume its
3112 // producer will keep it in positive logic and just emit beqz with this
3113 // Boolean as an operand.
3114 auto *BooleanR = legalizeToReg(Boolean);
3115 _br(TargetTrue, TargetFalse, BooleanR, CondMIPS32::Cond::EQZ);
3116 return;
3117 }
3118 if (Producer->getKind() == Inst::Icmp) {
3119 const InstIcmp *CompareInst = llvm::cast<InstIcmp>(Producer);
3120 Operand *Src0 = CompareInst->getSrc(0);
3121 Operand *Src1 = CompareInst->getSrc(1);
3122 const Type Src0Ty = Src0->getType();
3123 assert(Src0Ty == Src1->getType());
3124
3125 Variable *Src0R = nullptr;
3126 Variable *Src1R = nullptr;
3127 Variable *Src0HiR = nullptr;
3128 Variable *Src1HiR = nullptr;
3129 if (Src0Ty == IceType_i64) {
3130 Src0R = legalizeToReg(loOperand(Src0));
3131 Src1R = legalizeToReg(loOperand(Src1));
3132 Src0HiR = legalizeToReg(hiOperand(Src0));
3133 Src1HiR = legalizeToReg(hiOperand(Src1));
3134 } else {
3135 auto *Src0RT = legalizeToReg(Src0);
3136 auto *Src1RT = legalizeToReg(Src1);
3137 // Sign/Zero extend the source operands
3138 if (Src0Ty != IceType_i32) {
3139 InstCast::OpKind CastKind;
3140 switch (CompareInst->getCondition()) {
3141 case InstIcmp::Eq:
3142 case InstIcmp::Ne:
3143 case InstIcmp::Sgt:
3144 case InstIcmp::Sge:
3145 case InstIcmp::Slt:
3146 case InstIcmp::Sle:
3147 CastKind = InstCast::Sext;
3148 break;
3149 default:
3150 CastKind = InstCast::Zext;
3151 break;
3152 }
3153 Src0R = makeReg(IceType_i32);
3154 Src1R = makeReg(IceType_i32);
3155 lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT));
3156 lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT));
3157 } else {
3158 Src0R = Src0RT;
3159 Src1R = Src1RT;
3160 }
3161 }
3162 auto *DestT = makeReg(IceType_i32);
3163
3164 switch (CompareInst->getCondition()) {
3165 default:
3166 llvm_unreachable("unexpected condition");
3167 return;
3168 case InstIcmp::Eq: {
3169 if (Src0Ty == IceType_i64) {
3170 auto *T1 = I32Reg();
3171 auto *T2 = I32Reg();
3172 auto *T3 = I32Reg();
3173 _xor(T1, Src0HiR, Src1HiR);
3174 _xor(T2, Src0R, Src1R);
3175 _or(T3, T1, T2);
3176 _mov(DestT, T3);
3177 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3178 } else {
3179 _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::NE);
3180 }
3181 return;
3182 }
3183 case InstIcmp::Ne: {
3184 if (Src0Ty == IceType_i64) {
3185 auto *T1 = I32Reg();
3186 auto *T2 = I32Reg();
3187 auto *T3 = I32Reg();
3188 _xor(T1, Src0HiR, Src1HiR);
3189 _xor(T2, Src0R, Src1R);
3190 _or(T3, T1, T2);
3191 _mov(DestT, T3);
3192 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3193 } else {
3194 _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::EQ);
3195 }
3196 return;
3197 }
3198 case InstIcmp::Ugt: {
3199 if (Src0Ty == IceType_i64) {
3200 auto *T1 = I32Reg();
3201 auto *T2 = I32Reg();
3202 auto *T3 = I32Reg();
3203 auto *T4 = I32Reg();
3204 auto *T5 = I32Reg();
3205 _xor(T1, Src0HiR, Src1HiR);
3206 _sltu(T2, Src1HiR, Src0HiR);
3207 _xori(T3, T2, 1);
3208 _sltu(T4, Src1R, Src0R);
3209 _xori(T5, T4, 1);
3210 _movz(T3, T5, T1);
3211 _mov(DestT, T3);
3212 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3213 } else {
3214 _sltu(DestT, Src1R, Src0R);
3215 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3216 }
3217 return;
3218 }
3219 case InstIcmp::Uge: {
3220 if (Src0Ty == IceType_i64) {
3221 auto *T1 = I32Reg();
3222 auto *T2 = I32Reg();
3223 auto *T3 = I32Reg();
3224 _xor(T1, Src0HiR, Src1HiR);
3225 _sltu(T2, Src0HiR, Src1HiR);
3226 _sltu(T3, Src0R, Src1R);
3227 _movz(T2, T3, T1);
3228 _mov(DestT, T2);
3229 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3230 } else {
3231 _sltu(DestT, Src0R, Src1R);
3232 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3233 }
3234 return;
3235 }
3236 case InstIcmp::Ult: {
3237 if (Src0Ty == IceType_i64) {
3238 auto *T1 = I32Reg();
3239 auto *T2 = I32Reg();
3240 auto *T3 = I32Reg();
3241 auto *T4 = I32Reg();
3242 auto *T5 = I32Reg();
3243 _xor(T1, Src0HiR, Src1HiR);
3244 _sltu(T2, Src0HiR, Src1HiR);
3245 _xori(T3, T2, 1);
3246 _sltu(T4, Src0R, Src1R);
3247 _xori(T5, T4, 1);
3248 _movz(T3, T5, T1);
3249 _mov(DestT, T3);
3250 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3251 } else {
3252 _sltu(DestT, Src0R, Src1R);
3253 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3254 }
3255 return;
3256 }
3257 case InstIcmp::Ule: {
3258 if (Src0Ty == IceType_i64) {
3259 auto *T1 = I32Reg();
3260 auto *T2 = I32Reg();
3261 auto *T3 = I32Reg();
3262 _xor(T1, Src0HiR, Src1HiR);
3263 _sltu(T2, Src1HiR, Src0HiR);
3264 _sltu(T3, Src1R, Src0R);
3265 _movz(T2, T3, T1);
3266 _mov(DestT, T2);
3267 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3268 } else {
3269 _sltu(DestT, Src1R, Src0R);
3270 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3271 }
3272 return;
3273 }
3274 case InstIcmp::Sgt: {
3275 if (Src0Ty == IceType_i64) {
3276 auto *T1 = I32Reg();
3277 auto *T2 = I32Reg();
3278 auto *T3 = I32Reg();
3279 auto *T4 = I32Reg();
3280 auto *T5 = I32Reg();
3281 _xor(T1, Src0HiR, Src1HiR);
3282 _slt(T2, Src1HiR, Src0HiR);
3283 _xori(T3, T2, 1);
3284 _sltu(T4, Src1R, Src0R);
3285 _xori(T5, T4, 1);
3286 _movz(T3, T5, T1);
3287 _mov(DestT, T3);
3288 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3289 } else {
3290 _slt(DestT, Src1R, Src0R);
3291 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3292 }
3293 return;
3294 }
3295 case InstIcmp::Sge: {
3296 if (Src0Ty == IceType_i64) {
3297 auto *T1 = I32Reg();
3298 auto *T2 = I32Reg();
3299 auto *T3 = I32Reg();
3300 _xor(T1, Src0HiR, Src1HiR);
3301 _slt(T2, Src0HiR, Src1HiR);
3302 _sltu(T3, Src0R, Src1R);
3303 _movz(T2, T3, T1);
3304 _mov(DestT, T2);
3305 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3306 } else {
3307 _slt(DestT, Src0R, Src1R);
3308 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3309 }
3310 return;
3311 }
3312 case InstIcmp::Slt: {
3313 if (Src0Ty == IceType_i64) {
3314 auto *T1 = I32Reg();
3315 auto *T2 = I32Reg();
3316 auto *T3 = I32Reg();
3317 auto *T4 = I32Reg();
3318 auto *T5 = I32Reg();
3319 _xor(T1, Src0HiR, Src1HiR);
3320 _slt(T2, Src0HiR, Src1HiR);
3321 _xori(T3, T2, 1);
3322 _sltu(T4, Src0R, Src1R);
3323 _xori(T5, T4, 1);
3324 _movz(T3, T5, T1);
3325 _mov(DestT, T3);
3326 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3327 } else {
3328 _slt(DestT, Src0R, Src1R);
3329 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3330 }
3331 return;
3332 }
3333 case InstIcmp::Sle: {
3334 if (Src0Ty == IceType_i64) {
3335 auto *T1 = I32Reg();
3336 auto *T2 = I32Reg();
3337 auto *T3 = I32Reg();
3338 _xor(T1, Src0HiR, Src1HiR);
3339 _slt(T2, Src1HiR, Src0HiR);
3340 _sltu(T3, Src1R, Src0R);
3341 _movz(T2, T3, T1);
3342 _mov(DestT, T2);
3343 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3344 } else {
3345 _slt(DestT, Src1R, Src0R);
3346 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3347 }
3348 return;
3349 }
3350 }
3351 }
3352 }
3353
lowerCall(const InstCall * Instr)3354 void TargetMIPS32::lowerCall(const InstCall *Instr) {
3355 CfgVector<Variable *> RegArgs;
3356 NeedsStackAlignment = true;
3357
3358 // Assign arguments to registers and stack. Also reserve stack.
3359 TargetMIPS32::CallingConv CC;
3360
3361 // Pair of Arg Operand -> GPR number assignments.
3362 llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_GPR_ARG> GPRArgs;
3363 llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_FP_ARG> FPArgs;
3364 // Pair of Arg Operand -> stack offset.
3365 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3366 size_t ParameterAreaSizeBytes = 16;
3367
3368 // Classify each argument operand according to the location where the
3369 // argument is passed.
3370
3371 // v4f32 is returned through stack. $4 is setup by the caller and passed as
3372 // first argument implicitly. Callee then copies the return vector at $4.
3373 SizeT ArgNum = 0;
3374 Variable *Dest = Instr->getDest();
3375 Variable *RetVecFloat = nullptr;
3376 if (Dest && isVectorFloatingType(Dest->getType())) {
3377 ArgNum = 1;
3378 CC.discardReg(RegMIPS32::Reg_A0);
3379 RetVecFloat = Func->makeVariable(IceType_i32);
3380 auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16);
3381 constexpr SizeT Alignment = 4;
3382 lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment));
3383 RegArgs.emplace_back(
3384 legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0)));
3385 }
3386
3387 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3388 Operand *Arg = legalizeUndef(Instr->getArg(i));
3389 const Type Ty = Arg->getType();
3390 bool InReg = false;
3391 RegNumT Reg;
3392
3393 InReg = CC.argInReg(Ty, i, &Reg);
3394
3395 if (!InReg) {
3396 if (isVectorType(Ty)) {
3397 auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3398 ParameterAreaSizeBytes =
3399 applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3400 for (Variable *Elem : ArgVec->getContainers()) {
3401 StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
3402 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3403 }
3404 } else {
3405 ParameterAreaSizeBytes =
3406 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3407 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3408 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3409 }
3410 ++ArgNum;
3411 continue;
3412 }
3413
3414 if (isVectorType(Ty)) {
3415 auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3416 Operand *Elem0 = ArgVec->getContainers()[0];
3417 Operand *Elem1 = ArgVec->getContainers()[1];
3418 GPRArgs.push_back(
3419 std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0)));
3420 GPRArgs.push_back(
3421 std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1)));
3422 Operand *Elem2 = ArgVec->getContainers()[2];
3423 Operand *Elem3 = ArgVec->getContainers()[3];
3424 // First argument is passed in $4:$5:$6:$7
3425 // Second and rest arguments are passed in $6:$7:stack:stack
3426 if (ArgNum == 0) {
3427 GPRArgs.push_back(
3428 std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2)));
3429 GPRArgs.push_back(
3430 std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
3431 } else {
3432 ParameterAreaSizeBytes =
3433 applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3434 StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
3435 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3436 StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
3437 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3438 }
3439 } else if (Ty == IceType_i64) {
3440 Operand *Lo = loOperand(Arg);
3441 Operand *Hi = hiOperand(Arg);
3442 GPRArgs.push_back(
3443 std::make_pair(Lo, RegMIPS32::get64PairFirstRegNum(Reg)));
3444 GPRArgs.push_back(
3445 std::make_pair(Hi, RegMIPS32::get64PairSecondRegNum(Reg)));
3446 } else if (isScalarIntegerType(Ty)) {
3447 GPRArgs.push_back(std::make_pair(Arg, Reg));
3448 } else {
3449 FPArgs.push_back(std::make_pair(Arg, Reg));
3450 }
3451 ++ArgNum;
3452 }
3453
3454 // Adjust the parameter area so that the stack is aligned. It is assumed that
3455 // the stack is already aligned at the start of the calling sequence.
3456 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3457
3458 // Copy arguments that are passed on the stack to the appropriate stack
3459 // locations.
3460 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
3461 for (auto &StackArg : StackArgs) {
3462 ConstantInteger32 *Loc =
3463 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3464 Type Ty = StackArg.first->getType();
3465 OperandMIPS32Mem *Addr;
3466 constexpr bool SignExt = false;
3467 if (OperandMIPS32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3468 Addr = OperandMIPS32Mem::create(Func, Ty, SP, Loc);
3469 } else {
3470 Variable *NewBase = Func->makeVariable(SP->getType());
3471 lowerArithmetic(
3472 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3473 Addr = formMemoryOperand(NewBase, Ty);
3474 }
3475 lowerStore(InstStore::create(Func, StackArg.first, Addr));
3476 }
3477
3478 // Generate the call instruction. Assign its result to a temporary with high
3479 // register allocation weight.
3480
3481 // ReturnReg doubles as ReturnRegLo as necessary.
3482 Variable *ReturnReg = nullptr;
3483 Variable *ReturnRegHi = nullptr;
3484 if (Dest) {
3485 switch (Dest->getType()) {
3486 case IceType_NUM:
3487 llvm_unreachable("Invalid Call dest type");
3488 return;
3489 case IceType_void:
3490 break;
3491 case IceType_i1:
3492 case IceType_i8:
3493 case IceType_i16:
3494 case IceType_i32:
3495 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3496 break;
3497 case IceType_i64:
3498 ReturnReg = I32Reg(RegMIPS32::Reg_V0);
3499 ReturnRegHi = I32Reg(RegMIPS32::Reg_V1);
3500 break;
3501 case IceType_f32:
3502 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0);
3503 break;
3504 case IceType_f64:
3505 ReturnReg = makeReg(IceType_f64, RegMIPS32::Reg_F0);
3506 break;
3507 case IceType_v4i1:
3508 case IceType_v8i1:
3509 case IceType_v16i1:
3510 case IceType_v16i8:
3511 case IceType_v8i16:
3512 case IceType_v4i32: {
3513 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3514 auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg);
3515 RetVec->initVecElement(Func);
3516 for (SizeT i = 0; i < RetVec->ContainersPerVector; ++i) {
3517 auto *Var = RetVec->getContainers()[i];
3518 Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i));
3519 }
3520 break;
3521 }
3522 case IceType_v4f32:
3523 ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0);
3524 break;
3525 }
3526 }
3527 Operand *CallTarget = Instr->getCallTarget();
3528 // Allow ConstantRelocatable to be left alone as a direct call,
3529 // but force other constants like ConstantInteger32 to be in
3530 // a register and make it an indirect call.
3531 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3532 CallTarget = legalize(CallTarget, Legal_Reg);
3533 }
3534
3535 // Copy arguments to be passed in registers to the appropriate registers.
3536 for (auto &FPArg : FPArgs) {
3537 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3538 }
3539 for (auto &GPRArg : GPRArgs) {
3540 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3541 }
3542
3543 // Generate a FakeUse of register arguments so that they do not get dead code
3544 // eliminated as a result of the FakeKill of scratch registers after the call.
3545 // These fake-uses need to be placed here to avoid argument registers from
3546 // being used during the legalizeToReg() calls above.
3547 for (auto *RegArg : RegArgs) {
3548 Context.insert<InstFakeUse>(RegArg);
3549 }
3550
3551 // If variable alloca is used the extra 16 bytes for argument build area
3552 // will be allocated on stack before a call.
3553 if (VariableAllocaUsed)
3554 Sandboxer(this).addiu_sp(-MaxOutArgsSizeBytes);
3555
3556 Inst *NewCall;
3557
3558 // We don't need to define the return register if it is a vector.
3559 // We have inserted fake defs of it just after the call.
3560 if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) {
3561 Variable *RetReg = nullptr;
3562 NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget);
3563 Context.insert(NewCall);
3564 } else {
3565 NewCall = Sandboxer(this, InstBundleLock::Opt_AlignToEnd)
3566 .jal(ReturnReg, CallTarget);
3567 }
3568
3569 if (VariableAllocaUsed)
3570 Sandboxer(this).addiu_sp(MaxOutArgsSizeBytes);
3571
3572 // Insert a fake use of stack pointer to avoid dead code elimination of addiu
3573 // instruction.
3574 Context.insert<InstFakeUse>(SP);
3575
3576 if (ReturnRegHi)
3577 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
3578
3579 if (ReturnReg) {
3580 if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3581 for (Variable *Var : RetVec->getContainers()) {
3582 Context.insert(InstFakeDef::create(Func, Var));
3583 }
3584 }
3585 }
3586
3587 // Insert a register-kill pseudo instruction.
3588 Context.insert(InstFakeKill::create(Func, NewCall));
3589
3590 // Generate a FakeUse to keep the call live if necessary.
3591 if (Instr->hasSideEffects() && ReturnReg) {
3592 if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3593 for (Variable *Var : RetVec->getContainers()) {
3594 Context.insert<InstFakeUse>(Var);
3595 }
3596 } else {
3597 Context.insert<InstFakeUse>(ReturnReg);
3598 }
3599 }
3600
3601 if (Dest == nullptr)
3602 return;
3603
3604 // Assign the result of the call to Dest.
3605 if (ReturnReg) {
3606 if (RetVecFloat) {
3607 auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3608 auto *TBase = legalizeToReg(RetVecFloat);
3609 for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3610 auto *Var = DestVecOn32->getContainers()[i];
3611 auto *TVar = makeReg(IceType_i32);
3612 OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
3613 Func, IceType_i32, TBase,
3614 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
3615 _lw(TVar, Mem);
3616 _mov(Var, TVar);
3617 }
3618 } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3619 auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3620 for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3621 _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]);
3622 }
3623 } else if (ReturnRegHi) {
3624 assert(Dest->getType() == IceType_i64);
3625 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3626 Variable *DestLo = Dest64On32->getLo();
3627 Variable *DestHi = Dest64On32->getHi();
3628 _mov(DestLo, ReturnReg);
3629 _mov(DestHi, ReturnRegHi);
3630 } else {
3631 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
3632 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
3633 isScalarFloatingType(Dest->getType()) ||
3634 isVectorType(Dest->getType()));
3635 _mov(Dest, ReturnReg);
3636 }
3637 }
3638 }
3639
lowerCast(const InstCast * Instr)3640 void TargetMIPS32::lowerCast(const InstCast *Instr) {
3641 InstCast::OpKind CastKind = Instr->getCastKind();
3642 Variable *Dest = Instr->getDest();
3643 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3644 const Type DestTy = Dest->getType();
3645 const Type Src0Ty = Src0->getType();
3646 const uint32_t ShiftAmount =
3647 (Src0Ty == IceType_i1
3648 ? INT32_BITS - 1
3649 : INT32_BITS - (CHAR_BITS * typeWidthInBytes(Src0Ty)));
3650 const uint32_t Mask =
3651 (Src0Ty == IceType_i1
3652 ? 1
3653 : (1 << (CHAR_BITS * typeWidthInBytes(Src0Ty))) - 1);
3654
3655 if (isVectorType(DestTy)) {
3656 llvm::report_fatal_error("Cast: Destination type is vector");
3657 return;
3658 }
3659 switch (CastKind) {
3660 default:
3661 Func->setError("Cast type not supported");
3662 return;
3663 case InstCast::Sext: {
3664 if (DestTy == IceType_i64) {
3665 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3666 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3667 Variable *Src0R = legalizeToReg(Src0);
3668 Variable *T1_Lo = I32Reg();
3669 Variable *T2_Lo = I32Reg();
3670 Variable *T_Hi = I32Reg();
3671 if (Src0Ty == IceType_i1) {
3672 _sll(T1_Lo, Src0R, INT32_BITS - 1);
3673 _sra(T2_Lo, T1_Lo, INT32_BITS - 1);
3674 _mov(DestHi, T2_Lo);
3675 _mov(DestLo, T2_Lo);
3676 } else if (Src0Ty == IceType_i8 || Src0Ty == IceType_i16) {
3677 _sll(T1_Lo, Src0R, ShiftAmount);
3678 _sra(T2_Lo, T1_Lo, ShiftAmount);
3679 _sra(T_Hi, T2_Lo, INT32_BITS - 1);
3680 _mov(DestHi, T_Hi);
3681 _mov(DestLo, T2_Lo);
3682 } else if (Src0Ty == IceType_i32) {
3683 _mov(T1_Lo, Src0R);
3684 _sra(T_Hi, T1_Lo, INT32_BITS - 1);
3685 _mov(DestHi, T_Hi);
3686 _mov(DestLo, T1_Lo);
3687 }
3688 } else {
3689 Variable *Src0R = legalizeToReg(Src0);
3690 Variable *T1 = makeReg(DestTy);
3691 Variable *T2 = makeReg(DestTy);
3692 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3693 Src0Ty == IceType_i16) {
3694 _sll(T1, Src0R, ShiftAmount);
3695 _sra(T2, T1, ShiftAmount);
3696 _mov(Dest, T2);
3697 }
3698 }
3699 break;
3700 }
3701 case InstCast::Zext: {
3702 if (DestTy == IceType_i64) {
3703 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3704 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3705 Variable *Src0R = legalizeToReg(Src0);
3706 Variable *T_Lo = I32Reg();
3707 Variable *T_Hi = I32Reg();
3708
3709 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || Src0Ty == IceType_i16)
3710 _andi(T_Lo, Src0R, Mask);
3711 else if (Src0Ty == IceType_i32)
3712 _mov(T_Lo, Src0R);
3713 else
3714 assert(Src0Ty != IceType_i64);
3715 _mov(DestLo, T_Lo);
3716
3717 auto *Zero = getZero();
3718 _addiu(T_Hi, Zero, 0);
3719 _mov(DestHi, T_Hi);
3720 } else {
3721 Variable *Src0R = legalizeToReg(Src0);
3722 Variable *T = makeReg(DestTy);
3723 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3724 Src0Ty == IceType_i16) {
3725 _andi(T, Src0R, Mask);
3726 _mov(Dest, T);
3727 }
3728 }
3729 break;
3730 }
3731 case InstCast::Trunc: {
3732 if (Src0Ty == IceType_i64)
3733 Src0 = loOperand(Src0);
3734 Variable *Src0R = legalizeToReg(Src0);
3735 Variable *T = makeReg(DestTy);
3736 switch (DestTy) {
3737 case IceType_i1:
3738 _andi(T, Src0R, 0x1);
3739 break;
3740 case IceType_i8:
3741 _andi(T, Src0R, 0xff);
3742 break;
3743 case IceType_i16:
3744 _andi(T, Src0R, 0xffff);
3745 break;
3746 default:
3747 _mov(T, Src0R);
3748 break;
3749 }
3750 _mov(Dest, T);
3751 break;
3752 }
3753 case InstCast::Fptrunc: {
3754 assert(Dest->getType() == IceType_f32);
3755 assert(Src0->getType() == IceType_f64);
3756 auto *DestR = legalizeToReg(Dest);
3757 auto *Src0R = legalizeToReg(Src0);
3758 _cvt_s_d(DestR, Src0R);
3759 _mov(Dest, DestR);
3760 break;
3761 }
3762 case InstCast::Fpext: {
3763 assert(Dest->getType() == IceType_f64);
3764 assert(Src0->getType() == IceType_f32);
3765 auto *DestR = legalizeToReg(Dest);
3766 auto *Src0R = legalizeToReg(Src0);
3767 _cvt_d_s(DestR, Src0R);
3768 _mov(Dest, DestR);
3769 break;
3770 }
3771 case InstCast::Fptosi:
3772 case InstCast::Fptoui: {
3773 if (llvm::isa<Variable64On32>(Dest)) {
3774 llvm::report_fatal_error("fp-to-i64 should have been prelowered.");
3775 return;
3776 }
3777 if (DestTy != IceType_i64) {
3778 if (Src0Ty == IceType_f32 && isScalarIntegerType(DestTy)) {
3779 Variable *Src0R = legalizeToReg(Src0);
3780 Variable *FTmp = makeReg(IceType_f32);
3781 _trunc_w_s(FTmp, Src0R);
3782 _mov(Dest, FTmp);
3783 return;
3784 }
3785 if (Src0Ty == IceType_f64 && isScalarIntegerType(DestTy)) {
3786 Variable *Src0R = legalizeToReg(Src0);
3787 Variable *FTmp = makeReg(IceType_f64);
3788 _trunc_w_d(FTmp, Src0R);
3789 _mov(Dest, FTmp);
3790 return;
3791 }
3792 }
3793 llvm::report_fatal_error("Destination is i64 in fp-to-i32");
3794 break;
3795 }
3796 case InstCast::Sitofp:
3797 case InstCast::Uitofp: {
3798 if (llvm::isa<Variable64On32>(Dest)) {
3799 llvm::report_fatal_error("i64-to-fp should have been prelowered.");
3800 return;
3801 }
3802 if (Src0Ty != IceType_i64) {
3803 Variable *Src0R = legalizeToReg(Src0);
3804 auto *T0R = Src0R;
3805 if (Src0Ty != IceType_i32) {
3806 T0R = makeReg(IceType_i32);
3807 if (CastKind == InstCast::Uitofp)
3808 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
3809 else
3810 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
3811 }
3812 if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) {
3813 Variable *FTmp1 = makeReg(IceType_f32);
3814 Variable *FTmp2 = makeReg(IceType_f32);
3815 _mtc1(FTmp1, T0R);
3816 _cvt_s_w(FTmp2, FTmp1);
3817 _mov(Dest, FTmp2);
3818 return;
3819 }
3820 if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f64) {
3821 Variable *FTmp1 = makeReg(IceType_f64);
3822 Variable *FTmp2 = makeReg(IceType_f64);
3823 _mtc1(FTmp1, T0R);
3824 _cvt_d_w(FTmp2, FTmp1);
3825 _mov(Dest, FTmp2);
3826 return;
3827 }
3828 }
3829 llvm::report_fatal_error("Source is i64 in i32-to-fp");
3830 break;
3831 }
3832 case InstCast::Bitcast: {
3833 Operand *Src0 = Instr->getSrc(0);
3834 if (DestTy == Src0->getType()) {
3835 auto *Assign = InstAssign::create(Func, Dest, Src0);
3836 lowerAssign(Assign);
3837 return;
3838 }
3839 if (isVectorType(DestTy) || isVectorType(Src0->getType())) {
3840 llvm::report_fatal_error(
3841 "Bitcast: vector type should have been prelowered.");
3842 return;
3843 }
3844 switch (DestTy) {
3845 case IceType_NUM:
3846 case IceType_void:
3847 llvm::report_fatal_error("Unexpected bitcast.");
3848 case IceType_i1:
3849 UnimplementedLoweringError(this, Instr);
3850 break;
3851 case IceType_i8:
3852 assert(Src0->getType() == IceType_v8i1);
3853 llvm::report_fatal_error(
3854 "i8 to v8i1 conversion should have been prelowered.");
3855 break;
3856 case IceType_i16:
3857 assert(Src0->getType() == IceType_v16i1);
3858 llvm::report_fatal_error(
3859 "i16 to v16i1 conversion should have been prelowered.");
3860 break;
3861 case IceType_i32:
3862 case IceType_f32: {
3863 Variable *Src0R = legalizeToReg(Src0);
3864 _mov(Dest, Src0R);
3865 break;
3866 }
3867 case IceType_i64: {
3868 assert(Src0->getType() == IceType_f64);
3869 Variable *Src0R = legalizeToReg(Src0);
3870 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3871 T->initHiLo(Func);
3872 T->getHi()->setMustNotHaveReg();
3873 T->getLo()->setMustNotHaveReg();
3874 Context.insert<InstFakeDef>(T->getHi());
3875 Context.insert<InstFakeDef>(T->getLo());
3876 _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi);
3877 _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo);
3878 lowerAssign(InstAssign::create(Func, Dest, T));
3879 break;
3880 }
3881 case IceType_f64: {
3882 assert(Src0->getType() == IceType_i64);
3883 const uint32_t Mask = 0xFFFFFFFF;
3884 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src0)) {
3885 Variable *RegHi, *RegLo;
3886 const uint64_t Value = C64->getValue();
3887 uint64_t Upper32Bits = (Value >> INT32_BITS) & Mask;
3888 uint64_t Lower32Bits = Value & Mask;
3889 RegLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits));
3890 RegHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits));
3891 _mov(Dest, RegHi, RegLo);
3892 } else {
3893 auto *Var64On32 = llvm::cast<Variable64On32>(Src0);
3894 auto *RegLo = legalizeToReg(loOperand(Var64On32));
3895 auto *RegHi = legalizeToReg(hiOperand(Var64On32));
3896 _mov(Dest, RegHi, RegLo);
3897 }
3898 break;
3899 }
3900 default:
3901 llvm::report_fatal_error("Unexpected bitcast.");
3902 }
3903 break;
3904 }
3905 }
3906 }
3907
lowerExtractElement(const InstExtractElement * Instr)3908 void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) {
3909 Variable *Dest = Instr->getDest();
3910 const Type DestTy = Dest->getType();
3911 Operand *Src1 = Instr->getSrc(1);
3912 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
3913 const uint32_t Index = Imm->getValue();
3914 Variable *TDest = makeReg(DestTy);
3915 Variable *TReg = makeReg(DestTy);
3916 auto *Src0 = legalizeUndef(Instr->getSrc(0));
3917 auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
3918 // Number of elements in each container
3919 uint32_t ElemPerCont =
3920 typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
3921 auto *Src = Src0R->getContainers()[Index / ElemPerCont];
3922 auto *SrcE = legalizeToReg(Src);
3923 // Position of the element in the container
3924 uint32_t PosInCont = Index % ElemPerCont;
3925 if (ElemPerCont == 1) {
3926 _mov(TDest, SrcE);
3927 } else if (ElemPerCont == 2) {
3928 switch (PosInCont) {
3929 case 0:
3930 _andi(TDest, SrcE, 0xffff);
3931 break;
3932 case 1:
3933 _srl(TDest, SrcE, 16);
3934 break;
3935 default:
3936 llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3937 break;
3938 }
3939 } else if (ElemPerCont == 4) {
3940 switch (PosInCont) {
3941 case 0:
3942 _andi(TDest, SrcE, 0xff);
3943 break;
3944 case 1:
3945 _srl(TReg, SrcE, 8);
3946 _andi(TDest, TReg, 0xff);
3947 break;
3948 case 2:
3949 _srl(TReg, SrcE, 16);
3950 _andi(TDest, TReg, 0xff);
3951 break;
3952 case 3:
3953 _srl(TDest, SrcE, 24);
3954 break;
3955 default:
3956 llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3957 break;
3958 }
3959 }
3960 if (typeElementType(Src0R->getType()) == IceType_i1) {
3961 Variable *TReg1 = makeReg(DestTy);
3962 _andi(TReg1, TDest, 0x1);
3963 _mov(Dest, TReg1);
3964 } else {
3965 _mov(Dest, TDest);
3966 }
3967 return;
3968 }
3969 llvm::report_fatal_error("ExtractElement requires a constant index");
3970 }
3971
lowerFcmp(const InstFcmp * Instr)3972 void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) {
3973 Variable *Dest = Instr->getDest();
3974 if (isVectorType(Dest->getType())) {
3975 llvm::report_fatal_error("Fcmp: Destination type is vector");
3976 return;
3977 }
3978
3979 auto *Src0 = Instr->getSrc(0);
3980 auto *Src1 = Instr->getSrc(1);
3981 auto *Zero = getZero();
3982
3983 InstFcmp::FCond Cond = Instr->getCondition();
3984 auto *DestR = makeReg(IceType_i32);
3985 auto *Src0R = legalizeToReg(Src0);
3986 auto *Src1R = legalizeToReg(Src1);
3987 const Type Src0Ty = Src0->getType();
3988
3989 Operand *FCC0 = OperandMIPS32FCC::create(getFunc(), OperandMIPS32FCC::FCC0);
3990
3991 switch (Cond) {
3992 default: {
3993 llvm::report_fatal_error("Unhandled fp comparison.");
3994 return;
3995 }
3996 case InstFcmp::False: {
3997 Context.insert<InstFakeUse>(Src0R);
3998 Context.insert<InstFakeUse>(Src1R);
3999 _addiu(DestR, Zero, 0);
4000 _mov(Dest, DestR);
4001 break;
4002 }
4003 case InstFcmp::Oeq: {
4004 if (Src0Ty == IceType_f32) {
4005 _c_eq_s(Src0R, Src1R);
4006 } else {
4007 _c_eq_d(Src0R, Src1R);
4008 }
4009 _addiu(DestR, Zero, 1);
4010 _movf(DestR, Zero, FCC0);
4011 _mov(Dest, DestR);
4012 break;
4013 }
4014 case InstFcmp::Ogt: {
4015 if (Src0Ty == IceType_f32) {
4016 _c_ule_s(Src0R, Src1R);
4017 } else {
4018 _c_ule_d(Src0R, Src1R);
4019 }
4020 _addiu(DestR, Zero, 1);
4021 _movt(DestR, Zero, FCC0);
4022 _mov(Dest, DestR);
4023 break;
4024 }
4025 case InstFcmp::Oge: {
4026 if (Src0Ty == IceType_f32) {
4027 _c_ult_s(Src0R, Src1R);
4028 } else {
4029 _c_ult_d(Src0R, Src1R);
4030 }
4031 _addiu(DestR, Zero, 1);
4032 _movt(DestR, Zero, FCC0);
4033 _mov(Dest, DestR);
4034 break;
4035 }
4036 case InstFcmp::Olt: {
4037 if (Src0Ty == IceType_f32) {
4038 _c_olt_s(Src0R, Src1R);
4039 } else {
4040 _c_olt_d(Src0R, Src1R);
4041 }
4042 _addiu(DestR, Zero, 1);
4043 _movf(DestR, Zero, FCC0);
4044 _mov(Dest, DestR);
4045 break;
4046 }
4047 case InstFcmp::Ole: {
4048 if (Src0Ty == IceType_f32) {
4049 _c_ole_s(Src0R, Src1R);
4050 } else {
4051 _c_ole_d(Src0R, Src1R);
4052 }
4053 _addiu(DestR, Zero, 1);
4054 _movf(DestR, Zero, FCC0);
4055 _mov(Dest, DestR);
4056 break;
4057 }
4058 case InstFcmp::One: {
4059 if (Src0Ty == IceType_f32) {
4060 _c_ueq_s(Src0R, Src1R);
4061 } else {
4062 _c_ueq_d(Src0R, Src1R);
4063 }
4064 _addiu(DestR, Zero, 1);
4065 _movt(DestR, Zero, FCC0);
4066 _mov(Dest, DestR);
4067 break;
4068 }
4069 case InstFcmp::Ord: {
4070 if (Src0Ty == IceType_f32) {
4071 _c_un_s(Src0R, Src1R);
4072 } else {
4073 _c_un_d(Src0R, Src1R);
4074 }
4075 _addiu(DestR, Zero, 1);
4076 _movt(DestR, Zero, FCC0);
4077 _mov(Dest, DestR);
4078 break;
4079 }
4080 case InstFcmp::Ueq: {
4081 if (Src0Ty == IceType_f32) {
4082 _c_ueq_s(Src0R, Src1R);
4083 } else {
4084 _c_ueq_d(Src0R, Src1R);
4085 }
4086 _addiu(DestR, Zero, 1);
4087 _movf(DestR, Zero, FCC0);
4088 _mov(Dest, DestR);
4089 break;
4090 }
4091 case InstFcmp::Ugt: {
4092 if (Src0Ty == IceType_f32) {
4093 _c_ole_s(Src0R, Src1R);
4094 } else {
4095 _c_ole_d(Src0R, Src1R);
4096 }
4097 _addiu(DestR, Zero, 1);
4098 _movt(DestR, Zero, FCC0);
4099 _mov(Dest, DestR);
4100 break;
4101 }
4102 case InstFcmp::Uge: {
4103 if (Src0Ty == IceType_f32) {
4104 _c_olt_s(Src0R, Src1R);
4105 } else {
4106 _c_olt_d(Src0R, Src1R);
4107 }
4108 _addiu(DestR, Zero, 1);
4109 _movt(DestR, Zero, FCC0);
4110 _mov(Dest, DestR);
4111 break;
4112 }
4113 case InstFcmp::Ult: {
4114 if (Src0Ty == IceType_f32) {
4115 _c_ult_s(Src0R, Src1R);
4116 } else {
4117 _c_ult_d(Src0R, Src1R);
4118 }
4119 _addiu(DestR, Zero, 1);
4120 _movf(DestR, Zero, FCC0);
4121 _mov(Dest, DestR);
4122 break;
4123 }
4124 case InstFcmp::Ule: {
4125 if (Src0Ty == IceType_f32) {
4126 _c_ule_s(Src0R, Src1R);
4127 } else {
4128 _c_ule_d(Src0R, Src1R);
4129 }
4130 _addiu(DestR, Zero, 1);
4131 _movf(DestR, Zero, FCC0);
4132 _mov(Dest, DestR);
4133 break;
4134 }
4135 case InstFcmp::Une: {
4136 if (Src0Ty == IceType_f32) {
4137 _c_eq_s(Src0R, Src1R);
4138 } else {
4139 _c_eq_d(Src0R, Src1R);
4140 }
4141 _addiu(DestR, Zero, 1);
4142 _movt(DestR, Zero, FCC0);
4143 _mov(Dest, DestR);
4144 break;
4145 }
4146 case InstFcmp::Uno: {
4147 if (Src0Ty == IceType_f32) {
4148 _c_un_s(Src0R, Src1R);
4149 } else {
4150 _c_un_d(Src0R, Src1R);
4151 }
4152 _addiu(DestR, Zero, 1);
4153 _movf(DestR, Zero, FCC0);
4154 _mov(Dest, DestR);
4155 break;
4156 }
4157 case InstFcmp::True: {
4158 Context.insert<InstFakeUse>(Src0R);
4159 Context.insert<InstFakeUse>(Src1R);
4160 _addiu(DestR, Zero, 1);
4161 _mov(Dest, DestR);
4162 break;
4163 }
4164 }
4165 }
4166
lower64Icmp(const InstIcmp * Instr)4167 void TargetMIPS32::lower64Icmp(const InstIcmp *Instr) {
4168 Operand *Src0 = legalize(Instr->getSrc(0));
4169 Operand *Src1 = legalize(Instr->getSrc(1));
4170 Variable *Dest = Instr->getDest();
4171 InstIcmp::ICond Condition = Instr->getCondition();
4172
4173 Variable *Src0LoR = legalizeToReg(loOperand(Src0));
4174 Variable *Src0HiR = legalizeToReg(hiOperand(Src0));
4175 Variable *Src1LoR = legalizeToReg(loOperand(Src1));
4176 Variable *Src1HiR = legalizeToReg(hiOperand(Src1));
4177
4178 switch (Condition) {
4179 default:
4180 llvm_unreachable("unexpected condition");
4181 return;
4182 case InstIcmp::Eq: {
4183 auto *T1 = I32Reg();
4184 auto *T2 = I32Reg();
4185 auto *T3 = I32Reg();
4186 auto *T4 = I32Reg();
4187 _xor(T1, Src0HiR, Src1HiR);
4188 _xor(T2, Src0LoR, Src1LoR);
4189 _or(T3, T1, T2);
4190 _sltiu(T4, T3, 1);
4191 _mov(Dest, T4);
4192 return;
4193 }
4194 case InstIcmp::Ne: {
4195 auto *T1 = I32Reg();
4196 auto *T2 = I32Reg();
4197 auto *T3 = I32Reg();
4198 auto *T4 = I32Reg();
4199 _xor(T1, Src0HiR, Src1HiR);
4200 _xor(T2, Src0LoR, Src1LoR);
4201 _or(T3, T1, T2);
4202 _sltu(T4, getZero(), T3);
4203 _mov(Dest, T4);
4204 return;
4205 }
4206 case InstIcmp::Sgt: {
4207 auto *T1 = I32Reg();
4208 auto *T2 = I32Reg();
4209 auto *T3 = I32Reg();
4210 _xor(T1, Src0HiR, Src1HiR);
4211 _slt(T2, Src1HiR, Src0HiR);
4212 _sltu(T3, Src1LoR, Src0LoR);
4213 _movz(T2, T3, T1);
4214 _mov(Dest, T2);
4215 return;
4216 }
4217 case InstIcmp::Ugt: {
4218 auto *T1 = I32Reg();
4219 auto *T2 = I32Reg();
4220 auto *T3 = I32Reg();
4221 _xor(T1, Src0HiR, Src1HiR);
4222 _sltu(T2, Src1HiR, Src0HiR);
4223 _sltu(T3, Src1LoR, Src0LoR);
4224 _movz(T2, T3, T1);
4225 _mov(Dest, T2);
4226 return;
4227 }
4228 case InstIcmp::Sge: {
4229 auto *T1 = I32Reg();
4230 auto *T2 = I32Reg();
4231 auto *T3 = I32Reg();
4232 auto *T4 = I32Reg();
4233 auto *T5 = I32Reg();
4234 _xor(T1, Src0HiR, Src1HiR);
4235 _slt(T2, Src0HiR, Src1HiR);
4236 _xori(T3, T2, 1);
4237 _sltu(T4, Src0LoR, Src1LoR);
4238 _xori(T5, T4, 1);
4239 _movz(T3, T5, T1);
4240 _mov(Dest, T3);
4241 return;
4242 }
4243 case InstIcmp::Uge: {
4244 auto *T1 = I32Reg();
4245 auto *T2 = I32Reg();
4246 auto *T3 = I32Reg();
4247 auto *T4 = I32Reg();
4248 auto *T5 = I32Reg();
4249 _xor(T1, Src0HiR, Src1HiR);
4250 _sltu(T2, Src0HiR, Src1HiR);
4251 _xori(T3, T2, 1);
4252 _sltu(T4, Src0LoR, Src1LoR);
4253 _xori(T5, T4, 1);
4254 _movz(T3, T5, T1);
4255 _mov(Dest, T3);
4256 return;
4257 }
4258 case InstIcmp::Slt: {
4259 auto *T1 = I32Reg();
4260 auto *T2 = I32Reg();
4261 auto *T3 = I32Reg();
4262 _xor(T1, Src0HiR, Src1HiR);
4263 _slt(T2, Src0HiR, Src1HiR);
4264 _sltu(T3, Src0LoR, Src1LoR);
4265 _movz(T2, T3, T1);
4266 _mov(Dest, T2);
4267 return;
4268 }
4269 case InstIcmp::Ult: {
4270 auto *T1 = I32Reg();
4271 auto *T2 = I32Reg();
4272 auto *T3 = I32Reg();
4273 _xor(T1, Src0HiR, Src1HiR);
4274 _sltu(T2, Src0HiR, Src1HiR);
4275 _sltu(T3, Src0LoR, Src1LoR);
4276 _movz(T2, T3, T1);
4277 _mov(Dest, T2);
4278 return;
4279 }
4280 case InstIcmp::Sle: {
4281 auto *T1 = I32Reg();
4282 auto *T2 = I32Reg();
4283 auto *T3 = I32Reg();
4284 auto *T4 = I32Reg();
4285 auto *T5 = I32Reg();
4286 _xor(T1, Src0HiR, Src1HiR);
4287 _slt(T2, Src1HiR, Src0HiR);
4288 _xori(T3, T2, 1);
4289 _sltu(T4, Src1LoR, Src0LoR);
4290 _xori(T5, T4, 1);
4291 _movz(T3, T5, T1);
4292 _mov(Dest, T3);
4293 return;
4294 }
4295 case InstIcmp::Ule: {
4296 auto *T1 = I32Reg();
4297 auto *T2 = I32Reg();
4298 auto *T3 = I32Reg();
4299 auto *T4 = I32Reg();
4300 auto *T5 = I32Reg();
4301 _xor(T1, Src0HiR, Src1HiR);
4302 _sltu(T2, Src1HiR, Src0HiR);
4303 _xori(T3, T2, 1);
4304 _sltu(T4, Src1LoR, Src0LoR);
4305 _xori(T5, T4, 1);
4306 _movz(T3, T5, T1);
4307 _mov(Dest, T3);
4308 return;
4309 }
4310 }
4311 }
4312
lowerIcmp(const InstIcmp * Instr)4313 void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) {
4314 auto *Src0 = Instr->getSrc(0);
4315 auto *Src1 = Instr->getSrc(1);
4316 if (Src0->getType() == IceType_i64) {
4317 lower64Icmp(Instr);
4318 return;
4319 }
4320 Variable *Dest = Instr->getDest();
4321 if (isVectorType(Dest->getType())) {
4322 llvm::report_fatal_error("Icmp: Destination type is vector");
4323 return;
4324 }
4325 InstIcmp::ICond Cond = Instr->getCondition();
4326 auto *Src0R = legalizeToReg(Src0);
4327 auto *Src1R = legalizeToReg(Src1);
4328 const Type Src0Ty = Src0R->getType();
4329 const uint32_t ShAmt = INT32_BITS - getScalarIntBitWidth(Src0->getType());
4330 Variable *Src0RT = I32Reg();
4331 Variable *Src1RT = I32Reg();
4332
4333 if (Src0Ty != IceType_i32) {
4334 _sll(Src0RT, Src0R, ShAmt);
4335 _sll(Src1RT, Src1R, ShAmt);
4336 } else {
4337 _mov(Src0RT, Src0R);
4338 _mov(Src1RT, Src1R);
4339 }
4340
4341 switch (Cond) {
4342 case InstIcmp::Eq: {
4343 auto *DestT = I32Reg();
4344 auto *T = I32Reg();
4345 _xor(T, Src0RT, Src1RT);
4346 _sltiu(DestT, T, 1);
4347 _mov(Dest, DestT);
4348 return;
4349 }
4350 case InstIcmp::Ne: {
4351 auto *DestT = I32Reg();
4352 auto *T = I32Reg();
4353 auto *Zero = getZero();
4354 _xor(T, Src0RT, Src1RT);
4355 _sltu(DestT, Zero, T);
4356 _mov(Dest, DestT);
4357 return;
4358 }
4359 case InstIcmp::Ugt: {
4360 auto *DestT = I32Reg();
4361 _sltu(DestT, Src1RT, Src0RT);
4362 _mov(Dest, DestT);
4363 return;
4364 }
4365 case InstIcmp::Uge: {
4366 auto *DestT = I32Reg();
4367 auto *T = I32Reg();
4368 _sltu(T, Src0RT, Src1RT);
4369 _xori(DestT, T, 1);
4370 _mov(Dest, DestT);
4371 return;
4372 }
4373 case InstIcmp::Ult: {
4374 auto *DestT = I32Reg();
4375 _sltu(DestT, Src0RT, Src1RT);
4376 _mov(Dest, DestT);
4377 return;
4378 }
4379 case InstIcmp::Ule: {
4380 auto *DestT = I32Reg();
4381 auto *T = I32Reg();
4382 _sltu(T, Src1RT, Src0RT);
4383 _xori(DestT, T, 1);
4384 _mov(Dest, DestT);
4385 return;
4386 }
4387 case InstIcmp::Sgt: {
4388 auto *DestT = I32Reg();
4389 _slt(DestT, Src1RT, Src0RT);
4390 _mov(Dest, DestT);
4391 return;
4392 }
4393 case InstIcmp::Sge: {
4394 auto *DestT = I32Reg();
4395 auto *T = I32Reg();
4396 _slt(T, Src0RT, Src1RT);
4397 _xori(DestT, T, 1);
4398 _mov(Dest, DestT);
4399 return;
4400 }
4401 case InstIcmp::Slt: {
4402 auto *DestT = I32Reg();
4403 _slt(DestT, Src0RT, Src1RT);
4404 _mov(Dest, DestT);
4405 return;
4406 }
4407 case InstIcmp::Sle: {
4408 auto *DestT = I32Reg();
4409 auto *T = I32Reg();
4410 _slt(T, Src1RT, Src0RT);
4411 _xori(DestT, T, 1);
4412 _mov(Dest, DestT);
4413 return;
4414 }
4415 default:
4416 llvm_unreachable("Invalid ICmp operator");
4417 return;
4418 }
4419 }
4420
lowerInsertElement(const InstInsertElement * Instr)4421 void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) {
4422 Variable *Dest = Instr->getDest();
4423 const Type DestTy = Dest->getType();
4424 Operand *Src2 = Instr->getSrc(2);
4425 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4426 const uint32_t Index = Imm->getValue();
4427 // Vector to insert in
4428 auto *Src0 = legalizeUndef(Instr->getSrc(0));
4429 auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
4430 // Number of elements in each container
4431 uint32_t ElemPerCont =
4432 typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
4433 // Source Element
4434 auto *Src = Src0R->getContainers()[Index / ElemPerCont];
4435 auto *SrcE = Src;
4436 if (ElemPerCont > 1)
4437 SrcE = legalizeToReg(Src);
4438 // Dest is a vector
4439 auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest);
4440 VDest->initVecElement(Func);
4441 // Temp vector variable
4442 auto *TDest = makeReg(DestTy);
4443 auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest);
4444 TVDest->initVecElement(Func);
4445 // Destination element
4446 auto *DstE = TVDest->getContainers()[Index / ElemPerCont];
4447 // Element to insert
4448 auto *Src1R = legalizeToReg(Instr->getSrc(1));
4449 auto *TReg1 = makeReg(IceType_i32);
4450 auto *TReg2 = makeReg(IceType_i32);
4451 auto *TReg3 = makeReg(IceType_i32);
4452 auto *TReg4 = makeReg(IceType_i32);
4453 auto *TReg5 = makeReg(IceType_i32);
4454 auto *TDReg = makeReg(IceType_i32);
4455 // Position of the element in the container
4456 uint32_t PosInCont = Index % ElemPerCont;
4457 // Load source vector in a temporary vector
4458 for (SizeT i = 0; i < TVDest->ContainersPerVector; ++i) {
4459 auto *DCont = TVDest->getContainers()[i];
4460 // Do not define DstE as we are going to redefine it
4461 if (DCont == DstE)
4462 continue;
4463 auto *SCont = Src0R->getContainers()[i];
4464 auto *TReg = makeReg(IceType_i32);
4465 _mov(TReg, SCont);
4466 _mov(DCont, TReg);
4467 }
4468 // Insert the element
4469 if (ElemPerCont == 1) {
4470 _mov(DstE, Src1R);
4471 } else if (ElemPerCont == 2) {
4472 switch (PosInCont) {
4473 case 0:
4474 _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source
4475 _srl(TReg2, SrcE, 16);
4476 _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element
4477 _or(TDReg, TReg1, TReg3);
4478 _mov(DstE, TDReg);
4479 break;
4480 case 1:
4481 _sll(TReg1, Src1R, 16); // Clear lower 16-bits of source
4482 _sll(TReg2, SrcE, 16);
4483 _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element
4484 _or(TDReg, TReg1, TReg3);
4485 _mov(DstE, TDReg);
4486 break;
4487 default:
4488 llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4489 break;
4490 }
4491 } else if (ElemPerCont == 4) {
4492 switch (PosInCont) {
4493 case 0:
4494 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4495 _srl(TReg2, SrcE, 8);
4496 _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element
4497 _or(TDReg, TReg1, TReg3);
4498 _mov(DstE, TDReg);
4499 break;
4500 case 1:
4501 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4502 _sll(TReg5, TReg1, 8); // Position in the destination
4503 _lui(TReg2, Ctx->getConstantInt32(0xffff));
4504 _ori(TReg3, TReg2, 0x00ff);
4505 _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4506 _or(TDReg, TReg5, TReg4);
4507 _mov(DstE, TDReg);
4508 break;
4509 case 2:
4510 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4511 _sll(TReg5, TReg1, 16); // Position in the destination
4512 _lui(TReg2, Ctx->getConstantInt32(0xff00));
4513 _ori(TReg3, TReg2, 0xffff);
4514 _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4515 _or(TDReg, TReg5, TReg4);
4516 _mov(DstE, TDReg);
4517 break;
4518 case 3:
4519 _sll(TReg1, Src1R, 24); // Position in the destination
4520 _sll(TReg2, SrcE, 8);
4521 _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
4522 _or(TDReg, TReg1, TReg3);
4523 _mov(DstE, TDReg);
4524 break;
4525 default:
4526 llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4527 break;
4528 }
4529 }
4530 // Write back temporary vector to the destination
4531 auto *Assign = InstAssign::create(Func, Dest, TDest);
4532 lowerAssign(Assign);
4533 return;
4534 }
4535 llvm::report_fatal_error("InsertElement requires a constant index");
4536 }
4537
createArithInst(Intrinsics::AtomicRMWOperation Operation,Variable * Dest,Variable * Src0,Variable * Src1)4538 void TargetMIPS32::createArithInst(Intrinsics::AtomicRMWOperation Operation,
4539 Variable *Dest, Variable *Src0,
4540 Variable *Src1) {
4541 switch (Operation) {
4542 default:
4543 llvm::report_fatal_error("Unknown AtomicRMW operation");
4544 case Intrinsics::AtomicExchange:
4545 llvm::report_fatal_error("Can't handle Atomic xchg operation");
4546 case Intrinsics::AtomicAdd:
4547 _addu(Dest, Src0, Src1);
4548 break;
4549 case Intrinsics::AtomicAnd:
4550 _and(Dest, Src0, Src1);
4551 break;
4552 case Intrinsics::AtomicSub:
4553 _subu(Dest, Src0, Src1);
4554 break;
4555 case Intrinsics::AtomicOr:
4556 _or(Dest, Src0, Src1);
4557 break;
4558 case Intrinsics::AtomicXor:
4559 _xor(Dest, Src0, Src1);
4560 break;
4561 }
4562 }
4563
lowerIntrinsicCall(const InstIntrinsicCall * Instr)4564 void TargetMIPS32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
4565 Variable *Dest = Instr->getDest();
4566 Type DestTy = (Dest == nullptr) ? IceType_void : Dest->getType();
4567
4568 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
4569 switch (ID) {
4570 case Intrinsics::AtomicLoad: {
4571 assert(isScalarIntegerType(DestTy));
4572 // We require the memory address to be naturally aligned. Given that is the
4573 // case, then normal loads are atomic.
4574 if (!Intrinsics::isMemoryOrderValid(
4575 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4576 Func->setError("Unexpected memory ordering for AtomicLoad");
4577 return;
4578 }
4579 if (DestTy == IceType_i64) {
4580 llvm::report_fatal_error("AtomicLoad.i64 should have been prelowered.");
4581 return;
4582 } else if (DestTy == IceType_i32) {
4583 auto *T1 = makeReg(DestTy);
4584 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4585 auto *Base = legalizeToReg(Instr->getArg(0));
4586 auto *Addr = formMemoryOperand(Base, DestTy);
4587 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4588 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4589 constexpr CfgNode *NoTarget = nullptr;
4590 _sync();
4591 Context.insert(Retry);
4592 Sandboxer(this).ll(T1, Addr);
4593 _br(NoTarget, NoTarget, T1, getZero(), Exit, CondMIPS32::Cond::NE);
4594 _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4595 Sandboxer(this).sc(RegAt, Addr);
4596 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4597 Context.insert(Exit);
4598 _sync();
4599 _mov(Dest, T1);
4600 Context.insert<InstFakeUse>(T1);
4601 } else {
4602 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4603 auto *Base = legalizeToReg(Instr->getArg(0));
4604 auto *T1 = makeReg(IceType_i32);
4605 auto *T2 = makeReg(IceType_i32);
4606 auto *T3 = makeReg(IceType_i32);
4607 auto *T4 = makeReg(IceType_i32);
4608 auto *T5 = makeReg(IceType_i32);
4609 auto *T6 = makeReg(IceType_i32);
4610 auto *SrcMask = makeReg(IceType_i32);
4611 auto *Tdest = makeReg(IceType_i32);
4612 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4613 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4614 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4615 constexpr CfgNode *NoTarget = nullptr;
4616 _sync();
4617 _addiu(T1, getZero(), -4); // Address mask 0xFFFFFFFC
4618 _andi(T2, Base, 3); // Last two bits of the address
4619 _and(T3, Base, T1); // Align the address
4620 _sll(T4, T2, 3);
4621 _ori(T5, getZero(), Mask);
4622 _sllv(SrcMask, T5, T4); // Source mask
4623 auto *Addr = formMemoryOperand(T3, IceType_i32);
4624 Context.insert(Retry);
4625 Sandboxer(this).ll(T6, Addr);
4626 _and(Tdest, T6, SrcMask);
4627 _br(NoTarget, NoTarget, T6, getZero(), Exit, CondMIPS32::Cond::NE);
4628 _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4629 Sandboxer(this).sc(RegAt, Addr);
4630 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4631 Context.insert(Exit);
4632 auto *T7 = makeReg(IceType_i32);
4633 auto *T8 = makeReg(IceType_i32);
4634 _srlv(T7, Tdest, T4);
4635 _andi(T8, T7, Mask);
4636 _sync();
4637 _mov(Dest, T8);
4638 Context.insert<InstFakeUse>(T6);
4639 Context.insert<InstFakeUse>(SrcMask);
4640 }
4641 return;
4642 }
4643 case Intrinsics::AtomicStore: {
4644 // We require the memory address to be naturally aligned. Given that is the
4645 // case, then normal stores are atomic.
4646 if (!Intrinsics::isMemoryOrderValid(
4647 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4648 Func->setError("Unexpected memory ordering for AtomicStore");
4649 return;
4650 }
4651 auto *Val = Instr->getArg(0);
4652 auto Ty = Val->getType();
4653 if (Ty == IceType_i64) {
4654 llvm::report_fatal_error("AtomicStore.i64 should have been prelowered.");
4655 return;
4656 } else if (Ty == IceType_i32) {
4657 auto *Val = legalizeToReg(Instr->getArg(0));
4658 auto *Base = legalizeToReg(Instr->getArg(1));
4659 auto *Addr = formMemoryOperand(Base, Ty);
4660 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4661 constexpr CfgNode *NoTarget = nullptr;
4662 auto *T1 = makeReg(IceType_i32);
4663 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4664 _sync();
4665 Context.insert(Retry);
4666 Sandboxer(this).ll(T1, Addr);
4667 _mov(RegAt, Val);
4668 Sandboxer(this).sc(RegAt, Addr);
4669 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4670 Context.insert<InstFakeUse>(T1); // To keep LL alive
4671 _sync();
4672 } else {
4673 auto *Val = legalizeToReg(Instr->getArg(0));
4674 auto *Base = legalizeToReg(Instr->getArg(1));
4675 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4676 constexpr CfgNode *NoTarget = nullptr;
4677 auto *T1 = makeReg(IceType_i32);
4678 auto *T2 = makeReg(IceType_i32);
4679 auto *T3 = makeReg(IceType_i32);
4680 auto *T4 = makeReg(IceType_i32);
4681 auto *T5 = makeReg(IceType_i32);
4682 auto *T6 = makeReg(IceType_i32);
4683 auto *T7 = makeReg(IceType_i32);
4684 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4685 auto *SrcMask = makeReg(IceType_i32);
4686 auto *DstMask = makeReg(IceType_i32);
4687 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(Ty))) - 1;
4688 _sync();
4689 _addiu(T1, getZero(), -4);
4690 _and(T7, Base, T1);
4691 auto *Addr = formMemoryOperand(T7, Ty);
4692 _andi(T2, Base, 3);
4693 _sll(T3, T2, 3);
4694 _ori(T4, getZero(), Mask);
4695 _sllv(T5, T4, T3);
4696 _sllv(T6, Val, T3);
4697 _nor(SrcMask, getZero(), T5);
4698 _and(DstMask, T6, T5);
4699 Context.insert(Retry);
4700 Sandboxer(this).ll(RegAt, Addr);
4701 _and(RegAt, RegAt, SrcMask);
4702 _or(RegAt, RegAt, DstMask);
4703 Sandboxer(this).sc(RegAt, Addr);
4704 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4705 Context.insert<InstFakeUse>(SrcMask);
4706 Context.insert<InstFakeUse>(DstMask);
4707 _sync();
4708 }
4709 return;
4710 }
4711 case Intrinsics::AtomicCmpxchg: {
4712 assert(isScalarIntegerType(DestTy));
4713 // We require the memory address to be naturally aligned. Given that is the
4714 // case, then normal loads are atomic.
4715 if (!Intrinsics::isMemoryOrderValid(
4716 ID, getConstantMemoryOrder(Instr->getArg(3)),
4717 getConstantMemoryOrder(Instr->getArg(4)))) {
4718 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4719 return;
4720 }
4721
4722 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4723 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4724 constexpr CfgNode *NoTarget = nullptr;
4725 auto *New = Instr->getArg(2);
4726 auto *Expected = Instr->getArg(1);
4727 auto *ActualAddress = Instr->getArg(0);
4728
4729 if (DestTy == IceType_i64) {
4730 llvm::report_fatal_error(
4731 "AtomicCmpxchg.i64 should have been prelowered.");
4732 return;
4733 } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4734 auto *NewR = legalizeToReg(New);
4735 auto *ExpectedR = legalizeToReg(Expected);
4736 auto *ActualAddressR = legalizeToReg(ActualAddress);
4737 const uint32_t ShiftAmount =
4738 (INT32_BITS - CHAR_BITS * typeWidthInBytes(DestTy));
4739 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4740 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4741 auto *T1 = I32Reg();
4742 auto *T2 = I32Reg();
4743 auto *T3 = I32Reg();
4744 auto *T4 = I32Reg();
4745 auto *T5 = I32Reg();
4746 auto *T6 = I32Reg();
4747 auto *T7 = I32Reg();
4748 auto *T8 = I32Reg();
4749 auto *T9 = I32Reg();
4750 _addiu(RegAt, getZero(), -4);
4751 _and(T1, ActualAddressR, RegAt);
4752 auto *Addr = formMemoryOperand(T1, DestTy);
4753 _andi(RegAt, ActualAddressR, 3);
4754 _sll(T2, RegAt, 3);
4755 _ori(RegAt, getZero(), Mask);
4756 _sllv(T3, RegAt, T2);
4757 _nor(T4, getZero(), T3);
4758 _andi(RegAt, ExpectedR, Mask);
4759 _sllv(T5, RegAt, T2);
4760 _andi(RegAt, NewR, Mask);
4761 _sllv(T6, RegAt, T2);
4762 _sync();
4763 Context.insert(Retry);
4764 Sandboxer(this).ll(T7, Addr);
4765 _and(T8, T7, T3);
4766 _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE);
4767 _and(RegAt, T7, T4);
4768 _or(T9, RegAt, T6);
4769 Sandboxer(this).sc(T9, Addr);
4770 _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ);
4771 Context.insert<InstFakeUse>(getZero());
4772 Context.insert(Exit);
4773 _srlv(RegAt, T8, T2);
4774 _sll(RegAt, RegAt, ShiftAmount);
4775 _sra(RegAt, RegAt, ShiftAmount);
4776 _mov(Dest, RegAt);
4777 _sync();
4778 Context.insert<InstFakeUse>(T3);
4779 Context.insert<InstFakeUse>(T4);
4780 Context.insert<InstFakeUse>(T5);
4781 Context.insert<InstFakeUse>(T6);
4782 Context.insert<InstFakeUse>(T8);
4783 Context.insert<InstFakeUse>(ExpectedR);
4784 Context.insert<InstFakeUse>(NewR);
4785 } else {
4786 auto *T1 = I32Reg();
4787 auto *T2 = I32Reg();
4788 auto *NewR = legalizeToReg(New);
4789 auto *ExpectedR = legalizeToReg(Expected);
4790 auto *ActualAddressR = legalizeToReg(ActualAddress);
4791 _sync();
4792 Context.insert(Retry);
4793 Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4794 _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE);
4795 _mov(T2, NewR);
4796 Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4797 _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4798 Context.insert<InstFakeUse>(getZero());
4799 Context.insert(Exit);
4800 _mov(Dest, T1);
4801 _sync();
4802 Context.insert<InstFakeUse>(ExpectedR);
4803 Context.insert<InstFakeUse>(NewR);
4804 }
4805 return;
4806 }
4807 case Intrinsics::AtomicRMW: {
4808 assert(isScalarIntegerType(DestTy));
4809 // We require the memory address to be naturally aligned. Given that is the
4810 // case, then normal loads are atomic.
4811 if (!Intrinsics::isMemoryOrderValid(
4812 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4813 Func->setError("Unexpected memory ordering for AtomicRMW");
4814 return;
4815 }
4816
4817 constexpr CfgNode *NoTarget = nullptr;
4818 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4819 auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
4820 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue());
4821 auto *New = Instr->getArg(2);
4822 auto *ActualAddress = Instr->getArg(1);
4823
4824 if (DestTy == IceType_i64) {
4825 llvm::report_fatal_error("AtomicRMW.i64 should have been prelowered.");
4826 return;
4827 } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4828 const uint32_t ShiftAmount =
4829 INT32_BITS - (CHAR_BITS * typeWidthInBytes(DestTy));
4830 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4831 auto *NewR = legalizeToReg(New);
4832 auto *ActualAddressR = legalizeToReg(ActualAddress);
4833 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4834 auto *T1 = I32Reg();
4835 auto *T2 = I32Reg();
4836 auto *T3 = I32Reg();
4837 auto *T4 = I32Reg();
4838 auto *T5 = I32Reg();
4839 auto *T6 = I32Reg();
4840 auto *T7 = I32Reg();
4841 _sync();
4842 _addiu(RegAt, getZero(), -4);
4843 _and(T1, ActualAddressR, RegAt);
4844 _andi(RegAt, ActualAddressR, 3);
4845 _sll(T2, RegAt, 3);
4846 _ori(RegAt, getZero(), Mask);
4847 _sllv(T3, RegAt, T2);
4848 _nor(T4, getZero(), T3);
4849 _sllv(T5, NewR, T2);
4850 Context.insert(Retry);
4851 Sandboxer(this).ll(T6, formMemoryOperand(T1, DestTy));
4852 if (Operation != Intrinsics::AtomicExchange) {
4853 createArithInst(Operation, RegAt, T6, T5);
4854 _and(RegAt, RegAt, T3);
4855 }
4856 _and(T7, T6, T4);
4857 if (Operation == Intrinsics::AtomicExchange) {
4858 _or(RegAt, T7, T5);
4859 } else {
4860 _or(RegAt, T7, RegAt);
4861 }
4862 Sandboxer(this).sc(RegAt, formMemoryOperand(T1, DestTy));
4863 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4864 Context.insert<InstFakeUse>(getZero());
4865 _and(RegAt, T6, T3);
4866 _srlv(RegAt, RegAt, T2);
4867 _sll(RegAt, RegAt, ShiftAmount);
4868 _sra(RegAt, RegAt, ShiftAmount);
4869 _mov(Dest, RegAt);
4870 _sync();
4871 Context.insert<InstFakeUse>(NewR);
4872 Context.insert<InstFakeUse>(Dest);
4873 } else {
4874 auto *T1 = I32Reg();
4875 auto *T2 = I32Reg();
4876 auto *NewR = legalizeToReg(New);
4877 auto *ActualAddressR = legalizeToReg(ActualAddress);
4878 _sync();
4879 Context.insert(Retry);
4880 Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4881 if (Operation == Intrinsics::AtomicExchange) {
4882 _mov(T2, NewR);
4883 } else {
4884 createArithInst(Operation, T2, T1, NewR);
4885 }
4886 Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4887 _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4888 Context.insert<InstFakeUse>(getZero());
4889 _mov(Dest, T1);
4890 _sync();
4891 Context.insert<InstFakeUse>(NewR);
4892 Context.insert<InstFakeUse>(Dest);
4893 }
4894 return;
4895 }
4896 case Intrinsics::AtomicFence:
4897 case Intrinsics::AtomicFenceAll:
4898 assert(Dest == nullptr);
4899 _sync();
4900 return;
4901 case Intrinsics::AtomicIsLockFree: {
4902 Operand *ByteSize = Instr->getArg(0);
4903 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4904 auto *T = I32Reg();
4905 if (CI == nullptr) {
4906 // The PNaCl ABI requires the byte size to be a compile-time constant.
4907 Func->setError("AtomicIsLockFree byte size should be compile-time const");
4908 return;
4909 }
4910 static constexpr int32_t NotLockFree = 0;
4911 static constexpr int32_t LockFree = 1;
4912 int32_t Result = NotLockFree;
4913 switch (CI->getValue()) {
4914 case 1:
4915 case 2:
4916 case 4:
4917 Result = LockFree;
4918 break;
4919 }
4920 _addiu(T, getZero(), Result);
4921 _mov(Dest, T);
4922 return;
4923 }
4924 case Intrinsics::Bswap: {
4925 auto *Src = Instr->getArg(0);
4926 const Type SrcTy = Src->getType();
4927 assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
4928 SrcTy == IceType_i64);
4929 switch (SrcTy) {
4930 case IceType_i16: {
4931 auto *T1 = I32Reg();
4932 auto *T2 = I32Reg();
4933 auto *T3 = I32Reg();
4934 auto *T4 = I32Reg();
4935 auto *SrcR = legalizeToReg(Src);
4936 _sll(T1, SrcR, 8);
4937 _lui(T2, Ctx->getConstantInt32(255));
4938 _and(T1, T1, T2);
4939 _sll(T3, SrcR, 24);
4940 _or(T1, T3, T1);
4941 _srl(T4, T1, 16);
4942 _mov(Dest, T4);
4943 return;
4944 }
4945 case IceType_i32: {
4946 auto *T1 = I32Reg();
4947 auto *T2 = I32Reg();
4948 auto *T3 = I32Reg();
4949 auto *T4 = I32Reg();
4950 auto *T5 = I32Reg();
4951 auto *SrcR = legalizeToReg(Src);
4952 _srl(T1, SrcR, 24);
4953 _srl(T2, SrcR, 8);
4954 _andi(T2, T2, 0xFF00);
4955 _or(T1, T2, T1);
4956 _sll(T4, SrcR, 8);
4957 _lui(T3, Ctx->getConstantInt32(255));
4958 _and(T4, T4, T3);
4959 _sll(T5, SrcR, 24);
4960 _or(T4, T5, T4);
4961 _or(T4, T4, T1);
4962 _mov(Dest, T4);
4963 return;
4964 }
4965 case IceType_i64: {
4966 auto *T1 = I32Reg();
4967 auto *T2 = I32Reg();
4968 auto *T3 = I32Reg();
4969 auto *T4 = I32Reg();
4970 auto *T5 = I32Reg();
4971 auto *T6 = I32Reg();
4972 auto *T7 = I32Reg();
4973 auto *T8 = I32Reg();
4974 auto *T9 = I32Reg();
4975 auto *T10 = I32Reg();
4976 auto *T11 = I32Reg();
4977 auto *T12 = I32Reg();
4978 auto *T13 = I32Reg();
4979 auto *T14 = I32Reg();
4980 auto *T15 = I32Reg();
4981 auto *T16 = I32Reg();
4982 auto *T17 = I32Reg();
4983 auto *T18 = I32Reg();
4984 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4985 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4986 Src = legalizeUndef(Src);
4987 auto *SrcLoR = legalizeToReg(loOperand(Src));
4988 auto *SrcHiR = legalizeToReg(hiOperand(Src));
4989 _sll(T1, SrcHiR, 8);
4990 _srl(T2, SrcHiR, 24);
4991 _srl(T3, SrcHiR, 8);
4992 _andi(T3, T3, 0xFF00);
4993 _lui(T4, Ctx->getConstantInt32(255));
4994 _or(T5, T3, T2);
4995 _and(T6, T1, T4);
4996 _sll(T7, SrcHiR, 24);
4997 _or(T8, T7, T6);
4998 _srl(T9, SrcLoR, 24);
4999 _srl(T10, SrcLoR, 8);
5000 _andi(T11, T10, 0xFF00);
5001 _or(T12, T8, T5);
5002 _or(T13, T11, T9);
5003 _sll(T14, SrcLoR, 8);
5004 _and(T15, T14, T4);
5005 _sll(T16, SrcLoR, 24);
5006 _or(T17, T16, T15);
5007 _or(T18, T17, T13);
5008 _mov(DestLo, T12);
5009 _mov(DestHi, T18);
5010 return;
5011 }
5012 default:
5013 llvm::report_fatal_error("Control flow should never have reached here.");
5014 }
5015 return;
5016 }
5017 case Intrinsics::Ctpop: {
5018 llvm::report_fatal_error("Ctpop should have been prelowered.");
5019 return;
5020 }
5021 case Intrinsics::Ctlz: {
5022 auto *Src = Instr->getArg(0);
5023 const Type SrcTy = Src->getType();
5024 assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5025 switch (SrcTy) {
5026 case IceType_i32: {
5027 auto *T = I32Reg();
5028 auto *SrcR = legalizeToReg(Src);
5029 _clz(T, SrcR);
5030 _mov(Dest, T);
5031 break;
5032 }
5033 case IceType_i64: {
5034 auto *T1 = I32Reg();
5035 auto *T2 = I32Reg();
5036 auto *T3 = I32Reg();
5037 auto *T4 = I32Reg();
5038 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5039 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5040 Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5041 Variable *SrcLoR = legalizeToReg(loOperand(Src));
5042 _clz(T1, SrcHiR);
5043 _clz(T2, SrcLoR);
5044 _addiu(T3, T2, 32);
5045 _movn(T3, T1, SrcHiR);
5046 _addiu(T4, getZero(), 0);
5047 _mov(DestHi, T4);
5048 _mov(DestLo, T3);
5049 break;
5050 }
5051 default:
5052 llvm::report_fatal_error("Control flow should never have reached here.");
5053 }
5054 break;
5055 }
5056 case Intrinsics::Cttz: {
5057 auto *Src = Instr->getArg(0);
5058 const Type SrcTy = Src->getType();
5059 assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5060 switch (SrcTy) {
5061 case IceType_i32: {
5062 auto *T1 = I32Reg();
5063 auto *T2 = I32Reg();
5064 auto *T3 = I32Reg();
5065 auto *T4 = I32Reg();
5066 auto *T5 = I32Reg();
5067 auto *T6 = I32Reg();
5068 auto *SrcR = legalizeToReg(Src);
5069 _addiu(T1, SrcR, -1);
5070 _not(T2, SrcR);
5071 _and(T3, T2, T1);
5072 _clz(T4, T3);
5073 _addiu(T5, getZero(), 32);
5074 _subu(T6, T5, T4);
5075 _mov(Dest, T6);
5076 break;
5077 }
5078 case IceType_i64: {
5079 auto *THi1 = I32Reg();
5080 auto *THi2 = I32Reg();
5081 auto *THi3 = I32Reg();
5082 auto *THi4 = I32Reg();
5083 auto *THi5 = I32Reg();
5084 auto *THi6 = I32Reg();
5085 auto *TLo1 = I32Reg();
5086 auto *TLo2 = I32Reg();
5087 auto *TLo3 = I32Reg();
5088 auto *TLo4 = I32Reg();
5089 auto *TLo5 = I32Reg();
5090 auto *TLo6 = I32Reg();
5091 auto *TResHi = I32Reg();
5092 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5093 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5094 Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5095 Variable *SrcLoR = legalizeToReg(loOperand(Src));
5096 _addiu(THi1, SrcHiR, -1);
5097 _not(THi2, SrcHiR);
5098 _and(THi3, THi2, THi1);
5099 _clz(THi4, THi3);
5100 _addiu(THi5, getZero(), 64);
5101 _subu(THi6, THi5, THi4);
5102 _addiu(TLo1, SrcLoR, -1);
5103 _not(TLo2, SrcLoR);
5104 _and(TLo3, TLo2, TLo1);
5105 _clz(TLo4, TLo3);
5106 _addiu(TLo5, getZero(), 32);
5107 _subu(TLo6, TLo5, TLo4);
5108 _movn(THi6, TLo6, SrcLoR);
5109 _addiu(TResHi, getZero(), 0);
5110 _mov(DestHi, TResHi);
5111 _mov(DestLo, THi6);
5112 break;
5113 }
5114 default:
5115 llvm::report_fatal_error("Control flow should never have reached here.");
5116 }
5117 return;
5118 }
5119 case Intrinsics::Fabs: {
5120 if (isScalarFloatingType(DestTy)) {
5121 Variable *T = makeReg(DestTy);
5122 if (DestTy == IceType_f32) {
5123 _abs_s(T, legalizeToReg(Instr->getArg(0)));
5124 } else {
5125 _abs_d(T, legalizeToReg(Instr->getArg(0)));
5126 }
5127 _mov(Dest, T);
5128 }
5129 return;
5130 }
5131 case Intrinsics::Longjmp: {
5132 llvm::report_fatal_error("longjmp should have been prelowered.");
5133 return;
5134 }
5135 case Intrinsics::Memcpy: {
5136 llvm::report_fatal_error("memcpy should have been prelowered.");
5137 return;
5138 }
5139 case Intrinsics::Memmove: {
5140 llvm::report_fatal_error("memmove should have been prelowered.");
5141 return;
5142 }
5143 case Intrinsics::Memset: {
5144 llvm::report_fatal_error("memset should have been prelowered.");
5145 return;
5146 }
5147 case Intrinsics::NaClReadTP: {
5148 if (SandboxingType != ST_NaCl)
5149 llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5150 else {
5151 auto *T8 = makeReg(IceType_i32, RegMIPS32::Reg_T8);
5152 Context.insert<InstFakeDef>(T8);
5153 Variable *TP = legalizeToReg(OperandMIPS32Mem::create(
5154 Func, getPointerType(), T8,
5155 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5156 _mov(Dest, TP);
5157 }
5158 return;
5159 }
5160 case Intrinsics::Setjmp: {
5161 llvm::report_fatal_error("setjmp should have been prelowered.");
5162 return;
5163 }
5164 case Intrinsics::Sqrt: {
5165 if (isScalarFloatingType(DestTy)) {
5166 Variable *T = makeReg(DestTy);
5167 if (DestTy == IceType_f32) {
5168 _sqrt_s(T, legalizeToReg(Instr->getArg(0)));
5169 } else {
5170 _sqrt_d(T, legalizeToReg(Instr->getArg(0)));
5171 }
5172 _mov(Dest, T);
5173 } else {
5174 assert(getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5175 UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5176 }
5177 return;
5178 }
5179 case Intrinsics::Stacksave: {
5180 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5181 _mov(Dest, SP);
5182 return;
5183 }
5184 case Intrinsics::Stackrestore: {
5185 Variable *Val = legalizeToReg(Instr->getArg(0));
5186 Sandboxer(this).reset_sp(Val);
5187 return;
5188 }
5189 case Intrinsics::Trap: {
5190 const uint32_t TrapCodeZero = 0;
5191 _teq(getZero(), getZero(), TrapCodeZero);
5192 return;
5193 }
5194 case Intrinsics::LoadSubVector: {
5195 UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5196 return;
5197 }
5198 case Intrinsics::StoreSubVector: {
5199 UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5200 return;
5201 }
5202 default: // UnknownIntrinsic
5203 Func->setError("Unexpected intrinsic");
5204 return;
5205 }
5206 return;
5207 }
5208
lowerLoad(const InstLoad * Instr)5209 void TargetMIPS32::lowerLoad(const InstLoad *Instr) {
5210 // A Load instruction can be treated the same as an Assign instruction, after
5211 // the source operand is transformed into an OperandMIPS32Mem operand.
5212 Type Ty = Instr->getDest()->getType();
5213 Operand *Src0 = formMemoryOperand(Instr->getSourceAddress(), Ty);
5214 Variable *DestLoad = Instr->getDest();
5215 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5216 lowerAssign(Assign);
5217 }
5218
5219 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Inst * Reason)5220 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5221 const Inst *Reason) {
5222 if (!BuildDefs::dump())
5223 return;
5224 if (!Func->isVerbose(IceV_AddrOpt))
5225 return;
5226 OstreamLocker _(Func->getContext());
5227 Ostream &Str = Func->getContext()->getStrDump();
5228 Str << "Instruction: ";
5229 Reason->dumpDecorated(Func);
5230 Str << " results in Base=";
5231 if (Base)
5232 Base->dump(Func);
5233 else
5234 Str << "<null>";
5235 Str << ", Offset=" << Offset << "\n";
5236 }
5237
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5238 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5239 int32_t *Offset, const Inst **Reason) {
5240 // Var originates from Var=SrcVar ==> set Var:=SrcVar
5241 if (*Var == nullptr)
5242 return false;
5243 const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5244 if (!VarAssign)
5245 return false;
5246 assert(!VMetadata->isMultiDef(*Var));
5247 if (!llvm::isa<InstAssign>(VarAssign))
5248 return false;
5249
5250 Operand *SrcOp = VarAssign->getSrc(0);
5251 bool Optimized = false;
5252 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5253 if (!VMetadata->isMultiDef(SrcVar) ||
5254 // TODO: ensure SrcVar stays single-BB
5255 false) {
5256 Optimized = true;
5257 *Var = SrcVar;
5258 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5259 int32_t MoreOffset = Const->getValue();
5260 int32_t NewOffset = MoreOffset + *Offset;
5261 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5262 return false;
5263 *Var = nullptr;
5264 *Offset += NewOffset;
5265 Optimized = true;
5266 }
5267 }
5268
5269 if (Optimized) {
5270 *Reason = VarAssign;
5271 }
5272
5273 return Optimized;
5274 }
5275
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5276 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5277 if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5278 switch (Arith->getOp()) {
5279 default:
5280 return false;
5281 case InstArithmetic::Add:
5282 case InstArithmetic::Sub:
5283 *Kind = Arith->getOp();
5284 return true;
5285 }
5286 }
5287 return false;
5288 }
5289
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5290 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5291 int32_t *Offset, const Inst **Reason) {
5292 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5293 // set Base=Var, Offset+=Const
5294 // Base is Base=Var-Const ==>
5295 // set Base=Var, Offset-=Const
5296 if (*Base == nullptr)
5297 return false;
5298 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5299 if (BaseInst == nullptr) {
5300 return false;
5301 }
5302 assert(!VMetadata->isMultiDef(*Base));
5303
5304 auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5305 if (ArithInst == nullptr)
5306 return false;
5307 InstArithmetic::OpKind Kind;
5308 if (!isAddOrSub(ArithInst, &Kind))
5309 return false;
5310 bool IsAdd = Kind == InstArithmetic::Add;
5311 Operand *Src0 = ArithInst->getSrc(0);
5312 Operand *Src1 = ArithInst->getSrc(1);
5313 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5314 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5315 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5316 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5317 Variable *NewBase = nullptr;
5318 int32_t NewOffset = *Offset;
5319
5320 if (Var0 == nullptr && Const0 == nullptr) {
5321 assert(llvm::isa<ConstantRelocatable>(Src0));
5322 return false;
5323 }
5324
5325 if (Var1 == nullptr && Const1 == nullptr) {
5326 assert(llvm::isa<ConstantRelocatable>(Src1));
5327 return false;
5328 }
5329
5330 if (Var0 && Var1)
5331 // TODO(jpp): merge base/index splitting into here.
5332 return false;
5333 if (!IsAdd && Var1)
5334 return false;
5335 if (Var0)
5336 NewBase = Var0;
5337 else if (Var1)
5338 NewBase = Var1;
5339 // Compute the updated constant offset.
5340 if (Const0) {
5341 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5342 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5343 return false;
5344 NewOffset += MoreOffset;
5345 }
5346 if (Const1) {
5347 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5348 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5349 return false;
5350 NewOffset += MoreOffset;
5351 }
5352
5353 // Update the computed address parameters once we are sure optimization
5354 // is valid.
5355 *Base = NewBase;
5356 *Offset = NewOffset;
5357 *Reason = BaseInst;
5358 return true;
5359 }
5360 } // end of anonymous namespace
5361
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5362 OperandMIPS32Mem *TargetMIPS32::formAddressingMode(Type Ty, Cfg *Func,
5363 const Inst *LdSt,
5364 Operand *Base) {
5365 assert(Base != nullptr);
5366 int32_t OffsetImm = 0;
5367
5368 Func->resetCurrentNode();
5369 if (Func->isVerbose(IceV_AddrOpt)) {
5370 OstreamLocker _(Func->getContext());
5371 Ostream &Str = Func->getContext()->getStrDump();
5372 Str << "\nAddress mode formation:\t";
5373 LdSt->dumpDecorated(Func);
5374 }
5375
5376 if (isVectorType(Ty)) {
5377 return nullptr;
5378 }
5379
5380 auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5381 if (BaseVar == nullptr)
5382 return nullptr;
5383
5384 const VariablesMetadata *VMetadata = Func->getVMetadata();
5385 const Inst *Reason = nullptr;
5386
5387 do {
5388 if (Reason != nullptr) {
5389 dumpAddressOpt(Func, BaseVar, OffsetImm, Reason);
5390 Reason = nullptr;
5391 }
5392
5393 if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5394 continue;
5395 }
5396
5397 if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5398 continue;
5399 }
5400 } while (Reason);
5401
5402 if (BaseVar == nullptr) {
5403 // We need base register rather than just OffsetImm. Move the OffsetImm to
5404 // BaseVar and form 0(BaseVar) addressing.
5405 const Type PointerType = getPointerType();
5406 BaseVar = makeReg(PointerType);
5407 Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5408 OffsetImm = 0;
5409 } else if (OffsetImm != 0) {
5410 // If the OffsetImm is more than signed 16-bit value then add it in the
5411 // BaseVar and form 0(BaseVar) addressing.
5412 const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5413 const InstArithmetic::OpKind Op =
5414 OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5415 constexpr bool ZeroExt = false;
5416 if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, OffsetImm)) {
5417 const Type PointerType = getPointerType();
5418 Variable *T = makeReg(PointerType);
5419 Context.insert<InstArithmetic>(Op, T, BaseVar,
5420 Ctx->getConstantInt32(PositiveOffset));
5421 BaseVar = T;
5422 OffsetImm = 0;
5423 }
5424 }
5425
5426 assert(BaseVar != nullptr);
5427 assert(OffsetImm < 0 ? (-OffsetImm & 0x0000ffff) == -OffsetImm
5428 : (OffsetImm & 0x0000ffff) == OffsetImm);
5429
5430 return OperandMIPS32Mem::create(
5431 Func, Ty, BaseVar,
5432 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5433 }
5434
doAddressOptLoad()5435 void TargetMIPS32::doAddressOptLoad() {
5436 Inst *Instr = iteratorToInst(Context.getCur());
5437 assert(llvm::isa<InstLoad>(Instr));
5438 Variable *Dest = Instr->getDest();
5439 Operand *Addr = Instr->getSrc(0);
5440 if (OperandMIPS32Mem *Mem =
5441 formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5442 Instr->setDeleted();
5443 Context.insert<InstLoad>(Dest, Mem);
5444 }
5445 }
5446
randomlyInsertNop(float Probability,RandomNumberGenerator & RNG)5447 void TargetMIPS32::randomlyInsertNop(float Probability,
5448 RandomNumberGenerator &RNG) {
5449 RandomNumberGeneratorWrapper RNGW(RNG);
5450 if (RNGW.getTrueWithProbability(Probability)) {
5451 _nop();
5452 }
5453 }
5454
lowerPhi(const InstPhi *)5455 void TargetMIPS32::lowerPhi(const InstPhi * /*Instr*/) {
5456 Func->setError("Phi found in regular instruction list");
5457 }
5458
lowerRet(const InstRet * Instr)5459 void TargetMIPS32::lowerRet(const InstRet *Instr) {
5460 Variable *Reg = nullptr;
5461 if (Instr->hasRetValue()) {
5462 Operand *Src0 = Instr->getRetValue();
5463 switch (Src0->getType()) {
5464 case IceType_f32: {
5465 Operand *Src0F = legalizeToReg(Src0);
5466 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0);
5467 _mov(Reg, Src0F);
5468 break;
5469 }
5470 case IceType_f64: {
5471 Operand *Src0F = legalizeToReg(Src0);
5472 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0F1);
5473 _mov(Reg, Src0F);
5474 break;
5475 }
5476 case IceType_i1:
5477 case IceType_i8:
5478 case IceType_i16:
5479 case IceType_i32: {
5480 Operand *Src0F = legalizeToReg(Src0);
5481 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_V0);
5482 _mov(Reg, Src0F);
5483 break;
5484 }
5485 case IceType_i64: {
5486 Src0 = legalizeUndef(Src0);
5487 Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0);
5488 Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1);
5489 Reg = R0;
5490 Context.insert<InstFakeUse>(R1);
5491 break;
5492 }
5493 case IceType_v4i1:
5494 case IceType_v8i1:
5495 case IceType_v16i1:
5496 case IceType_v16i8:
5497 case IceType_v8i16:
5498 case IceType_v4i32: {
5499 auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5500 Variable *V0 =
5501 legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0);
5502 Variable *V1 =
5503 legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1);
5504 Variable *A0 =
5505 legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0);
5506 Variable *A1 =
5507 legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1);
5508 Reg = V0;
5509 Context.insert<InstFakeUse>(V1);
5510 Context.insert<InstFakeUse>(A0);
5511 Context.insert<InstFakeUse>(A1);
5512 break;
5513 }
5514 case IceType_v4f32: {
5515 auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5516 Reg = getImplicitRet();
5517 auto *RegT = legalizeToReg(Reg);
5518 // Return the vector through buffer in implicit argument a0
5519 for (SizeT i = 0; i < SrcVec->ContainersPerVector; ++i) {
5520 OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
5521 Func, IceType_f32, RegT,
5522 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
5523 Variable *Var = legalizeToReg(SrcVec->getContainers()[i]);
5524 _sw(Var, Mem);
5525 }
5526 Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0);
5527 _mov(V0, Reg); // move v0,a0
5528 Context.insert<InstFakeUse>(Reg);
5529 Context.insert<InstFakeUse>(V0);
5530 break;
5531 }
5532 default:
5533 llvm::report_fatal_error("Ret: Invalid type.");
5534 break;
5535 }
5536 }
5537 _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);
5538 }
5539
lowerSelect(const InstSelect * Instr)5540 void TargetMIPS32::lowerSelect(const InstSelect *Instr) {
5541 Variable *Dest = Instr->getDest();
5542 const Type DestTy = Dest->getType();
5543
5544 if (isVectorType(DestTy)) {
5545 llvm::report_fatal_error("Select: Destination type is vector");
5546 return;
5547 }
5548
5549 Variable *DestR = nullptr;
5550 Variable *DestHiR = nullptr;
5551 Variable *SrcTR = nullptr;
5552 Variable *SrcTHiR = nullptr;
5553 Variable *SrcFR = nullptr;
5554 Variable *SrcFHiR = nullptr;
5555
5556 if (DestTy == IceType_i64) {
5557 DestR = llvm::cast<Variable>(loOperand(Dest));
5558 DestHiR = llvm::cast<Variable>(hiOperand(Dest));
5559 SrcTR = legalizeToReg(loOperand(legalizeUndef(Instr->getTrueOperand())));
5560 SrcTHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getTrueOperand())));
5561 SrcFR = legalizeToReg(loOperand(legalizeUndef(Instr->getFalseOperand())));
5562 SrcFHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getFalseOperand())));
5563 } else {
5564 SrcTR = legalizeToReg(legalizeUndef(Instr->getTrueOperand()));
5565 SrcFR = legalizeToReg(legalizeUndef(Instr->getFalseOperand()));
5566 }
5567
5568 Variable *ConditionR = legalizeToReg(Instr->getCondition());
5569
5570 assert(Instr->getCondition()->getType() == IceType_i1);
5571
5572 switch (DestTy) {
5573 case IceType_i1:
5574 case IceType_i8:
5575 case IceType_i16:
5576 case IceType_i32:
5577 _movn(SrcFR, SrcTR, ConditionR);
5578 _mov(Dest, SrcFR);
5579 break;
5580 case IceType_i64:
5581 _movn(SrcFR, SrcTR, ConditionR);
5582 _movn(SrcFHiR, SrcTHiR, ConditionR);
5583 _mov(DestR, SrcFR);
5584 _mov(DestHiR, SrcFHiR);
5585 break;
5586 case IceType_f32:
5587 _movn_s(SrcFR, SrcTR, ConditionR);
5588 _mov(Dest, SrcFR);
5589 break;
5590 case IceType_f64:
5591 _movn_d(SrcFR, SrcTR, ConditionR);
5592 _mov(Dest, SrcFR);
5593 break;
5594 default:
5595 llvm::report_fatal_error("Select: Invalid type.");
5596 }
5597 }
5598
lowerShuffleVector(const InstShuffleVector * Instr)5599 void TargetMIPS32::lowerShuffleVector(const InstShuffleVector *Instr) {
5600 UnimplementedLoweringError(this, Instr);
5601 }
5602
lowerStore(const InstStore * Instr)5603 void TargetMIPS32::lowerStore(const InstStore *Instr) {
5604 Operand *Value = Instr->getData();
5605 Operand *Addr = Instr->getAddr();
5606 OperandMIPS32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5607 Type Ty = NewAddr->getType();
5608
5609 if (Ty == IceType_i64) {
5610 Value = legalizeUndef(Value);
5611 Variable *ValueHi = legalizeToReg(hiOperand(Value));
5612 Variable *ValueLo = legalizeToReg(loOperand(Value));
5613 _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr)));
5614 _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr)));
5615 } else if (isVectorType(Value->getType())) {
5616 auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value);
5617 for (SizeT i = 0; i < DataVec->ContainersPerVector; ++i) {
5618 auto *DCont = legalizeToReg(DataVec->getContainers()[i]);
5619 auto *MCont = llvm::cast<OperandMIPS32Mem>(
5620 getOperandAtIndex(NewAddr, IceType_i32, i));
5621 _sw(DCont, MCont);
5622 }
5623 } else {
5624 Variable *ValueR = legalizeToReg(Value);
5625 _sw(ValueR, NewAddr);
5626 }
5627 }
5628
doAddressOptStore()5629 void TargetMIPS32::doAddressOptStore() {
5630 Inst *Instr = iteratorToInst(Context.getCur());
5631 assert(llvm::isa<InstStore>(Instr));
5632 Operand *Src = Instr->getSrc(0);
5633 Operand *Addr = Instr->getSrc(1);
5634 if (OperandMIPS32Mem *Mem =
5635 formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5636 Instr->setDeleted();
5637 Context.insert<InstStore>(Src, Mem);
5638 }
5639 }
5640
lowerSwitch(const InstSwitch * Instr)5641 void TargetMIPS32::lowerSwitch(const InstSwitch *Instr) {
5642 Operand *Src = Instr->getComparison();
5643 SizeT NumCases = Instr->getNumCases();
5644 if (Src->getType() == IceType_i64) {
5645 Src = legalizeUndef(Src);
5646 Variable *Src0Lo = legalizeToReg(loOperand(Src));
5647 Variable *Src0Hi = legalizeToReg(hiOperand(Src));
5648 for (SizeT I = 0; I < NumCases; ++I) {
5649 Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5650 Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5651 CfgNode *TargetTrue = Instr->getLabel(I);
5652 constexpr CfgNode *NoTarget = nullptr;
5653 ValueHi = legalizeToReg(ValueHi);
5654 InstMIPS32Label *IntraLabel = InstMIPS32Label::create(Func, this);
5655 _br(NoTarget, NoTarget, Src0Hi, ValueHi, IntraLabel,
5656 CondMIPS32::Cond::NE);
5657 ValueLo = legalizeToReg(ValueLo);
5658 _br(NoTarget, TargetTrue, Src0Lo, ValueLo, CondMIPS32::Cond::EQ);
5659 Context.insert(IntraLabel);
5660 }
5661 _br(Instr->getLabelDefault());
5662 return;
5663 }
5664 Variable *SrcVar = legalizeToReg(Src);
5665 assert(SrcVar->mustHaveReg());
5666 for (SizeT I = 0; I < NumCases; ++I) {
5667 Operand *Value = Ctx->getConstantInt32(Instr->getValue(I));
5668 CfgNode *TargetTrue = Instr->getLabel(I);
5669 constexpr CfgNode *NoTargetFalse = nullptr;
5670 Value = legalizeToReg(Value);
5671 _br(NoTargetFalse, TargetTrue, SrcVar, Value, CondMIPS32::Cond::EQ);
5672 }
5673 _br(Instr->getLabelDefault());
5674 }
5675
lowerBreakpoint(const InstBreakpoint * Instr)5676 void TargetMIPS32::lowerBreakpoint(const InstBreakpoint *Instr) {
5677 UnimplementedLoweringError(this, Instr);
5678 }
5679
lowerUnreachable(const InstUnreachable *)5680 void TargetMIPS32::lowerUnreachable(const InstUnreachable *) {
5681 const uint32_t TrapCodeZero = 0;
5682 _teq(getZero(), getZero(), TrapCodeZero);
5683 }
5684
lowerOther(const Inst * Instr)5685 void TargetMIPS32::lowerOther(const Inst *Instr) {
5686 if (llvm::isa<InstMIPS32Sync>(Instr)) {
5687 _sync();
5688 } else {
5689 TargetLowering::lowerOther(Instr);
5690 }
5691 }
5692
5693 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5694 // integrity of liveness analysis. Undef values are also turned into zeroes,
5695 // since loOperand() and hiOperand() don't expect Undef input.
prelowerPhis()5696 void TargetMIPS32::prelowerPhis() {
5697 PhiLowering::prelowerPhis32Bit<TargetMIPS32>(this, Context.getNode(), Func);
5698 }
5699
postLower()5700 void TargetMIPS32::postLower() {
5701 if (Func->getOptLevel() == Opt_m1)
5702 return;
5703 markRedefinitions();
5704 Context.availabilityUpdate();
5705 }
5706
makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> & Permutation,const SmallBitVector & ExcludeRegisters,uint64_t Salt) const5707 void TargetMIPS32::makeRandomRegisterPermutation(
5708 llvm::SmallVectorImpl<RegNumT> &Permutation,
5709 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
5710 (void)Permutation;
5711 (void)ExcludeRegisters;
5712 (void)Salt;
5713 UnimplementedError(getFlags());
5714 }
5715
5716 /* TODO(jvoung): avoid duplicate symbols with multiple targets.
5717 void ConstantUndef::emitWithoutDollar(GlobalContext *) const {
5718 llvm_unreachable("Not expecting to emitWithoutDollar undef");
5719 }
5720
5721 void ConstantUndef::emit(GlobalContext *) const {
5722 llvm_unreachable("undef value encountered by emitter.");
5723 }
5724 */
5725
TargetDataMIPS32(GlobalContext * Ctx)5726 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
5727 : TargetDataLowering(Ctx) {}
5728
5729 // Generate .MIPS.abiflags section. This section contains a versioned data
5730 // structure with essential information required for loader to determine the
5731 // requirements of the application.
emitTargetRODataSections()5732 void TargetDataMIPS32::emitTargetRODataSections() {
5733 struct MipsABIFlagsSection Flags;
5734 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5735 const std::string Name = ".MIPS.abiflags";
5736 const llvm::ELF::Elf64_Word ShType = llvm::ELF::SHT_MIPS_ABIFLAGS;
5737 const llvm::ELF::Elf64_Xword ShFlags = llvm::ELF::SHF_ALLOC;
5738 const llvm::ELF::Elf64_Xword ShAddralign = 8;
5739 const llvm::ELF::Elf64_Xword ShEntsize = sizeof(Flags);
5740 Writer->writeTargetRODataSection(
5741 Name, ShType, ShFlags, ShAddralign, ShEntsize,
5742 llvm::StringRef(reinterpret_cast<const char *>(&Flags), sizeof(Flags)));
5743 }
5744
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)5745 void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars,
5746 const std::string &SectionSuffix) {
5747 const bool IsPIC = getFlags().getUseNonsfi();
5748 switch (getFlags().getOutFileType()) {
5749 case FT_Elf: {
5750 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5751 Writer->writeDataSection(Vars, llvm::ELF::R_MIPS_32, SectionSuffix, IsPIC);
5752 } break;
5753 case FT_Asm:
5754 case FT_Iasm: {
5755 OstreamLocker L(Ctx);
5756 for (const VariableDeclaration *Var : Vars) {
5757 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
5758 emitGlobal(*Var, SectionSuffix);
5759 }
5760 }
5761 } break;
5762 }
5763 }
5764
5765 namespace {
5766 template <typename T> struct ConstantPoolEmitterTraits;
5767
5768 static_assert(sizeof(uint64_t) == 8,
5769 "uint64_t is supposed to be 8 bytes wide.");
5770
5771 // TODO(jaydeep.patil): implement the following when implementing constant
5772 // randomization:
5773 // * template <> struct ConstantPoolEmitterTraits<uint8_t>
5774 // * template <> struct ConstantPoolEmitterTraits<uint16_t>
5775 // * template <> struct ConstantPoolEmitterTraits<uint32_t>
5776 template <> struct ConstantPoolEmitterTraits<float> {
5777 using ConstantType = ConstantFloat;
5778 static constexpr Type IceType = IceType_f32;
5779 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
5780 // about them being constexpr.
5781 static const char AsmTag[];
5782 static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anon2d79189b0611::ConstantPoolEmitterTraits5783 static uint64_t bitcastToUint64(float Value) {
5784 static_assert(sizeof(Value) == sizeof(uint32_t),
5785 "Float should be 4 bytes.");
5786 const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
5787 return static_cast<uint64_t>(IntValue);
5788 }
5789 };
5790 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".word";
5791 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
5792
5793 template <> struct ConstantPoolEmitterTraits<double> {
5794 using ConstantType = ConstantDouble;
5795 static constexpr Type IceType = IceType_f64;
5796 static const char AsmTag[];
5797 static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anon2d79189b0611::ConstantPoolEmitterTraits5798 static uint64_t bitcastToUint64(double Value) {
5799 static_assert(sizeof(double) == sizeof(uint64_t),
5800 "Double should be 8 bytes.");
5801 return Utils::bitCopy<uint64_t>(Value);
5802 }
5803 };
5804 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
5805 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
5806
5807 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)5808 void emitConstant(
5809 Ostream &Str,
5810 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
5811 if (!BuildDefs::dump())
5812 return;
5813 using Traits = ConstantPoolEmitterTraits<T>;
5814 Str << Const->getLabelName();
5815 T Value = Const->getValue();
5816 Str << ":\n\t" << Traits::AsmTag << "\t0x";
5817 Str.write_hex(Traits::bitcastToUint64(Value));
5818 Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
5819 }
5820
emitConstantPool(GlobalContext * Ctx)5821 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
5822 if (!BuildDefs::dump())
5823 return;
5824 using Traits = ConstantPoolEmitterTraits<T>;
5825 static constexpr size_t MinimumAlignment = 4;
5826 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
5827 assert((Align % 4) == 0 && "Constants should be aligned");
5828 Ostream &Str = Ctx->getStrEmit();
5829 ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
5830 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
5831 << "\n"
5832 << "\t.align\t" << (Align == 4 ? 2 : 3) << "\n";
5833 if (getFlags().getReorderPooledConstants()) {
5834 // TODO(jaydeep.patil): add constant pooling.
5835 UnimplementedError(getFlags());
5836 }
5837 for (Constant *C : Pool) {
5838 if (!C->getShouldBePooled()) {
5839 continue;
5840 }
5841 emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
5842 }
5843 }
5844 } // end of anonymous namespace
5845
lowerConstants()5846 void TargetDataMIPS32::lowerConstants() {
5847 if (getFlags().getDisableTranslation())
5848 return;
5849 switch (getFlags().getOutFileType()) {
5850 case FT_Elf: {
5851 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5852 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5853 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5854 } break;
5855 case FT_Asm:
5856 case FT_Iasm: {
5857 OstreamLocker _(Ctx);
5858 emitConstantPool<float>(Ctx);
5859 emitConstantPool<double>(Ctx);
5860 break;
5861 }
5862 }
5863 }
5864
lowerJumpTables()5865 void TargetDataMIPS32::lowerJumpTables() {
5866 if (getFlags().getDisableTranslation())
5867 return;
5868 }
5869
5870 // Helper for legalize() to emit the right code to lower an operand to a
5871 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)5872 Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {
5873 Type Ty = Src->getType();
5874 Variable *Reg = makeReg(Ty, RegNum);
5875 if (isVectorType(Ty)) {
5876 llvm::report_fatal_error("Invalid copy from vector type.");
5877 } else {
5878 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) {
5879 _lw(Reg, Mem);
5880 } else {
5881 _mov(Reg, Src);
5882 }
5883 }
5884 return Reg;
5885 }
5886
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)5887 Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
5888 RegNumT RegNum) {
5889 Type Ty = From->getType();
5890 // Assert that a physical register is allowed. To date, all calls
5891 // to legalize() allow a physical register. Legal_Flex converts
5892 // registers to the right type OperandMIPS32FlexReg as needed.
5893 assert(Allowed & Legal_Reg);
5894
5895 if (RegNum.hasNoValue()) {
5896 if (Variable *Subst = getContext().availabilityGet(From)) {
5897 // At this point we know there is a potential substitution available.
5898 if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
5899 !Subst->hasReg()) {
5900 // At this point we know the substitution will have a register.
5901 if (From->getType() == Subst->getType()) {
5902 // At this point we know the substitution's register is compatible.
5903 return Subst;
5904 }
5905 }
5906 }
5907 }
5908
5909 // Go through the various types of operands:
5910 // OperandMIPS32Mem, Constant, and Variable.
5911 // Given the above assertion, if type of operand is not legal
5912 // (e.g., OperandMIPS32Mem and !Legal_Mem), we can always copy
5913 // to a register.
5914 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(From)) {
5915 // Base must be in a physical register.
5916 Variable *Base = Mem->getBase();
5917 ConstantInteger32 *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
5918 Variable *RegBase = nullptr;
5919 assert(Base);
5920
5921 RegBase = llvm::cast<Variable>(
5922 legalize(Base, Legal_Reg | Legal_Rematerializable));
5923
5924 if (Offset != nullptr && Offset->getValue() != 0) {
5925 static constexpr bool ZeroExt = false;
5926 if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
5927 llvm::report_fatal_error("Invalid memory offset.");
5928 }
5929 }
5930
5931 // Create a new operand if there was a change.
5932 if (Base != RegBase) {
5933 Mem = OperandMIPS32Mem::create(Func, Ty, RegBase, Offset,
5934 Mem->getAddrMode());
5935 }
5936
5937 if (Allowed & Legal_Mem) {
5938 From = Mem;
5939 } else {
5940 Variable *Reg = makeReg(Ty, RegNum);
5941 _lw(Reg, Mem);
5942 From = Reg;
5943 }
5944 return From;
5945 }
5946
5947 if (llvm::isa<Constant>(From)) {
5948 if (llvm::isa<ConstantUndef>(From)) {
5949 From = legalizeUndef(From, RegNum);
5950 if (isVectorType(Ty))
5951 return From;
5952 }
5953 if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
5954 Variable *Reg = makeReg(Ty, RegNum);
5955 Variable *TReg = makeReg(Ty, RegNum);
5956 _lui(TReg, C, RO_Hi);
5957 _addiu(Reg, TReg, C, RO_Lo);
5958 return Reg;
5959 } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5960 const uint32_t Value = C32->getValue();
5961 // Use addiu if the immediate is a 16bit value. Otherwise load it
5962 // using a lui-ori instructions.
5963 Variable *Reg = makeReg(Ty, RegNum);
5964 if (isInt<16>(int32_t(Value))) {
5965 Variable *Zero = makeReg(Ty, RegMIPS32::Reg_ZERO);
5966 Context.insert<InstFakeDef>(Zero);
5967 _addiu(Reg, Zero, Value);
5968 } else {
5969 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5970 uint32_t LowerBits = Value & 0xFFFF;
5971 if (LowerBits) {
5972 Variable *TReg = makeReg(Ty, RegNum);
5973 _lui(TReg, Ctx->getConstantInt32(UpperBits));
5974 _ori(Reg, TReg, LowerBits);
5975 } else {
5976 _lui(Reg, Ctx->getConstantInt32(UpperBits));
5977 }
5978 }
5979 return Reg;
5980 } else if (isScalarFloatingType(Ty)) {
5981 auto *CFrom = llvm::cast<Constant>(From);
5982 Variable *TReg = makeReg(Ty);
5983 if (!CFrom->getShouldBePooled()) {
5984 // Float/Double constant 0 is not pooled.
5985 Context.insert<InstFakeDef>(TReg);
5986 _mov(TReg, getZero());
5987 } else {
5988 // Load floats/doubles from literal pool.
5989 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
5990 Variable *TReg1 = makeReg(getPointerType());
5991 _lui(TReg1, Offset, RO_Hi);
5992 OperandMIPS32Mem *Addr =
5993 OperandMIPS32Mem::create(Func, Ty, TReg1, Offset);
5994 if (Ty == IceType_f32)
5995 Sandboxer(this).lwc1(TReg, Addr, RO_Lo);
5996 else
5997 Sandboxer(this).ldc1(TReg, Addr, RO_Lo);
5998 }
5999 return copyToReg(TReg, RegNum);
6000 }
6001 }
6002
6003 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6004 if (Var->isRematerializable()) {
6005 if (Allowed & Legal_Rematerializable) {
6006 return From;
6007 }
6008
6009 Variable *T = makeReg(Var->getType(), RegNum);
6010 _mov(T, Var);
6011 return T;
6012 }
6013 // Check if the variable is guaranteed a physical register. This
6014 // can happen either when the variable is pre-colored or when it is
6015 // assigned infinite weight.
6016 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6017 // We need a new physical register for the operand if:
6018 // Mem is not allowed and Var isn't guaranteed a physical
6019 // register, or
6020 // RegNum is required and Var->getRegNum() doesn't match.
6021 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6022 (RegNum.hasValue() && RegNum != Var->getRegNum())) {
6023 From = copyToReg(From, RegNum);
6024 }
6025 return From;
6026 }
6027 return From;
6028 }
6029
6030 namespace BoolFolding {
6031 // TODO(sagar.thakur): Add remaining instruction kinds to shouldTrackProducer()
6032 // and isValidConsumer()
shouldTrackProducer(const Inst & Instr)6033 bool shouldTrackProducer(const Inst &Instr) {
6034 return Instr.getKind() == Inst::Icmp;
6035 }
6036
isValidConsumer(const Inst & Instr)6037 bool isValidConsumer(const Inst &Instr) { return Instr.getKind() == Inst::Br; }
6038 } // end of namespace BoolFolding
6039
recordProducers(CfgNode * Node)6040 void TargetMIPS32::ComputationTracker::recordProducers(CfgNode *Node) {
6041 for (Inst &Instr : Node->getInsts()) {
6042 if (Instr.isDeleted())
6043 continue;
6044 // Check whether Instr is a valid producer.
6045 Variable *Dest = Instr.getDest();
6046 if (Dest // only consider instructions with an actual dest var; and
6047 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6048 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6049 KnownComputations.emplace(Dest->getIndex(),
6050 ComputationEntry(&Instr, IceType_i1));
6051 }
6052 // Check each src variable against the map.
6053 FOREACH_VAR_IN_INST(Var, Instr) {
6054 SizeT VarNum = Var->getIndex();
6055 auto ComputationIter = KnownComputations.find(VarNum);
6056 if (ComputationIter == KnownComputations.end()) {
6057 continue;
6058 }
6059
6060 ++ComputationIter->second.NumUses;
6061 switch (ComputationIter->second.ComputationType) {
6062 default:
6063 KnownComputations.erase(VarNum);
6064 continue;
6065 case IceType_i1:
6066 if (!BoolFolding::isValidConsumer(Instr)) {
6067 KnownComputations.erase(VarNum);
6068 continue;
6069 }
6070 break;
6071 }
6072
6073 if (Instr.isLastUse(Var)) {
6074 ComputationIter->second.IsLiveOut = false;
6075 }
6076 }
6077 }
6078
6079 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
6080 Iter != End;) {
6081 // Disable the folding if its dest may be live beyond this block.
6082 if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6083 Iter = KnownComputations.erase(Iter);
6084 continue;
6085 }
6086
6087 // Mark as "dead" rather than outright deleting. This is so that other
6088 // peephole style optimizations during or before lowering have access to
6089 // this instruction in undeleted form. See for example
6090 // tryOptimizedCmpxchgCmpBr().
6091 Iter->second.Instr->setDead();
6092 ++Iter;
6093 }
6094 }
6095
TargetHeaderMIPS32(GlobalContext * Ctx)6096 TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx)
6097 : TargetHeaderLowering(Ctx) {}
6098
lower()6099 void TargetHeaderMIPS32::lower() {
6100 if (!BuildDefs::dump())
6101 return;
6102 OstreamLocker L(Ctx);
6103 Ostream &Str = Ctx->getStrEmit();
6104 Str << "\t.set\t"
6105 << "nomicromips\n";
6106 Str << "\t.set\t"
6107 << "nomips16\n";
6108 Str << "\t.set\t"
6109 << "noat\n";
6110 if (getFlags().getUseSandboxing())
6111 Str << "\t.bundle_align_mode 4\n";
6112 }
6113
6114 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
6115 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
6116 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
6117
Sandboxer(TargetMIPS32 * Target,InstBundleLock::Option BundleOption)6118 TargetMIPS32::Sandboxer::Sandboxer(TargetMIPS32 *Target,
6119 InstBundleLock::Option BundleOption)
6120 : Target(Target), BundleOption(BundleOption) {}
6121
~Sandboxer()6122 TargetMIPS32::Sandboxer::~Sandboxer() {}
6123
createAutoBundle()6124 void TargetMIPS32::Sandboxer::createAutoBundle() {
6125 Bundler = makeUnique<AutoBundle>(Target, BundleOption);
6126 }
6127
addiu_sp(uint32_t StackOffset)6128 void TargetMIPS32::Sandboxer::addiu_sp(uint32_t StackOffset) {
6129 Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6130 if (!Target->NeedSandboxing) {
6131 Target->_addiu(SP, SP, StackOffset);
6132 return;
6133 }
6134 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6135 Target->Context.insert<InstFakeDef>(T7);
6136 createAutoBundle();
6137 Target->_addiu(SP, SP, StackOffset);
6138 Target->_and(SP, SP, T7);
6139 }
6140
lw(Variable * Dest,OperandMIPS32Mem * Mem)6141 void TargetMIPS32::Sandboxer::lw(Variable *Dest, OperandMIPS32Mem *Mem) {
6142 Variable *Base = Mem->getBase();
6143 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum()) &&
6144 (RegMIPS32::Reg_T8 != Base->getRegNum())) {
6145 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6146 Target->Context.insert<InstFakeDef>(T7);
6147 createAutoBundle();
6148 Target->_and(Base, Base, T7);
6149 }
6150 Target->_lw(Dest, Mem);
6151 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6152 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6153 Target->Context.insert<InstFakeDef>(T7);
6154 Target->_and(Dest, Dest, T7);
6155 }
6156 }
6157
ll(Variable * Dest,OperandMIPS32Mem * Mem)6158 void TargetMIPS32::Sandboxer::ll(Variable *Dest, OperandMIPS32Mem *Mem) {
6159 Variable *Base = Mem->getBase();
6160 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6161 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6162 Target->Context.insert<InstFakeDef>(T7);
6163 createAutoBundle();
6164 Target->_and(Base, Base, T7);
6165 }
6166 Target->_ll(Dest, Mem);
6167 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6168 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6169 Target->Context.insert<InstFakeDef>(T7);
6170 Target->_and(Dest, Dest, T7);
6171 }
6172 }
6173
sc(Variable * Dest,OperandMIPS32Mem * Mem)6174 void TargetMIPS32::Sandboxer::sc(Variable *Dest, OperandMIPS32Mem *Mem) {
6175 Variable *Base = Mem->getBase();
6176 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6177 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6178 Target->Context.insert<InstFakeDef>(T7);
6179 createAutoBundle();
6180 Target->_and(Base, Base, T7);
6181 }
6182 Target->_sc(Dest, Mem);
6183 }
6184
sw(Variable * Dest,OperandMIPS32Mem * Mem)6185 void TargetMIPS32::Sandboxer::sw(Variable *Dest, OperandMIPS32Mem *Mem) {
6186 Variable *Base = Mem->getBase();
6187 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6188 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6189 Target->Context.insert<InstFakeDef>(T7);
6190 createAutoBundle();
6191 Target->_and(Base, Base, T7);
6192 }
6193 Target->_sw(Dest, Mem);
6194 }
6195
lwc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6196 void TargetMIPS32::Sandboxer::lwc1(Variable *Dest, OperandMIPS32Mem *Mem,
6197 RelocOp Reloc) {
6198 Variable *Base = Mem->getBase();
6199 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6200 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6201 Target->Context.insert<InstFakeDef>(T7);
6202 createAutoBundle();
6203 Target->_and(Base, Base, T7);
6204 }
6205 Target->_lwc1(Dest, Mem, Reloc);
6206 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6207 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6208 Target->Context.insert<InstFakeDef>(T7);
6209 Target->_and(Dest, Dest, T7);
6210 }
6211 }
6212
ldc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6213 void TargetMIPS32::Sandboxer::ldc1(Variable *Dest, OperandMIPS32Mem *Mem,
6214 RelocOp Reloc) {
6215 Variable *Base = Mem->getBase();
6216 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6217 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6218 Target->Context.insert<InstFakeDef>(T7);
6219 createAutoBundle();
6220 Target->_and(Base, Base, T7);
6221 }
6222 Target->_ldc1(Dest, Mem, Reloc);
6223 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6224 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6225 Target->Context.insert<InstFakeDef>(T7);
6226 Target->_and(Dest, Dest, T7);
6227 }
6228 }
6229
ret(Variable * RetAddr,Variable * RetValue)6230 void TargetMIPS32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
6231 if (!Target->NeedSandboxing) {
6232 Target->_ret(RetAddr, RetValue);
6233 }
6234 auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6235 Target->Context.insert<InstFakeDef>(T6);
6236 createAutoBundle();
6237 Target->_and(RetAddr, RetAddr, T6);
6238 Target->_ret(RetAddr, RetValue);
6239 }
6240
reset_sp(Variable * Src)6241 void TargetMIPS32::Sandboxer::reset_sp(Variable *Src) {
6242 Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6243 if (!Target->NeedSandboxing) {
6244 Target->_mov(SP, Src);
6245 return;
6246 }
6247 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6248 Target->Context.insert<InstFakeDef>(T7);
6249 createAutoBundle();
6250 Target->_mov(SP, Src);
6251 Target->_and(SP, SP, T7);
6252 Target->getContext().insert<InstFakeUse>(SP);
6253 }
6254
jal(Variable * ReturnReg,Operand * CallTarget)6255 InstMIPS32Call *TargetMIPS32::Sandboxer::jal(Variable *ReturnReg,
6256 Operand *CallTarget) {
6257 if (Target->NeedSandboxing) {
6258 createAutoBundle();
6259 if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
6260 auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6261 Target->Context.insert<InstFakeDef>(T6);
6262 Target->_and(CallTargetR, CallTargetR, T6);
6263 }
6264 }
6265 return Target->Context.insert<InstMIPS32Call>(ReturnReg, CallTarget);
6266 }
6267
6268 } // end of namespace MIPS32
6269 } // end of namespace Ice
6270