1 //
2 //                        The Subzero Code Generator
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
11 /// entirely of the lowering sequence for each high-level instruction.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "IceTargetLoweringMIPS32.h"
16 
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstMIPS32.h"
24 #include "IceInstVarIter.h"
25 #include "IceLiveness.h"
26 #include "IceOperand.h"
27 #include "IcePhiLoweringImpl.h"
28 #include "IceRegistersMIPS32.h"
29 #include "IceTargetLoweringMIPS32.def"
30 #include "IceUtils.h"
31 #include "llvm/Support/MathExtras.h"
32 
33 namespace MIPS32 {
createTargetLowering(::Ice::Cfg * Func)34 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
35   return ::Ice::MIPS32::TargetMIPS32::create(Func);
36 }
37 
38 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)39 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
40   return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx);
41 }
42 
43 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)44 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
45   return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx);
46 }
47 
staticInit(::Ice::GlobalContext * Ctx)48 void staticInit(::Ice::GlobalContext *Ctx) {
49   ::Ice::MIPS32::TargetMIPS32::staticInit(Ctx);
50 }
51 
shouldBePooled(const::Ice::Constant * C)52 bool shouldBePooled(const ::Ice::Constant *C) {
53   return ::Ice::MIPS32::TargetMIPS32::shouldBePooled(C);
54 }
55 
getPointerType()56 ::Ice::Type getPointerType() {
57   return ::Ice::MIPS32::TargetMIPS32::getPointerType();
58 }
59 
60 } // end of namespace MIPS32
61 
62 namespace Ice {
63 namespace MIPS32 {
64 
65 using llvm::isInt;
66 
67 namespace {
68 
69 // The maximum number of arguments to pass in GPR registers.
70 constexpr uint32_t MIPS32_MAX_GPR_ARG = 4;
71 
72 std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer;
73 std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer;
74 
75 constexpr uint32_t MIPS32_MAX_FP_ARG = 2;
76 
77 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer;
78 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer;
79 
getRegClassName(RegClass C)80 const char *getRegClassName(RegClass C) {
81   auto ClassNum = static_cast<RegClassMIPS32>(C);
82   assert(ClassNum < RCMIPS32_NUM);
83   switch (ClassNum) {
84   default:
85     assert(C < RC_Target);
86     return regClassString(C);
87     // Add handling of new register classes below.
88   }
89 }
90 
91 // Stack alignment
92 constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16;
93 
94 // Value is in bytes. Return Value adjusted to the next highest multiple of the
95 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)96 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
97   size_t typeAlignInBytes = typeWidthInBytes(Ty);
98   // Vectors are stored on stack with the same alignment as that of int type
99   if (isVectorType(Ty))
100     typeAlignInBytes = typeWidthInBytes(IceType_i64);
101   return Utils::applyAlignment(Value, typeAlignInBytes);
102 }
103 
104 // Value is in bytes. Return Value adjusted to the next highest multiple of the
105 // stack alignment.
applyStackAlignment(uint32_t Value)106 uint32_t applyStackAlignment(uint32_t Value) {
107   return Utils::applyAlignment(Value, MIPS32_STACK_ALIGNMENT_BYTES);
108 }
109 
110 } // end of anonymous namespace
111 
TargetMIPS32(Cfg * Func)112 TargetMIPS32::TargetMIPS32(Cfg *Func)
113     : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {}
114 
assignVarStackSlots(VarList & SortedSpilledVariables,size_t SpillAreaPaddingBytes,size_t SpillAreaSizeBytes,size_t GlobalsAndSubsequentPaddingSize)115 void TargetMIPS32::assignVarStackSlots(VarList &SortedSpilledVariables,
116                                        size_t SpillAreaPaddingBytes,
117                                        size_t SpillAreaSizeBytes,
118                                        size_t GlobalsAndSubsequentPaddingSize) {
119   const VariablesMetadata *VMetadata = Func->getVMetadata();
120   size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
121   size_t NextStackOffset = SpillAreaPaddingBytes;
122   CfgVector<size_t> LocalsSize(Func->getNumNodes());
123   const bool SimpleCoalescing = !callsReturnsTwice();
124   for (Variable *Var : SortedSpilledVariables) {
125     size_t Increment = typeWidthInBytesOnStack(Var->getType());
126     if (SimpleCoalescing && VMetadata->isTracked(Var)) {
127       if (VMetadata->isMultiBlock(Var)) {
128         GlobalsSpaceUsed += Increment;
129         NextStackOffset = GlobalsSpaceUsed;
130       } else {
131         SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
132         LocalsSize[NodeIndex] += Increment;
133         NextStackOffset = SpillAreaPaddingBytes +
134                           GlobalsAndSubsequentPaddingSize +
135                           LocalsSize[NodeIndex];
136       }
137     } else {
138       NextStackOffset += Increment;
139     }
140     Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
141   }
142 }
143 
staticInit(GlobalContext * Ctx)144 void TargetMIPS32::staticInit(GlobalContext *Ctx) {
145   (void)Ctx;
146   RegNumT::setLimit(RegMIPS32::Reg_NUM);
147   SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
148   SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
149   SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
150   SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
151   SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
152   SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
153 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
154           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
155   IntegerRegisters[RegMIPS32::val] = isInt;                                    \
156   I64PairRegisters[RegMIPS32::val] = isI64Pair;                                \
157   Float32Registers[RegMIPS32::val] = isFP32;                                   \
158   Float64Registers[RegMIPS32::val] = isFP64;                                   \
159   VectorRegisters[RegMIPS32::val] = isVec128;                                  \
160   RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM);                  \
161   for (SizeT RegAlias : alias_init) {                                          \
162     assert(!RegisterAliases[RegMIPS32::val][RegAlias] &&                       \
163            "Duplicate alias for " #val);                                       \
164     RegisterAliases[RegMIPS32::val].set(RegAlias);                             \
165   }                                                                            \
166   RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM);                  \
167   assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]);
168   REGMIPS32_TABLE;
169 #undef X
170 
171   // TODO(mohit.bhakkad): Change these inits once we provide argument related
172   // field in register tables
173   for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++)
174     GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i);
175 
176   for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++)
177     I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i);
178 
179   for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) {
180     FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2);
181     FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i);
182   }
183 
184   TypeToRegisterSet[IceType_void] = InvalidRegisters;
185   TypeToRegisterSet[IceType_i1] = IntegerRegisters;
186   TypeToRegisterSet[IceType_i8] = IntegerRegisters;
187   TypeToRegisterSet[IceType_i16] = IntegerRegisters;
188   TypeToRegisterSet[IceType_i32] = IntegerRegisters;
189   TypeToRegisterSet[IceType_i64] = IntegerRegisters;
190   TypeToRegisterSet[IceType_f32] = Float32Registers;
191   TypeToRegisterSet[IceType_f64] = Float64Registers;
192   TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
193   TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
194   TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
195   TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
196   TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
197   TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
198   TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
199 
200   for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
201     TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
202 
203   filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
204                           llvm::array_lengthof(TypeToRegisterSet),
205                           RegMIPS32::getRegName, getRegClassName);
206 }
207 
unsetIfNonLeafFunc()208 void TargetMIPS32::unsetIfNonLeafFunc() {
209   for (CfgNode *Node : Func->getNodes()) {
210     for (Inst &Instr : Node->getInsts()) {
211       if (llvm::isa<InstCall>(&Instr)) {
212         // Unset MaybeLeafFunc if call instruction exists.
213         MaybeLeafFunc = false;
214         return;
215       }
216     }
217   }
218 }
219 
getStackAlignment() const220 uint32_t TargetMIPS32::getStackAlignment() const {
221   return MIPS32_STACK_ALIGNMENT_BYTES;
222 }
223 
getCallStackArgumentsSizeBytes(const InstCall * Call)224 uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
225   TargetMIPS32::CallingConv CC;
226   RegNumT DummyReg;
227   size_t OutArgsSizeBytes = 0;
228   Variable *Dest = Call->getDest();
229   bool PartialOnStack = false;
230   if (Dest != nullptr && isVectorFloatingType(Dest->getType())) {
231     CC.discardReg(RegMIPS32::Reg_A0);
232     // Next vector is partially on stack
233     PartialOnStack = true;
234   }
235   for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
236     Operand *Arg = legalizeUndef(Call->getArg(i));
237     const Type Ty = Arg->getType();
238     RegNumT RegNum;
239     if (CC.argInReg(Ty, i, &RegNum)) {
240       // If PartialOnStack is true and if this is a vector type then last two
241       // elements are on stack
242       if (PartialOnStack && isVectorType(Ty)) {
243         OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64);
244         OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2;
245       }
246       continue;
247     }
248     OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
249     OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
250   }
251   // Add size of argument save area
252   constexpr int BytesPerStackArg = 4;
253   OutArgsSizeBytes += MIPS32_MAX_GPR_ARG * BytesPerStackArg;
254   return applyStackAlignment(OutArgsSizeBytes);
255 }
256 
257 namespace {
getConstantMemoryOrder(Operand * Opnd)258 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
259   if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
260     return Integer->getValue();
261   return Intrinsics::MemoryOrderInvalid;
262 }
263 } // namespace
264 
genTargetHelperCallFor(Inst * Instr)265 void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
266   constexpr bool NoTailCall = false;
267   constexpr bool IsTargetHelperCall = true;
268   Variable *Dest = Instr->getDest();
269   const Type DestTy = Dest ? Dest->getType() : IceType_void;
270 
271   switch (Instr->getKind()) {
272   default:
273     return;
274   case Inst::Select: {
275     if (isVectorType(DestTy)) {
276       Operand *SrcT = llvm::cast<InstSelect>(Instr)->getTrueOperand();
277       Operand *SrcF = llvm::cast<InstSelect>(Instr)->getFalseOperand();
278       Operand *Cond = llvm::cast<InstSelect>(Instr)->getCondition();
279       Variable *T = Func->makeVariable(DestTy);
280       auto *Undef = ConstantUndef::create(Ctx, DestTy);
281       Context.insert<InstAssign>(T, Undef);
282       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
283       VarVecOn32->initVecElement(Func);
284       for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
285         auto *Index = Ctx->getConstantInt32(I);
286         auto *OpC = Func->makeVariable(typeElementType(Cond->getType()));
287         Context.insert<InstExtractElement>(OpC, Cond, Index);
288         auto *OpT = Func->makeVariable(typeElementType(DestTy));
289         Context.insert<InstExtractElement>(OpT, SrcT, Index);
290         auto *OpF = Func->makeVariable(typeElementType(DestTy));
291         Context.insert<InstExtractElement>(OpF, SrcF, Index);
292         auto *Dst = Func->makeVariable(typeElementType(DestTy));
293         Variable *DestT = Func->makeVariable(DestTy);
294         Context.insert<InstSelect>(Dst, OpC, OpT, OpF);
295         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
296         T = DestT;
297       }
298       Context.insert<InstAssign>(Dest, T);
299       Instr->setDeleted();
300     }
301     return;
302   }
303   case Inst::Fcmp: {
304     if (isVectorType(DestTy)) {
305       InstFcmp::FCond Cond = llvm::cast<InstFcmp>(Instr)->getCondition();
306       Operand *Src0 = Instr->getSrc(0);
307       Operand *Src1 = Instr->getSrc(1);
308       Variable *T = Func->makeVariable(IceType_v4f32);
309       auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
310       Context.insert<InstAssign>(T, Undef);
311       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
312       VarVecOn32->initVecElement(Func);
313       for (SizeT I = 0; I < typeNumElements(IceType_v4f32); ++I) {
314         auto *Index = Ctx->getConstantInt32(I);
315         auto *Op0 = Func->makeVariable(IceType_f32);
316         Context.insert<InstExtractElement>(Op0, Src0, Index);
317         auto *Op1 = Func->makeVariable(IceType_f32);
318         Context.insert<InstExtractElement>(Op1, Src1, Index);
319         auto *Dst = Func->makeVariable(IceType_f32);
320         Variable *DestT = Func->makeVariable(IceType_v4f32);
321         Context.insert<InstFcmp>(Cond, Dst, Op0, Op1);
322         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
323         T = DestT;
324       }
325       Context.insert<InstAssign>(Dest, T);
326       Instr->setDeleted();
327     }
328     return;
329   }
330   case Inst::Icmp: {
331     if (isVectorType(DestTy)) {
332       InstIcmp::ICond Cond = llvm::cast<InstIcmp>(Instr)->getCondition();
333       Operand *Src0 = Instr->getSrc(0);
334       Operand *Src1 = Instr->getSrc(1);
335       const Type SrcType = Src0->getType();
336       Variable *T = Func->makeVariable(DestTy);
337       auto *Undef = ConstantUndef::create(Ctx, DestTy);
338       Context.insert<InstAssign>(T, Undef);
339       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
340       VarVecOn32->initVecElement(Func);
341       for (SizeT I = 0; I < typeNumElements(SrcType); ++I) {
342         auto *Index = Ctx->getConstantInt32(I);
343         auto *Op0 = Func->makeVariable(typeElementType(SrcType));
344         Context.insert<InstExtractElement>(Op0, Src0, Index);
345         auto *Op1 = Func->makeVariable(typeElementType(SrcType));
346         Context.insert<InstExtractElement>(Op1, Src1, Index);
347         auto *Dst = Func->makeVariable(typeElementType(DestTy));
348         Variable *DestT = Func->makeVariable(DestTy);
349         Context.insert<InstIcmp>(Cond, Dst, Op0, Op1);
350         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
351         T = DestT;
352       }
353       Context.insert<InstAssign>(Dest, T);
354       Instr->setDeleted();
355     }
356     return;
357   }
358   case Inst::Arithmetic: {
359     const InstArithmetic::OpKind Op =
360         llvm::cast<InstArithmetic>(Instr)->getOp();
361     if (isVectorType(DestTy)) {
362       scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
363       Instr->setDeleted();
364       return;
365     }
366     switch (DestTy) {
367     default:
368       return;
369     case IceType_i64: {
370       RuntimeHelper HelperID = RuntimeHelper::H_Num;
371       switch (Op) {
372       default:
373         return;
374       case InstArithmetic::Udiv:
375         HelperID = RuntimeHelper::H_udiv_i64;
376         break;
377       case InstArithmetic::Sdiv:
378         HelperID = RuntimeHelper::H_sdiv_i64;
379         break;
380       case InstArithmetic::Urem:
381         HelperID = RuntimeHelper::H_urem_i64;
382         break;
383       case InstArithmetic::Srem:
384         HelperID = RuntimeHelper::H_srem_i64;
385         break;
386       }
387 
388       if (HelperID == RuntimeHelper::H_Num) {
389         return;
390       }
391 
392       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
393       constexpr SizeT MaxArgs = 2;
394       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
395                                             NoTailCall, IsTargetHelperCall);
396       Call->addArg(Instr->getSrc(0));
397       Call->addArg(Instr->getSrc(1));
398       Instr->setDeleted();
399       return;
400     }
401     case IceType_f32:
402     case IceType_f64: {
403       if (Op != InstArithmetic::Frem) {
404         return;
405       }
406       constexpr SizeT MaxArgs = 2;
407       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
408           DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
409                                 : RuntimeHelper::H_frem_f64);
410       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
411                                             NoTailCall, IsTargetHelperCall);
412       Call->addArg(Instr->getSrc(0));
413       Call->addArg(Instr->getSrc(1));
414       Instr->setDeleted();
415       return;
416     }
417     }
418     llvm::report_fatal_error("Control flow should never have reached here.");
419   }
420   case Inst::Cast: {
421     Operand *Src0 = Instr->getSrc(0);
422     const Type SrcTy = Src0->getType();
423     auto *CastInstr = llvm::cast<InstCast>(Instr);
424     const InstCast::OpKind CastKind = CastInstr->getCastKind();
425 
426     if (isVectorType(DestTy)) {
427       Variable *T = Func->makeVariable(DestTy);
428       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
429       VarVecOn32->initVecElement(Func);
430       auto *Undef = ConstantUndef::create(Ctx, DestTy);
431       Context.insert<InstAssign>(T, Undef);
432       for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
433         auto *Index = Ctx->getConstantInt32(I);
434         auto *Op = Func->makeVariable(typeElementType(SrcTy));
435         Context.insert<InstExtractElement>(Op, Src0, Index);
436         auto *Dst = Func->makeVariable(typeElementType(DestTy));
437         Variable *DestT = Func->makeVariable(DestTy);
438         Context.insert<InstCast>(CastKind, Dst, Op);
439         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
440         T = DestT;
441       }
442       Context.insert<InstAssign>(Dest, T);
443       Instr->setDeleted();
444       return;
445     }
446 
447     switch (CastKind) {
448     default:
449       return;
450     case InstCast::Fptosi:
451     case InstCast::Fptoui: {
452       if ((DestTy != IceType_i32) && (DestTy != IceType_i64)) {
453         return;
454       }
455       const bool DestIs32 = DestTy == IceType_i32;
456       const bool DestIsSigned = CastKind == InstCast::Fptosi;
457       const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
458       RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
459       if (DestIsSigned) {
460         if (DestIs32) {
461           return;
462         }
463         RTHFunc = Src0IsF32 ? RuntimeHelper::H_fptosi_f32_i64
464                             : RuntimeHelper::H_fptosi_f64_i64;
465       } else {
466         RTHFunc = Src0IsF32 ? (DestIs32 ? RuntimeHelper::H_fptoui_f32_i32
467                                         : RuntimeHelper::H_fptoui_f32_i64)
468                             : (DestIs32 ? RuntimeHelper::H_fptoui_f64_i32
469                                         : RuntimeHelper::H_fptoui_f64_i64);
470       }
471       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
472       static constexpr SizeT MaxArgs = 1;
473       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
474                                             NoTailCall, IsTargetHelperCall);
475       Call->addArg(Src0);
476       Instr->setDeleted();
477       return;
478     }
479     case InstCast::Sitofp:
480     case InstCast::Uitofp: {
481       if ((SrcTy != IceType_i32) && (SrcTy != IceType_i64)) {
482         return;
483       }
484       const bool SourceIs32 = SrcTy == IceType_i32;
485       const bool SourceIsSigned = CastKind == InstCast::Sitofp;
486       const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
487       RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
488       if (SourceIsSigned) {
489         if (SourceIs32) {
490           return;
491         }
492         RTHFunc = DestIsF32 ? RuntimeHelper::H_sitofp_i64_f32
493                             : RuntimeHelper::H_sitofp_i64_f64;
494       } else {
495         RTHFunc = DestIsF32 ? (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f32
496                                           : RuntimeHelper::H_uitofp_i64_f32)
497                             : (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f64
498                                           : RuntimeHelper::H_uitofp_i64_f64);
499       }
500       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
501       static constexpr SizeT MaxArgs = 1;
502       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
503                                             NoTailCall, IsTargetHelperCall);
504       Call->addArg(Src0);
505       Instr->setDeleted();
506       return;
507     }
508     case InstCast::Bitcast: {
509       if (DestTy == SrcTy) {
510         return;
511       }
512       Variable *CallDest = Dest;
513       RuntimeHelper HelperID = RuntimeHelper::H_Num;
514       switch (DestTy) {
515       default:
516         return;
517       case IceType_i8:
518         assert(SrcTy == IceType_v8i1);
519         HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
520         CallDest = Func->makeVariable(IceType_i32);
521         break;
522       case IceType_i16:
523         assert(SrcTy == IceType_v16i1);
524         HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
525         CallDest = Func->makeVariable(IceType_i32);
526         break;
527       case IceType_v8i1: {
528         assert(SrcTy == IceType_i8);
529         HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
530         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
531         // Arguments to functions are required to be at least 32 bits wide.
532         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
533         Src0 = Src0AsI32;
534       } break;
535       case IceType_v16i1: {
536         assert(SrcTy == IceType_i16);
537         HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
538         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
539         // Arguments to functions are required to be at least 32 bits wide.
540         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
541         Src0 = Src0AsI32;
542       } break;
543       }
544       constexpr SizeT MaxSrcs = 1;
545       InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
546       Call->addArg(Src0);
547       Context.insert(Call);
548       // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
549       // call result to the appropriate type as necessary.
550       if (CallDest->getType() != DestTy)
551         Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
552       Instr->setDeleted();
553       return;
554     }
555     case InstCast::Trunc: {
556       if (DestTy == SrcTy) {
557         return;
558       }
559       if (!isVectorType(SrcTy)) {
560         return;
561       }
562       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
563       assert(typeElementType(DestTy) == IceType_i1);
564       assert(isVectorIntegerType(SrcTy));
565       return;
566     }
567     case InstCast::Sext:
568     case InstCast::Zext: {
569       if (DestTy == SrcTy) {
570         return;
571       }
572       if (!isVectorType(DestTy)) {
573         return;
574       }
575       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
576       assert(typeElementType(SrcTy) == IceType_i1);
577       assert(isVectorIntegerType(DestTy));
578       return;
579     }
580     }
581     llvm::report_fatal_error("Control flow should never have reached here.");
582   }
583   case Inst::Intrinsic: {
584     auto *Intrinsic = llvm::cast<InstIntrinsic>(Instr);
585     Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicID();
586     if (isVectorType(DestTy) && ID == Intrinsics::Fabs) {
587       Operand *Src0 = Intrinsic->getArg(0);
588       Intrinsics::IntrinsicInfo Info = Intrinsic->getIntrinsicInfo();
589 
590       Variable *T = Func->makeVariable(IceType_v4f32);
591       auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
592       Context.insert<InstAssign>(T, Undef);
593       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
594       VarVecOn32->initVecElement(Func);
595 
596       for (SizeT i = 0; i < typeNumElements(IceType_v4f32); ++i) {
597         auto *Index = Ctx->getConstantInt32(i);
598         auto *Op = Func->makeVariable(IceType_f32);
599         Context.insert<InstExtractElement>(Op, Src0, Index);
600         auto *Res = Func->makeVariable(IceType_f32);
601         Variable *DestT = Func->makeVariable(IceType_v4f32);
602         auto *Intrinsic = Context.insert<InstIntrinsic>(1, Res, Info);
603         Intrinsic->addArg(Op);
604         Context.insert<InstInsertElement>(DestT, T, Res, Index);
605         T = DestT;
606       }
607 
608       Context.insert<InstAssign>(Dest, T);
609 
610       Instr->setDeleted();
611       return;
612     }
613     switch (ID) {
614     default:
615       return;
616     case Intrinsics::AtomicLoad: {
617       if (DestTy != IceType_i64)
618         return;
619       if (!Intrinsics::isMemoryOrderValid(
620               ID, getConstantMemoryOrder(Intrinsic->getArg(1)))) {
621         Func->setError("Unexpected memory ordering for AtomicLoad");
622         return;
623       }
624       Operand *Addr = Intrinsic->getArg(0);
625       Operand *TargetHelper = Ctx->getConstantExternSym(
626           Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
627       static constexpr SizeT MaxArgs = 3;
628       auto *_0 = Ctx->getConstantZero(IceType_i64);
629       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
630                                             NoTailCall, IsTargetHelperCall);
631       Call->addArg(Addr);
632       Call->addArg(_0);
633       Call->addArg(_0);
634       Context.insert<InstMIPS32Sync>();
635       Instr->setDeleted();
636       return;
637     }
638     case Intrinsics::AtomicStore: {
639       Operand *Val = Intrinsic->getArg(0);
640       if (Val->getType() != IceType_i64)
641         return;
642       if (!Intrinsics::isMemoryOrderValid(
643               ID, getConstantMemoryOrder(Intrinsic->getArg(2)))) {
644         Func->setError("Unexpected memory ordering for AtomicStore");
645         return;
646       }
647       Operand *Addr = Intrinsic->getArg(1);
648       Variable *NoDest = nullptr;
649       Operand *TargetHelper = Ctx->getConstantExternSym(
650           Ctx->getGlobalString("__sync_lock_test_and_set_8"));
651       Context.insert<InstMIPS32Sync>();
652       static constexpr SizeT MaxArgs = 2;
653       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
654                                             NoTailCall, IsTargetHelperCall);
655       Call->addArg(Addr);
656       Call->addArg(Val);
657       Context.insert<InstMIPS32Sync>();
658       Instr->setDeleted();
659       return;
660     }
661     case Intrinsics::AtomicCmpxchg: {
662       if (DestTy != IceType_i64)
663         return;
664       if (!Intrinsics::isMemoryOrderValid(
665               ID, getConstantMemoryOrder(Intrinsic->getArg(3)),
666               getConstantMemoryOrder(Intrinsic->getArg(4)))) {
667         Func->setError("Unexpected memory ordering for AtomicCmpxchg");
668         return;
669       }
670       Operand *Addr = Intrinsic->getArg(0);
671       Operand *Oldval = Intrinsic->getArg(1);
672       Operand *Newval = Intrinsic->getArg(2);
673       Operand *TargetHelper = Ctx->getConstantExternSym(
674           Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
675       Context.insert<InstMIPS32Sync>();
676       static constexpr SizeT MaxArgs = 3;
677       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
678                                             NoTailCall, IsTargetHelperCall);
679       Call->addArg(Addr);
680       Call->addArg(Oldval);
681       Call->addArg(Newval);
682       Context.insert<InstMIPS32Sync>();
683       Instr->setDeleted();
684       return;
685     }
686     case Intrinsics::AtomicRMW: {
687       if (DestTy != IceType_i64)
688         return;
689       if (!Intrinsics::isMemoryOrderValid(
690               ID, getConstantMemoryOrder(Intrinsic->getArg(3)))) {
691         Func->setError("Unexpected memory ordering for AtomicRMW");
692         return;
693       }
694       auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
695           llvm::cast<ConstantInteger32>(Intrinsic->getArg(0))->getValue());
696       auto *Addr = Intrinsic->getArg(1);
697       auto *Newval = Intrinsic->getArg(2);
698       Operand *TargetHelper;
699       switch (Operation) {
700       case Intrinsics::AtomicAdd:
701         TargetHelper = Ctx->getConstantExternSym(
702             Ctx->getGlobalString("__sync_fetch_and_add_8"));
703         break;
704       case Intrinsics::AtomicSub:
705         TargetHelper = Ctx->getConstantExternSym(
706             Ctx->getGlobalString("__sync_fetch_and_sub_8"));
707         break;
708       case Intrinsics::AtomicOr:
709         TargetHelper = Ctx->getConstantExternSym(
710             Ctx->getGlobalString("__sync_fetch_and_or_8"));
711         break;
712       case Intrinsics::AtomicAnd:
713         TargetHelper = Ctx->getConstantExternSym(
714             Ctx->getGlobalString("__sync_fetch_and_and_8"));
715         break;
716       case Intrinsics::AtomicXor:
717         TargetHelper = Ctx->getConstantExternSym(
718             Ctx->getGlobalString("__sync_fetch_and_xor_8"));
719         break;
720       case Intrinsics::AtomicExchange:
721         TargetHelper = Ctx->getConstantExternSym(
722             Ctx->getGlobalString("__sync_lock_test_and_set_8"));
723         break;
724       default:
725         llvm::report_fatal_error("Unknown AtomicRMW operation");
726         return;
727       }
728       Context.insert<InstMIPS32Sync>();
729       static constexpr SizeT MaxArgs = 2;
730       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
731                                             NoTailCall, IsTargetHelperCall);
732       Call->addArg(Addr);
733       Call->addArg(Newval);
734       Context.insert<InstMIPS32Sync>();
735       Instr->setDeleted();
736       return;
737     }
738     case Intrinsics::Ctpop: {
739       Operand *Src0 = Intrinsic->getArg(0);
740       Operand *TargetHelper =
741           Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
742                                         ? RuntimeHelper::H_call_ctpop_i32
743                                         : RuntimeHelper::H_call_ctpop_i64);
744       static constexpr SizeT MaxArgs = 1;
745       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
746                                             NoTailCall, IsTargetHelperCall);
747       Call->addArg(Src0);
748       Instr->setDeleted();
749       return;
750     }
751     case Intrinsics::Longjmp: {
752       static constexpr SizeT MaxArgs = 2;
753       static constexpr Variable *NoDest = nullptr;
754       Operand *TargetHelper =
755           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
756       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
757                                             NoTailCall, IsTargetHelperCall);
758       Call->addArg(Intrinsic->getArg(0));
759       Call->addArg(Intrinsic->getArg(1));
760       Instr->setDeleted();
761       return;
762     }
763     case Intrinsics::Memcpy: {
764       static constexpr SizeT MaxArgs = 3;
765       static constexpr Variable *NoDest = nullptr;
766       Operand *TargetHelper =
767           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
768       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
769                                             NoTailCall, IsTargetHelperCall);
770       Call->addArg(Intrinsic->getArg(0));
771       Call->addArg(Intrinsic->getArg(1));
772       Call->addArg(Intrinsic->getArg(2));
773       Instr->setDeleted();
774       return;
775     }
776     case Intrinsics::Memmove: {
777       static constexpr SizeT MaxArgs = 3;
778       static constexpr Variable *NoDest = nullptr;
779       Operand *TargetHelper =
780           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
781       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
782                                             NoTailCall, IsTargetHelperCall);
783       Call->addArg(Intrinsic->getArg(0));
784       Call->addArg(Intrinsic->getArg(1));
785       Call->addArg(Intrinsic->getArg(2));
786       Instr->setDeleted();
787       return;
788     }
789     case Intrinsics::Memset: {
790       Operand *ValOp = Intrinsic->getArg(1);
791       assert(ValOp->getType() == IceType_i8);
792       Variable *ValExt = Func->makeVariable(stackSlotType());
793       Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
794 
795       static constexpr SizeT MaxArgs = 3;
796       static constexpr Variable *NoDest = nullptr;
797       Operand *TargetHelper =
798           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
799       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
800                                             NoTailCall, IsTargetHelperCall);
801       Call->addArg(Intrinsic->getArg(0));
802       Call->addArg(ValExt);
803       Call->addArg(Intrinsic->getArg(2));
804       Instr->setDeleted();
805       return;
806     }
807     case Intrinsics::NaClReadTP: {
808       if (SandboxingType == ST_NaCl) {
809         return;
810       }
811       static constexpr SizeT MaxArgs = 0;
812       assert(SandboxingType != ST_Nonsfi);
813       Operand *TargetHelper =
814           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
815       Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
816                                IsTargetHelperCall);
817       Instr->setDeleted();
818       return;
819     }
820     case Intrinsics::Setjmp: {
821       static constexpr SizeT MaxArgs = 1;
822       Operand *TargetHelper =
823           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
824       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
825                                             NoTailCall, IsTargetHelperCall);
826       Call->addArg(Intrinsic->getArg(0));
827       Instr->setDeleted();
828       return;
829     }
830     }
831     llvm::report_fatal_error("Control flow should never have reached here.");
832   }
833   }
834 }
835 
findMaxStackOutArgsSize()836 void TargetMIPS32::findMaxStackOutArgsSize() {
837   // MinNeededOutArgsBytes should be updated if the Target ever creates a
838   // high-level InstCall that requires more stack bytes.
839   size_t MinNeededOutArgsBytes = 0;
840   if (!MaybeLeafFunc)
841     MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
842   MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
843   for (CfgNode *Node : Func->getNodes()) {
844     Context.init(Node);
845     while (!Context.atEnd()) {
846       PostIncrLoweringContext PostIncrement(Context);
847       Inst *CurInstr = iteratorToInst(Context.getCur());
848       if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
849         SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
850         MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
851       }
852     }
853   }
854   CurrentAllocaOffset = MaxOutArgsSizeBytes;
855 }
856 
translateO2()857 void TargetMIPS32::translateO2() {
858   TimerMarker T(TimerStack::TT_O2, Func);
859 
860   // TODO(stichnot): share passes with X86?
861   // https://code.google.com/p/nativeclient/issues/detail?id=4094
862   genTargetHelperCalls();
863 
864   unsetIfNonLeafFunc();
865 
866   findMaxStackOutArgsSize();
867 
868   // Merge Alloca instructions, and lay out the stack.
869   static constexpr bool SortAndCombineAllocas = true;
870   Func->processAllocas(SortAndCombineAllocas);
871   Func->dump("After Alloca processing");
872 
873   if (!getFlags().getEnablePhiEdgeSplit()) {
874     // Lower Phi instructions.
875     Func->placePhiLoads();
876     if (Func->hasError())
877       return;
878     Func->placePhiStores();
879     if (Func->hasError())
880       return;
881     Func->deletePhis();
882     if (Func->hasError())
883       return;
884     Func->dump("After Phi lowering");
885   }
886 
887   // Address mode optimization.
888   Func->getVMetadata()->init(VMK_SingleDefs);
889   Func->doAddressOpt();
890 
891   // Argument lowering
892   Func->doArgLowering();
893 
894   // Target lowering. This requires liveness analysis for some parts of the
895   // lowering decisions, such as compare/branch fusing. If non-lightweight
896   // liveness analysis is used, the instructions need to be renumbered first.
897   // TODO: This renumbering should only be necessary if we're actually
898   // calculating live intervals, which we only do for register allocation.
899   Func->renumberInstructions();
900   if (Func->hasError())
901     return;
902 
903   // TODO: It should be sufficient to use the fastest liveness calculation,
904   // i.e. livenessLightweight(). However, for some reason that slows down the
905   // rest of the translation. Investigate.
906   Func->liveness(Liveness_Basic);
907   if (Func->hasError())
908     return;
909   Func->dump("After MIPS32 address mode opt");
910 
911   Func->genCode();
912   if (Func->hasError())
913     return;
914   Func->dump("After MIPS32 codegen");
915 
916   // Register allocation. This requires instruction renumbering and full
917   // liveness analysis.
918   Func->renumberInstructions();
919   if (Func->hasError())
920     return;
921   Func->liveness(Liveness_Intervals);
922   if (Func->hasError())
923     return;
924   // The post-codegen dump is done here, after liveness analysis and associated
925   // cleanup, to make the dump cleaner and more useful.
926   Func->dump("After initial MIPS32 codegen");
927   // Validate the live range computations. The expensive validation call is
928   // deliberately only made when assertions are enabled.
929   assert(Func->validateLiveness());
930   Func->getVMetadata()->init(VMK_All);
931   regAlloc(RAK_Global);
932   if (Func->hasError())
933     return;
934   Func->dump("After linear scan regalloc");
935 
936   if (getFlags().getEnablePhiEdgeSplit()) {
937     Func->advancedPhiLowering();
938     Func->dump("After advanced Phi lowering");
939   }
940 
941   // Stack frame mapping.
942   Func->genFrame();
943   if (Func->hasError())
944     return;
945   Func->dump("After stack frame mapping");
946 
947   postLowerLegalization();
948   if (Func->hasError())
949     return;
950   Func->dump("After postLowerLegalization");
951 
952   Func->contractEmptyNodes();
953   Func->reorderNodes();
954 
955   // Branch optimization. This needs to be done just before code emission. In
956   // particular, no transformations that insert or reorder CfgNodes should be
957   // done after branch optimization. We go ahead and do it before nop insertion
958   // to reduce the amount of work needed for searching for opportunities.
959   Func->doBranchOpt();
960   Func->dump("After branch optimization");
961 }
962 
translateOm1()963 void TargetMIPS32::translateOm1() {
964   TimerMarker T(TimerStack::TT_Om1, Func);
965 
966   // TODO: share passes with X86?
967   genTargetHelperCalls();
968 
969   unsetIfNonLeafFunc();
970 
971   findMaxStackOutArgsSize();
972 
973   // Do not merge Alloca instructions, and lay out the stack.
974   static constexpr bool SortAndCombineAllocas = false;
975   Func->processAllocas(SortAndCombineAllocas);
976   Func->dump("After Alloca processing");
977 
978   Func->placePhiLoads();
979   if (Func->hasError())
980     return;
981   Func->placePhiStores();
982   if (Func->hasError())
983     return;
984   Func->deletePhis();
985   if (Func->hasError())
986     return;
987   Func->dump("After Phi lowering");
988 
989   Func->doArgLowering();
990 
991   Func->genCode();
992   if (Func->hasError())
993     return;
994   Func->dump("After initial MIPS32 codegen");
995 
996   regAlloc(RAK_InfOnly);
997   if (Func->hasError())
998     return;
999   Func->dump("After regalloc of infinite-weight variables");
1000 
1001   Func->genFrame();
1002   if (Func->hasError())
1003     return;
1004   Func->dump("After stack frame mapping");
1005 
1006   postLowerLegalization();
1007   if (Func->hasError())
1008     return;
1009   Func->dump("After postLowerLegalization");
1010 }
1011 
doBranchOpt(Inst * Instr,const CfgNode * NextNode)1012 bool TargetMIPS32::doBranchOpt(Inst *Instr, const CfgNode *NextNode) {
1013   if (auto *Br = llvm::dyn_cast<InstMIPS32Br>(Instr)) {
1014     return Br->optimizeBranch(NextNode);
1015   }
1016   return false;
1017 }
1018 
1019 namespace {
1020 
1021 const char *RegNames[RegMIPS32::Reg_NUM] = {
1022 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
1023           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
1024   name,
1025     REGMIPS32_TABLE
1026 #undef X
1027 };
1028 
1029 } // end of anonymous namespace
1030 
getRegName(RegNumT RegNum)1031 const char *RegMIPS32::getRegName(RegNumT RegNum) {
1032   RegNum.assertIsValid();
1033   return RegNames[RegNum];
1034 }
1035 
getRegName(RegNumT RegNum,Type Ty) const1036 const char *TargetMIPS32::getRegName(RegNumT RegNum, Type Ty) const {
1037   (void)Ty;
1038   return RegMIPS32::getRegName(RegNum);
1039 }
1040 
getPhysicalRegister(RegNumT RegNum,Type Ty)1041 Variable *TargetMIPS32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1042   if (Ty == IceType_void)
1043     Ty = IceType_i32;
1044   if (PhysicalRegisters[Ty].empty())
1045     PhysicalRegisters[Ty].resize(RegMIPS32::Reg_NUM);
1046   RegNum.assertIsValid();
1047   Variable *Reg = PhysicalRegisters[Ty][RegNum];
1048   if (Reg == nullptr) {
1049     Reg = Func->makeVariable(Ty);
1050     Reg->setRegNum(RegNum);
1051     PhysicalRegisters[Ty][RegNum] = Reg;
1052     // Specially mark a named physical register as an "argument" so that it is
1053     // considered live upon function entry.  Otherwise it's possible to get
1054     // liveness validation errors for saving callee-save registers.
1055     Func->addImplicitArg(Reg);
1056     // Don't bother tracking the live range of a named physical register.
1057     Reg->setIgnoreLiveness();
1058   }
1059   return Reg;
1060 }
1061 
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1062 void TargetMIPS32::emitJumpTable(const Cfg *Func,
1063                                  const InstJumpTable *JumpTable) const {
1064   (void)Func;
1065   (void)JumpTable;
1066   UnimplementedError(getFlags());
1067 }
1068 
1069 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)1070 Variable *TargetMIPS32::legalizeToReg(Operand *From, RegNumT RegNum) {
1071   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
1072 }
1073 
1074 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)1075 Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) {
1076   (void)RegNum;
1077   Type Ty = From->getType();
1078   if (llvm::isa<ConstantUndef>(From)) {
1079     // Lower undefs to zero.  Another option is to lower undefs to an
1080     // uninitialized register; however, using an uninitialized register
1081     // results in less predictable code.
1082     //
1083     // If in the future the implementation is changed to lower undef
1084     // values to uninitialized registers, a FakeDef will be needed:
1085     //     Context.insert(InstFakeDef::create(Func, Reg));
1086     // This is in order to ensure that the live range of Reg is not
1087     // overestimated.  If the constant being lowered is a 64 bit value,
1088     // then the result should be split and the lo and hi components will
1089     // need to go in uninitialized registers.
1090     if (isVectorType(Ty)) {
1091       Variable *Var = makeReg(Ty, RegNum);
1092       auto *Reg = llvm::cast<VariableVecOn32>(Var);
1093       Reg->initVecElement(Func);
1094       auto *Zero = getZero();
1095       for (Variable *Var : Reg->getContainers()) {
1096         _mov(Var, Zero);
1097       }
1098       return Reg;
1099     }
1100     return Ctx->getConstantZero(Ty);
1101   }
1102   return From;
1103 }
1104 
makeReg(Type Type,RegNumT RegNum)1105 Variable *TargetMIPS32::makeReg(Type Type, RegNumT RegNum) {
1106   // There aren't any 64-bit integer registers for Mips32.
1107   assert(Type != IceType_i64);
1108   Variable *Reg = Func->makeVariable(Type);
1109   if (RegNum.hasValue())
1110     Reg->setRegNum(RegNum);
1111   else
1112     Reg->setMustHaveReg();
1113   return Reg;
1114 }
1115 
formMemoryOperand(Operand * Operand,Type Ty)1116 OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
1117   // It may be the case that address mode optimization already creates an
1118   // OperandMIPS32Mem, so in that case it wouldn't need another level of
1119   // transformation.
1120   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
1121     return llvm::cast<OperandMIPS32Mem>(legalize(Mem));
1122   }
1123 
1124   // If we didn't do address mode optimization, then we only have a base/offset
1125   // to work with. MIPS always requires a base register, so just use that to
1126   // hold the operand.
1127   auto *Base = llvm::cast<Variable>(
1128       legalize(Operand, Legal_Reg | Legal_Rematerializable));
1129   const int32_t Offset = Base->hasStackOffset() ? Base->getStackOffset() : 0;
1130   return OperandMIPS32Mem::create(
1131       Func, Ty, Base,
1132       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)));
1133 }
1134 
emitVariable(const Variable * Var) const1135 void TargetMIPS32::emitVariable(const Variable *Var) const {
1136   if (!BuildDefs::dump())
1137     return;
1138   Ostream &Str = Ctx->getStrEmit();
1139   const Type FrameSPTy = IceType_i32;
1140   if (Var->hasReg()) {
1141     Str << '$' << getRegName(Var->getRegNum(), Var->getType());
1142     return;
1143   }
1144   if (Var->mustHaveReg()) {
1145     llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1146                              ") has no register assigned - function " +
1147                              Func->getFunctionName());
1148   }
1149   const int32_t Offset = Var->getStackOffset();
1150   Str << Offset;
1151   Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy);
1152   Str << ")";
1153 }
1154 
CallingConv()1155 TargetMIPS32::CallingConv::CallingConv()
1156     : GPRegsUsed(RegMIPS32::Reg_NUM),
1157       GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1158       I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1159       VFPRegsUsed(RegMIPS32::Reg_NUM),
1160       FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1161       FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {}
1162 
1163 // In MIPS O32 abi FP argument registers can be used only if first argument is
1164 // of type float/double. UseFPRegs flag is used to care of that. Also FP arg
1165 // registers can be used only for first 2 arguments, so we require argument
1166 // number to make register allocation decisions.
argInReg(Type Ty,uint32_t ArgNo,RegNumT * Reg)1167 bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,
1168                                          RegNumT *Reg) {
1169   if (isScalarIntegerType(Ty) || isVectorType(Ty))
1170     return argInGPR(Ty, Reg);
1171   if (isScalarFloatingType(Ty)) {
1172     if (ArgNo == 0) {
1173       UseFPRegs = true;
1174       return argInVFP(Ty, Reg);
1175     }
1176     if (UseFPRegs && ArgNo == 1) {
1177       UseFPRegs = false;
1178       return argInVFP(Ty, Reg);
1179     }
1180     return argInGPR(Ty, Reg);
1181   }
1182   llvm::report_fatal_error("argInReg: Invalid type.");
1183   return false;
1184 }
1185 
argInGPR(Type Ty,RegNumT * Reg)1186 bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1187   CfgVector<RegNumT> *Source;
1188 
1189   switch (Ty) {
1190   default: {
1191     llvm::report_fatal_error("argInGPR: Invalid type.");
1192     return false;
1193   } break;
1194   case IceType_v4i1:
1195   case IceType_v8i1:
1196   case IceType_v16i1:
1197   case IceType_v16i8:
1198   case IceType_v8i16:
1199   case IceType_v4i32:
1200   case IceType_v4f32:
1201   case IceType_i32:
1202   case IceType_f32: {
1203     Source = &GPRArgs;
1204   } break;
1205   case IceType_i64:
1206   case IceType_f64: {
1207     Source = &I64Args;
1208   } break;
1209   }
1210 
1211   discardUnavailableGPRsAndTheirAliases(Source);
1212 
1213   // If $4 is used for any scalar type (or returining v4f32) then the next
1214   // vector type if passed in $6:$7:stack:stack
1215   if (isVectorType(Ty)) {
1216     alignGPR(Source);
1217   }
1218 
1219   if (Source->empty()) {
1220     GPRegsUsed.set();
1221     return false;
1222   }
1223 
1224   *Reg = Source->back();
1225   // Note that we don't Source->pop_back() here. This is intentional. Notice how
1226   // we mark all of Reg's aliases as Used. So, for the next argument,
1227   // Source->back() is marked as unavailable, and it is thus implicitly popped
1228   // from the stack.
1229   GPRegsUsed |= RegisterAliases[*Reg];
1230 
1231   // All vector arguments irrespective of their base type are passed in GP
1232   // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd
1233   // is passed in $6:$7:stack:stack. If it is 1st argument then discard
1234   // $4:$5:$6:$7 otherwise discard $6:$7 only.
1235   if (isVectorType(Ty)) {
1236     if (((unsigned)*Reg) == RegMIPS32::Reg_A0) {
1237       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1];
1238       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2];
1239       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1240     } else {
1241       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1242     }
1243   }
1244 
1245   return true;
1246 }
1247 
discardNextGPRAndItsAliases(CfgVector<RegNumT> * Regs)1248 inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases(
1249     CfgVector<RegNumT> *Regs) {
1250   GPRegsUsed |= RegisterAliases[Regs->back()];
1251   Regs->pop_back();
1252 }
1253 
alignGPR(CfgVector<RegNumT> * Regs)1254 inline void TargetMIPS32::CallingConv::alignGPR(CfgVector<RegNumT> *Regs) {
1255   if (Regs->back() == RegMIPS32::Reg_A1 || Regs->back() == RegMIPS32::Reg_A3)
1256     discardNextGPRAndItsAliases(Regs);
1257 }
1258 
1259 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1260 // i32) will have the first argument in a0, the second in a2-a3, and the third
1261 // on the stack. To model this behavior, whenever we pop a register from Regs,
1262 // we remove all of its aliases from the pool of available GPRs. This has the
1263 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1264 void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1265     CfgVector<RegNumT> *Regs) {
1266   while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1267     discardNextGPRAndItsAliases(Regs);
1268   }
1269 }
1270 
argInVFP(Type Ty,RegNumT * Reg)1271 bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1272   CfgVector<RegNumT> *Source;
1273 
1274   switch (Ty) {
1275   default: {
1276     llvm::report_fatal_error("argInVFP: Invalid type.");
1277     return false;
1278   } break;
1279   case IceType_f32: {
1280     Source = &FP32Args;
1281   } break;
1282   case IceType_f64: {
1283     Source = &FP64Args;
1284   } break;
1285   }
1286 
1287   discardUnavailableVFPRegsAndTheirAliases(Source);
1288 
1289   if (Source->empty()) {
1290     VFPRegsUsed.set();
1291     return false;
1292   }
1293 
1294   *Reg = Source->back();
1295   VFPRegsUsed |= RegisterAliases[*Reg];
1296 
1297   // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0
1298   // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg
1299   // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes
1300   // in reg_a3 and a0, a1 are not used.
1301   Source = &GPRArgs;
1302   // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes)
1303   if (Ty == IceType_f64) {
1304     // In MIPS o32 abi, when we use GPR argument pairs to store F64 values, pair
1305     // must be aligned at even register. Similarly when we discard GPR registers
1306     // when some arguments from starting 16 bytes goes in FPR, we must take care
1307     // of alignment. For example if fun args are (f32, f64, f32), for first f32
1308     // we discard a0, now for f64 argument, which will go in F14F15, we must
1309     // first align GPR vector to even register by discarding a1, then discard
1310     // two GPRs a2 and a3. Now last f32 argument will go on stack.
1311     alignGPR(Source);
1312     discardNextGPRAndItsAliases(Source);
1313   }
1314   discardNextGPRAndItsAliases(Source);
1315   return true;
1316 }
1317 
discardUnavailableVFPRegsAndTheirAliases(CfgVector<RegNumT> * Regs)1318 void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases(
1319     CfgVector<RegNumT> *Regs) {
1320   while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1321     Regs->pop_back();
1322   }
1323 }
1324 
lowerArguments()1325 void TargetMIPS32::lowerArguments() {
1326   VarList &Args = Func->getArgs();
1327   TargetMIPS32::CallingConv CC;
1328 
1329   // For each register argument, replace Arg in the argument list with the home
1330   // register. Then generate an instruction in the prolog to copy the home
1331   // register to the assigned location of Arg.
1332   Context.init(Func->getEntryNode());
1333   Context.setInsertPoint(Context.getCur());
1334 
1335   // v4f32 is returned through stack. $4 is setup by the caller and passed as
1336   // first argument implicitly. Callee then copies the return vector at $4.
1337   Variable *ImplicitRetVec = nullptr;
1338   if (isVectorFloatingType(Func->getReturnType())) {
1339     ImplicitRetVec = Func->makeVariable(IceType_i32);
1340     ImplicitRetVec->setName(Func, "ImplicitRet_v4f32");
1341     ImplicitRetVec->setIsArg();
1342     Args.insert(Args.begin(), ImplicitRetVec);
1343     setImplicitRet(ImplicitRetVec);
1344   }
1345 
1346   for (SizeT i = 0, E = Args.size(); i < E; ++i) {
1347     Variable *Arg = Args[i];
1348     Type Ty = Arg->getType();
1349     RegNumT RegNum;
1350     if (!CC.argInReg(Ty, i, &RegNum)) {
1351       continue;
1352     }
1353     Variable *RegisterArg = Func->makeVariable(Ty);
1354     if (BuildDefs::dump()) {
1355       RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1356     }
1357     RegisterArg->setIsArg();
1358     Arg->setIsArg(false);
1359     Args[i] = RegisterArg;
1360 
1361     if (isVectorType(Ty)) {
1362       auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg);
1363       RegisterArgVec->initVecElement(Func);
1364       RegisterArgVec->getContainers()[0]->setRegNum(
1365           RegNumT::fixme((unsigned)RegNum + 0));
1366       RegisterArgVec->getContainers()[1]->setRegNum(
1367           RegNumT::fixme((unsigned)RegNum + 1));
1368       // First two elements of second vector argument are passed
1369       // in $6:$7 and remaining two on stack. Do not assign register
1370       // to this is second vector argument.
1371       if (i == 0) {
1372         RegisterArgVec->getContainers()[2]->setRegNum(
1373             RegNumT::fixme((unsigned)RegNum + 2));
1374         RegisterArgVec->getContainers()[3]->setRegNum(
1375             RegNumT::fixme((unsigned)RegNum + 3));
1376       } else {
1377         RegisterArgVec->getContainers()[2]->setRegNum(
1378             RegNumT::fixme(RegNumT()));
1379         RegisterArgVec->getContainers()[3]->setRegNum(
1380             RegNumT::fixme(RegNumT()));
1381       }
1382     } else {
1383       switch (Ty) {
1384       default: {
1385         RegisterArg->setRegNum(RegNum);
1386       } break;
1387       case IceType_i64: {
1388         auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1389         RegisterArg64->initHiLo(Func);
1390         RegisterArg64->getLo()->setRegNum(
1391             RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
1392         RegisterArg64->getHi()->setRegNum(
1393             RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
1394       } break;
1395       }
1396     }
1397     Context.insert<InstAssign>(Arg, RegisterArg);
1398   }
1399 
1400   // Insert fake use of ImplicitRet_v4f32 to keep it live
1401   if (ImplicitRetVec) {
1402     for (CfgNode *Node : Func->getNodes()) {
1403       for (Inst &Instr : Node->getInsts()) {
1404         if (llvm::isa<InstRet>(&Instr)) {
1405           Context.setInsertPoint(instToIterator(&Instr));
1406           Context.insert<InstFakeUse>(ImplicitRetVec);
1407           break;
1408         }
1409       }
1410     }
1411   }
1412 }
1413 
stackSlotType()1414 Type TargetMIPS32::stackSlotType() { return IceType_i32; }
1415 
1416 // Helper function for addProlog().
1417 //
1418 // This assumes Arg is an argument passed on the stack. This sets the frame
1419 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1420 // I64 arg that has been split into Lo and Hi components, it calls itself
1421 // recursively on the components, taking care to handle Lo first because of the
1422 // little-endian architecture. Lastly, this function generates an instruction
1423 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,bool PartialOnStack,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1424 void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack,
1425                                           Variable *FramePtr,
1426                                           size_t BasicFrameOffset,
1427                                           size_t *InArgsSizeBytes) {
1428   const Type Ty = Arg->getType();
1429   *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1430 
1431   // If $4 is used for any scalar type (or returining v4f32) then the next
1432   // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element
1433   // from agument stack.
1434   if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) {
1435     if (PartialOnStack == false) {
1436       auto *Elem0 = ArgVecOn32->getContainers()[0];
1437       auto *Elem1 = ArgVecOn32->getContainers()[1];
1438       finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset,
1439                              InArgsSizeBytes);
1440       finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset,
1441                              InArgsSizeBytes);
1442     }
1443     auto *Elem2 = ArgVecOn32->getContainers()[2];
1444     auto *Elem3 = ArgVecOn32->getContainers()[3];
1445     finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset,
1446                            InArgsSizeBytes);
1447     finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset,
1448                            InArgsSizeBytes);
1449     return;
1450   }
1451 
1452   if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1453     Variable *const Lo = Arg64On32->getLo();
1454     Variable *const Hi = Arg64On32->getHi();
1455     finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset,
1456                            InArgsSizeBytes);
1457     finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset,
1458                            InArgsSizeBytes);
1459     return;
1460   }
1461 
1462   assert(Ty != IceType_i64);
1463   assert(!isVectorType(Ty));
1464 
1465   const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1466   *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1467 
1468   if (!Arg->hasReg()) {
1469     Arg->setStackOffset(ArgStackOffset);
1470     return;
1471   }
1472 
1473   // If the argument variable has been assigned a register, we need to copy the
1474   // value from the stack slot.
1475   Variable *Parameter = Func->makeVariable(Ty);
1476   Parameter->setMustNotHaveReg();
1477   Parameter->setStackOffset(ArgStackOffset);
1478   _mov(Arg, Parameter);
1479 }
1480 
addProlog(CfgNode * Node)1481 void TargetMIPS32::addProlog(CfgNode *Node) {
1482   // Stack frame layout:
1483   //
1484   // +------------------------+
1485   // | 1. preserved registers |
1486   // +------------------------+
1487   // | 2. padding             |
1488   // +------------------------+
1489   // | 3. global spill area   |
1490   // +------------------------+
1491   // | 4. padding             |
1492   // +------------------------+
1493   // | 5. local spill area    |
1494   // +------------------------+
1495   // | 6. padding             |
1496   // +------------------------+
1497   // | 7. allocas             |
1498   // +------------------------+
1499   // | 8. padding             |
1500   // +------------------------+
1501   // | 9. out args            |
1502   // +------------------------+ <--- StackPointer
1503   //
1504   // The following variables record the size in bytes of the given areas:
1505   //  * PreservedRegsSizeBytes: area 1
1506   //  * SpillAreaPaddingBytes:  area 2
1507   //  * GlobalsSize:            area 3
1508   //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1509   //  * LocalsSpillAreaSize:    area 5
1510   //  * SpillAreaSizeBytes:     areas 2 - 9
1511   //  * maxOutArgsSizeBytes():  area 9
1512 
1513   Context.init(Node);
1514   Context.setInsertPoint(Context.getCur());
1515 
1516   SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1517   RegsUsed = SmallBitVector(CalleeSaves.size());
1518 
1519   VarList SortedSpilledVariables;
1520 
1521   size_t GlobalsSize = 0;
1522   // If there is a separate locals area, this represents that area. Otherwise
1523   // it counts any variable not counted by GlobalsSize.
1524   SpillAreaSizeBytes = 0;
1525   // If there is a separate locals area, this specifies the alignment for it.
1526   uint32_t LocalsSlotsAlignmentBytes = 0;
1527   // The entire spill locations area gets aligned to largest natural alignment
1528   // of the variables that have a spill slot.
1529   uint32_t SpillAreaAlignmentBytes = 0;
1530   // For now, we don't have target-specific variables that need special
1531   // treatment (no stack-slot-linked SpillVariable type).
1532   std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1533     static constexpr bool AssignStackSlot = false;
1534     static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1535     if (llvm::isa<Variable64On32>(Var)) {
1536       return DontAssignStackSlot;
1537     }
1538     return AssignStackSlot;
1539   };
1540 
1541   // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1542   getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1543                         &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1544                         &LocalsSlotsAlignmentBytes, TargetVarHook);
1545   uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1546   SpillAreaSizeBytes += GlobalsSize;
1547 
1548   PreservedGPRs.reserve(CalleeSaves.size());
1549 
1550   // Consider FP and RA as callee-save / used as needed.
1551   if (UsesFramePointer) {
1552     if (RegsUsed[RegMIPS32::Reg_FP]) {
1553       llvm::report_fatal_error("Frame pointer has been used.");
1554     }
1555     CalleeSaves[RegMIPS32::Reg_FP] = true;
1556     RegsUsed[RegMIPS32::Reg_FP] = true;
1557   }
1558   if (!MaybeLeafFunc) {
1559     CalleeSaves[RegMIPS32::Reg_RA] = true;
1560     RegsUsed[RegMIPS32::Reg_RA] = true;
1561   }
1562 
1563   // Make two passes over the used registers. The first pass records all the
1564   // used registers -- and their aliases. Then, we figure out which GPR
1565   // registers should be saved.
1566   SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
1567   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1568     if (CalleeSaves[i] && RegsUsed[i]) {
1569       ToPreserve |= RegisterAliases[i];
1570     }
1571   }
1572 
1573   uint32_t NumCallee = 0;
1574 
1575   // RegClasses is a tuple of
1576   //
1577   // <First Register in Class, Last Register in Class, Vector of Save Registers>
1578   //
1579   // We use this tuple to figure out which register we should save/restore
1580   // during
1581   // prolog/epilog.
1582   using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1583   const RegClassType RegClass = RegClassType(
1584       RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs);
1585   const uint32_t FirstRegInClass = std::get<0>(RegClass);
1586   const uint32_t LastRegInClass = std::get<1>(RegClass);
1587   VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1588   for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
1589     if (!ToPreserve[Reg]) {
1590       continue;
1591     }
1592     ++NumCallee;
1593     Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1594     PreservedRegsSizeBytes +=
1595         typeWidthInBytesOnStack(PhysicalRegister->getType());
1596     PreservedRegsInClass->push_back(PhysicalRegister);
1597   }
1598 
1599   Ctx->statsUpdateRegistersSaved(NumCallee);
1600 
1601   // Align the variables area. SpillAreaPaddingBytes is the size of the region
1602   // after the preserved registers and before the spill areas.
1603   // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1604   // locals area if they are separate.
1605   assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
1606   (void)MIPS32_STACK_ALIGNMENT_BYTES;
1607   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1608   uint32_t SpillAreaPaddingBytes = 0;
1609   uint32_t LocalsSlotsPaddingBytes = 0;
1610   alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1611                        GlobalsSize, LocalsSlotsAlignmentBytes,
1612                        &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1613   SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1614   uint32_t GlobalsAndSubsequentPaddingSize =
1615       GlobalsSize + LocalsSlotsPaddingBytes;
1616 
1617   // Adds the out args space to the stack, and align SP if necessary.
1618   if (!NeedsStackAlignment) {
1619     SpillAreaSizeBytes += MaxOutArgsSizeBytes * (VariableAllocaUsed ? 0 : 1);
1620   } else {
1621     SpillAreaSizeBytes = applyStackAlignment(
1622         SpillAreaSizeBytes +
1623         (VariableAllocaUsed ? VariableAllocaAlignBytes : MaxOutArgsSizeBytes));
1624   }
1625 
1626   // Combine fixed alloca with SpillAreaSize.
1627   SpillAreaSizeBytes += FixedAllocaSizeBytes;
1628 
1629   TotalStackSizeBytes =
1630       applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes);
1631 
1632   // Generate "addiu sp, sp, -TotalStackSizeBytes"
1633   if (TotalStackSizeBytes) {
1634     // Use the scratch register if needed to legalize the immediate.
1635     Sandboxer(this).addiu_sp(-TotalStackSizeBytes);
1636   }
1637 
1638   Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
1639 
1640   if (!PreservedGPRs.empty()) {
1641     uint32_t StackOffset = TotalStackSizeBytes;
1642     for (Variable *Var : *PreservedRegsInClass) {
1643       Type RegType;
1644       if (RegMIPS32::isFPRReg(Var->getRegNum()))
1645         RegType = IceType_f32;
1646       else
1647         RegType = IceType_i32;
1648       auto *PhysicalRegister = makeReg(RegType, Var->getRegNum());
1649       StackOffset -= typeWidthInBytesOnStack(RegType);
1650       Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1651       OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1652           Func, RegType, SP,
1653           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1654       Sandboxer(this).sw(PhysicalRegister, MemoryLocation);
1655     }
1656   }
1657 
1658   Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1659 
1660   // Generate "mov FP, SP" if needed.
1661   if (UsesFramePointer) {
1662     Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1663     _mov(FP, SP);
1664     // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1665     Context.insert<InstFakeUse>(FP);
1666   }
1667 
1668   // Fill in stack offsets for stack args, and copy args into registers for
1669   // those that were register-allocated. Args are pushed right to left, so
1670   // Arg[0] is closest to the stack/frame pointer.
1671   const VarList &Args = Func->getArgs();
1672   size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
1673   TargetMIPS32::CallingConv CC;
1674   uint32_t ArgNo = 0;
1675 
1676   for (Variable *Arg : Args) {
1677     RegNumT DummyReg;
1678     const Type Ty = Arg->getType();
1679     bool PartialOnStack;
1680     // Skip arguments passed in registers.
1681     if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
1682       // Load argument from stack:
1683       // 1. If this is first vector argument and return type is v4f32.
1684       //    In this case $4 is used to pass stack address implicitly.
1685       //    3rd and 4th element of vector argument is passed through stack.
1686       // 2. If this is second vector argument.
1687       if (ArgNo != 0 && isVectorType(Ty)) {
1688         PartialOnStack = true;
1689         finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1690                                &InArgsSizeBytes);
1691       }
1692     } else {
1693       PartialOnStack = false;
1694       finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1695                              &InArgsSizeBytes);
1696     }
1697     ++ArgNo;
1698   }
1699 
1700   // Fill in stack offsets for locals.
1701   assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1702                       SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize);
1703   this->HasComputedFrame = true;
1704 
1705   if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1706     OstreamLocker _(Func->getContext());
1707     Ostream &Str = Func->getContext()->getStrDump();
1708 
1709     Str << "Stack layout:\n";
1710     uint32_t SPAdjustmentPaddingSize =
1711         SpillAreaSizeBytes - LocalsSpillAreaSize -
1712         GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1713         MaxOutArgsSizeBytes;
1714     Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1715         << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1716         << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1717         << " globals spill area = " << GlobalsSize << " bytes\n"
1718         << " globals-locals spill areas intermediate padding = "
1719         << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1720         << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1721         << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1722 
1723     Str << "Stack details:\n"
1724         << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1725         << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1726         << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1727         << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1728         << " bytes\n"
1729         << " is FP based = " << 1 << "\n";
1730   }
1731   return;
1732 }
1733 
addEpilog(CfgNode * Node)1734 void TargetMIPS32::addEpilog(CfgNode *Node) {
1735   InstList &Insts = Node->getInsts();
1736   InstList::reverse_iterator RI, E;
1737   for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1738     if (llvm::isa<InstMIPS32Ret>(*RI))
1739       break;
1740   }
1741   if (RI == E)
1742     return;
1743 
1744   // Convert the reverse_iterator position into its corresponding (forward)
1745   // iterator position.
1746   InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1747   --InsertPoint;
1748   Context.init(Node);
1749   Context.setInsertPoint(InsertPoint);
1750 
1751   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1752   if (UsesFramePointer) {
1753     Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1754     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1755     // use of SP before the assignment of SP=FP keeps previous SP adjustments
1756     // from being dead-code eliminated.
1757     Context.insert<InstFakeUse>(SP);
1758     Sandboxer(this).reset_sp(FP);
1759   }
1760 
1761   VarList::reverse_iterator RIter, END;
1762 
1763   if (!PreservedGPRs.empty()) {
1764     uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes;
1765     for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend();
1766          RIter != END; ++RIter) {
1767       Type RegType;
1768       if (RegMIPS32::isFPRReg((*RIter)->getRegNum()))
1769         RegType = IceType_f32;
1770       else
1771         RegType = IceType_i32;
1772       auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum());
1773       Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1774       OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1775           Func, RegType, SP,
1776           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1777       _lw(PhysicalRegister, MemoryLocation);
1778       StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType());
1779     }
1780   }
1781 
1782   if (TotalStackSizeBytes) {
1783     Sandboxer(this).addiu_sp(TotalStackSizeBytes);
1784   }
1785   if (!getFlags().getUseSandboxing())
1786     return;
1787 
1788   Variable *RA = getPhysicalRegister(RegMIPS32::Reg_RA);
1789   Variable *RetValue = nullptr;
1790   if (RI->getSrcSize())
1791     RetValue = llvm::cast<Variable>(RI->getSrc(0));
1792 
1793   Sandboxer(this).ret(RA, RetValue);
1794 
1795   RI->setDeleted();
1796 }
1797 
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1798 Variable *TargetMIPS32::PostLoweringLegalizer::newBaseRegister(
1799     Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1800   // Legalize will likely need a lui/ori combination, but if the top bits are
1801   // all 0 from negating the offset and subtracting, we could use that instead.
1802   const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1803   Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1804   if (ShouldSub) {
1805     Target->_addi(ScratchReg, Base, -Offset);
1806   } else {
1807     constexpr bool SignExt = true;
1808     if (!OperandMIPS32Mem::canHoldOffset(Base->getType(), SignExt, Offset)) {
1809       const uint32_t UpperBits = (Offset >> 16) & 0xFFFF;
1810       const uint32_t LowerBits = Offset & 0xFFFF;
1811       Target->_lui(ScratchReg, Target->Ctx->getConstantInt32(UpperBits));
1812       if (LowerBits)
1813         Target->_ori(ScratchReg, ScratchReg, LowerBits);
1814       Target->_addu(ScratchReg, ScratchReg, Base);
1815     } else {
1816       Target->_addiu(ScratchReg, Base, Offset);
1817     }
1818   }
1819 
1820   return ScratchReg;
1821 }
1822 
legalizeMovFp(InstMIPS32MovFP64ToI64 * MovInstr)1823 void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp(
1824     InstMIPS32MovFP64ToI64 *MovInstr) {
1825   Variable *Dest = MovInstr->getDest();
1826   Operand *Src = MovInstr->getSrc(0);
1827   const Type SrcTy = Src->getType();
1828 
1829   if (Dest != nullptr && SrcTy == IceType_f64) {
1830     int32_t Offset = Dest->getStackOffset();
1831     auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1832     OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1833         Target->Func, IceType_f32, Base,
1834         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1835     OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1836     auto *SrcV = llvm::cast<Variable>(Src);
1837     Variable *SrcR;
1838     if (MovInstr->getInt64Part() == Int64_Lo) {
1839       SrcR = Target->makeReg(
1840           IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum()));
1841     } else {
1842       SrcR = Target->makeReg(
1843           IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum()));
1844     }
1845     Sandboxer(Target).sw(SrcR, Addr);
1846     if (MovInstr->isDestRedefined()) {
1847       Target->_set_dest_redefined();
1848     }
1849     MovInstr->setDeleted();
1850     return;
1851   }
1852 
1853   llvm::report_fatal_error("legalizeMovFp: Invalid operands");
1854 }
1855 
legalizeMov(InstMIPS32Mov * MovInstr)1856 void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) {
1857   Variable *Dest = MovInstr->getDest();
1858   assert(Dest != nullptr);
1859   const Type DestTy = Dest->getType();
1860   assert(DestTy != IceType_i64);
1861 
1862   Operand *Src = MovInstr->getSrc(0);
1863   const Type SrcTy = Src->getType();
1864   (void)SrcTy;
1865   assert(SrcTy != IceType_i64);
1866 
1867   bool Legalized = false;
1868   auto *SrcR = llvm::cast<Variable>(Src);
1869   if (Dest->hasReg() && SrcR->hasReg()) {
1870     // This might be a GP to/from FP move generated due to argument passing.
1871     // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of
1872     // different types.
1873     const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum());
1874     const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum());
1875     const RegNumT SRegNum = SrcR->getRegNum();
1876     const RegNumT DRegNum = Dest->getRegNum();
1877     if (IsDstGPR != IsSrcGPR) {
1878       if (IsDstGPR) {
1879         // Dest is GPR and SrcR is FPR. Use mfc1.
1880         int32_t TypeWidth = typeWidthInBytes(DestTy);
1881         if (MovInstr->getDestHi() != nullptr)
1882           TypeWidth += typeWidthInBytes(MovInstr->getDestHi()->getType());
1883         if (TypeWidth == 8) {
1884           // Split it into two mfc1 instructions
1885           Variable *SrcGPRHi = Target->makeReg(
1886               IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1887           Variable *SrcGPRLo = Target->makeReg(
1888               IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1889           Variable *DstFPRHi, *DstFPRLo;
1890           if (MovInstr->getDestHi() != nullptr && Dest != nullptr) {
1891             DstFPRHi = Target->makeReg(IceType_i32,
1892                                        MovInstr->getDestHi()->getRegNum());
1893             DstFPRLo = Target->makeReg(IceType_i32, Dest->getRegNum());
1894           } else {
1895             DstFPRHi = Target->makeReg(
1896                 IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1897             DstFPRLo = Target->makeReg(
1898                 IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1899           }
1900           Target->_mov(DstFPRHi, SrcGPRHi);
1901           Target->_mov(DstFPRLo, SrcGPRLo);
1902           Legalized = true;
1903         } else {
1904           Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
1905           Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum);
1906           Target->_mov(DstFPR, SrcGPR);
1907           Legalized = true;
1908         }
1909       } else {
1910         // Dest is FPR and SrcR is GPR. Use mtc1.
1911         if (typeWidthInBytes(Dest->getType()) == 8) {
1912           Variable *SrcGPRHi, *SrcGPRLo;
1913           // SrcR could be $zero which is i32
1914           if (SRegNum == RegMIPS32::Reg_ZERO) {
1915             SrcGPRHi = Target->makeReg(IceType_i32, SRegNum);
1916             SrcGPRLo = SrcGPRHi;
1917           } else {
1918             // Split it into two mtc1 instructions
1919             if (MovInstr->getSrcSize() == 2) {
1920               const auto FirstReg =
1921                   (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1922               const auto SecondReg =
1923                   (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1924               SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1925               SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1926             } else {
1927               SrcGPRLo = Target->makeReg(
1928                   IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1929               SrcGPRHi = Target->makeReg(
1930                   IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1931             }
1932           }
1933           Variable *DstFPRHi = Target->makeReg(
1934               IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1935           Variable *DstFPRLo = Target->makeReg(
1936               IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1937           Target->_mov(DstFPRHi, SrcGPRLo);
1938           Target->_mov(DstFPRLo, SrcGPRHi);
1939           Legalized = true;
1940         } else {
1941           Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum);
1942           Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum);
1943           Target->_mov(DstFPR, SrcGPR);
1944           Legalized = true;
1945         }
1946       }
1947     }
1948     if (Legalized) {
1949       if (MovInstr->isDestRedefined()) {
1950         Target->_set_dest_redefined();
1951       }
1952       MovInstr->setDeleted();
1953       return;
1954     }
1955   }
1956 
1957   if (!Dest->hasReg()) {
1958     auto *SrcR = llvm::cast<Variable>(Src);
1959     assert(SrcR->hasReg());
1960     assert(!SrcR->isRematerializable());
1961     int32_t Offset = Dest->getStackOffset();
1962 
1963     // This is a _mov(Mem(), Variable), i.e., a store.
1964     auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1965 
1966     OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1967         Target->Func, DestTy, Base,
1968         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1969     OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
1970         Target->Func, DestTy, Base,
1971         llvm::cast<ConstantInteger32>(
1972             Target->Ctx->getConstantInt32(Offset + 4)));
1973     OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1974 
1975     // FP arguments are passed in GP reg if first argument is in GP. In this
1976     // case type of the SrcR is still FP thus we need to explicitly generate sw
1977     // instead of swc1.
1978     const RegNumT RegNum = SrcR->getRegNum();
1979     const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum());
1980     if (SrcTy == IceType_f32 && IsSrcGPReg) {
1981       Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum);
1982       Sandboxer(Target).sw(SrcGPR, Addr);
1983     } else if (SrcTy == IceType_f64 && IsSrcGPReg) {
1984       Variable *SrcGPRHi =
1985           Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
1986       Variable *SrcGPRLo = Target->makeReg(
1987           IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
1988       Sandboxer(Target).sw(SrcGPRHi, Addr);
1989       OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
1990       Sandboxer(Target).sw(SrcGPRLo, AddrHi);
1991     } else if (DestTy == IceType_f64 && IsSrcGPReg) {
1992       const auto FirstReg =
1993           (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1994       const auto SecondReg =
1995           (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1996       Variable *SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1997       Variable *SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1998       Sandboxer(Target).sw(SrcGPRLo, Addr);
1999       OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2000       Sandboxer(Target).sw(SrcGPRHi, AddrHi);
2001     } else {
2002       Sandboxer(Target).sw(SrcR, Addr);
2003     }
2004 
2005     Target->Context.insert<InstFakeDef>(Dest);
2006     Legalized = true;
2007   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
2008     if (Var->isRematerializable()) {
2009       // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
2010 
2011       // ExtraOffset is only needed for stack-pointer based frames as we have
2012       // to account for spill storage.
2013       const int32_t ExtraOffset =
2014           (Var->getRegNum() == Target->getFrameOrStackReg())
2015               ? Target->getFrameFixedAllocaOffset()
2016               : 0;
2017 
2018       const int32_t Offset = Var->getStackOffset() + ExtraOffset;
2019       Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
2020       Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
2021       Target->_mov(Dest, T);
2022       Legalized = true;
2023     } else {
2024       if (!Var->hasReg()) {
2025         // This is a _mov(Variable, Mem()), i.e., a load.
2026         const int32_t Offset = Var->getStackOffset();
2027         auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
2028         const RegNumT RegNum = Dest->getRegNum();
2029         const bool IsDstGPReg = RegMIPS32::isGPRReg(Dest->getRegNum());
2030         // If we are moving i64 to a double using stack then the address may
2031         // not be aligned to 8-byte boundary as we split i64 into Hi-Lo parts
2032         // and store them individually with 4-byte alignment. Load the Hi-Lo
2033         // parts in TmpReg and move them to the dest using mtc1.
2034         if (DestTy == IceType_f64 && !Utils::IsAligned(Offset, 8) &&
2035             !IsDstGPReg) {
2036           auto *Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2037           const RegNumT RegNum = Dest->getRegNum();
2038           Variable *DestLo = Target->makeReg(
2039               IceType_f32, RegMIPS32::get64PairFirstRegNum(RegNum));
2040           Variable *DestHi = Target->makeReg(
2041               IceType_f32, RegMIPS32::get64PairSecondRegNum(RegNum));
2042           OperandMIPS32Mem *AddrLo = OperandMIPS32Mem::create(
2043               Target->Func, IceType_i32, Base,
2044               llvm::cast<ConstantInteger32>(
2045                   Target->Ctx->getConstantInt32(Offset)));
2046           OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create(
2047               Target->Func, IceType_i32, Base,
2048               llvm::cast<ConstantInteger32>(
2049                   Target->Ctx->getConstantInt32(Offset + 4)));
2050           Sandboxer(Target).lw(Reg, AddrLo);
2051           Target->_mov(DestLo, Reg);
2052           Sandboxer(Target).lw(Reg, AddrHi);
2053           Target->_mov(DestHi, Reg);
2054         } else {
2055           OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
2056               Target->Func, DestTy, Base,
2057               llvm::cast<ConstantInteger32>(
2058                   Target->Ctx->getConstantInt32(Offset)));
2059           OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
2060           OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
2061               Target->Func, DestTy, Base,
2062               llvm::cast<ConstantInteger32>(
2063                   Target->Ctx->getConstantInt32(Offset + 4)));
2064           // FP arguments are passed in GP reg if first argument is in GP.
2065           // In this case type of the Dest is still FP thus we need to
2066           // explicitly generate lw instead of lwc1.
2067           if (DestTy == IceType_f32 && IsDstGPReg) {
2068             Variable *DstGPR = Target->makeReg(IceType_i32, RegNum);
2069             Sandboxer(Target).lw(DstGPR, Addr);
2070           } else if (DestTy == IceType_f64 && IsDstGPReg) {
2071             Variable *DstGPRHi = Target->makeReg(
2072                 IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2073             Variable *DstGPRLo = Target->makeReg(
2074                 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2075             Sandboxer(Target).lw(DstGPRHi, Addr);
2076             OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2077             Sandboxer(Target).lw(DstGPRLo, AddrHi);
2078           } else if (DestTy == IceType_f64 && IsDstGPReg) {
2079             const auto FirstReg =
2080                 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2081             const auto SecondReg =
2082                 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2083             Variable *DstGPRHi = Target->makeReg(IceType_i32, FirstReg);
2084             Variable *DstGPRLo = Target->makeReg(IceType_i32, SecondReg);
2085             Sandboxer(Target).lw(DstGPRLo, Addr);
2086             OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2087             Sandboxer(Target).lw(DstGPRHi, AddrHi);
2088           } else {
2089             Sandboxer(Target).lw(Dest, Addr);
2090           }
2091         }
2092         Legalized = true;
2093       }
2094     }
2095   }
2096 
2097   if (Legalized) {
2098     if (MovInstr->isDestRedefined()) {
2099       Target->_set_dest_redefined();
2100     }
2101     MovInstr->setDeleted();
2102   }
2103 }
2104 
2105 OperandMIPS32Mem *
legalizeMemOperand(OperandMIPS32Mem * Mem)2106 TargetMIPS32::PostLoweringLegalizer::legalizeMemOperand(OperandMIPS32Mem *Mem) {
2107   if (llvm::isa<ConstantRelocatable>(Mem->getOffset())) {
2108     return nullptr;
2109   }
2110   Variable *Base = Mem->getBase();
2111   auto *Ci32 = llvm::cast<ConstantInteger32>(Mem->getOffset());
2112   int32_t Offset = Ci32->getValue();
2113 
2114   if (Base->isRematerializable()) {
2115     const int32_t ExtraOffset =
2116         (Base->getRegNum() == Target->getFrameOrStackReg())
2117             ? Target->getFrameFixedAllocaOffset()
2118             : 0;
2119     Offset += Base->getStackOffset() + ExtraOffset;
2120     Base = Target->getPhysicalRegister(Base->getRegNum());
2121   }
2122 
2123   constexpr bool SignExt = true;
2124   if (!OperandMIPS32Mem::canHoldOffset(Mem->getType(), SignExt, Offset)) {
2125     Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2126     Offset = 0;
2127   }
2128 
2129   return OperandMIPS32Mem::create(
2130       Target->Func, Mem->getType(), Base,
2131       llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
2132 }
2133 
legalizeImmediate(int32_t Imm)2134 Variable *TargetMIPS32::PostLoweringLegalizer::legalizeImmediate(int32_t Imm) {
2135   Variable *Reg = nullptr;
2136   if (!((std::numeric_limits<int16_t>::min() <= Imm) &&
2137         (Imm <= std::numeric_limits<int16_t>::max()))) {
2138     const uint32_t UpperBits = (Imm >> 16) & 0xFFFF;
2139     const uint32_t LowerBits = Imm & 0xFFFF;
2140     Variable *TReg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2141     Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2142     if (LowerBits) {
2143       Target->_lui(TReg, Target->Ctx->getConstantInt32(UpperBits));
2144       Target->_ori(Reg, TReg, LowerBits);
2145     } else {
2146       Target->_lui(Reg, Target->Ctx->getConstantInt32(UpperBits));
2147     }
2148   }
2149   return Reg;
2150 }
2151 
postLowerLegalization()2152 void TargetMIPS32::postLowerLegalization() {
2153   Func->dump("Before postLowerLegalization");
2154   assert(hasComputedFrame());
2155   for (CfgNode *Node : Func->getNodes()) {
2156     Context.init(Node);
2157     PostLoweringLegalizer Legalizer(this);
2158     while (!Context.atEnd()) {
2159       PostIncrLoweringContext PostIncrement(Context);
2160       Inst *CurInstr = iteratorToInst(Context.getCur());
2161       const SizeT NumSrcs = CurInstr->getSrcSize();
2162       Operand *Src0 = NumSrcs < 1 ? nullptr : CurInstr->getSrc(0);
2163       Operand *Src1 = NumSrcs < 2 ? nullptr : CurInstr->getSrc(1);
2164       auto *Src0V = llvm::dyn_cast_or_null<Variable>(Src0);
2165       auto *Src0M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src0);
2166       auto *Src1M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src1);
2167       Variable *Dst = CurInstr->getDest();
2168       if (auto *MovInstr = llvm::dyn_cast<InstMIPS32Mov>(CurInstr)) {
2169         Legalizer.legalizeMov(MovInstr);
2170         continue;
2171       }
2172       if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) {
2173         Legalizer.legalizeMovFp(MovInstr);
2174         continue;
2175       }
2176       if (llvm::isa<InstMIPS32Sw>(CurInstr)) {
2177         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2178           Sandboxer(this).sw(Src0V, LegalMem);
2179           CurInstr->setDeleted();
2180         }
2181         continue;
2182       }
2183       if (llvm::isa<InstMIPS32Swc1>(CurInstr)) {
2184         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2185           _swc1(Src0V, LegalMem);
2186           CurInstr->setDeleted();
2187         }
2188         continue;
2189       }
2190       if (llvm::isa<InstMIPS32Sdc1>(CurInstr)) {
2191         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2192           _sdc1(Src0V, LegalMem);
2193           CurInstr->setDeleted();
2194         }
2195         continue;
2196       }
2197       if (llvm::isa<InstMIPS32Lw>(CurInstr)) {
2198         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2199           Sandboxer(this).lw(Dst, LegalMem);
2200           CurInstr->setDeleted();
2201         }
2202         continue;
2203       }
2204       if (llvm::isa<InstMIPS32Lwc1>(CurInstr)) {
2205         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2206           _lwc1(Dst, LegalMem);
2207           CurInstr->setDeleted();
2208         }
2209         continue;
2210       }
2211       if (llvm::isa<InstMIPS32Ldc1>(CurInstr)) {
2212         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2213           _ldc1(Dst, LegalMem);
2214           CurInstr->setDeleted();
2215         }
2216         continue;
2217       }
2218       if (auto *AddiuInstr = llvm::dyn_cast<InstMIPS32Addiu>(CurInstr)) {
2219         if (auto *LegalImm = Legalizer.legalizeImmediate(
2220                 static_cast<int32_t>(AddiuInstr->getImmediateValue()))) {
2221           _addu(Dst, Src0V, LegalImm);
2222           CurInstr->setDeleted();
2223         }
2224         continue;
2225       }
2226     }
2227   }
2228 }
2229 
loOperand(Operand * Operand)2230 Operand *TargetMIPS32::loOperand(Operand *Operand) {
2231   assert(Operand->getType() == IceType_i64);
2232   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2233     return Var64On32->getLo();
2234   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2235     return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2236   }
2237   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2238     // Conservatively disallow memory operands with side-effects (pre/post
2239     // increment) in case of duplication.
2240     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2241     return OperandMIPS32Mem::create(Func, IceType_i32, Mem->getBase(),
2242                                     Mem->getOffset(), Mem->getAddrMode());
2243   }
2244   llvm_unreachable("Unsupported operand type");
2245   return nullptr;
2246 }
2247 
getOperandAtIndex(Operand * Operand,Type BaseType,uint32_t Index)2248 Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType,
2249                                          uint32_t Index) {
2250   if (!isVectorType(Operand->getType())) {
2251     llvm::report_fatal_error("getOperandAtIndex: Operand is not vector");
2252     return nullptr;
2253   }
2254 
2255   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2256     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2257     Variable *Base = Mem->getBase();
2258     auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2259     assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2260     int32_t NextOffsetVal =
2261         Offset->getValue() + (Index * typeWidthInBytes(BaseType));
2262     constexpr bool NoSignExt = false;
2263     if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) {
2264       Constant *_4 = Ctx->getConstantInt32(4);
2265       Variable *NewBase = Func->makeVariable(Base->getType());
2266       lowerArithmetic(
2267           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4));
2268       Base = NewBase;
2269     } else {
2270       Offset =
2271           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2272     }
2273     return OperandMIPS32Mem::create(Func, BaseType, Base, Offset,
2274                                     Mem->getAddrMode());
2275   }
2276 
2277   if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand))
2278     return VarVecOn32->getContainers()[Index];
2279 
2280   llvm_unreachable("Unsupported operand type");
2281   return nullptr;
2282 }
2283 
hiOperand(Operand * Operand)2284 Operand *TargetMIPS32::hiOperand(Operand *Operand) {
2285   assert(Operand->getType() == IceType_i64);
2286   if (Operand->getType() != IceType_i64)
2287     return Operand;
2288   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2289     return Var64On32->getHi();
2290   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2291     return Ctx->getConstantInt32(
2292         static_cast<uint32_t>(Const->getValue() >> 32));
2293   }
2294   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2295     // Conservatively disallow memory operands with side-effects
2296     // in case of duplication.
2297     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2298     const Type SplitType = IceType_i32;
2299     Variable *Base = Mem->getBase();
2300     auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2301     assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2302     int32_t NextOffsetVal = Offset->getValue() + 4;
2303     constexpr bool SignExt = false;
2304     if (!OperandMIPS32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
2305       // We have to make a temp variable and add 4 to either Base or Offset.
2306       // If we add 4 to Offset, this will convert a non-RegReg addressing
2307       // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2308       // RegReg addressing modes, prefer adding to base and replacing instead.
2309       // Thus we leave the old offset alone.
2310       Constant *Four = Ctx->getConstantInt32(4);
2311       Variable *NewBase = Func->makeVariable(Base->getType());
2312       lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2313                                              Base, Four));
2314       Base = NewBase;
2315     } else {
2316       Offset =
2317           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2318     }
2319     return OperandMIPS32Mem::create(Func, SplitType, Base, Offset,
2320                                     Mem->getAddrMode());
2321   }
2322   llvm_unreachable("Unsupported operand type");
2323   return nullptr;
2324 }
2325 
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2326 SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
2327                                             RegSetMask Exclude) const {
2328   SmallBitVector Registers(RegMIPS32::Reg_NUM);
2329 
2330 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
2331           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
2332   if (scratch && (Include & RegSet_CallerSave))                                \
2333     Registers[RegMIPS32::val] = true;                                          \
2334   if (preserved && (Include & RegSet_CalleeSave))                              \
2335     Registers[RegMIPS32::val] = true;                                          \
2336   if (stackptr && (Include & RegSet_StackPointer))                             \
2337     Registers[RegMIPS32::val] = true;                                          \
2338   if (frameptr && (Include & RegSet_FramePointer))                             \
2339     Registers[RegMIPS32::val] = true;                                          \
2340   if (scratch && (Exclude & RegSet_CallerSave))                                \
2341     Registers[RegMIPS32::val] = false;                                         \
2342   if (preserved && (Exclude & RegSet_CalleeSave))                              \
2343     Registers[RegMIPS32::val] = false;                                         \
2344   if (stackptr && (Exclude & RegSet_StackPointer))                             \
2345     Registers[RegMIPS32::val] = false;                                         \
2346   if (frameptr && (Exclude & RegSet_FramePointer))                             \
2347     Registers[RegMIPS32::val] = false;
2348 
2349   REGMIPS32_TABLE
2350 
2351 #undef X
2352 
2353   if (NeedSandboxing) {
2354     Registers[RegMIPS32::Reg_T6] = false;
2355     Registers[RegMIPS32::Reg_T7] = false;
2356     Registers[RegMIPS32::Reg_T8] = false;
2357   }
2358   return Registers;
2359 }
2360 
lowerAlloca(const InstAlloca * Instr)2361 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
2362   // Conservatively require the stack to be aligned. Some stack adjustment
2363   // operations implemented below assume that the stack is aligned before the
2364   // alloca. All the alloca code ensures that the stack alignment is preserved
2365   // after the alloca. The stack alignment restriction can be relaxed in some
2366   // cases.
2367   NeedsStackAlignment = true;
2368 
2369   // For default align=0, set it to the real value 1, to avoid any
2370   // bit-manipulation problems below.
2371   const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2372 
2373   // LLVM enforces power of 2 alignment.
2374   assert(llvm::isPowerOf2_32(AlignmentParam));
2375   assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
2376 
2377   const uint32_t Alignment =
2378       std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
2379   const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
2380   const bool OptM1 = Func->getOptLevel() == Opt_m1;
2381   const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2382   const bool UseFramePointer =
2383       hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2384 
2385   if (UseFramePointer)
2386     setHasFramePointer();
2387 
2388   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
2389 
2390   Variable *Dest = Instr->getDest();
2391   Operand *TotalSize = Instr->getSizeInBytes();
2392 
2393   if (const auto *ConstantTotalSize =
2394           llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2395     const uint32_t Value =
2396         Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2397     FixedAllocaSizeBytes += Value;
2398     // Constant size alloca.
2399     if (!UseFramePointer) {
2400       // If we don't need a Frame Pointer, this alloca has a known offset to the
2401       // stack pointer. We don't need adjust the stack pointer, nor assign any
2402       // value to Dest, as Dest is rematerializable.
2403       assert(Dest->isRematerializable());
2404       Context.insert<InstFakeDef>(Dest);
2405       return;
2406     }
2407 
2408     if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) {
2409       CurrentAllocaOffset =
2410           Utils::applyAlignment(CurrentAllocaOffset, Alignment);
2411     }
2412     auto *T = I32Reg();
2413     _addiu(T, SP, CurrentAllocaOffset);
2414     _mov(Dest, T);
2415     CurrentAllocaOffset += Value;
2416     return;
2417 
2418   } else {
2419     // Non-constant sizes need to be adjusted to the next highest multiple of
2420     // the required alignment at runtime.
2421     VariableAllocaUsed = true;
2422     VariableAllocaAlignBytes = AlignmentParam;
2423     Variable *AlignAmount;
2424     auto *TotalSizeR = legalizeToReg(TotalSize, Legal_Reg);
2425     auto *T1 = I32Reg();
2426     auto *T2 = I32Reg();
2427     auto *T3 = I32Reg();
2428     auto *T4 = I32Reg();
2429     auto *T5 = I32Reg();
2430     _addiu(T1, TotalSizeR, MIPS32_STACK_ALIGNMENT_BYTES - 1);
2431     _addiu(T2, getZero(), -MIPS32_STACK_ALIGNMENT_BYTES);
2432     _and(T3, T1, T2);
2433     _subu(T4, SP, T3);
2434     if (Instr->getAlignInBytes()) {
2435       AlignAmount =
2436           legalizeToReg(Ctx->getConstantInt32(-AlignmentParam), Legal_Reg);
2437       _and(T5, T4, AlignAmount);
2438       _mov(Dest, T5);
2439     } else {
2440       _mov(Dest, T4);
2441     }
2442     if (OptM1)
2443       _mov(SP, Dest);
2444     else
2445       Sandboxer(this).reset_sp(Dest);
2446     return;
2447   }
2448 }
2449 
lowerInt64Arithmetic(const InstArithmetic * Instr,Variable * Dest,Operand * Src0,Operand * Src1)2450 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
2451                                         Variable *Dest, Operand *Src0,
2452                                         Operand *Src1) {
2453   InstArithmetic::OpKind Op = Instr->getOp();
2454   auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2455   auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2456   Variable *Src0LoR = nullptr;
2457   Variable *Src1LoR = nullptr;
2458   Variable *Src0HiR = nullptr;
2459   Variable *Src1HiR = nullptr;
2460 
2461   switch (Op) {
2462   case InstArithmetic::_num:
2463     llvm::report_fatal_error("Unknown arithmetic operator");
2464     return;
2465   case InstArithmetic::Add: {
2466     Src0LoR = legalizeToReg(loOperand(Src0));
2467     Src1LoR = legalizeToReg(loOperand(Src1));
2468     Src0HiR = legalizeToReg(hiOperand(Src0));
2469     Src1HiR = legalizeToReg(hiOperand(Src1));
2470     auto *T_Carry = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2471          *T_Hi2 = I32Reg();
2472     _addu(T_Lo, Src0LoR, Src1LoR);
2473     _mov(DestLo, T_Lo);
2474     _sltu(T_Carry, T_Lo, Src0LoR);
2475     _addu(T_Hi, T_Carry, Src0HiR);
2476     _addu(T_Hi2, Src1HiR, T_Hi);
2477     _mov(DestHi, T_Hi2);
2478     return;
2479   }
2480   case InstArithmetic::And: {
2481     Src0LoR = legalizeToReg(loOperand(Src0));
2482     Src1LoR = legalizeToReg(loOperand(Src1));
2483     Src0HiR = legalizeToReg(hiOperand(Src0));
2484     Src1HiR = legalizeToReg(hiOperand(Src1));
2485     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2486     _and(T_Lo, Src0LoR, Src1LoR);
2487     _mov(DestLo, T_Lo);
2488     _and(T_Hi, Src0HiR, Src1HiR);
2489     _mov(DestHi, T_Hi);
2490     return;
2491   }
2492   case InstArithmetic::Sub: {
2493     Src0LoR = legalizeToReg(loOperand(Src0));
2494     Src1LoR = legalizeToReg(loOperand(Src1));
2495     Src0HiR = legalizeToReg(hiOperand(Src0));
2496     Src1HiR = legalizeToReg(hiOperand(Src1));
2497     auto *T_Borrow = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2498          *T_Hi2 = I32Reg();
2499     _subu(T_Lo, Src0LoR, Src1LoR);
2500     _mov(DestLo, T_Lo);
2501     _sltu(T_Borrow, Src0LoR, Src1LoR);
2502     _addu(T_Hi, T_Borrow, Src1HiR);
2503     _subu(T_Hi2, Src0HiR, T_Hi);
2504     _mov(DestHi, T_Hi2);
2505     return;
2506   }
2507   case InstArithmetic::Or: {
2508     Src0LoR = legalizeToReg(loOperand(Src0));
2509     Src1LoR = legalizeToReg(loOperand(Src1));
2510     Src0HiR = legalizeToReg(hiOperand(Src0));
2511     Src1HiR = legalizeToReg(hiOperand(Src1));
2512     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2513     _or(T_Lo, Src0LoR, Src1LoR);
2514     _mov(DestLo, T_Lo);
2515     _or(T_Hi, Src0HiR, Src1HiR);
2516     _mov(DestHi, T_Hi);
2517     return;
2518   }
2519   case InstArithmetic::Xor: {
2520     Src0LoR = legalizeToReg(loOperand(Src0));
2521     Src1LoR = legalizeToReg(loOperand(Src1));
2522     Src0HiR = legalizeToReg(hiOperand(Src0));
2523     Src1HiR = legalizeToReg(hiOperand(Src1));
2524     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2525     _xor(T_Lo, Src0LoR, Src1LoR);
2526     _mov(DestLo, T_Lo);
2527     _xor(T_Hi, Src0HiR, Src1HiR);
2528     _mov(DestHi, T_Hi);
2529     return;
2530   }
2531   case InstArithmetic::Mul: {
2532     // TODO(rkotler): Make sure that mul has the side effect of clobbering
2533     // LO, HI. Check for any other LO, HI quirkiness in this section.
2534     Src0LoR = legalizeToReg(loOperand(Src0));
2535     Src1LoR = legalizeToReg(loOperand(Src1));
2536     Src0HiR = legalizeToReg(hiOperand(Src0));
2537     Src1HiR = legalizeToReg(hiOperand(Src1));
2538     auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
2539     auto *T1 = I32Reg(), *T2 = I32Reg();
2540     auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
2541     _multu(T_Lo, Src0LoR, Src1LoR);
2542     Context.insert<InstFakeDef>(T_Hi, T_Lo);
2543     _mflo(T1, T_Lo);
2544     _mfhi(T2, T_Hi);
2545     _mov(DestLo, T1);
2546     _mul(TM1, Src0HiR, Src1LoR);
2547     _mul(TM2, Src0LoR, Src1HiR);
2548     _addu(TM3, TM1, T2);
2549     _addu(TM4, TM3, TM2);
2550     _mov(DestHi, TM4);
2551     return;
2552   }
2553   case InstArithmetic::Shl: {
2554     auto *T_Lo = I32Reg();
2555     auto *T_Hi = I32Reg();
2556     auto *T1_Lo = I32Reg();
2557     auto *T1_Hi = I32Reg();
2558     auto *T1 = I32Reg();
2559     auto *T2 = I32Reg();
2560     auto *T3 = I32Reg();
2561     auto *T4 = I32Reg();
2562     auto *T5 = I32Reg();
2563 
2564     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2565       Src0LoR = legalizeToReg(loOperand(Src0));
2566       int64_t ShiftAmount = Const->getValue();
2567       if (ShiftAmount == 1) {
2568         Src0HiR = legalizeToReg(hiOperand(Src0));
2569         _addu(T_Lo, Src0LoR, Src0LoR);
2570         _sltu(T1, T_Lo, Src0LoR);
2571         _addu(T2, T1, Src0HiR);
2572         _addu(T_Hi, Src0HiR, T2);
2573       } else if (ShiftAmount < INT32_BITS) {
2574         Src0HiR = legalizeToReg(hiOperand(Src0));
2575         _srl(T1, Src0LoR, INT32_BITS - ShiftAmount);
2576         _sll(T2, Src0HiR, ShiftAmount);
2577         _or(T_Hi, T1, T2);
2578         _sll(T_Lo, Src0LoR, ShiftAmount);
2579       } else if (ShiftAmount == INT32_BITS) {
2580         _addiu(T_Lo, getZero(), 0);
2581         _mov(T_Hi, Src0LoR);
2582       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2583         _sll(T_Hi, Src0LoR, ShiftAmount - INT32_BITS);
2584         _addiu(T_Lo, getZero(), 0);
2585       }
2586       _mov(DestLo, T_Lo);
2587       _mov(DestHi, T_Hi);
2588       return;
2589     }
2590 
2591     Src0LoR = legalizeToReg(loOperand(Src0));
2592     Src1LoR = legalizeToReg(loOperand(Src1));
2593     Src0HiR = legalizeToReg(hiOperand(Src0));
2594 
2595     _sllv(T1, Src0HiR, Src1LoR);
2596     _not(T2, Src1LoR);
2597     _srl(T3, Src0LoR, 1);
2598     _srlv(T4, T3, T2);
2599     _or(T_Hi, T1, T4);
2600     _sllv(T_Lo, Src0LoR, Src1LoR);
2601 
2602     _mov(T1_Hi, T_Hi);
2603     _mov(T1_Lo, T_Lo);
2604     _andi(T5, Src1LoR, INT32_BITS);
2605     _movn(T1_Hi, T_Lo, T5);
2606     _movn(T1_Lo, getZero(), T5);
2607     _mov(DestHi, T1_Hi);
2608     _mov(DestLo, T1_Lo);
2609     return;
2610   }
2611   case InstArithmetic::Lshr: {
2612 
2613     auto *T_Lo = I32Reg();
2614     auto *T_Hi = I32Reg();
2615     auto *T1_Lo = I32Reg();
2616     auto *T1_Hi = I32Reg();
2617     auto *T1 = I32Reg();
2618     auto *T2 = I32Reg();
2619     auto *T3 = I32Reg();
2620     auto *T4 = I32Reg();
2621     auto *T5 = I32Reg();
2622 
2623     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2624       Src0HiR = legalizeToReg(hiOperand(Src0));
2625       int64_t ShiftAmount = Const->getValue();
2626       if (ShiftAmount < INT32_BITS) {
2627         Src0LoR = legalizeToReg(loOperand(Src0));
2628         _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2629         _srl(T2, Src0LoR, ShiftAmount);
2630         _or(T_Lo, T1, T2);
2631         _srl(T_Hi, Src0HiR, ShiftAmount);
2632       } else if (ShiftAmount == INT32_BITS) {
2633         _mov(T_Lo, Src0HiR);
2634         _addiu(T_Hi, getZero(), 0);
2635       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2636         _srl(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2637         _addiu(T_Hi, getZero(), 0);
2638       }
2639       _mov(DestLo, T_Lo);
2640       _mov(DestHi, T_Hi);
2641       return;
2642     }
2643 
2644     Src0LoR = legalizeToReg(loOperand(Src0));
2645     Src1LoR = legalizeToReg(loOperand(Src1));
2646     Src0HiR = legalizeToReg(hiOperand(Src0));
2647 
2648     _srlv(T1, Src0LoR, Src1LoR);
2649     _not(T2, Src1LoR);
2650     _sll(T3, Src0HiR, 1);
2651     _sllv(T4, T3, T2);
2652     _or(T_Lo, T1, T4);
2653     _srlv(T_Hi, Src0HiR, Src1LoR);
2654 
2655     _mov(T1_Hi, T_Hi);
2656     _mov(T1_Lo, T_Lo);
2657     _andi(T5, Src1LoR, INT32_BITS);
2658     _movn(T1_Lo, T_Hi, T5);
2659     _movn(T1_Hi, getZero(), T5);
2660     _mov(DestHi, T1_Hi);
2661     _mov(DestLo, T1_Lo);
2662     return;
2663   }
2664   case InstArithmetic::Ashr: {
2665 
2666     auto *T_Lo = I32Reg();
2667     auto *T_Hi = I32Reg();
2668     auto *T1_Lo = I32Reg();
2669     auto *T1_Hi = I32Reg();
2670     auto *T1 = I32Reg();
2671     auto *T2 = I32Reg();
2672     auto *T3 = I32Reg();
2673     auto *T4 = I32Reg();
2674     auto *T5 = I32Reg();
2675     auto *T6 = I32Reg();
2676 
2677     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2678       Src0HiR = legalizeToReg(hiOperand(Src0));
2679       int64_t ShiftAmount = Const->getValue();
2680       if (ShiftAmount < INT32_BITS) {
2681         Src0LoR = legalizeToReg(loOperand(Src0));
2682         _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2683         _srl(T2, Src0LoR, ShiftAmount);
2684         _or(T_Lo, T1, T2);
2685         _sra(T_Hi, Src0HiR, ShiftAmount);
2686       } else if (ShiftAmount == INT32_BITS) {
2687         _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2688         _mov(T_Lo, Src0HiR);
2689       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2690         _sra(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2691         _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2692       }
2693       _mov(DestLo, T_Lo);
2694       _mov(DestHi, T_Hi);
2695       return;
2696     }
2697 
2698     Src0LoR = legalizeToReg(loOperand(Src0));
2699     Src1LoR = legalizeToReg(loOperand(Src1));
2700     Src0HiR = legalizeToReg(hiOperand(Src0));
2701 
2702     _srlv(T1, Src0LoR, Src1LoR);
2703     _not(T2, Src1LoR);
2704     _sll(T3, Src0HiR, 1);
2705     _sllv(T4, T3, T2);
2706     _or(T_Lo, T1, T4);
2707     _srav(T_Hi, Src0HiR, Src1LoR);
2708 
2709     _mov(T1_Hi, T_Hi);
2710     _mov(T1_Lo, T_Lo);
2711     _andi(T5, Src1LoR, INT32_BITS);
2712     _movn(T1_Lo, T_Hi, T5);
2713     _sra(T6, Src0HiR, INT32_BITS - 1);
2714     _movn(T1_Hi, T6, T5);
2715     _mov(DestHi, T1_Hi);
2716     _mov(DestLo, T1_Lo);
2717     return;
2718   }
2719   case InstArithmetic::Fadd:
2720   case InstArithmetic::Fsub:
2721   case InstArithmetic::Fmul:
2722   case InstArithmetic::Fdiv:
2723   case InstArithmetic::Frem:
2724     llvm::report_fatal_error("FP instruction with i64 type");
2725     return;
2726   case InstArithmetic::Udiv:
2727   case InstArithmetic::Sdiv:
2728   case InstArithmetic::Urem:
2729   case InstArithmetic::Srem:
2730     llvm::report_fatal_error("64-bit div and rem should have been prelowered");
2731     return;
2732   }
2733 }
2734 
lowerArithmetic(const InstArithmetic * Instr)2735 void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {
2736   Variable *Dest = Instr->getDest();
2737 
2738   if (Dest->isRematerializable()) {
2739     Context.insert<InstFakeDef>(Dest);
2740     return;
2741   }
2742 
2743   // We need to signal all the UnimplementedLoweringError errors before any
2744   // legalization into new variables, otherwise Om1 register allocation may fail
2745   // when it sees variables that are defined but not used.
2746   Type DestTy = Dest->getType();
2747   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2748   Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2749   if (DestTy == IceType_i64) {
2750     lowerInt64Arithmetic(Instr, Instr->getDest(), Src0, Src1);
2751     return;
2752   }
2753   if (isVectorType(Dest->getType())) {
2754     llvm::report_fatal_error("Arithmetic: Destination type is vector");
2755     return;
2756   }
2757 
2758   Variable *T = makeReg(Dest->getType());
2759   Variable *Src0R = legalizeToReg(Src0);
2760   Variable *Src1R = nullptr;
2761   uint32_t Value = 0;
2762   bool IsSrc1Imm16 = false;
2763 
2764   switch (Instr->getOp()) {
2765   case InstArithmetic::Add:
2766   case InstArithmetic::Sub: {
2767     auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2768     if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
2769       IsSrc1Imm16 = true;
2770       Value = Const32->getValue();
2771     } else {
2772       Src1R = legalizeToReg(Src1);
2773     }
2774     break;
2775   }
2776   case InstArithmetic::And:
2777   case InstArithmetic::Or:
2778   case InstArithmetic::Xor:
2779   case InstArithmetic::Shl:
2780   case InstArithmetic::Lshr:
2781   case InstArithmetic::Ashr: {
2782     auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2783     if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) {
2784       IsSrc1Imm16 = true;
2785       Value = Const32->getValue();
2786     } else {
2787       Src1R = legalizeToReg(Src1);
2788     }
2789     break;
2790   }
2791   default:
2792     Src1R = legalizeToReg(Src1);
2793     break;
2794   }
2795   constexpr uint32_t DivideByZeroTrapCode = 7;
2796 
2797   switch (Instr->getOp()) {
2798   case InstArithmetic::_num:
2799     break;
2800   case InstArithmetic::Add: {
2801     auto *T0R = Src0R;
2802     auto *T1R = Src1R;
2803     if (Dest->getType() != IceType_i32) {
2804       T0R = makeReg(IceType_i32);
2805       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2806       if (!IsSrc1Imm16) {
2807         T1R = makeReg(IceType_i32);
2808         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2809       }
2810     }
2811     if (IsSrc1Imm16) {
2812       _addiu(T, T0R, Value);
2813     } else {
2814       _addu(T, T0R, T1R);
2815     }
2816     _mov(Dest, T);
2817     return;
2818   }
2819   case InstArithmetic::And:
2820     if (IsSrc1Imm16) {
2821       _andi(T, Src0R, Value);
2822     } else {
2823       _and(T, Src0R, Src1R);
2824     }
2825     _mov(Dest, T);
2826     return;
2827   case InstArithmetic::Or:
2828     if (IsSrc1Imm16) {
2829       _ori(T, Src0R, Value);
2830     } else {
2831       _or(T, Src0R, Src1R);
2832     }
2833     _mov(Dest, T);
2834     return;
2835   case InstArithmetic::Xor:
2836     if (IsSrc1Imm16) {
2837       _xori(T, Src0R, Value);
2838     } else {
2839       _xor(T, Src0R, Src1R);
2840     }
2841     _mov(Dest, T);
2842     return;
2843   case InstArithmetic::Sub: {
2844     auto *T0R = Src0R;
2845     auto *T1R = Src1R;
2846     if (Dest->getType() != IceType_i32) {
2847       T0R = makeReg(IceType_i32);
2848       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2849       if (!IsSrc1Imm16) {
2850         T1R = makeReg(IceType_i32);
2851         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2852       }
2853     }
2854     if (IsSrc1Imm16) {
2855       _addiu(T, T0R, -Value);
2856     } else {
2857       _subu(T, T0R, T1R);
2858     }
2859     _mov(Dest, T);
2860     return;
2861   }
2862   case InstArithmetic::Mul: {
2863     _mul(T, Src0R, Src1R);
2864     _mov(Dest, T);
2865     return;
2866   }
2867   case InstArithmetic::Shl: {
2868     if (IsSrc1Imm16) {
2869       _sll(T, Src0R, Value);
2870     } else {
2871       _sllv(T, Src0R, Src1R);
2872     }
2873     _mov(Dest, T);
2874     return;
2875   }
2876   case InstArithmetic::Lshr: {
2877     auto *T0R = Src0R;
2878     auto *T1R = Src1R;
2879     if (Dest->getType() != IceType_i32) {
2880       T0R = makeReg(IceType_i32);
2881       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2882       if (!IsSrc1Imm16) {
2883         T1R = makeReg(IceType_i32);
2884         lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2885       }
2886     }
2887     if (IsSrc1Imm16) {
2888       _srl(T, T0R, Value);
2889     } else {
2890       _srlv(T, T0R, T1R);
2891     }
2892     _mov(Dest, T);
2893     return;
2894   }
2895   case InstArithmetic::Ashr: {
2896     auto *T0R = Src0R;
2897     auto *T1R = Src1R;
2898     if (Dest->getType() != IceType_i32) {
2899       T0R = makeReg(IceType_i32);
2900       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2901       if (!IsSrc1Imm16) {
2902         T1R = makeReg(IceType_i32);
2903         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2904       }
2905     }
2906     if (IsSrc1Imm16) {
2907       _sra(T, T0R, Value);
2908     } else {
2909       _srav(T, T0R, T1R);
2910     }
2911     _mov(Dest, T);
2912     return;
2913   }
2914   case InstArithmetic::Udiv: {
2915     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2916     auto *T0R = Src0R;
2917     auto *T1R = Src1R;
2918     if (Dest->getType() != IceType_i32) {
2919       T0R = makeReg(IceType_i32);
2920       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2921       T1R = makeReg(IceType_i32);
2922       lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2923     }
2924     _divu(T_Zero, T0R, T1R);
2925     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2926     _mflo(T, T_Zero);
2927     _mov(Dest, T);
2928     return;
2929   }
2930   case InstArithmetic::Sdiv: {
2931     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2932     auto *T0R = Src0R;
2933     auto *T1R = Src1R;
2934     if (Dest->getType() != IceType_i32) {
2935       T0R = makeReg(IceType_i32);
2936       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2937       T1R = makeReg(IceType_i32);
2938       lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2939     }
2940     _div(T_Zero, T0R, T1R);
2941     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2942     _mflo(T, T_Zero);
2943     _mov(Dest, T);
2944     return;
2945   }
2946   case InstArithmetic::Urem: {
2947     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2948     auto *T0R = Src0R;
2949     auto *T1R = Src1R;
2950     if (Dest->getType() != IceType_i32) {
2951       T0R = makeReg(IceType_i32);
2952       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2953       T1R = makeReg(IceType_i32);
2954       lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2955     }
2956     _divu(T_Zero, T0R, T1R);
2957     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2958     _mfhi(T, T_Zero);
2959     _mov(Dest, T);
2960     return;
2961   }
2962   case InstArithmetic::Srem: {
2963     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2964     auto *T0R = Src0R;
2965     auto *T1R = Src1R;
2966     if (Dest->getType() != IceType_i32) {
2967       T0R = makeReg(IceType_i32);
2968       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2969       T1R = makeReg(IceType_i32);
2970       lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2971     }
2972     _div(T_Zero, T0R, T1R);
2973     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2974     _mfhi(T, T_Zero);
2975     _mov(Dest, T);
2976     return;
2977   }
2978   case InstArithmetic::Fadd: {
2979     if (DestTy == IceType_f32) {
2980       _add_s(T, Src0R, Src1R);
2981       _mov(Dest, T);
2982       return;
2983     }
2984     if (DestTy == IceType_f64) {
2985       _add_d(T, Src0R, Src1R);
2986       _mov(Dest, T);
2987       return;
2988     }
2989     break;
2990   }
2991   case InstArithmetic::Fsub:
2992     if (DestTy == IceType_f32) {
2993       _sub_s(T, Src0R, Src1R);
2994       _mov(Dest, T);
2995       return;
2996     }
2997     if (DestTy == IceType_f64) {
2998       _sub_d(T, Src0R, Src1R);
2999       _mov(Dest, T);
3000       return;
3001     }
3002     break;
3003   case InstArithmetic::Fmul:
3004     if (DestTy == IceType_f32) {
3005       _mul_s(T, Src0R, Src1R);
3006       _mov(Dest, T);
3007       return;
3008     }
3009     if (DestTy == IceType_f64) {
3010       _mul_d(T, Src0R, Src1R);
3011       _mov(Dest, T);
3012       return;
3013     }
3014     break;
3015   case InstArithmetic::Fdiv:
3016     if (DestTy == IceType_f32) {
3017       _div_s(T, Src0R, Src1R);
3018       _mov(Dest, T);
3019       return;
3020     }
3021     if (DestTy == IceType_f64) {
3022       _div_d(T, Src0R, Src1R);
3023       _mov(Dest, T);
3024       return;
3025     }
3026     break;
3027   case InstArithmetic::Frem:
3028     llvm::report_fatal_error("frem should have been prelowered.");
3029     break;
3030   }
3031   llvm::report_fatal_error("Unknown arithmetic operator");
3032 }
3033 
lowerAssign(const InstAssign * Instr)3034 void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
3035   Variable *Dest = Instr->getDest();
3036 
3037   if (Dest->isRematerializable()) {
3038     Context.insert<InstFakeDef>(Dest);
3039     return;
3040   }
3041 
3042   // Source type may not be same as destination
3043   if (isVectorType(Dest->getType())) {
3044     Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3045     auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest);
3046     for (SizeT i = 0; i < DstVec->ContainersPerVector; ++i) {
3047       auto *DCont = DstVec->getContainers()[i];
3048       auto *SCont =
3049           legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg);
3050       auto *TReg = makeReg(IceType_i32);
3051       _mov(TReg, SCont);
3052       _mov(DCont, TReg);
3053     }
3054     return;
3055   }
3056   Operand *Src0 = Instr->getSrc(0);
3057   assert(Dest->getType() == Src0->getType());
3058   if (Dest->getType() == IceType_i64) {
3059     Src0 = legalizeUndef(Src0);
3060     Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg);
3061     Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg);
3062     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3063     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3064     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
3065     _mov(T_Lo, Src0Lo);
3066     _mov(DestLo, T_Lo);
3067     _mov(T_Hi, Src0Hi);
3068     _mov(DestHi, T_Hi);
3069     return;
3070   }
3071   Operand *SrcR;
3072   if (Dest->hasReg()) {
3073     // If Dest already has a physical register, then legalize the Src operand
3074     // into a Variable with the same register assignment.  This especially
3075     // helps allow the use of Flex operands.
3076     SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
3077   } else {
3078     // Dest could be a stack operand. Since we could potentially need
3079     // to do a Store (and store can only have Register operands),
3080     // legalize this to a register.
3081     SrcR = legalize(Src0, Legal_Reg);
3082   }
3083   _mov(Dest, SrcR);
3084 }
3085 
lowerBr(const InstBr * Instr)3086 void TargetMIPS32::lowerBr(const InstBr *Instr) {
3087   if (Instr->isUnconditional()) {
3088     _br(Instr->getTargetUnconditional());
3089     return;
3090   }
3091   CfgNode *TargetTrue = Instr->getTargetTrue();
3092   CfgNode *TargetFalse = Instr->getTargetFalse();
3093   Operand *Boolean = Instr->getCondition();
3094   const Inst *Producer = Computations.getProducerOf(Boolean);
3095   if (Producer == nullptr) {
3096     // Since we don't know the producer of this boolean we will assume its
3097     // producer will keep it in positive logic and just emit beqz with this
3098     // Boolean as an operand.
3099     auto *BooleanR = legalizeToReg(Boolean);
3100     _br(TargetTrue, TargetFalse, BooleanR, CondMIPS32::Cond::EQZ);
3101     return;
3102   }
3103   if (Producer->getKind() == Inst::Icmp) {
3104     const InstIcmp *CompareInst = llvm::cast<InstIcmp>(Producer);
3105     Operand *Src0 = CompareInst->getSrc(0);
3106     Operand *Src1 = CompareInst->getSrc(1);
3107     const Type Src0Ty = Src0->getType();
3108     assert(Src0Ty == Src1->getType());
3109 
3110     Variable *Src0R = nullptr;
3111     Variable *Src1R = nullptr;
3112     Variable *Src0HiR = nullptr;
3113     Variable *Src1HiR = nullptr;
3114     if (Src0Ty == IceType_i64) {
3115       Src0R = legalizeToReg(loOperand(Src0));
3116       Src1R = legalizeToReg(loOperand(Src1));
3117       Src0HiR = legalizeToReg(hiOperand(Src0));
3118       Src1HiR = legalizeToReg(hiOperand(Src1));
3119     } else {
3120       auto *Src0RT = legalizeToReg(Src0);
3121       auto *Src1RT = legalizeToReg(Src1);
3122       // Sign/Zero extend the source operands
3123       if (Src0Ty != IceType_i32) {
3124         InstCast::OpKind CastKind;
3125         switch (CompareInst->getCondition()) {
3126         case InstIcmp::Eq:
3127         case InstIcmp::Ne:
3128         case InstIcmp::Sgt:
3129         case InstIcmp::Sge:
3130         case InstIcmp::Slt:
3131         case InstIcmp::Sle:
3132           CastKind = InstCast::Sext;
3133           break;
3134         default:
3135           CastKind = InstCast::Zext;
3136           break;
3137         }
3138         Src0R = makeReg(IceType_i32);
3139         Src1R = makeReg(IceType_i32);
3140         lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT));
3141         lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT));
3142       } else {
3143         Src0R = Src0RT;
3144         Src1R = Src1RT;
3145       }
3146     }
3147     auto *DestT = makeReg(IceType_i32);
3148 
3149     switch (CompareInst->getCondition()) {
3150     default:
3151       llvm_unreachable("unexpected condition");
3152       return;
3153     case InstIcmp::Eq: {
3154       if (Src0Ty == IceType_i64) {
3155         auto *T1 = I32Reg();
3156         auto *T2 = I32Reg();
3157         auto *T3 = I32Reg();
3158         _xor(T1, Src0HiR, Src1HiR);
3159         _xor(T2, Src0R, Src1R);
3160         _or(T3, T1, T2);
3161         _mov(DestT, T3);
3162         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3163       } else {
3164         _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::NE);
3165       }
3166       return;
3167     }
3168     case InstIcmp::Ne: {
3169       if (Src0Ty == IceType_i64) {
3170         auto *T1 = I32Reg();
3171         auto *T2 = I32Reg();
3172         auto *T3 = I32Reg();
3173         _xor(T1, Src0HiR, Src1HiR);
3174         _xor(T2, Src0R, Src1R);
3175         _or(T3, T1, T2);
3176         _mov(DestT, T3);
3177         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3178       } else {
3179         _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::EQ);
3180       }
3181       return;
3182     }
3183     case InstIcmp::Ugt: {
3184       if (Src0Ty == IceType_i64) {
3185         auto *T1 = I32Reg();
3186         auto *T2 = I32Reg();
3187         auto *T3 = I32Reg();
3188         auto *T4 = I32Reg();
3189         auto *T5 = I32Reg();
3190         _xor(T1, Src0HiR, Src1HiR);
3191         _sltu(T2, Src1HiR, Src0HiR);
3192         _xori(T3, T2, 1);
3193         _sltu(T4, Src1R, Src0R);
3194         _xori(T5, T4, 1);
3195         _movz(T3, T5, T1);
3196         _mov(DestT, T3);
3197         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3198       } else {
3199         _sltu(DestT, Src1R, Src0R);
3200         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3201       }
3202       return;
3203     }
3204     case InstIcmp::Uge: {
3205       if (Src0Ty == IceType_i64) {
3206         auto *T1 = I32Reg();
3207         auto *T2 = I32Reg();
3208         auto *T3 = I32Reg();
3209         _xor(T1, Src0HiR, Src1HiR);
3210         _sltu(T2, Src0HiR, Src1HiR);
3211         _sltu(T3, Src0R, Src1R);
3212         _movz(T2, T3, T1);
3213         _mov(DestT, T2);
3214         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3215       } else {
3216         _sltu(DestT, Src0R, Src1R);
3217         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3218       }
3219       return;
3220     }
3221     case InstIcmp::Ult: {
3222       if (Src0Ty == IceType_i64) {
3223         auto *T1 = I32Reg();
3224         auto *T2 = I32Reg();
3225         auto *T3 = I32Reg();
3226         auto *T4 = I32Reg();
3227         auto *T5 = I32Reg();
3228         _xor(T1, Src0HiR, Src1HiR);
3229         _sltu(T2, Src0HiR, Src1HiR);
3230         _xori(T3, T2, 1);
3231         _sltu(T4, Src0R, Src1R);
3232         _xori(T5, T4, 1);
3233         _movz(T3, T5, T1);
3234         _mov(DestT, T3);
3235         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3236       } else {
3237         _sltu(DestT, Src0R, Src1R);
3238         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3239       }
3240       return;
3241     }
3242     case InstIcmp::Ule: {
3243       if (Src0Ty == IceType_i64) {
3244         auto *T1 = I32Reg();
3245         auto *T2 = I32Reg();
3246         auto *T3 = I32Reg();
3247         _xor(T1, Src0HiR, Src1HiR);
3248         _sltu(T2, Src1HiR, Src0HiR);
3249         _sltu(T3, Src1R, Src0R);
3250         _movz(T2, T3, T1);
3251         _mov(DestT, T2);
3252         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3253       } else {
3254         _sltu(DestT, Src1R, Src0R);
3255         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3256       }
3257       return;
3258     }
3259     case InstIcmp::Sgt: {
3260       if (Src0Ty == IceType_i64) {
3261         auto *T1 = I32Reg();
3262         auto *T2 = I32Reg();
3263         auto *T3 = I32Reg();
3264         auto *T4 = I32Reg();
3265         auto *T5 = I32Reg();
3266         _xor(T1, Src0HiR, Src1HiR);
3267         _slt(T2, Src1HiR, Src0HiR);
3268         _xori(T3, T2, 1);
3269         _sltu(T4, Src1R, Src0R);
3270         _xori(T5, T4, 1);
3271         _movz(T3, T5, T1);
3272         _mov(DestT, T3);
3273         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3274       } else {
3275         _slt(DestT, Src1R, Src0R);
3276         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3277       }
3278       return;
3279     }
3280     case InstIcmp::Sge: {
3281       if (Src0Ty == IceType_i64) {
3282         auto *T1 = I32Reg();
3283         auto *T2 = I32Reg();
3284         auto *T3 = I32Reg();
3285         _xor(T1, Src0HiR, Src1HiR);
3286         _slt(T2, Src0HiR, Src1HiR);
3287         _sltu(T3, Src0R, Src1R);
3288         _movz(T2, T3, T1);
3289         _mov(DestT, T2);
3290         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3291       } else {
3292         _slt(DestT, Src0R, Src1R);
3293         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3294       }
3295       return;
3296     }
3297     case InstIcmp::Slt: {
3298       if (Src0Ty == IceType_i64) {
3299         auto *T1 = I32Reg();
3300         auto *T2 = I32Reg();
3301         auto *T3 = I32Reg();
3302         auto *T4 = I32Reg();
3303         auto *T5 = I32Reg();
3304         _xor(T1, Src0HiR, Src1HiR);
3305         _slt(T2, Src0HiR, Src1HiR);
3306         _xori(T3, T2, 1);
3307         _sltu(T4, Src0R, Src1R);
3308         _xori(T5, T4, 1);
3309         _movz(T3, T5, T1);
3310         _mov(DestT, T3);
3311         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3312       } else {
3313         _slt(DestT, Src0R, Src1R);
3314         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3315       }
3316       return;
3317     }
3318     case InstIcmp::Sle: {
3319       if (Src0Ty == IceType_i64) {
3320         auto *T1 = I32Reg();
3321         auto *T2 = I32Reg();
3322         auto *T3 = I32Reg();
3323         _xor(T1, Src0HiR, Src1HiR);
3324         _slt(T2, Src1HiR, Src0HiR);
3325         _sltu(T3, Src1R, Src0R);
3326         _movz(T2, T3, T1);
3327         _mov(DestT, T2);
3328         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3329       } else {
3330         _slt(DestT, Src1R, Src0R);
3331         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3332       }
3333       return;
3334     }
3335     }
3336   }
3337 }
3338 
lowerCall(const InstCall * Instr)3339 void TargetMIPS32::lowerCall(const InstCall *Instr) {
3340   CfgVector<Variable *> RegArgs;
3341   NeedsStackAlignment = true;
3342 
3343   //  Assign arguments to registers and stack. Also reserve stack.
3344   TargetMIPS32::CallingConv CC;
3345 
3346   // Pair of Arg Operand -> GPR number assignments.
3347   llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_GPR_ARG> GPRArgs;
3348   llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_FP_ARG> FPArgs;
3349   // Pair of Arg Operand -> stack offset.
3350   llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3351   size_t ParameterAreaSizeBytes = 16;
3352 
3353   // Classify each argument operand according to the location where the
3354   // argument is passed.
3355 
3356   // v4f32 is returned through stack. $4 is setup by the caller and passed as
3357   // first argument implicitly. Callee then copies the return vector at $4.
3358   SizeT ArgNum = 0;
3359   Variable *Dest = Instr->getDest();
3360   Variable *RetVecFloat = nullptr;
3361   if (Dest && isVectorFloatingType(Dest->getType())) {
3362     ArgNum = 1;
3363     CC.discardReg(RegMIPS32::Reg_A0);
3364     RetVecFloat = Func->makeVariable(IceType_i32);
3365     auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16);
3366     constexpr SizeT Alignment = 4;
3367     lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment));
3368     RegArgs.emplace_back(
3369         legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0)));
3370   }
3371 
3372   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3373     Operand *Arg = legalizeUndef(Instr->getArg(i));
3374     const Type Ty = Arg->getType();
3375     bool InReg = false;
3376     RegNumT Reg;
3377 
3378     InReg = CC.argInReg(Ty, i, &Reg);
3379 
3380     if (!InReg) {
3381       if (isVectorType(Ty)) {
3382         auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3383         ParameterAreaSizeBytes =
3384             applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3385         for (Variable *Elem : ArgVec->getContainers()) {
3386           StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
3387           ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3388         }
3389       } else {
3390         ParameterAreaSizeBytes =
3391             applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3392         StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3393         ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3394       }
3395       ++ArgNum;
3396       continue;
3397     }
3398 
3399     if (isVectorType(Ty)) {
3400       auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3401       Operand *Elem0 = ArgVec->getContainers()[0];
3402       Operand *Elem1 = ArgVec->getContainers()[1];
3403       GPRArgs.push_back(
3404           std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0)));
3405       GPRArgs.push_back(
3406           std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1)));
3407       Operand *Elem2 = ArgVec->getContainers()[2];
3408       Operand *Elem3 = ArgVec->getContainers()[3];
3409       // First argument is passed in $4:$5:$6:$7
3410       // Second and rest arguments are passed in $6:$7:stack:stack
3411       if (ArgNum == 0) {
3412         GPRArgs.push_back(
3413             std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2)));
3414         GPRArgs.push_back(
3415             std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
3416       } else {
3417         ParameterAreaSizeBytes =
3418             applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3419         StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
3420         ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3421         StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
3422         ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3423       }
3424     } else if (Ty == IceType_i64) {
3425       Operand *Lo = loOperand(Arg);
3426       Operand *Hi = hiOperand(Arg);
3427       GPRArgs.push_back(
3428           std::make_pair(Lo, RegMIPS32::get64PairFirstRegNum(Reg)));
3429       GPRArgs.push_back(
3430           std::make_pair(Hi, RegMIPS32::get64PairSecondRegNum(Reg)));
3431     } else if (isScalarIntegerType(Ty)) {
3432       GPRArgs.push_back(std::make_pair(Arg, Reg));
3433     } else {
3434       FPArgs.push_back(std::make_pair(Arg, Reg));
3435     }
3436     ++ArgNum;
3437   }
3438 
3439   // Adjust the parameter area so that the stack is aligned. It is assumed that
3440   // the stack is already aligned at the start of the calling sequence.
3441   ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3442 
3443   // Copy arguments that are passed on the stack to the appropriate stack
3444   // locations.
3445   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
3446   for (auto &StackArg : StackArgs) {
3447     ConstantInteger32 *Loc =
3448         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3449     Type Ty = StackArg.first->getType();
3450     OperandMIPS32Mem *Addr;
3451     constexpr bool SignExt = false;
3452     if (OperandMIPS32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3453       Addr = OperandMIPS32Mem::create(Func, Ty, SP, Loc);
3454     } else {
3455       Variable *NewBase = Func->makeVariable(SP->getType());
3456       lowerArithmetic(
3457           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3458       Addr = formMemoryOperand(NewBase, Ty);
3459     }
3460     lowerStore(InstStore::create(Func, StackArg.first, Addr));
3461   }
3462 
3463   // Generate the call instruction.  Assign its result to a temporary with high
3464   // register allocation weight.
3465 
3466   // ReturnReg doubles as ReturnRegLo as necessary.
3467   Variable *ReturnReg = nullptr;
3468   Variable *ReturnRegHi = nullptr;
3469   if (Dest) {
3470     switch (Dest->getType()) {
3471     case IceType_NUM:
3472       llvm_unreachable("Invalid Call dest type");
3473       return;
3474     case IceType_void:
3475       break;
3476     case IceType_i1:
3477     case IceType_i8:
3478     case IceType_i16:
3479     case IceType_i32:
3480       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3481       break;
3482     case IceType_i64:
3483       ReturnReg = I32Reg(RegMIPS32::Reg_V0);
3484       ReturnRegHi = I32Reg(RegMIPS32::Reg_V1);
3485       break;
3486     case IceType_f32:
3487       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0);
3488       break;
3489     case IceType_f64:
3490       ReturnReg = makeReg(IceType_f64, RegMIPS32::Reg_F0);
3491       break;
3492     case IceType_v4i1:
3493     case IceType_v8i1:
3494     case IceType_v16i1:
3495     case IceType_v16i8:
3496     case IceType_v8i16:
3497     case IceType_v4i32: {
3498       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3499       auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg);
3500       RetVec->initVecElement(Func);
3501       for (SizeT i = 0; i < RetVec->ContainersPerVector; ++i) {
3502         auto *Var = RetVec->getContainers()[i];
3503         Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i));
3504       }
3505       break;
3506     }
3507     case IceType_v4f32:
3508       ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0);
3509       break;
3510     }
3511   }
3512   Operand *CallTarget = Instr->getCallTarget();
3513   // Allow ConstantRelocatable to be left alone as a direct call,
3514   // but force other constants like ConstantInteger32 to be in
3515   // a register and make it an indirect call.
3516   if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3517     CallTarget = legalize(CallTarget, Legal_Reg);
3518   }
3519 
3520   // Copy arguments to be passed in registers to the appropriate registers.
3521   for (auto &FPArg : FPArgs) {
3522     RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3523   }
3524   for (auto &GPRArg : GPRArgs) {
3525     RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3526   }
3527 
3528   // Generate a FakeUse of register arguments so that they do not get dead code
3529   // eliminated as a result of the FakeKill of scratch registers after the call.
3530   // These fake-uses need to be placed here to avoid argument registers from
3531   // being used during the legalizeToReg() calls above.
3532   for (auto *RegArg : RegArgs) {
3533     Context.insert<InstFakeUse>(RegArg);
3534   }
3535 
3536   // If variable alloca is used the extra 16 bytes for argument build area
3537   // will be allocated on stack before a call.
3538   if (VariableAllocaUsed)
3539     Sandboxer(this).addiu_sp(-MaxOutArgsSizeBytes);
3540 
3541   Inst *NewCall;
3542 
3543   // We don't need to define the return register if it is a vector.
3544   // We have inserted fake defs of it just after the call.
3545   if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) {
3546     Variable *RetReg = nullptr;
3547     NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget);
3548     Context.insert(NewCall);
3549   } else {
3550     NewCall = Sandboxer(this, InstBundleLock::Opt_AlignToEnd)
3551                   .jal(ReturnReg, CallTarget);
3552   }
3553 
3554   if (VariableAllocaUsed)
3555     Sandboxer(this).addiu_sp(MaxOutArgsSizeBytes);
3556 
3557   // Insert a fake use of stack pointer to avoid dead code elimination of addiu
3558   // instruction.
3559   Context.insert<InstFakeUse>(SP);
3560 
3561   if (ReturnRegHi)
3562     Context.insert(InstFakeDef::create(Func, ReturnRegHi));
3563 
3564   if (ReturnReg) {
3565     if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3566       for (Variable *Var : RetVec->getContainers()) {
3567         Context.insert(InstFakeDef::create(Func, Var));
3568       }
3569     }
3570   }
3571 
3572   // Insert a register-kill pseudo instruction.
3573   Context.insert(InstFakeKill::create(Func, NewCall));
3574 
3575   // Generate a FakeUse to keep the call live if necessary.
3576   if (Instr->hasSideEffects() && ReturnReg) {
3577     if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3578       for (Variable *Var : RetVec->getContainers()) {
3579         Context.insert<InstFakeUse>(Var);
3580       }
3581     } else {
3582       Context.insert<InstFakeUse>(ReturnReg);
3583     }
3584   }
3585 
3586   if (Dest == nullptr)
3587     return;
3588 
3589   // Assign the result of the call to Dest.
3590   if (ReturnReg) {
3591     if (RetVecFloat) {
3592       auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3593       auto *TBase = legalizeToReg(RetVecFloat);
3594       for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3595         auto *Var = DestVecOn32->getContainers()[i];
3596         auto *TVar = makeReg(IceType_i32);
3597         OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
3598             Func, IceType_i32, TBase,
3599             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
3600         _lw(TVar, Mem);
3601         _mov(Var, TVar);
3602       }
3603     } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3604       auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3605       for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3606         _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]);
3607       }
3608     } else if (ReturnRegHi) {
3609       assert(Dest->getType() == IceType_i64);
3610       auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3611       Variable *DestLo = Dest64On32->getLo();
3612       Variable *DestHi = Dest64On32->getHi();
3613       _mov(DestLo, ReturnReg);
3614       _mov(DestHi, ReturnRegHi);
3615     } else {
3616       assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
3617              Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
3618              isScalarFloatingType(Dest->getType()) ||
3619              isVectorType(Dest->getType()));
3620       _mov(Dest, ReturnReg);
3621     }
3622   }
3623 }
3624 
lowerCast(const InstCast * Instr)3625 void TargetMIPS32::lowerCast(const InstCast *Instr) {
3626   InstCast::OpKind CastKind = Instr->getCastKind();
3627   Variable *Dest = Instr->getDest();
3628   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3629   const Type DestTy = Dest->getType();
3630   const Type Src0Ty = Src0->getType();
3631   const uint32_t ShiftAmount =
3632       (Src0Ty == IceType_i1
3633            ? INT32_BITS - 1
3634            : INT32_BITS - (CHAR_BITS * typeWidthInBytes(Src0Ty)));
3635   const uint32_t Mask =
3636       (Src0Ty == IceType_i1
3637            ? 1
3638            : (1 << (CHAR_BITS * typeWidthInBytes(Src0Ty))) - 1);
3639 
3640   if (isVectorType(DestTy)) {
3641     llvm::report_fatal_error("Cast: Destination type is vector");
3642     return;
3643   }
3644   switch (CastKind) {
3645   default:
3646     Func->setError("Cast type not supported");
3647     return;
3648   case InstCast::Sext: {
3649     if (DestTy == IceType_i64) {
3650       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3651       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3652       Variable *Src0R = legalizeToReg(Src0);
3653       Variable *T1_Lo = I32Reg();
3654       Variable *T2_Lo = I32Reg();
3655       Variable *T_Hi = I32Reg();
3656       if (Src0Ty == IceType_i1) {
3657         _sll(T1_Lo, Src0R, INT32_BITS - 1);
3658         _sra(T2_Lo, T1_Lo, INT32_BITS - 1);
3659         _mov(DestHi, T2_Lo);
3660         _mov(DestLo, T2_Lo);
3661       } else if (Src0Ty == IceType_i8 || Src0Ty == IceType_i16) {
3662         _sll(T1_Lo, Src0R, ShiftAmount);
3663         _sra(T2_Lo, T1_Lo, ShiftAmount);
3664         _sra(T_Hi, T2_Lo, INT32_BITS - 1);
3665         _mov(DestHi, T_Hi);
3666         _mov(DestLo, T2_Lo);
3667       } else if (Src0Ty == IceType_i32) {
3668         _mov(T1_Lo, Src0R);
3669         _sra(T_Hi, T1_Lo, INT32_BITS - 1);
3670         _mov(DestHi, T_Hi);
3671         _mov(DestLo, T1_Lo);
3672       }
3673     } else {
3674       Variable *Src0R = legalizeToReg(Src0);
3675       Variable *T1 = makeReg(DestTy);
3676       Variable *T2 = makeReg(DestTy);
3677       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3678           Src0Ty == IceType_i16) {
3679         _sll(T1, Src0R, ShiftAmount);
3680         _sra(T2, T1, ShiftAmount);
3681         _mov(Dest, T2);
3682       }
3683     }
3684     break;
3685   }
3686   case InstCast::Zext: {
3687     if (DestTy == IceType_i64) {
3688       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3689       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3690       Variable *Src0R = legalizeToReg(Src0);
3691       Variable *T_Lo = I32Reg();
3692       Variable *T_Hi = I32Reg();
3693 
3694       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || Src0Ty == IceType_i16)
3695         _andi(T_Lo, Src0R, Mask);
3696       else if (Src0Ty == IceType_i32)
3697         _mov(T_Lo, Src0R);
3698       else
3699         assert(Src0Ty != IceType_i64);
3700       _mov(DestLo, T_Lo);
3701 
3702       auto *Zero = getZero();
3703       _addiu(T_Hi, Zero, 0);
3704       _mov(DestHi, T_Hi);
3705     } else {
3706       Variable *Src0R = legalizeToReg(Src0);
3707       Variable *T = makeReg(DestTy);
3708       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3709           Src0Ty == IceType_i16) {
3710         _andi(T, Src0R, Mask);
3711         _mov(Dest, T);
3712       }
3713     }
3714     break;
3715   }
3716   case InstCast::Trunc: {
3717     if (Src0Ty == IceType_i64)
3718       Src0 = loOperand(Src0);
3719     Variable *Src0R = legalizeToReg(Src0);
3720     Variable *T = makeReg(DestTy);
3721     switch (DestTy) {
3722     case IceType_i1:
3723       _andi(T, Src0R, 0x1);
3724       break;
3725     case IceType_i8:
3726       _andi(T, Src0R, 0xff);
3727       break;
3728     case IceType_i16:
3729       _andi(T, Src0R, 0xffff);
3730       break;
3731     default:
3732       _mov(T, Src0R);
3733       break;
3734     }
3735     _mov(Dest, T);
3736     break;
3737   }
3738   case InstCast::Fptrunc: {
3739     assert(Dest->getType() == IceType_f32);
3740     assert(Src0->getType() == IceType_f64);
3741     auto *DestR = legalizeToReg(Dest);
3742     auto *Src0R = legalizeToReg(Src0);
3743     _cvt_s_d(DestR, Src0R);
3744     _mov(Dest, DestR);
3745     break;
3746   }
3747   case InstCast::Fpext: {
3748     assert(Dest->getType() == IceType_f64);
3749     assert(Src0->getType() == IceType_f32);
3750     auto *DestR = legalizeToReg(Dest);
3751     auto *Src0R = legalizeToReg(Src0);
3752     _cvt_d_s(DestR, Src0R);
3753     _mov(Dest, DestR);
3754     break;
3755   }
3756   case InstCast::Fptosi:
3757   case InstCast::Fptoui: {
3758     if (llvm::isa<Variable64On32>(Dest)) {
3759       llvm::report_fatal_error("fp-to-i64 should have been prelowered.");
3760       return;
3761     }
3762     if (DestTy != IceType_i64) {
3763       if (Src0Ty == IceType_f32 && isScalarIntegerType(DestTy)) {
3764         Variable *Src0R = legalizeToReg(Src0);
3765         Variable *FTmp = makeReg(IceType_f32);
3766         _trunc_w_s(FTmp, Src0R);
3767         _mov(Dest, FTmp);
3768         return;
3769       }
3770       if (Src0Ty == IceType_f64 && isScalarIntegerType(DestTy)) {
3771         Variable *Src0R = legalizeToReg(Src0);
3772         Variable *FTmp = makeReg(IceType_f64);
3773         _trunc_w_d(FTmp, Src0R);
3774         _mov(Dest, FTmp);
3775         return;
3776       }
3777     }
3778     llvm::report_fatal_error("Destination is i64 in fp-to-i32");
3779     break;
3780   }
3781   case InstCast::Sitofp:
3782   case InstCast::Uitofp: {
3783     if (llvm::isa<Variable64On32>(Dest)) {
3784       llvm::report_fatal_error("i64-to-fp should have been prelowered.");
3785       return;
3786     }
3787     if (Src0Ty != IceType_i64) {
3788       Variable *Src0R = legalizeToReg(Src0);
3789       auto *T0R = Src0R;
3790       if (Src0Ty != IceType_i32) {
3791         T0R = makeReg(IceType_i32);
3792         if (CastKind == InstCast::Uitofp)
3793           lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
3794         else
3795           lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
3796       }
3797       if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) {
3798         Variable *FTmp1 = makeReg(IceType_f32);
3799         Variable *FTmp2 = makeReg(IceType_f32);
3800         _mtc1(FTmp1, T0R);
3801         _cvt_s_w(FTmp2, FTmp1);
3802         _mov(Dest, FTmp2);
3803         return;
3804       }
3805       if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f64) {
3806         Variable *FTmp1 = makeReg(IceType_f64);
3807         Variable *FTmp2 = makeReg(IceType_f64);
3808         _mtc1(FTmp1, T0R);
3809         _cvt_d_w(FTmp2, FTmp1);
3810         _mov(Dest, FTmp2);
3811         return;
3812       }
3813     }
3814     llvm::report_fatal_error("Source is i64 in i32-to-fp");
3815     break;
3816   }
3817   case InstCast::Bitcast: {
3818     Operand *Src0 = Instr->getSrc(0);
3819     if (DestTy == Src0->getType()) {
3820       auto *Assign = InstAssign::create(Func, Dest, Src0);
3821       lowerAssign(Assign);
3822       return;
3823     }
3824     if (isVectorType(DestTy) || isVectorType(Src0->getType())) {
3825       llvm::report_fatal_error(
3826           "Bitcast: vector type should have been prelowered.");
3827       return;
3828     }
3829     switch (DestTy) {
3830     case IceType_NUM:
3831     case IceType_void:
3832       llvm::report_fatal_error("Unexpected bitcast.");
3833     case IceType_i1:
3834       UnimplementedLoweringError(this, Instr);
3835       break;
3836     case IceType_i8:
3837       assert(Src0->getType() == IceType_v8i1);
3838       llvm::report_fatal_error(
3839           "i8 to v8i1 conversion should have been prelowered.");
3840       break;
3841     case IceType_i16:
3842       assert(Src0->getType() == IceType_v16i1);
3843       llvm::report_fatal_error(
3844           "i16 to v16i1 conversion should have been prelowered.");
3845       break;
3846     case IceType_i32:
3847     case IceType_f32: {
3848       Variable *Src0R = legalizeToReg(Src0);
3849       _mov(Dest, Src0R);
3850       break;
3851     }
3852     case IceType_i64: {
3853       assert(Src0->getType() == IceType_f64);
3854       Variable *Src0R = legalizeToReg(Src0);
3855       auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3856       T->initHiLo(Func);
3857       T->getHi()->setMustNotHaveReg();
3858       T->getLo()->setMustNotHaveReg();
3859       Context.insert<InstFakeDef>(T->getHi());
3860       Context.insert<InstFakeDef>(T->getLo());
3861       _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi);
3862       _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo);
3863       lowerAssign(InstAssign::create(Func, Dest, T));
3864       break;
3865     }
3866     case IceType_f64: {
3867       assert(Src0->getType() == IceType_i64);
3868       const uint32_t Mask = 0xFFFFFFFF;
3869       if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src0)) {
3870         Variable *RegHi, *RegLo;
3871         const uint64_t Value = C64->getValue();
3872         uint64_t Upper32Bits = (Value >> INT32_BITS) & Mask;
3873         uint64_t Lower32Bits = Value & Mask;
3874         RegLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits));
3875         RegHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits));
3876         _mov(Dest, RegHi, RegLo);
3877       } else {
3878         auto *Var64On32 = llvm::cast<Variable64On32>(Src0);
3879         auto *RegLo = legalizeToReg(loOperand(Var64On32));
3880         auto *RegHi = legalizeToReg(hiOperand(Var64On32));
3881         _mov(Dest, RegHi, RegLo);
3882       }
3883       break;
3884     }
3885     default:
3886       llvm::report_fatal_error("Unexpected bitcast.");
3887     }
3888     break;
3889   }
3890   }
3891 }
3892 
lowerExtractElement(const InstExtractElement * Instr)3893 void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) {
3894   Variable *Dest = Instr->getDest();
3895   const Type DestTy = Dest->getType();
3896   Operand *Src1 = Instr->getSrc(1);
3897   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
3898     const uint32_t Index = Imm->getValue();
3899     Variable *TDest = makeReg(DestTy);
3900     Variable *TReg = makeReg(DestTy);
3901     auto *Src0 = legalizeUndef(Instr->getSrc(0));
3902     auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
3903     // Number of elements in each container
3904     uint32_t ElemPerCont =
3905         typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
3906     auto *Src = Src0R->getContainers()[Index / ElemPerCont];
3907     auto *SrcE = legalizeToReg(Src);
3908     // Position of the element in the container
3909     uint32_t PosInCont = Index % ElemPerCont;
3910     if (ElemPerCont == 1) {
3911       _mov(TDest, SrcE);
3912     } else if (ElemPerCont == 2) {
3913       switch (PosInCont) {
3914       case 0:
3915         _andi(TDest, SrcE, 0xffff);
3916         break;
3917       case 1:
3918         _srl(TDest, SrcE, 16);
3919         break;
3920       default:
3921         llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3922         break;
3923       }
3924     } else if (ElemPerCont == 4) {
3925       switch (PosInCont) {
3926       case 0:
3927         _andi(TDest, SrcE, 0xff);
3928         break;
3929       case 1:
3930         _srl(TReg, SrcE, 8);
3931         _andi(TDest, TReg, 0xff);
3932         break;
3933       case 2:
3934         _srl(TReg, SrcE, 16);
3935         _andi(TDest, TReg, 0xff);
3936         break;
3937       case 3:
3938         _srl(TDest, SrcE, 24);
3939         break;
3940       default:
3941         llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3942         break;
3943       }
3944     }
3945     if (typeElementType(Src0R->getType()) == IceType_i1) {
3946       Variable *TReg1 = makeReg(DestTy);
3947       _andi(TReg1, TDest, 0x1);
3948       _mov(Dest, TReg1);
3949     } else {
3950       _mov(Dest, TDest);
3951     }
3952     return;
3953   }
3954   llvm::report_fatal_error("ExtractElement requires a constant index");
3955 }
3956 
lowerFcmp(const InstFcmp * Instr)3957 void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) {
3958   Variable *Dest = Instr->getDest();
3959   if (isVectorType(Dest->getType())) {
3960     llvm::report_fatal_error("Fcmp: Destination type is vector");
3961     return;
3962   }
3963 
3964   auto *Src0 = Instr->getSrc(0);
3965   auto *Src1 = Instr->getSrc(1);
3966   auto *Zero = getZero();
3967 
3968   InstFcmp::FCond Cond = Instr->getCondition();
3969   auto *DestR = makeReg(IceType_i32);
3970   auto *Src0R = legalizeToReg(Src0);
3971   auto *Src1R = legalizeToReg(Src1);
3972   const Type Src0Ty = Src0->getType();
3973 
3974   Operand *FCC0 = OperandMIPS32FCC::create(getFunc(), OperandMIPS32FCC::FCC0);
3975 
3976   switch (Cond) {
3977   default: {
3978     llvm::report_fatal_error("Unhandled fp comparison.");
3979     return;
3980   }
3981   case InstFcmp::False: {
3982     Context.insert<InstFakeUse>(Src0R);
3983     Context.insert<InstFakeUse>(Src1R);
3984     _addiu(DestR, Zero, 0);
3985     _mov(Dest, DestR);
3986     break;
3987   }
3988   case InstFcmp::Oeq: {
3989     if (Src0Ty == IceType_f32) {
3990       _c_eq_s(Src0R, Src1R);
3991     } else {
3992       _c_eq_d(Src0R, Src1R);
3993     }
3994     _addiu(DestR, Zero, 1);
3995     _movf(DestR, Zero, FCC0);
3996     _mov(Dest, DestR);
3997     break;
3998   }
3999   case InstFcmp::Ogt: {
4000     if (Src0Ty == IceType_f32) {
4001       _c_ule_s(Src0R, Src1R);
4002     } else {
4003       _c_ule_d(Src0R, Src1R);
4004     }
4005     _addiu(DestR, Zero, 1);
4006     _movt(DestR, Zero, FCC0);
4007     _mov(Dest, DestR);
4008     break;
4009   }
4010   case InstFcmp::Oge: {
4011     if (Src0Ty == IceType_f32) {
4012       _c_ult_s(Src0R, Src1R);
4013     } else {
4014       _c_ult_d(Src0R, Src1R);
4015     }
4016     _addiu(DestR, Zero, 1);
4017     _movt(DestR, Zero, FCC0);
4018     _mov(Dest, DestR);
4019     break;
4020   }
4021   case InstFcmp::Olt: {
4022     if (Src0Ty == IceType_f32) {
4023       _c_olt_s(Src0R, Src1R);
4024     } else {
4025       _c_olt_d(Src0R, Src1R);
4026     }
4027     _addiu(DestR, Zero, 1);
4028     _movf(DestR, Zero, FCC0);
4029     _mov(Dest, DestR);
4030     break;
4031   }
4032   case InstFcmp::Ole: {
4033     if (Src0Ty == IceType_f32) {
4034       _c_ole_s(Src0R, Src1R);
4035     } else {
4036       _c_ole_d(Src0R, Src1R);
4037     }
4038     _addiu(DestR, Zero, 1);
4039     _movf(DestR, Zero, FCC0);
4040     _mov(Dest, DestR);
4041     break;
4042   }
4043   case InstFcmp::One: {
4044     if (Src0Ty == IceType_f32) {
4045       _c_ueq_s(Src0R, Src1R);
4046     } else {
4047       _c_ueq_d(Src0R, Src1R);
4048     }
4049     _addiu(DestR, Zero, 1);
4050     _movt(DestR, Zero, FCC0);
4051     _mov(Dest, DestR);
4052     break;
4053   }
4054   case InstFcmp::Ord: {
4055     if (Src0Ty == IceType_f32) {
4056       _c_un_s(Src0R, Src1R);
4057     } else {
4058       _c_un_d(Src0R, Src1R);
4059     }
4060     _addiu(DestR, Zero, 1);
4061     _movt(DestR, Zero, FCC0);
4062     _mov(Dest, DestR);
4063     break;
4064   }
4065   case InstFcmp::Ueq: {
4066     if (Src0Ty == IceType_f32) {
4067       _c_ueq_s(Src0R, Src1R);
4068     } else {
4069       _c_ueq_d(Src0R, Src1R);
4070     }
4071     _addiu(DestR, Zero, 1);
4072     _movf(DestR, Zero, FCC0);
4073     _mov(Dest, DestR);
4074     break;
4075   }
4076   case InstFcmp::Ugt: {
4077     if (Src0Ty == IceType_f32) {
4078       _c_ole_s(Src0R, Src1R);
4079     } else {
4080       _c_ole_d(Src0R, Src1R);
4081     }
4082     _addiu(DestR, Zero, 1);
4083     _movt(DestR, Zero, FCC0);
4084     _mov(Dest, DestR);
4085     break;
4086   }
4087   case InstFcmp::Uge: {
4088     if (Src0Ty == IceType_f32) {
4089       _c_olt_s(Src0R, Src1R);
4090     } else {
4091       _c_olt_d(Src0R, Src1R);
4092     }
4093     _addiu(DestR, Zero, 1);
4094     _movt(DestR, Zero, FCC0);
4095     _mov(Dest, DestR);
4096     break;
4097   }
4098   case InstFcmp::Ult: {
4099     if (Src0Ty == IceType_f32) {
4100       _c_ult_s(Src0R, Src1R);
4101     } else {
4102       _c_ult_d(Src0R, Src1R);
4103     }
4104     _addiu(DestR, Zero, 1);
4105     _movf(DestR, Zero, FCC0);
4106     _mov(Dest, DestR);
4107     break;
4108   }
4109   case InstFcmp::Ule: {
4110     if (Src0Ty == IceType_f32) {
4111       _c_ule_s(Src0R, Src1R);
4112     } else {
4113       _c_ule_d(Src0R, Src1R);
4114     }
4115     _addiu(DestR, Zero, 1);
4116     _movf(DestR, Zero, FCC0);
4117     _mov(Dest, DestR);
4118     break;
4119   }
4120   case InstFcmp::Une: {
4121     if (Src0Ty == IceType_f32) {
4122       _c_eq_s(Src0R, Src1R);
4123     } else {
4124       _c_eq_d(Src0R, Src1R);
4125     }
4126     _addiu(DestR, Zero, 1);
4127     _movt(DestR, Zero, FCC0);
4128     _mov(Dest, DestR);
4129     break;
4130   }
4131   case InstFcmp::Uno: {
4132     if (Src0Ty == IceType_f32) {
4133       _c_un_s(Src0R, Src1R);
4134     } else {
4135       _c_un_d(Src0R, Src1R);
4136     }
4137     _addiu(DestR, Zero, 1);
4138     _movf(DestR, Zero, FCC0);
4139     _mov(Dest, DestR);
4140     break;
4141   }
4142   case InstFcmp::True: {
4143     Context.insert<InstFakeUse>(Src0R);
4144     Context.insert<InstFakeUse>(Src1R);
4145     _addiu(DestR, Zero, 1);
4146     _mov(Dest, DestR);
4147     break;
4148   }
4149   }
4150 }
4151 
lower64Icmp(const InstIcmp * Instr)4152 void TargetMIPS32::lower64Icmp(const InstIcmp *Instr) {
4153   Operand *Src0 = legalize(Instr->getSrc(0));
4154   Operand *Src1 = legalize(Instr->getSrc(1));
4155   Variable *Dest = Instr->getDest();
4156   InstIcmp::ICond Condition = Instr->getCondition();
4157 
4158   Variable *Src0LoR = legalizeToReg(loOperand(Src0));
4159   Variable *Src0HiR = legalizeToReg(hiOperand(Src0));
4160   Variable *Src1LoR = legalizeToReg(loOperand(Src1));
4161   Variable *Src1HiR = legalizeToReg(hiOperand(Src1));
4162 
4163   switch (Condition) {
4164   default:
4165     llvm_unreachable("unexpected condition");
4166     return;
4167   case InstIcmp::Eq: {
4168     auto *T1 = I32Reg();
4169     auto *T2 = I32Reg();
4170     auto *T3 = I32Reg();
4171     auto *T4 = I32Reg();
4172     _xor(T1, Src0HiR, Src1HiR);
4173     _xor(T2, Src0LoR, Src1LoR);
4174     _or(T3, T1, T2);
4175     _sltiu(T4, T3, 1);
4176     _mov(Dest, T4);
4177     return;
4178   }
4179   case InstIcmp::Ne: {
4180     auto *T1 = I32Reg();
4181     auto *T2 = I32Reg();
4182     auto *T3 = I32Reg();
4183     auto *T4 = I32Reg();
4184     _xor(T1, Src0HiR, Src1HiR);
4185     _xor(T2, Src0LoR, Src1LoR);
4186     _or(T3, T1, T2);
4187     _sltu(T4, getZero(), T3);
4188     _mov(Dest, T4);
4189     return;
4190   }
4191   case InstIcmp::Sgt: {
4192     auto *T1 = I32Reg();
4193     auto *T2 = I32Reg();
4194     auto *T3 = I32Reg();
4195     _xor(T1, Src0HiR, Src1HiR);
4196     _slt(T2, Src1HiR, Src0HiR);
4197     _sltu(T3, Src1LoR, Src0LoR);
4198     _movz(T2, T3, T1);
4199     _mov(Dest, T2);
4200     return;
4201   }
4202   case InstIcmp::Ugt: {
4203     auto *T1 = I32Reg();
4204     auto *T2 = I32Reg();
4205     auto *T3 = I32Reg();
4206     _xor(T1, Src0HiR, Src1HiR);
4207     _sltu(T2, Src1HiR, Src0HiR);
4208     _sltu(T3, Src1LoR, Src0LoR);
4209     _movz(T2, T3, T1);
4210     _mov(Dest, T2);
4211     return;
4212   }
4213   case InstIcmp::Sge: {
4214     auto *T1 = I32Reg();
4215     auto *T2 = I32Reg();
4216     auto *T3 = I32Reg();
4217     auto *T4 = I32Reg();
4218     auto *T5 = I32Reg();
4219     _xor(T1, Src0HiR, Src1HiR);
4220     _slt(T2, Src0HiR, Src1HiR);
4221     _xori(T3, T2, 1);
4222     _sltu(T4, Src0LoR, Src1LoR);
4223     _xori(T5, T4, 1);
4224     _movz(T3, T5, T1);
4225     _mov(Dest, T3);
4226     return;
4227   }
4228   case InstIcmp::Uge: {
4229     auto *T1 = I32Reg();
4230     auto *T2 = I32Reg();
4231     auto *T3 = I32Reg();
4232     auto *T4 = I32Reg();
4233     auto *T5 = I32Reg();
4234     _xor(T1, Src0HiR, Src1HiR);
4235     _sltu(T2, Src0HiR, Src1HiR);
4236     _xori(T3, T2, 1);
4237     _sltu(T4, Src0LoR, Src1LoR);
4238     _xori(T5, T4, 1);
4239     _movz(T3, T5, T1);
4240     _mov(Dest, T3);
4241     return;
4242   }
4243   case InstIcmp::Slt: {
4244     auto *T1 = I32Reg();
4245     auto *T2 = I32Reg();
4246     auto *T3 = I32Reg();
4247     _xor(T1, Src0HiR, Src1HiR);
4248     _slt(T2, Src0HiR, Src1HiR);
4249     _sltu(T3, Src0LoR, Src1LoR);
4250     _movz(T2, T3, T1);
4251     _mov(Dest, T2);
4252     return;
4253   }
4254   case InstIcmp::Ult: {
4255     auto *T1 = I32Reg();
4256     auto *T2 = I32Reg();
4257     auto *T3 = I32Reg();
4258     _xor(T1, Src0HiR, Src1HiR);
4259     _sltu(T2, Src0HiR, Src1HiR);
4260     _sltu(T3, Src0LoR, Src1LoR);
4261     _movz(T2, T3, T1);
4262     _mov(Dest, T2);
4263     return;
4264   }
4265   case InstIcmp::Sle: {
4266     auto *T1 = I32Reg();
4267     auto *T2 = I32Reg();
4268     auto *T3 = I32Reg();
4269     auto *T4 = I32Reg();
4270     auto *T5 = I32Reg();
4271     _xor(T1, Src0HiR, Src1HiR);
4272     _slt(T2, Src1HiR, Src0HiR);
4273     _xori(T3, T2, 1);
4274     _sltu(T4, Src1LoR, Src0LoR);
4275     _xori(T5, T4, 1);
4276     _movz(T3, T5, T1);
4277     _mov(Dest, T3);
4278     return;
4279   }
4280   case InstIcmp::Ule: {
4281     auto *T1 = I32Reg();
4282     auto *T2 = I32Reg();
4283     auto *T3 = I32Reg();
4284     auto *T4 = I32Reg();
4285     auto *T5 = I32Reg();
4286     _xor(T1, Src0HiR, Src1HiR);
4287     _sltu(T2, Src1HiR, Src0HiR);
4288     _xori(T3, T2, 1);
4289     _sltu(T4, Src1LoR, Src0LoR);
4290     _xori(T5, T4, 1);
4291     _movz(T3, T5, T1);
4292     _mov(Dest, T3);
4293     return;
4294   }
4295   }
4296 }
4297 
lowerIcmp(const InstIcmp * Instr)4298 void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) {
4299   auto *Src0 = Instr->getSrc(0);
4300   auto *Src1 = Instr->getSrc(1);
4301   if (Src0->getType() == IceType_i64) {
4302     lower64Icmp(Instr);
4303     return;
4304   }
4305   Variable *Dest = Instr->getDest();
4306   if (isVectorType(Dest->getType())) {
4307     llvm::report_fatal_error("Icmp: Destination type is vector");
4308     return;
4309   }
4310   InstIcmp::ICond Cond = Instr->getCondition();
4311   auto *Src0R = legalizeToReg(Src0);
4312   auto *Src1R = legalizeToReg(Src1);
4313   const Type Src0Ty = Src0R->getType();
4314   const uint32_t ShAmt = INT32_BITS - getScalarIntBitWidth(Src0->getType());
4315   Variable *Src0RT = I32Reg();
4316   Variable *Src1RT = I32Reg();
4317 
4318   if (Src0Ty != IceType_i32) {
4319     _sll(Src0RT, Src0R, ShAmt);
4320     _sll(Src1RT, Src1R, ShAmt);
4321   } else {
4322     _mov(Src0RT, Src0R);
4323     _mov(Src1RT, Src1R);
4324   }
4325 
4326   switch (Cond) {
4327   case InstIcmp::Eq: {
4328     auto *DestT = I32Reg();
4329     auto *T = I32Reg();
4330     _xor(T, Src0RT, Src1RT);
4331     _sltiu(DestT, T, 1);
4332     _mov(Dest, DestT);
4333     return;
4334   }
4335   case InstIcmp::Ne: {
4336     auto *DestT = I32Reg();
4337     auto *T = I32Reg();
4338     auto *Zero = getZero();
4339     _xor(T, Src0RT, Src1RT);
4340     _sltu(DestT, Zero, T);
4341     _mov(Dest, DestT);
4342     return;
4343   }
4344   case InstIcmp::Ugt: {
4345     auto *DestT = I32Reg();
4346     _sltu(DestT, Src1RT, Src0RT);
4347     _mov(Dest, DestT);
4348     return;
4349   }
4350   case InstIcmp::Uge: {
4351     auto *DestT = I32Reg();
4352     auto *T = I32Reg();
4353     _sltu(T, Src0RT, Src1RT);
4354     _xori(DestT, T, 1);
4355     _mov(Dest, DestT);
4356     return;
4357   }
4358   case InstIcmp::Ult: {
4359     auto *DestT = I32Reg();
4360     _sltu(DestT, Src0RT, Src1RT);
4361     _mov(Dest, DestT);
4362     return;
4363   }
4364   case InstIcmp::Ule: {
4365     auto *DestT = I32Reg();
4366     auto *T = I32Reg();
4367     _sltu(T, Src1RT, Src0RT);
4368     _xori(DestT, T, 1);
4369     _mov(Dest, DestT);
4370     return;
4371   }
4372   case InstIcmp::Sgt: {
4373     auto *DestT = I32Reg();
4374     _slt(DestT, Src1RT, Src0RT);
4375     _mov(Dest, DestT);
4376     return;
4377   }
4378   case InstIcmp::Sge: {
4379     auto *DestT = I32Reg();
4380     auto *T = I32Reg();
4381     _slt(T, Src0RT, Src1RT);
4382     _xori(DestT, T, 1);
4383     _mov(Dest, DestT);
4384     return;
4385   }
4386   case InstIcmp::Slt: {
4387     auto *DestT = I32Reg();
4388     _slt(DestT, Src0RT, Src1RT);
4389     _mov(Dest, DestT);
4390     return;
4391   }
4392   case InstIcmp::Sle: {
4393     auto *DestT = I32Reg();
4394     auto *T = I32Reg();
4395     _slt(T, Src1RT, Src0RT);
4396     _xori(DestT, T, 1);
4397     _mov(Dest, DestT);
4398     return;
4399   }
4400   default:
4401     llvm_unreachable("Invalid ICmp operator");
4402     return;
4403   }
4404 }
4405 
lowerInsertElement(const InstInsertElement * Instr)4406 void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) {
4407   Variable *Dest = Instr->getDest();
4408   const Type DestTy = Dest->getType();
4409   Operand *Src2 = Instr->getSrc(2);
4410   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4411     const uint32_t Index = Imm->getValue();
4412     // Vector to insert in
4413     auto *Src0 = legalizeUndef(Instr->getSrc(0));
4414     auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
4415     // Number of elements in each container
4416     uint32_t ElemPerCont =
4417         typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
4418     // Source Element
4419     auto *Src = Src0R->getContainers()[Index / ElemPerCont];
4420     auto *SrcE = Src;
4421     if (ElemPerCont > 1)
4422       SrcE = legalizeToReg(Src);
4423     // Dest is a vector
4424     auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest);
4425     VDest->initVecElement(Func);
4426     // Temp vector variable
4427     auto *TDest = makeReg(DestTy);
4428     auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest);
4429     TVDest->initVecElement(Func);
4430     // Destination element
4431     auto *DstE = TVDest->getContainers()[Index / ElemPerCont];
4432     // Element to insert
4433     auto *Src1R = legalizeToReg(Instr->getSrc(1));
4434     auto *TReg1 = makeReg(IceType_i32);
4435     auto *TReg2 = makeReg(IceType_i32);
4436     auto *TReg3 = makeReg(IceType_i32);
4437     auto *TReg4 = makeReg(IceType_i32);
4438     auto *TReg5 = makeReg(IceType_i32);
4439     auto *TDReg = makeReg(IceType_i32);
4440     // Position of the element in the container
4441     uint32_t PosInCont = Index % ElemPerCont;
4442     // Load source vector in a temporary vector
4443     for (SizeT i = 0; i < TVDest->ContainersPerVector; ++i) {
4444       auto *DCont = TVDest->getContainers()[i];
4445       // Do not define DstE as we are going to redefine it
4446       if (DCont == DstE)
4447         continue;
4448       auto *SCont = Src0R->getContainers()[i];
4449       auto *TReg = makeReg(IceType_i32);
4450       _mov(TReg, SCont);
4451       _mov(DCont, TReg);
4452     }
4453     // Insert the element
4454     if (ElemPerCont == 1) {
4455       _mov(DstE, Src1R);
4456     } else if (ElemPerCont == 2) {
4457       switch (PosInCont) {
4458       case 0:
4459         _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source
4460         _srl(TReg2, SrcE, 16);
4461         _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element
4462         _or(TDReg, TReg1, TReg3);
4463         _mov(DstE, TDReg);
4464         break;
4465       case 1:
4466         _sll(TReg1, Src1R, 16); // Clear lower 16-bits  of source
4467         _sll(TReg2, SrcE, 16);
4468         _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element
4469         _or(TDReg, TReg1, TReg3);
4470         _mov(DstE, TDReg);
4471         break;
4472       default:
4473         llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4474         break;
4475       }
4476     } else if (ElemPerCont == 4) {
4477       switch (PosInCont) {
4478       case 0:
4479         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4480         _srl(TReg2, SrcE, 8);
4481         _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element
4482         _or(TDReg, TReg1, TReg3);
4483         _mov(DstE, TDReg);
4484         break;
4485       case 1:
4486         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4487         _sll(TReg5, TReg1, 8);     // Position in the destination
4488         _lui(TReg2, Ctx->getConstantInt32(0xffff));
4489         _ori(TReg3, TReg2, 0x00ff);
4490         _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4491         _or(TDReg, TReg5, TReg4);
4492         _mov(DstE, TDReg);
4493         break;
4494       case 2:
4495         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4496         _sll(TReg5, TReg1, 16);    // Position in the destination
4497         _lui(TReg2, Ctx->getConstantInt32(0xff00));
4498         _ori(TReg3, TReg2, 0xffff);
4499         _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4500         _or(TDReg, TReg5, TReg4);
4501         _mov(DstE, TDReg);
4502         break;
4503       case 3:
4504         _sll(TReg1, Src1R, 24); // Position in the destination
4505         _sll(TReg2, SrcE, 8);
4506         _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
4507         _or(TDReg, TReg1, TReg3);
4508         _mov(DstE, TDReg);
4509         break;
4510       default:
4511         llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4512         break;
4513       }
4514     }
4515     // Write back temporary vector to the destination
4516     auto *Assign = InstAssign::create(Func, Dest, TDest);
4517     lowerAssign(Assign);
4518     return;
4519   }
4520   llvm::report_fatal_error("InsertElement requires a constant index");
4521 }
4522 
createArithInst(Intrinsics::AtomicRMWOperation Operation,Variable * Dest,Variable * Src0,Variable * Src1)4523 void TargetMIPS32::createArithInst(Intrinsics::AtomicRMWOperation Operation,
4524                                    Variable *Dest, Variable *Src0,
4525                                    Variable *Src1) {
4526   switch (Operation) {
4527   default:
4528     llvm::report_fatal_error("Unknown AtomicRMW operation");
4529   case Intrinsics::AtomicExchange:
4530     llvm::report_fatal_error("Can't handle Atomic xchg operation");
4531   case Intrinsics::AtomicAdd:
4532     _addu(Dest, Src0, Src1);
4533     break;
4534   case Intrinsics::AtomicAnd:
4535     _and(Dest, Src0, Src1);
4536     break;
4537   case Intrinsics::AtomicSub:
4538     _subu(Dest, Src0, Src1);
4539     break;
4540   case Intrinsics::AtomicOr:
4541     _or(Dest, Src0, Src1);
4542     break;
4543   case Intrinsics::AtomicXor:
4544     _xor(Dest, Src0, Src1);
4545     break;
4546   }
4547 }
4548 
lowerIntrinsic(const InstIntrinsic * Instr)4549 void TargetMIPS32::lowerIntrinsic(const InstIntrinsic *Instr) {
4550   Variable *Dest = Instr->getDest();
4551   Type DestTy = (Dest == nullptr) ? IceType_void : Dest->getType();
4552 
4553   Intrinsics::IntrinsicID ID = Instr->getIntrinsicID();
4554   switch (ID) {
4555   case Intrinsics::AtomicLoad: {
4556     assert(isScalarIntegerType(DestTy));
4557     // We require the memory address to be naturally aligned. Given that is the
4558     // case, then normal loads are atomic.
4559     if (!Intrinsics::isMemoryOrderValid(
4560             ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4561       Func->setError("Unexpected memory ordering for AtomicLoad");
4562       return;
4563     }
4564     if (DestTy == IceType_i64) {
4565       llvm::report_fatal_error("AtomicLoad.i64 should have been prelowered.");
4566       return;
4567     } else if (DestTy == IceType_i32) {
4568       auto *T1 = makeReg(DestTy);
4569       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4570       auto *Base = legalizeToReg(Instr->getArg(0));
4571       auto *Addr = formMemoryOperand(Base, DestTy);
4572       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4573       InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4574       constexpr CfgNode *NoTarget = nullptr;
4575       _sync();
4576       Context.insert(Retry);
4577       Sandboxer(this).ll(T1, Addr);
4578       _br(NoTarget, NoTarget, T1, getZero(), Exit, CondMIPS32::Cond::NE);
4579       _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4580       Sandboxer(this).sc(RegAt, Addr);
4581       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4582       Context.insert(Exit);
4583       _sync();
4584       _mov(Dest, T1);
4585       Context.insert<InstFakeUse>(T1);
4586     } else {
4587       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4588       auto *Base = legalizeToReg(Instr->getArg(0));
4589       auto *T1 = makeReg(IceType_i32);
4590       auto *T2 = makeReg(IceType_i32);
4591       auto *T3 = makeReg(IceType_i32);
4592       auto *T4 = makeReg(IceType_i32);
4593       auto *T5 = makeReg(IceType_i32);
4594       auto *T6 = makeReg(IceType_i32);
4595       auto *SrcMask = makeReg(IceType_i32);
4596       auto *Tdest = makeReg(IceType_i32);
4597       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4598       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4599       InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4600       constexpr CfgNode *NoTarget = nullptr;
4601       _sync();
4602       _addiu(T1, getZero(), -4); // Address mask 0xFFFFFFFC
4603       _andi(T2, Base, 3);        // Last two bits of the address
4604       _and(T3, Base, T1);        // Align the address
4605       _sll(T4, T2, 3);
4606       _ori(T5, getZero(), Mask);
4607       _sllv(SrcMask, T5, T4); // Source mask
4608       auto *Addr = formMemoryOperand(T3, IceType_i32);
4609       Context.insert(Retry);
4610       Sandboxer(this).ll(T6, Addr);
4611       _and(Tdest, T6, SrcMask);
4612       _br(NoTarget, NoTarget, T6, getZero(), Exit, CondMIPS32::Cond::NE);
4613       _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4614       Sandboxer(this).sc(RegAt, Addr);
4615       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4616       Context.insert(Exit);
4617       auto *T7 = makeReg(IceType_i32);
4618       auto *T8 = makeReg(IceType_i32);
4619       _srlv(T7, Tdest, T4);
4620       _andi(T8, T7, Mask);
4621       _sync();
4622       _mov(Dest, T8);
4623       Context.insert<InstFakeUse>(T6);
4624       Context.insert<InstFakeUse>(SrcMask);
4625     }
4626     return;
4627   }
4628   case Intrinsics::AtomicStore: {
4629     // We require the memory address to be naturally aligned. Given that is the
4630     // case, then normal stores are atomic.
4631     if (!Intrinsics::isMemoryOrderValid(
4632             ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4633       Func->setError("Unexpected memory ordering for AtomicStore");
4634       return;
4635     }
4636     auto *Val = Instr->getArg(0);
4637     auto Ty = Val->getType();
4638     if (Ty == IceType_i64) {
4639       llvm::report_fatal_error("AtomicStore.i64 should have been prelowered.");
4640       return;
4641     } else if (Ty == IceType_i32) {
4642       auto *Val = legalizeToReg(Instr->getArg(0));
4643       auto *Base = legalizeToReg(Instr->getArg(1));
4644       auto *Addr = formMemoryOperand(Base, Ty);
4645       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4646       constexpr CfgNode *NoTarget = nullptr;
4647       auto *T1 = makeReg(IceType_i32);
4648       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4649       _sync();
4650       Context.insert(Retry);
4651       Sandboxer(this).ll(T1, Addr);
4652       _mov(RegAt, Val);
4653       Sandboxer(this).sc(RegAt, Addr);
4654       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4655       Context.insert<InstFakeUse>(T1); // To keep LL alive
4656       _sync();
4657     } else {
4658       auto *Val = legalizeToReg(Instr->getArg(0));
4659       auto *Base = legalizeToReg(Instr->getArg(1));
4660       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4661       constexpr CfgNode *NoTarget = nullptr;
4662       auto *T1 = makeReg(IceType_i32);
4663       auto *T2 = makeReg(IceType_i32);
4664       auto *T3 = makeReg(IceType_i32);
4665       auto *T4 = makeReg(IceType_i32);
4666       auto *T5 = makeReg(IceType_i32);
4667       auto *T6 = makeReg(IceType_i32);
4668       auto *T7 = makeReg(IceType_i32);
4669       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4670       auto *SrcMask = makeReg(IceType_i32);
4671       auto *DstMask = makeReg(IceType_i32);
4672       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(Ty))) - 1;
4673       _sync();
4674       _addiu(T1, getZero(), -4);
4675       _and(T7, Base, T1);
4676       auto *Addr = formMemoryOperand(T7, Ty);
4677       _andi(T2, Base, 3);
4678       _sll(T3, T2, 3);
4679       _ori(T4, getZero(), Mask);
4680       _sllv(T5, T4, T3);
4681       _sllv(T6, Val, T3);
4682       _nor(SrcMask, getZero(), T5);
4683       _and(DstMask, T6, T5);
4684       Context.insert(Retry);
4685       Sandboxer(this).ll(RegAt, Addr);
4686       _and(RegAt, RegAt, SrcMask);
4687       _or(RegAt, RegAt, DstMask);
4688       Sandboxer(this).sc(RegAt, Addr);
4689       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4690       Context.insert<InstFakeUse>(SrcMask);
4691       Context.insert<InstFakeUse>(DstMask);
4692       _sync();
4693     }
4694     return;
4695   }
4696   case Intrinsics::AtomicCmpxchg: {
4697     assert(isScalarIntegerType(DestTy));
4698     // We require the memory address to be naturally aligned. Given that is the
4699     // case, then normal loads are atomic.
4700     if (!Intrinsics::isMemoryOrderValid(
4701             ID, getConstantMemoryOrder(Instr->getArg(3)),
4702             getConstantMemoryOrder(Instr->getArg(4)))) {
4703       Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4704       return;
4705     }
4706 
4707     InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4708     InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4709     constexpr CfgNode *NoTarget = nullptr;
4710     auto *New = Instr->getArg(2);
4711     auto *Expected = Instr->getArg(1);
4712     auto *ActualAddress = Instr->getArg(0);
4713 
4714     if (DestTy == IceType_i64) {
4715       llvm::report_fatal_error(
4716           "AtomicCmpxchg.i64 should have been prelowered.");
4717       return;
4718     } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4719       auto *NewR = legalizeToReg(New);
4720       auto *ExpectedR = legalizeToReg(Expected);
4721       auto *ActualAddressR = legalizeToReg(ActualAddress);
4722       const uint32_t ShiftAmount =
4723           (INT32_BITS - CHAR_BITS * typeWidthInBytes(DestTy));
4724       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4725       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4726       auto *T1 = I32Reg();
4727       auto *T2 = I32Reg();
4728       auto *T3 = I32Reg();
4729       auto *T4 = I32Reg();
4730       auto *T5 = I32Reg();
4731       auto *T6 = I32Reg();
4732       auto *T7 = I32Reg();
4733       auto *T8 = I32Reg();
4734       auto *T9 = I32Reg();
4735       _addiu(RegAt, getZero(), -4);
4736       _and(T1, ActualAddressR, RegAt);
4737       auto *Addr = formMemoryOperand(T1, DestTy);
4738       _andi(RegAt, ActualAddressR, 3);
4739       _sll(T2, RegAt, 3);
4740       _ori(RegAt, getZero(), Mask);
4741       _sllv(T3, RegAt, T2);
4742       _nor(T4, getZero(), T3);
4743       _andi(RegAt, ExpectedR, Mask);
4744       _sllv(T5, RegAt, T2);
4745       _andi(RegAt, NewR, Mask);
4746       _sllv(T6, RegAt, T2);
4747       _sync();
4748       Context.insert(Retry);
4749       Sandboxer(this).ll(T7, Addr);
4750       _and(T8, T7, T3);
4751       _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE);
4752       _and(RegAt, T7, T4);
4753       _or(T9, RegAt, T6);
4754       Sandboxer(this).sc(T9, Addr);
4755       _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ);
4756       Context.insert<InstFakeUse>(getZero());
4757       Context.insert(Exit);
4758       _srlv(RegAt, T8, T2);
4759       _sll(RegAt, RegAt, ShiftAmount);
4760       _sra(RegAt, RegAt, ShiftAmount);
4761       _mov(Dest, RegAt);
4762       _sync();
4763       Context.insert<InstFakeUse>(T3);
4764       Context.insert<InstFakeUse>(T4);
4765       Context.insert<InstFakeUse>(T5);
4766       Context.insert<InstFakeUse>(T6);
4767       Context.insert<InstFakeUse>(T8);
4768       Context.insert<InstFakeUse>(ExpectedR);
4769       Context.insert<InstFakeUse>(NewR);
4770     } else {
4771       auto *T1 = I32Reg();
4772       auto *T2 = I32Reg();
4773       auto *NewR = legalizeToReg(New);
4774       auto *ExpectedR = legalizeToReg(Expected);
4775       auto *ActualAddressR = legalizeToReg(ActualAddress);
4776       _sync();
4777       Context.insert(Retry);
4778       Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4779       _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE);
4780       _mov(T2, NewR);
4781       Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4782       _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4783       Context.insert<InstFakeUse>(getZero());
4784       Context.insert(Exit);
4785       _mov(Dest, T1);
4786       _sync();
4787       Context.insert<InstFakeUse>(ExpectedR);
4788       Context.insert<InstFakeUse>(NewR);
4789     }
4790     return;
4791   }
4792   case Intrinsics::AtomicRMW: {
4793     assert(isScalarIntegerType(DestTy));
4794     // We require the memory address to be naturally aligned. Given that is the
4795     // case, then normal loads are atomic.
4796     if (!Intrinsics::isMemoryOrderValid(
4797             ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4798       Func->setError("Unexpected memory ordering for AtomicRMW");
4799       return;
4800     }
4801 
4802     constexpr CfgNode *NoTarget = nullptr;
4803     InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4804     auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
4805         llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue());
4806     auto *New = Instr->getArg(2);
4807     auto *ActualAddress = Instr->getArg(1);
4808 
4809     if (DestTy == IceType_i64) {
4810       llvm::report_fatal_error("AtomicRMW.i64 should have been prelowered.");
4811       return;
4812     } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4813       const uint32_t ShiftAmount =
4814           INT32_BITS - (CHAR_BITS * typeWidthInBytes(DestTy));
4815       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4816       auto *NewR = legalizeToReg(New);
4817       auto *ActualAddressR = legalizeToReg(ActualAddress);
4818       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4819       auto *T1 = I32Reg();
4820       auto *T2 = I32Reg();
4821       auto *T3 = I32Reg();
4822       auto *T4 = I32Reg();
4823       auto *T5 = I32Reg();
4824       auto *T6 = I32Reg();
4825       auto *T7 = I32Reg();
4826       _sync();
4827       _addiu(RegAt, getZero(), -4);
4828       _and(T1, ActualAddressR, RegAt);
4829       _andi(RegAt, ActualAddressR, 3);
4830       _sll(T2, RegAt, 3);
4831       _ori(RegAt, getZero(), Mask);
4832       _sllv(T3, RegAt, T2);
4833       _nor(T4, getZero(), T3);
4834       _sllv(T5, NewR, T2);
4835       Context.insert(Retry);
4836       Sandboxer(this).ll(T6, formMemoryOperand(T1, DestTy));
4837       if (Operation != Intrinsics::AtomicExchange) {
4838         createArithInst(Operation, RegAt, T6, T5);
4839         _and(RegAt, RegAt, T3);
4840       }
4841       _and(T7, T6, T4);
4842       if (Operation == Intrinsics::AtomicExchange) {
4843         _or(RegAt, T7, T5);
4844       } else {
4845         _or(RegAt, T7, RegAt);
4846       }
4847       Sandboxer(this).sc(RegAt, formMemoryOperand(T1, DestTy));
4848       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4849       Context.insert<InstFakeUse>(getZero());
4850       _and(RegAt, T6, T3);
4851       _srlv(RegAt, RegAt, T2);
4852       _sll(RegAt, RegAt, ShiftAmount);
4853       _sra(RegAt, RegAt, ShiftAmount);
4854       _mov(Dest, RegAt);
4855       _sync();
4856       Context.insert<InstFakeUse>(NewR);
4857       Context.insert<InstFakeUse>(Dest);
4858     } else {
4859       auto *T1 = I32Reg();
4860       auto *T2 = I32Reg();
4861       auto *NewR = legalizeToReg(New);
4862       auto *ActualAddressR = legalizeToReg(ActualAddress);
4863       _sync();
4864       Context.insert(Retry);
4865       Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4866       if (Operation == Intrinsics::AtomicExchange) {
4867         _mov(T2, NewR);
4868       } else {
4869         createArithInst(Operation, T2, T1, NewR);
4870       }
4871       Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4872       _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4873       Context.insert<InstFakeUse>(getZero());
4874       _mov(Dest, T1);
4875       _sync();
4876       Context.insert<InstFakeUse>(NewR);
4877       Context.insert<InstFakeUse>(Dest);
4878     }
4879     return;
4880   }
4881   case Intrinsics::AtomicFence:
4882   case Intrinsics::AtomicFenceAll:
4883     assert(Dest == nullptr);
4884     _sync();
4885     return;
4886   case Intrinsics::AtomicIsLockFree: {
4887     Operand *ByteSize = Instr->getArg(0);
4888     auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4889     auto *T = I32Reg();
4890     if (CI == nullptr) {
4891       // The PNaCl ABI requires the byte size to be a compile-time constant.
4892       Func->setError("AtomicIsLockFree byte size should be compile-time const");
4893       return;
4894     }
4895     static constexpr int32_t NotLockFree = 0;
4896     static constexpr int32_t LockFree = 1;
4897     int32_t Result = NotLockFree;
4898     switch (CI->getValue()) {
4899     case 1:
4900     case 2:
4901     case 4:
4902       Result = LockFree;
4903       break;
4904     }
4905     _addiu(T, getZero(), Result);
4906     _mov(Dest, T);
4907     return;
4908   }
4909   case Intrinsics::Bswap: {
4910     auto *Src = Instr->getArg(0);
4911     const Type SrcTy = Src->getType();
4912     assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
4913            SrcTy == IceType_i64);
4914     switch (SrcTy) {
4915     case IceType_i16: {
4916       auto *T1 = I32Reg();
4917       auto *T2 = I32Reg();
4918       auto *T3 = I32Reg();
4919       auto *T4 = I32Reg();
4920       auto *SrcR = legalizeToReg(Src);
4921       _sll(T1, SrcR, 8);
4922       _lui(T2, Ctx->getConstantInt32(255));
4923       _and(T1, T1, T2);
4924       _sll(T3, SrcR, 24);
4925       _or(T1, T3, T1);
4926       _srl(T4, T1, 16);
4927       _mov(Dest, T4);
4928       return;
4929     }
4930     case IceType_i32: {
4931       auto *T1 = I32Reg();
4932       auto *T2 = I32Reg();
4933       auto *T3 = I32Reg();
4934       auto *T4 = I32Reg();
4935       auto *T5 = I32Reg();
4936       auto *SrcR = legalizeToReg(Src);
4937       _srl(T1, SrcR, 24);
4938       _srl(T2, SrcR, 8);
4939       _andi(T2, T2, 0xFF00);
4940       _or(T1, T2, T1);
4941       _sll(T4, SrcR, 8);
4942       _lui(T3, Ctx->getConstantInt32(255));
4943       _and(T4, T4, T3);
4944       _sll(T5, SrcR, 24);
4945       _or(T4, T5, T4);
4946       _or(T4, T4, T1);
4947       _mov(Dest, T4);
4948       return;
4949     }
4950     case IceType_i64: {
4951       auto *T1 = I32Reg();
4952       auto *T2 = I32Reg();
4953       auto *T3 = I32Reg();
4954       auto *T4 = I32Reg();
4955       auto *T5 = I32Reg();
4956       auto *T6 = I32Reg();
4957       auto *T7 = I32Reg();
4958       auto *T8 = I32Reg();
4959       auto *T9 = I32Reg();
4960       auto *T10 = I32Reg();
4961       auto *T11 = I32Reg();
4962       auto *T12 = I32Reg();
4963       auto *T13 = I32Reg();
4964       auto *T14 = I32Reg();
4965       auto *T15 = I32Reg();
4966       auto *T16 = I32Reg();
4967       auto *T17 = I32Reg();
4968       auto *T18 = I32Reg();
4969       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4970       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4971       Src = legalizeUndef(Src);
4972       auto *SrcLoR = legalizeToReg(loOperand(Src));
4973       auto *SrcHiR = legalizeToReg(hiOperand(Src));
4974       _sll(T1, SrcHiR, 8);
4975       _srl(T2, SrcHiR, 24);
4976       _srl(T3, SrcHiR, 8);
4977       _andi(T3, T3, 0xFF00);
4978       _lui(T4, Ctx->getConstantInt32(255));
4979       _or(T5, T3, T2);
4980       _and(T6, T1, T4);
4981       _sll(T7, SrcHiR, 24);
4982       _or(T8, T7, T6);
4983       _srl(T9, SrcLoR, 24);
4984       _srl(T10, SrcLoR, 8);
4985       _andi(T11, T10, 0xFF00);
4986       _or(T12, T8, T5);
4987       _or(T13, T11, T9);
4988       _sll(T14, SrcLoR, 8);
4989       _and(T15, T14, T4);
4990       _sll(T16, SrcLoR, 24);
4991       _or(T17, T16, T15);
4992       _or(T18, T17, T13);
4993       _mov(DestLo, T12);
4994       _mov(DestHi, T18);
4995       return;
4996     }
4997     default:
4998       llvm::report_fatal_error("Control flow should never have reached here.");
4999     }
5000     return;
5001   }
5002   case Intrinsics::Ctpop: {
5003     llvm::report_fatal_error("Ctpop should have been prelowered.");
5004     return;
5005   }
5006   case Intrinsics::Ctlz: {
5007     auto *Src = Instr->getArg(0);
5008     const Type SrcTy = Src->getType();
5009     assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5010     switch (SrcTy) {
5011     case IceType_i32: {
5012       auto *T = I32Reg();
5013       auto *SrcR = legalizeToReg(Src);
5014       _clz(T, SrcR);
5015       _mov(Dest, T);
5016       break;
5017     }
5018     case IceType_i64: {
5019       auto *T1 = I32Reg();
5020       auto *T2 = I32Reg();
5021       auto *T3 = I32Reg();
5022       auto *T4 = I32Reg();
5023       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5024       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5025       Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5026       Variable *SrcLoR = legalizeToReg(loOperand(Src));
5027       _clz(T1, SrcHiR);
5028       _clz(T2, SrcLoR);
5029       _addiu(T3, T2, 32);
5030       _movn(T3, T1, SrcHiR);
5031       _addiu(T4, getZero(), 0);
5032       _mov(DestHi, T4);
5033       _mov(DestLo, T3);
5034       break;
5035     }
5036     default:
5037       llvm::report_fatal_error("Control flow should never have reached here.");
5038     }
5039     break;
5040   }
5041   case Intrinsics::Cttz: {
5042     auto *Src = Instr->getArg(0);
5043     const Type SrcTy = Src->getType();
5044     assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5045     switch (SrcTy) {
5046     case IceType_i32: {
5047       auto *T1 = I32Reg();
5048       auto *T2 = I32Reg();
5049       auto *T3 = I32Reg();
5050       auto *T4 = I32Reg();
5051       auto *T5 = I32Reg();
5052       auto *T6 = I32Reg();
5053       auto *SrcR = legalizeToReg(Src);
5054       _addiu(T1, SrcR, -1);
5055       _not(T2, SrcR);
5056       _and(T3, T2, T1);
5057       _clz(T4, T3);
5058       _addiu(T5, getZero(), 32);
5059       _subu(T6, T5, T4);
5060       _mov(Dest, T6);
5061       break;
5062     }
5063     case IceType_i64: {
5064       auto *THi1 = I32Reg();
5065       auto *THi2 = I32Reg();
5066       auto *THi3 = I32Reg();
5067       auto *THi4 = I32Reg();
5068       auto *THi5 = I32Reg();
5069       auto *THi6 = I32Reg();
5070       auto *TLo1 = I32Reg();
5071       auto *TLo2 = I32Reg();
5072       auto *TLo3 = I32Reg();
5073       auto *TLo4 = I32Reg();
5074       auto *TLo5 = I32Reg();
5075       auto *TLo6 = I32Reg();
5076       auto *TResHi = I32Reg();
5077       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5078       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5079       Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5080       Variable *SrcLoR = legalizeToReg(loOperand(Src));
5081       _addiu(THi1, SrcHiR, -1);
5082       _not(THi2, SrcHiR);
5083       _and(THi3, THi2, THi1);
5084       _clz(THi4, THi3);
5085       _addiu(THi5, getZero(), 64);
5086       _subu(THi6, THi5, THi4);
5087       _addiu(TLo1, SrcLoR, -1);
5088       _not(TLo2, SrcLoR);
5089       _and(TLo3, TLo2, TLo1);
5090       _clz(TLo4, TLo3);
5091       _addiu(TLo5, getZero(), 32);
5092       _subu(TLo6, TLo5, TLo4);
5093       _movn(THi6, TLo6, SrcLoR);
5094       _addiu(TResHi, getZero(), 0);
5095       _mov(DestHi, TResHi);
5096       _mov(DestLo, THi6);
5097       break;
5098     }
5099     default:
5100       llvm::report_fatal_error("Control flow should never have reached here.");
5101     }
5102     return;
5103   }
5104   case Intrinsics::Fabs: {
5105     if (isScalarFloatingType(DestTy)) {
5106       Variable *T = makeReg(DestTy);
5107       if (DestTy == IceType_f32) {
5108         _abs_s(T, legalizeToReg(Instr->getArg(0)));
5109       } else {
5110         _abs_d(T, legalizeToReg(Instr->getArg(0)));
5111       }
5112       _mov(Dest, T);
5113     }
5114     return;
5115   }
5116   case Intrinsics::Longjmp: {
5117     llvm::report_fatal_error("longjmp should have been prelowered.");
5118     return;
5119   }
5120   case Intrinsics::Memcpy: {
5121     llvm::report_fatal_error("memcpy should have been prelowered.");
5122     return;
5123   }
5124   case Intrinsics::Memmove: {
5125     llvm::report_fatal_error("memmove should have been prelowered.");
5126     return;
5127   }
5128   case Intrinsics::Memset: {
5129     llvm::report_fatal_error("memset should have been prelowered.");
5130     return;
5131   }
5132   case Intrinsics::NaClReadTP: {
5133     if (SandboxingType != ST_NaCl)
5134       llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5135     else {
5136       auto *T8 = makeReg(IceType_i32, RegMIPS32::Reg_T8);
5137       Context.insert<InstFakeDef>(T8);
5138       Variable *TP = legalizeToReg(OperandMIPS32Mem::create(
5139           Func, getPointerType(), T8,
5140           llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5141       _mov(Dest, TP);
5142     }
5143     return;
5144   }
5145   case Intrinsics::Setjmp: {
5146     llvm::report_fatal_error("setjmp should have been prelowered.");
5147     return;
5148   }
5149   case Intrinsics::Sqrt: {
5150     if (isScalarFloatingType(DestTy)) {
5151       Variable *T = makeReg(DestTy);
5152       if (DestTy == IceType_f32) {
5153         _sqrt_s(T, legalizeToReg(Instr->getArg(0)));
5154       } else {
5155         _sqrt_d(T, legalizeToReg(Instr->getArg(0)));
5156       }
5157       _mov(Dest, T);
5158     } else {
5159       assert(getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5160       UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5161     }
5162     return;
5163   }
5164   case Intrinsics::Stacksave: {
5165     Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5166     _mov(Dest, SP);
5167     return;
5168   }
5169   case Intrinsics::Stackrestore: {
5170     Variable *Val = legalizeToReg(Instr->getArg(0));
5171     Sandboxer(this).reset_sp(Val);
5172     return;
5173   }
5174   case Intrinsics::Trap: {
5175     const uint32_t TrapCodeZero = 0;
5176     _teq(getZero(), getZero(), TrapCodeZero);
5177     return;
5178   }
5179   case Intrinsics::LoadSubVector: {
5180     UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5181     return;
5182   }
5183   case Intrinsics::StoreSubVector: {
5184     UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5185     return;
5186   }
5187   default: // UnknownIntrinsic
5188     Func->setError("Unexpected intrinsic");
5189     return;
5190   }
5191   return;
5192 }
5193 
lowerLoad(const InstLoad * Instr)5194 void TargetMIPS32::lowerLoad(const InstLoad *Instr) {
5195   // A Load instruction can be treated the same as an Assign instruction, after
5196   // the source operand is transformed into an OperandMIPS32Mem operand.
5197   Type Ty = Instr->getDest()->getType();
5198   Operand *Src0 = formMemoryOperand(Instr->getLoadAddress(), Ty);
5199   Variable *DestLoad = Instr->getDest();
5200   auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5201   lowerAssign(Assign);
5202 }
5203 
5204 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Inst * Reason)5205 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5206                     const Inst *Reason) {
5207   if (!BuildDefs::dump())
5208     return;
5209   if (!Func->isVerbose(IceV_AddrOpt))
5210     return;
5211   OstreamLocker _(Func->getContext());
5212   Ostream &Str = Func->getContext()->getStrDump();
5213   Str << "Instruction: ";
5214   Reason->dumpDecorated(Func);
5215   Str << "  results in Base=";
5216   if (Base)
5217     Base->dump(Func);
5218   else
5219     Str << "<null>";
5220   Str << ", Offset=" << Offset << "\n";
5221 }
5222 
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5223 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5224                  int32_t *Offset, const Inst **Reason) {
5225   // Var originates from Var=SrcVar ==> set Var:=SrcVar
5226   if (*Var == nullptr)
5227     return false;
5228   const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5229   if (!VarAssign)
5230     return false;
5231   assert(!VMetadata->isMultiDef(*Var));
5232   if (!llvm::isa<InstAssign>(VarAssign))
5233     return false;
5234 
5235   Operand *SrcOp = VarAssign->getSrc(0);
5236   bool Optimized = false;
5237   if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5238     if (!VMetadata->isMultiDef(SrcVar) ||
5239         // TODO: ensure SrcVar stays single-BB
5240         false) {
5241       Optimized = true;
5242       *Var = SrcVar;
5243     } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5244       int32_t MoreOffset = Const->getValue();
5245       int32_t NewOffset = MoreOffset + *Offset;
5246       if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5247         return false;
5248       *Var = nullptr;
5249       *Offset += NewOffset;
5250       Optimized = true;
5251     }
5252   }
5253 
5254   if (Optimized) {
5255     *Reason = VarAssign;
5256   }
5257 
5258   return Optimized;
5259 }
5260 
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5261 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5262   if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5263     switch (Arith->getOp()) {
5264     default:
5265       return false;
5266     case InstArithmetic::Add:
5267     case InstArithmetic::Sub:
5268       *Kind = Arith->getOp();
5269       return true;
5270     }
5271   }
5272   return false;
5273 }
5274 
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5275 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5276                      int32_t *Offset, const Inst **Reason) {
5277   // Base is Base=Var+Const || Base is Base=Const+Var ==>
5278   //   set Base=Var, Offset+=Const
5279   // Base is Base=Var-Const ==>
5280   //   set Base=Var, Offset-=Const
5281   if (*Base == nullptr)
5282     return false;
5283   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5284   if (BaseInst == nullptr) {
5285     return false;
5286   }
5287   assert(!VMetadata->isMultiDef(*Base));
5288 
5289   auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5290   if (ArithInst == nullptr)
5291     return false;
5292   InstArithmetic::OpKind Kind;
5293   if (!isAddOrSub(ArithInst, &Kind))
5294     return false;
5295   bool IsAdd = Kind == InstArithmetic::Add;
5296   Operand *Src0 = ArithInst->getSrc(0);
5297   Operand *Src1 = ArithInst->getSrc(1);
5298   auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5299   auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5300   auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5301   auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5302   Variable *NewBase = nullptr;
5303   int32_t NewOffset = *Offset;
5304 
5305   if (Var0 == nullptr && Const0 == nullptr) {
5306     assert(llvm::isa<ConstantRelocatable>(Src0));
5307     return false;
5308   }
5309 
5310   if (Var1 == nullptr && Const1 == nullptr) {
5311     assert(llvm::isa<ConstantRelocatable>(Src1));
5312     return false;
5313   }
5314 
5315   if (Var0 && Var1)
5316     // TODO(jpp): merge base/index splitting into here.
5317     return false;
5318   if (!IsAdd && Var1)
5319     return false;
5320   if (Var0)
5321     NewBase = Var0;
5322   else if (Var1)
5323     NewBase = Var1;
5324   // Compute the updated constant offset.
5325   if (Const0) {
5326     int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5327     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5328       return false;
5329     NewOffset += MoreOffset;
5330   }
5331   if (Const1) {
5332     int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5333     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5334       return false;
5335     NewOffset += MoreOffset;
5336   }
5337 
5338   // Update the computed address parameters once we are sure optimization
5339   // is valid.
5340   *Base = NewBase;
5341   *Offset = NewOffset;
5342   *Reason = BaseInst;
5343   return true;
5344 }
5345 } // end of anonymous namespace
5346 
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5347 OperandMIPS32Mem *TargetMIPS32::formAddressingMode(Type Ty, Cfg *Func,
5348                                                    const Inst *LdSt,
5349                                                    Operand *Base) {
5350   assert(Base != nullptr);
5351   int32_t OffsetImm = 0;
5352 
5353   Func->resetCurrentNode();
5354   if (Func->isVerbose(IceV_AddrOpt)) {
5355     OstreamLocker _(Func->getContext());
5356     Ostream &Str = Func->getContext()->getStrDump();
5357     Str << "\nAddress mode formation:\t";
5358     LdSt->dumpDecorated(Func);
5359   }
5360 
5361   if (isVectorType(Ty)) {
5362     return nullptr;
5363   }
5364 
5365   auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5366   if (BaseVar == nullptr)
5367     return nullptr;
5368 
5369   const VariablesMetadata *VMetadata = Func->getVMetadata();
5370   const Inst *Reason = nullptr;
5371 
5372   do {
5373     if (Reason != nullptr) {
5374       dumpAddressOpt(Func, BaseVar, OffsetImm, Reason);
5375       Reason = nullptr;
5376     }
5377 
5378     if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5379       continue;
5380     }
5381 
5382     if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5383       continue;
5384     }
5385   } while (Reason);
5386 
5387   if (BaseVar == nullptr) {
5388     // We need base register rather than just OffsetImm. Move the OffsetImm to
5389     // BaseVar and form 0(BaseVar) addressing.
5390     const Type PointerType = getPointerType();
5391     BaseVar = makeReg(PointerType);
5392     Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5393     OffsetImm = 0;
5394   } else if (OffsetImm != 0) {
5395     // If the OffsetImm is more than signed 16-bit value then add it in the
5396     // BaseVar and form 0(BaseVar) addressing.
5397     const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5398     const InstArithmetic::OpKind Op =
5399         OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5400     constexpr bool ZeroExt = false;
5401     if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, OffsetImm)) {
5402       const Type PointerType = getPointerType();
5403       Variable *T = makeReg(PointerType);
5404       Context.insert<InstArithmetic>(Op, T, BaseVar,
5405                                      Ctx->getConstantInt32(PositiveOffset));
5406       BaseVar = T;
5407       OffsetImm = 0;
5408     }
5409   }
5410 
5411   assert(BaseVar != nullptr);
5412   assert(OffsetImm < 0 ? (-OffsetImm & 0x0000ffff) == -OffsetImm
5413                        : (OffsetImm & 0x0000ffff) == OffsetImm);
5414 
5415   return OperandMIPS32Mem::create(
5416       Func, Ty, BaseVar,
5417       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5418 }
5419 
doAddressOptLoad()5420 void TargetMIPS32::doAddressOptLoad() {
5421   Inst *Instr = iteratorToInst(Context.getCur());
5422   assert(llvm::isa<InstLoad>(Instr));
5423   Variable *Dest = Instr->getDest();
5424   Operand *Addr = Instr->getSrc(0);
5425   if (OperandMIPS32Mem *Mem =
5426           formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5427     Instr->setDeleted();
5428     Context.insert<InstLoad>(Dest, Mem);
5429   }
5430 }
5431 
lowerPhi(const InstPhi *)5432 void TargetMIPS32::lowerPhi(const InstPhi * /*Instr*/) {
5433   Func->setError("Phi found in regular instruction list");
5434 }
5435 
lowerRet(const InstRet * Instr)5436 void TargetMIPS32::lowerRet(const InstRet *Instr) {
5437   Variable *Reg = nullptr;
5438   if (Instr->hasRetValue()) {
5439     Operand *Src0 = Instr->getRetValue();
5440     switch (Src0->getType()) {
5441     case IceType_f32: {
5442       Operand *Src0F = legalizeToReg(Src0);
5443       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0);
5444       _mov(Reg, Src0F);
5445       break;
5446     }
5447     case IceType_f64: {
5448       Operand *Src0F = legalizeToReg(Src0);
5449       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0F1);
5450       _mov(Reg, Src0F);
5451       break;
5452     }
5453     case IceType_i1:
5454     case IceType_i8:
5455     case IceType_i16:
5456     case IceType_i32: {
5457       Operand *Src0F = legalizeToReg(Src0);
5458       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_V0);
5459       _mov(Reg, Src0F);
5460       break;
5461     }
5462     case IceType_i64: {
5463       Src0 = legalizeUndef(Src0);
5464       Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0);
5465       Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1);
5466       Reg = R0;
5467       Context.insert<InstFakeUse>(R1);
5468       break;
5469     }
5470     case IceType_v4i1:
5471     case IceType_v8i1:
5472     case IceType_v16i1:
5473     case IceType_v16i8:
5474     case IceType_v8i16:
5475     case IceType_v4i32: {
5476       auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5477       Variable *V0 =
5478           legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0);
5479       Variable *V1 =
5480           legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1);
5481       Variable *A0 =
5482           legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0);
5483       Variable *A1 =
5484           legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1);
5485       Reg = V0;
5486       Context.insert<InstFakeUse>(V1);
5487       Context.insert<InstFakeUse>(A0);
5488       Context.insert<InstFakeUse>(A1);
5489       break;
5490     }
5491     case IceType_v4f32: {
5492       auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5493       Reg = getImplicitRet();
5494       auto *RegT = legalizeToReg(Reg);
5495       // Return the vector through buffer in implicit argument a0
5496       for (SizeT i = 0; i < SrcVec->ContainersPerVector; ++i) {
5497         OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
5498             Func, IceType_f32, RegT,
5499             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
5500         Variable *Var = legalizeToReg(SrcVec->getContainers()[i]);
5501         _sw(Var, Mem);
5502       }
5503       Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0);
5504       _mov(V0, Reg); // move v0,a0
5505       Context.insert<InstFakeUse>(Reg);
5506       Context.insert<InstFakeUse>(V0);
5507       break;
5508     }
5509     default:
5510       llvm::report_fatal_error("Ret: Invalid type.");
5511       break;
5512     }
5513   }
5514   _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);
5515 }
5516 
lowerSelect(const InstSelect * Instr)5517 void TargetMIPS32::lowerSelect(const InstSelect *Instr) {
5518   Variable *Dest = Instr->getDest();
5519   const Type DestTy = Dest->getType();
5520 
5521   if (isVectorType(DestTy)) {
5522     llvm::report_fatal_error("Select: Destination type is vector");
5523     return;
5524   }
5525 
5526   Variable *DestR = nullptr;
5527   Variable *DestHiR = nullptr;
5528   Variable *SrcTR = nullptr;
5529   Variable *SrcTHiR = nullptr;
5530   Variable *SrcFR = nullptr;
5531   Variable *SrcFHiR = nullptr;
5532 
5533   if (DestTy == IceType_i64) {
5534     DestR = llvm::cast<Variable>(loOperand(Dest));
5535     DestHiR = llvm::cast<Variable>(hiOperand(Dest));
5536     SrcTR = legalizeToReg(loOperand(legalizeUndef(Instr->getTrueOperand())));
5537     SrcTHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getTrueOperand())));
5538     SrcFR = legalizeToReg(loOperand(legalizeUndef(Instr->getFalseOperand())));
5539     SrcFHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getFalseOperand())));
5540   } else {
5541     SrcTR = legalizeToReg(legalizeUndef(Instr->getTrueOperand()));
5542     SrcFR = legalizeToReg(legalizeUndef(Instr->getFalseOperand()));
5543   }
5544 
5545   Variable *ConditionR = legalizeToReg(Instr->getCondition());
5546 
5547   assert(Instr->getCondition()->getType() == IceType_i1);
5548 
5549   switch (DestTy) {
5550   case IceType_i1:
5551   case IceType_i8:
5552   case IceType_i16:
5553   case IceType_i32:
5554     _movn(SrcFR, SrcTR, ConditionR);
5555     _mov(Dest, SrcFR);
5556     break;
5557   case IceType_i64:
5558     _movn(SrcFR, SrcTR, ConditionR);
5559     _movn(SrcFHiR, SrcTHiR, ConditionR);
5560     _mov(DestR, SrcFR);
5561     _mov(DestHiR, SrcFHiR);
5562     break;
5563   case IceType_f32:
5564     _movn_s(SrcFR, SrcTR, ConditionR);
5565     _mov(Dest, SrcFR);
5566     break;
5567   case IceType_f64:
5568     _movn_d(SrcFR, SrcTR, ConditionR);
5569     _mov(Dest, SrcFR);
5570     break;
5571   default:
5572     llvm::report_fatal_error("Select: Invalid type.");
5573   }
5574 }
5575 
lowerShuffleVector(const InstShuffleVector * Instr)5576 void TargetMIPS32::lowerShuffleVector(const InstShuffleVector *Instr) {
5577   UnimplementedLoweringError(this, Instr);
5578 }
5579 
lowerStore(const InstStore * Instr)5580 void TargetMIPS32::lowerStore(const InstStore *Instr) {
5581   Operand *Value = Instr->getData();
5582   Operand *Addr = Instr->getStoreAddress();
5583   OperandMIPS32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5584   Type Ty = NewAddr->getType();
5585 
5586   if (Ty == IceType_i64) {
5587     Value = legalizeUndef(Value);
5588     Variable *ValueHi = legalizeToReg(hiOperand(Value));
5589     Variable *ValueLo = legalizeToReg(loOperand(Value));
5590     _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr)));
5591     _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr)));
5592   } else if (isVectorType(Value->getType())) {
5593     auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value);
5594     for (SizeT i = 0; i < DataVec->ContainersPerVector; ++i) {
5595       auto *DCont = legalizeToReg(DataVec->getContainers()[i]);
5596       auto *MCont = llvm::cast<OperandMIPS32Mem>(
5597           getOperandAtIndex(NewAddr, IceType_i32, i));
5598       _sw(DCont, MCont);
5599     }
5600   } else {
5601     Variable *ValueR = legalizeToReg(Value);
5602     _sw(ValueR, NewAddr);
5603   }
5604 }
5605 
doAddressOptStore()5606 void TargetMIPS32::doAddressOptStore() {
5607   Inst *Instr = iteratorToInst(Context.getCur());
5608   assert(llvm::isa<InstStore>(Instr));
5609   Operand *Src = Instr->getSrc(0);
5610   Operand *Addr = Instr->getSrc(1);
5611   if (OperandMIPS32Mem *Mem =
5612           formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5613     Instr->setDeleted();
5614     Context.insert<InstStore>(Src, Mem);
5615   }
5616 }
5617 
lowerSwitch(const InstSwitch * Instr)5618 void TargetMIPS32::lowerSwitch(const InstSwitch *Instr) {
5619   Operand *Src = Instr->getComparison();
5620   SizeT NumCases = Instr->getNumCases();
5621   if (Src->getType() == IceType_i64) {
5622     Src = legalizeUndef(Src);
5623     Variable *Src0Lo = legalizeToReg(loOperand(Src));
5624     Variable *Src0Hi = legalizeToReg(hiOperand(Src));
5625     for (SizeT I = 0; I < NumCases; ++I) {
5626       Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5627       Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5628       CfgNode *TargetTrue = Instr->getLabel(I);
5629       constexpr CfgNode *NoTarget = nullptr;
5630       ValueHi = legalizeToReg(ValueHi);
5631       InstMIPS32Label *IntraLabel = InstMIPS32Label::create(Func, this);
5632       _br(NoTarget, NoTarget, Src0Hi, ValueHi, IntraLabel,
5633           CondMIPS32::Cond::NE);
5634       ValueLo = legalizeToReg(ValueLo);
5635       _br(NoTarget, TargetTrue, Src0Lo, ValueLo, CondMIPS32::Cond::EQ);
5636       Context.insert(IntraLabel);
5637     }
5638     _br(Instr->getLabelDefault());
5639     return;
5640   }
5641   Variable *SrcVar = legalizeToReg(Src);
5642   assert(SrcVar->mustHaveReg());
5643   for (SizeT I = 0; I < NumCases; ++I) {
5644     Operand *Value = Ctx->getConstantInt32(Instr->getValue(I));
5645     CfgNode *TargetTrue = Instr->getLabel(I);
5646     constexpr CfgNode *NoTargetFalse = nullptr;
5647     Value = legalizeToReg(Value);
5648     _br(NoTargetFalse, TargetTrue, SrcVar, Value, CondMIPS32::Cond::EQ);
5649   }
5650   _br(Instr->getLabelDefault());
5651 }
5652 
lowerBreakpoint(const InstBreakpoint * Instr)5653 void TargetMIPS32::lowerBreakpoint(const InstBreakpoint *Instr) {
5654   UnimplementedLoweringError(this, Instr);
5655 }
5656 
lowerUnreachable(const InstUnreachable *)5657 void TargetMIPS32::lowerUnreachable(const InstUnreachable *) {
5658   const uint32_t TrapCodeZero = 0;
5659   _teq(getZero(), getZero(), TrapCodeZero);
5660 }
5661 
lowerOther(const Inst * Instr)5662 void TargetMIPS32::lowerOther(const Inst *Instr) {
5663   if (llvm::isa<InstMIPS32Sync>(Instr)) {
5664     _sync();
5665   } else {
5666     TargetLowering::lowerOther(Instr);
5667   }
5668 }
5669 
5670 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5671 // integrity of liveness analysis. Undef values are also turned into zeroes,
5672 // since loOperand() and hiOperand() don't expect Undef input.
prelowerPhis()5673 void TargetMIPS32::prelowerPhis() {
5674   PhiLowering::prelowerPhis32Bit<TargetMIPS32>(this, Context.getNode(), Func);
5675 }
5676 
postLower()5677 void TargetMIPS32::postLower() {
5678   if (Func->getOptLevel() == Opt_m1)
5679     return;
5680   markRedefinitions();
5681   Context.availabilityUpdate();
5682 }
5683 
5684 /* TODO(jvoung): avoid duplicate symbols with multiple targets.
5685 void ConstantUndef::emitWithoutDollar(GlobalContext *) const {
5686   llvm_unreachable("Not expecting to emitWithoutDollar undef");
5687 }
5688 
5689 void ConstantUndef::emit(GlobalContext *) const {
5690   llvm_unreachable("undef value encountered by emitter.");
5691 }
5692 */
5693 
TargetDataMIPS32(GlobalContext * Ctx)5694 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
5695     : TargetDataLowering(Ctx) {}
5696 
5697 // Generate .MIPS.abiflags section. This section contains a versioned data
5698 // structure with essential information required for loader to determine the
5699 // requirements of the application.
emitTargetRODataSections()5700 void TargetDataMIPS32::emitTargetRODataSections() {
5701   struct MipsABIFlagsSection Flags;
5702   ELFObjectWriter *Writer = Ctx->getObjectWriter();
5703   const std::string Name = ".MIPS.abiflags";
5704   const llvm::ELF::Elf64_Word ShType = llvm::ELF::SHT_MIPS_ABIFLAGS;
5705   const llvm::ELF::Elf64_Xword ShFlags = llvm::ELF::SHF_ALLOC;
5706   const llvm::ELF::Elf64_Xword ShAddralign = 8;
5707   const llvm::ELF::Elf64_Xword ShEntsize = sizeof(Flags);
5708   Writer->writeTargetRODataSection(
5709       Name, ShType, ShFlags, ShAddralign, ShEntsize,
5710       llvm::StringRef(reinterpret_cast<const char *>(&Flags), sizeof(Flags)));
5711 }
5712 
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)5713 void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars,
5714                                     const std::string &SectionSuffix) {
5715   const bool IsPIC = getFlags().getUseNonsfi();
5716   switch (getFlags().getOutFileType()) {
5717   case FT_Elf: {
5718     ELFObjectWriter *Writer = Ctx->getObjectWriter();
5719     Writer->writeDataSection(Vars, llvm::ELF::R_MIPS_32, SectionSuffix, IsPIC);
5720   } break;
5721   case FT_Asm:
5722   case FT_Iasm: {
5723     OstreamLocker L(Ctx);
5724     for (const VariableDeclaration *Var : Vars) {
5725       if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
5726         emitGlobal(*Var, SectionSuffix);
5727       }
5728     }
5729   } break;
5730   }
5731 }
5732 
5733 namespace {
5734 template <typename T> struct ConstantPoolEmitterTraits;
5735 
5736 static_assert(sizeof(uint64_t) == 8,
5737               "uint64_t is supposed to be 8 bytes wide.");
5738 
5739 // TODO(jaydeep.patil): implement the following when implementing constant
5740 // randomization:
5741 //  * template <> struct ConstantPoolEmitterTraits<uint8_t>
5742 //  * template <> struct ConstantPoolEmitterTraits<uint16_t>
5743 //  * template <> struct ConstantPoolEmitterTraits<uint32_t>
5744 template <> struct ConstantPoolEmitterTraits<float> {
5745   using ConstantType = ConstantFloat;
5746   static constexpr Type IceType = IceType_f32;
5747   // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
5748   // about them being constexpr.
5749   static const char AsmTag[];
5750   static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anon2d79189b0611::ConstantPoolEmitterTraits5751   static uint64_t bitcastToUint64(float Value) {
5752     static_assert(sizeof(Value) == sizeof(uint32_t),
5753                   "Float should be 4 bytes.");
5754     const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
5755     return static_cast<uint64_t>(IntValue);
5756   }
5757 };
5758 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".word";
5759 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
5760 
5761 template <> struct ConstantPoolEmitterTraits<double> {
5762   using ConstantType = ConstantDouble;
5763   static constexpr Type IceType = IceType_f64;
5764   static const char AsmTag[];
5765   static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anon2d79189b0611::ConstantPoolEmitterTraits5766   static uint64_t bitcastToUint64(double Value) {
5767     static_assert(sizeof(double) == sizeof(uint64_t),
5768                   "Double should be 8 bytes.");
5769     return Utils::bitCopy<uint64_t>(Value);
5770   }
5771 };
5772 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
5773 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
5774 
5775 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)5776 void emitConstant(
5777     Ostream &Str,
5778     const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
5779   if (!BuildDefs::dump())
5780     return;
5781   using Traits = ConstantPoolEmitterTraits<T>;
5782   Str << Const->getLabelName();
5783   T Value = Const->getValue();
5784   Str << ":\n\t" << Traits::AsmTag << "\t0x";
5785   Str.write_hex(Traits::bitcastToUint64(Value));
5786   Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
5787 }
5788 
emitConstantPool(GlobalContext * Ctx)5789 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
5790   if (!BuildDefs::dump())
5791     return;
5792   using Traits = ConstantPoolEmitterTraits<T>;
5793   static constexpr size_t MinimumAlignment = 4;
5794   SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
5795   assert((Align % 4) == 0 && "Constants should be aligned");
5796   Ostream &Str = Ctx->getStrEmit();
5797   ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
5798   Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
5799       << "\n"
5800       << "\t.align\t" << (Align == 4 ? 2 : 3) << "\n";
5801   for (Constant *C : Pool) {
5802     if (!C->getShouldBePooled()) {
5803       continue;
5804     }
5805     emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
5806   }
5807 }
5808 } // end of anonymous namespace
5809 
lowerConstants()5810 void TargetDataMIPS32::lowerConstants() {
5811   if (getFlags().getDisableTranslation())
5812     return;
5813   switch (getFlags().getOutFileType()) {
5814   case FT_Elf: {
5815     ELFObjectWriter *Writer = Ctx->getObjectWriter();
5816     Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5817     Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5818   } break;
5819   case FT_Asm:
5820   case FT_Iasm: {
5821     OstreamLocker _(Ctx);
5822     emitConstantPool<float>(Ctx);
5823     emitConstantPool<double>(Ctx);
5824     break;
5825   }
5826   }
5827 }
5828 
lowerJumpTables()5829 void TargetDataMIPS32::lowerJumpTables() {
5830   if (getFlags().getDisableTranslation())
5831     return;
5832 }
5833 
5834 // Helper for legalize() to emit the right code to lower an operand to a
5835 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)5836 Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {
5837   Type Ty = Src->getType();
5838   Variable *Reg = makeReg(Ty, RegNum);
5839   if (isVectorType(Ty)) {
5840     llvm::report_fatal_error("Invalid copy from vector type.");
5841   } else {
5842     if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) {
5843       _lw(Reg, Mem);
5844     } else {
5845       _mov(Reg, Src);
5846     }
5847   }
5848   return Reg;
5849 }
5850 
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)5851 Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
5852                                 RegNumT RegNum) {
5853   Type Ty = From->getType();
5854   // Assert that a physical register is allowed.  To date, all calls
5855   // to legalize() allow a physical register. Legal_Flex converts
5856   // registers to the right type OperandMIPS32FlexReg as needed.
5857   assert(Allowed & Legal_Reg);
5858 
5859   if (RegNum.hasNoValue()) {
5860     if (Variable *Subst = getContext().availabilityGet(From)) {
5861       // At this point we know there is a potential substitution available.
5862       if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
5863           !Subst->hasReg()) {
5864         // At this point we know the substitution will have a register.
5865         if (From->getType() == Subst->getType()) {
5866           // At this point we know the substitution's register is compatible.
5867           return Subst;
5868         }
5869       }
5870     }
5871   }
5872 
5873   // Go through the various types of operands:
5874   // OperandMIPS32Mem, Constant, and Variable.
5875   // Given the above assertion, if type of operand is not legal
5876   // (e.g., OperandMIPS32Mem and !Legal_Mem), we can always copy
5877   // to a register.
5878   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(From)) {
5879     // Base must be in a physical register.
5880     Variable *Base = Mem->getBase();
5881     ConstantInteger32 *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
5882     Variable *RegBase = nullptr;
5883     assert(Base);
5884 
5885     RegBase = llvm::cast<Variable>(
5886         legalize(Base, Legal_Reg | Legal_Rematerializable));
5887 
5888     if (Offset != nullptr && Offset->getValue() != 0) {
5889       static constexpr bool ZeroExt = false;
5890       if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
5891         llvm::report_fatal_error("Invalid memory offset.");
5892       }
5893     }
5894 
5895     // Create a new operand if there was a change.
5896     if (Base != RegBase) {
5897       Mem = OperandMIPS32Mem::create(Func, Ty, RegBase, Offset,
5898                                      Mem->getAddrMode());
5899     }
5900 
5901     if (Allowed & Legal_Mem) {
5902       From = Mem;
5903     } else {
5904       Variable *Reg = makeReg(Ty, RegNum);
5905       _lw(Reg, Mem);
5906       From = Reg;
5907     }
5908     return From;
5909   }
5910 
5911   if (llvm::isa<Constant>(From)) {
5912     if (llvm::isa<ConstantUndef>(From)) {
5913       From = legalizeUndef(From, RegNum);
5914       if (isVectorType(Ty))
5915         return From;
5916     }
5917     if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
5918       Variable *Reg = makeReg(Ty, RegNum);
5919       Variable *TReg = makeReg(Ty, RegNum);
5920       _lui(TReg, C, RO_Hi);
5921       _addiu(Reg, TReg, C, RO_Lo);
5922       return Reg;
5923     } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5924       const uint32_t Value = C32->getValue();
5925       // Use addiu if the immediate is a 16bit value. Otherwise load it
5926       // using a lui-ori instructions.
5927       Variable *Reg = makeReg(Ty, RegNum);
5928       if (isInt<16>(int32_t(Value))) {
5929         Variable *Zero = makeReg(Ty, RegMIPS32::Reg_ZERO);
5930         Context.insert<InstFakeDef>(Zero);
5931         _addiu(Reg, Zero, Value);
5932       } else {
5933         uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5934         uint32_t LowerBits = Value & 0xFFFF;
5935         if (LowerBits) {
5936           Variable *TReg = makeReg(Ty, RegNum);
5937           _lui(TReg, Ctx->getConstantInt32(UpperBits));
5938           _ori(Reg, TReg, LowerBits);
5939         } else {
5940           _lui(Reg, Ctx->getConstantInt32(UpperBits));
5941         }
5942       }
5943       return Reg;
5944     } else if (isScalarFloatingType(Ty)) {
5945       auto *CFrom = llvm::cast<Constant>(From);
5946       Variable *TReg = makeReg(Ty);
5947       if (!CFrom->getShouldBePooled()) {
5948         // Float/Double constant 0 is not pooled.
5949         Context.insert<InstFakeDef>(TReg);
5950         _mov(TReg, getZero());
5951       } else {
5952         // Load floats/doubles from literal pool.
5953         Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
5954         Variable *TReg1 = makeReg(getPointerType());
5955         _lui(TReg1, Offset, RO_Hi);
5956         OperandMIPS32Mem *Addr =
5957             OperandMIPS32Mem::create(Func, Ty, TReg1, Offset);
5958         if (Ty == IceType_f32)
5959           Sandboxer(this).lwc1(TReg, Addr, RO_Lo);
5960         else
5961           Sandboxer(this).ldc1(TReg, Addr, RO_Lo);
5962       }
5963       return copyToReg(TReg, RegNum);
5964     }
5965   }
5966 
5967   if (auto *Var = llvm::dyn_cast<Variable>(From)) {
5968     if (Var->isRematerializable()) {
5969       if (Allowed & Legal_Rematerializable) {
5970         return From;
5971       }
5972 
5973       Variable *T = makeReg(Var->getType(), RegNum);
5974       _mov(T, Var);
5975       return T;
5976     }
5977     // Check if the variable is guaranteed a physical register.  This
5978     // can happen either when the variable is pre-colored or when it is
5979     // assigned infinite weight.
5980     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
5981     // We need a new physical register for the operand if:
5982     //   Mem is not allowed and Var isn't guaranteed a physical
5983     //   register, or
5984     //   RegNum is required and Var->getRegNum() doesn't match.
5985     if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5986         (RegNum.hasValue() && RegNum != Var->getRegNum())) {
5987       From = copyToReg(From, RegNum);
5988     }
5989     return From;
5990   }
5991   return From;
5992 }
5993 
5994 namespace BoolFolding {
5995 // TODO(sagar.thakur): Add remaining instruction kinds to shouldTrackProducer()
5996 // and isValidConsumer()
shouldTrackProducer(const Inst & Instr)5997 bool shouldTrackProducer(const Inst &Instr) {
5998   return Instr.getKind() == Inst::Icmp;
5999 }
6000 
isValidConsumer(const Inst & Instr)6001 bool isValidConsumer(const Inst &Instr) { return Instr.getKind() == Inst::Br; }
6002 } // end of namespace BoolFolding
6003 
recordProducers(CfgNode * Node)6004 void TargetMIPS32::ComputationTracker::recordProducers(CfgNode *Node) {
6005   for (Inst &Instr : Node->getInsts()) {
6006     if (Instr.isDeleted())
6007       continue;
6008     // Check whether Instr is a valid producer.
6009     Variable *Dest = Instr.getDest();
6010     if (Dest // only consider instructions with an actual dest var; and
6011         && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6012         && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6013       KnownComputations.emplace(Dest->getIndex(),
6014                                 ComputationEntry(&Instr, IceType_i1));
6015     }
6016     // Check each src variable against the map.
6017     FOREACH_VAR_IN_INST(Var, Instr) {
6018       SizeT VarNum = Var->getIndex();
6019       auto ComputationIter = KnownComputations.find(VarNum);
6020       if (ComputationIter == KnownComputations.end()) {
6021         continue;
6022       }
6023 
6024       ++ComputationIter->second.NumUses;
6025       switch (ComputationIter->second.ComputationType) {
6026       default:
6027         KnownComputations.erase(VarNum);
6028         continue;
6029       case IceType_i1:
6030         if (!BoolFolding::isValidConsumer(Instr)) {
6031           KnownComputations.erase(VarNum);
6032           continue;
6033         }
6034         break;
6035       }
6036 
6037       if (Instr.isLastUse(Var)) {
6038         ComputationIter->second.IsLiveOut = false;
6039       }
6040     }
6041   }
6042 
6043   for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
6044        Iter != End;) {
6045     // Disable the folding if its dest may be live beyond this block.
6046     if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6047       Iter = KnownComputations.erase(Iter);
6048       continue;
6049     }
6050 
6051     // Mark as "dead" rather than outright deleting. This is so that other
6052     // peephole style optimizations during or before lowering have access to
6053     // this instruction in undeleted form. See for example
6054     // tryOptimizedCmpxchgCmpBr().
6055     Iter->second.Instr->setDead();
6056     ++Iter;
6057   }
6058 }
6059 
TargetHeaderMIPS32(GlobalContext * Ctx)6060 TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx)
6061     : TargetHeaderLowering(Ctx) {}
6062 
lower()6063 void TargetHeaderMIPS32::lower() {
6064   if (!BuildDefs::dump())
6065     return;
6066   OstreamLocker L(Ctx);
6067   Ostream &Str = Ctx->getStrEmit();
6068   Str << "\t.set\t"
6069       << "nomicromips\n";
6070   Str << "\t.set\t"
6071       << "nomips16\n";
6072   Str << "\t.set\t"
6073       << "noat\n";
6074   if (getFlags().getUseSandboxing())
6075     Str << "\t.bundle_align_mode 4\n";
6076 }
6077 
6078 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
6079 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
6080 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
6081 
Sandboxer(TargetMIPS32 * Target,InstBundleLock::Option BundleOption)6082 TargetMIPS32::Sandboxer::Sandboxer(TargetMIPS32 *Target,
6083                                    InstBundleLock::Option BundleOption)
6084     : Target(Target), BundleOption(BundleOption) {}
6085 
~Sandboxer()6086 TargetMIPS32::Sandboxer::~Sandboxer() {}
6087 
createAutoBundle()6088 void TargetMIPS32::Sandboxer::createAutoBundle() {
6089   Bundler = makeUnique<AutoBundle>(Target, BundleOption);
6090 }
6091 
addiu_sp(uint32_t StackOffset)6092 void TargetMIPS32::Sandboxer::addiu_sp(uint32_t StackOffset) {
6093   Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6094   if (!Target->NeedSandboxing) {
6095     Target->_addiu(SP, SP, StackOffset);
6096     return;
6097   }
6098   auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6099   Target->Context.insert<InstFakeDef>(T7);
6100   createAutoBundle();
6101   Target->_addiu(SP, SP, StackOffset);
6102   Target->_and(SP, SP, T7);
6103 }
6104 
lw(Variable * Dest,OperandMIPS32Mem * Mem)6105 void TargetMIPS32::Sandboxer::lw(Variable *Dest, OperandMIPS32Mem *Mem) {
6106   Variable *Base = Mem->getBase();
6107   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum()) &&
6108       (RegMIPS32::Reg_T8 != Base->getRegNum())) {
6109     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6110     Target->Context.insert<InstFakeDef>(T7);
6111     createAutoBundle();
6112     Target->_and(Base, Base, T7);
6113   }
6114   Target->_lw(Dest, Mem);
6115   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6116     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6117     Target->Context.insert<InstFakeDef>(T7);
6118     Target->_and(Dest, Dest, T7);
6119   }
6120 }
6121 
ll(Variable * Dest,OperandMIPS32Mem * Mem)6122 void TargetMIPS32::Sandboxer::ll(Variable *Dest, OperandMIPS32Mem *Mem) {
6123   Variable *Base = Mem->getBase();
6124   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6125     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6126     Target->Context.insert<InstFakeDef>(T7);
6127     createAutoBundle();
6128     Target->_and(Base, Base, T7);
6129   }
6130   Target->_ll(Dest, Mem);
6131   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6132     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6133     Target->Context.insert<InstFakeDef>(T7);
6134     Target->_and(Dest, Dest, T7);
6135   }
6136 }
6137 
sc(Variable * Dest,OperandMIPS32Mem * Mem)6138 void TargetMIPS32::Sandboxer::sc(Variable *Dest, OperandMIPS32Mem *Mem) {
6139   Variable *Base = Mem->getBase();
6140   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6141     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6142     Target->Context.insert<InstFakeDef>(T7);
6143     createAutoBundle();
6144     Target->_and(Base, Base, T7);
6145   }
6146   Target->_sc(Dest, Mem);
6147 }
6148 
sw(Variable * Dest,OperandMIPS32Mem * Mem)6149 void TargetMIPS32::Sandboxer::sw(Variable *Dest, OperandMIPS32Mem *Mem) {
6150   Variable *Base = Mem->getBase();
6151   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6152     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6153     Target->Context.insert<InstFakeDef>(T7);
6154     createAutoBundle();
6155     Target->_and(Base, Base, T7);
6156   }
6157   Target->_sw(Dest, Mem);
6158 }
6159 
lwc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6160 void TargetMIPS32::Sandboxer::lwc1(Variable *Dest, OperandMIPS32Mem *Mem,
6161                                    RelocOp Reloc) {
6162   Variable *Base = Mem->getBase();
6163   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6164     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6165     Target->Context.insert<InstFakeDef>(T7);
6166     createAutoBundle();
6167     Target->_and(Base, Base, T7);
6168   }
6169   Target->_lwc1(Dest, Mem, Reloc);
6170   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6171     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6172     Target->Context.insert<InstFakeDef>(T7);
6173     Target->_and(Dest, Dest, T7);
6174   }
6175 }
6176 
ldc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6177 void TargetMIPS32::Sandboxer::ldc1(Variable *Dest, OperandMIPS32Mem *Mem,
6178                                    RelocOp Reloc) {
6179   Variable *Base = Mem->getBase();
6180   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6181     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6182     Target->Context.insert<InstFakeDef>(T7);
6183     createAutoBundle();
6184     Target->_and(Base, Base, T7);
6185   }
6186   Target->_ldc1(Dest, Mem, Reloc);
6187   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6188     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6189     Target->Context.insert<InstFakeDef>(T7);
6190     Target->_and(Dest, Dest, T7);
6191   }
6192 }
6193 
ret(Variable * RetAddr,Variable * RetValue)6194 void TargetMIPS32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
6195   if (!Target->NeedSandboxing) {
6196     Target->_ret(RetAddr, RetValue);
6197   }
6198   auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6199   Target->Context.insert<InstFakeDef>(T6);
6200   createAutoBundle();
6201   Target->_and(RetAddr, RetAddr, T6);
6202   Target->_ret(RetAddr, RetValue);
6203 }
6204 
reset_sp(Variable * Src)6205 void TargetMIPS32::Sandboxer::reset_sp(Variable *Src) {
6206   Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6207   if (!Target->NeedSandboxing) {
6208     Target->_mov(SP, Src);
6209     return;
6210   }
6211   auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6212   Target->Context.insert<InstFakeDef>(T7);
6213   createAutoBundle();
6214   Target->_mov(SP, Src);
6215   Target->_and(SP, SP, T7);
6216   Target->getContext().insert<InstFakeUse>(SP);
6217 }
6218 
jal(Variable * ReturnReg,Operand * CallTarget)6219 InstMIPS32Call *TargetMIPS32::Sandboxer::jal(Variable *ReturnReg,
6220                                              Operand *CallTarget) {
6221   if (Target->NeedSandboxing) {
6222     createAutoBundle();
6223     if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
6224       auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6225       Target->Context.insert<InstFakeDef>(T6);
6226       Target->_and(CallTargetR, CallTargetR, T6);
6227     }
6228   }
6229   return Target->Context.insert<InstMIPS32Call>(ReturnReg, CallTarget);
6230 }
6231 
6232 } // end of namespace MIPS32
6233 } // end of namespace Ice
6234