1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringARM32 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringARM32.h"
16 
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstARM32.def"
24 #include "IceInstARM32.h"
25 #include "IceInstVarIter.h"
26 #include "IceLiveness.h"
27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h"
29 #include "IceRegistersARM32.h"
30 #include "IceTargetLoweringARM32.def"
31 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h"
33 
34 #include <algorithm>
35 #include <array>
36 #include <utility>
37 
38 namespace ARM32 {
createTargetLowering(::Ice::Cfg * Func)39 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
40   return ::Ice::ARM32::TargetARM32::create(Func);
41 }
42 
43 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)44 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
45   return ::Ice::ARM32::TargetDataARM32::create(Ctx);
46 }
47 
48 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
50   return ::Ice::ARM32::TargetHeaderARM32::create(Ctx);
51 }
52 
staticInit(::Ice::GlobalContext * Ctx)53 void staticInit(::Ice::GlobalContext *Ctx) {
54   ::Ice::ARM32::TargetARM32::staticInit(Ctx);
55   if (Ice::getFlags().getUseNonsfi()) {
56     // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing
57     // globals. The GOT is an external symbol (i.e., it is not defined in the
58     // pexe) so we need to register it as such so that ELF emission won't barf
59     // on an "unknown" symbol. The GOT is added to the External symbols list
60     // here because staticInit() is invoked in a single-thread context.
61     Ctx->getConstantExternSym(Ctx->getGlobalString(::Ice::GlobalOffsetTable));
62   }
63 }
64 
shouldBePooled(const::Ice::Constant * C)65 bool shouldBePooled(const ::Ice::Constant *C) {
66   return ::Ice::ARM32::TargetARM32::shouldBePooled(C);
67 }
68 
getPointerType()69 ::Ice::Type getPointerType() {
70   return ::Ice::ARM32::TargetARM32::getPointerType();
71 }
72 
73 } // end of namespace ARM32
74 
75 namespace Ice {
76 namespace ARM32 {
77 
78 namespace {
79 
80 /// SizeOf is used to obtain the size of an initializer list as a constexpr
81 /// expression. This is only needed until our C++ library is updated to
82 /// C++ 14 -- which defines constexpr members to std::initializer_list.
83 class SizeOf {
84   SizeOf(const SizeOf &) = delete;
85   SizeOf &operator=(const SizeOf &) = delete;
86 
87 public:
SizeOf()88   constexpr SizeOf() : Size(0) {}
89   template <typename... T>
SizeOf(T...)90   explicit constexpr SizeOf(T...)
91       : Size(__length<T...>::value) {}
size() const92   constexpr SizeT size() const { return Size; }
93 
94 private:
95   template <typename T, typename... U> struct __length {
96     static constexpr std::size_t value = 1 + __length<U...>::value;
97   };
98 
99   template <typename T> struct __length<T> {
100     static constexpr std::size_t value = 1;
101   };
102 
103   const std::size_t Size;
104 };
105 
106 } // end of anonymous namespace
107 
108 // Defines the RegARM32::Table table with register information.
109 RegARM32::RegTableType RegARM32::RegTable[RegARM32::Reg_NUM] = {
110 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
111           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
112   {                                                                            \
113     name, encode, cc_arg, scratch, preserved, stackptr, frameptr, isGPR,       \
114         isInt, isI64Pair, isFP32, isFP64, isVec128,                            \
115         (SizeOf alias_init).size(), alias_init                                 \
116   }                                                                            \
117   ,
118     REGARM32_TABLE
119 #undef X
120 };
121 
122 namespace {
123 
124 // The following table summarizes the logic for lowering the icmp instruction
125 // for i32 and narrower types. Each icmp condition has a clear mapping to an
126 // ARM32 conditional move instruction.
127 
128 const struct TableIcmp32_ {
129   CondARM32::Cond Mapping;
130 } TableIcmp32[] = {
131 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
132   { CondARM32::C_32 }                                                          \
133   ,
134     ICMPARM32_TABLE
135 #undef X
136 };
137 
138 // The following table summarizes the logic for lowering the icmp instruction
139 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
140 // The operands may need to be swapped, and there is a slight difference for
141 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
142 const struct TableIcmp64_ {
143   bool IsSigned;
144   bool Swapped;
145   CondARM32::Cond C1, C2;
146 } TableIcmp64[] = {
147 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
148   { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 }                 \
149   ,
150     ICMPARM32_TABLE
151 #undef X
152 };
153 
getIcmp32Mapping(InstIcmp::ICond Cond)154 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
155   assert(Cond < llvm::array_lengthof(TableIcmp32));
156   return TableIcmp32[Cond].Mapping;
157 }
158 
159 // In some cases, there are x-macros tables for both high-level and low-level
160 // instructions/operands that use the same enum key value. The tables are kept
161 // separate to maintain a proper separation between abstraction layers. There
162 // is a risk that the tables could get out of sync if enum values are reordered
163 // or if entries are added or deleted. The following anonymous namespaces use
164 // static_asserts to ensure everything is kept in sync.
165 
166 // Validate the enum values in ICMPARM32_TABLE.
167 namespace {
168 // Define a temporary set of enum values based on low-level table entries.
169 enum _icmp_ll_enum {
170 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
171   _icmp_ll_##val,
172   ICMPARM32_TABLE
173 #undef X
174       _num
175 };
176 // Define a set of constants based on high-level table entries.
177 #define X(tag, reverse, str)                                                   \
178   static constexpr int _icmp_hl_##tag = InstIcmp::tag;
179 ICEINSTICMP_TABLE
180 #undef X
181 // Define a set of constants based on low-level table entries, and ensure the
182 // table entry keys are consistent.
183 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
184   static_assert(                                                               \
185       _icmp_ll_##val == _icmp_hl_##val,                                        \
186       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
187 ICMPARM32_TABLE
188 #undef X
189 // Repeat the static asserts with respect to the high-level table entries in
190 // case the high-level table has extra entries.
191 #define X(tag, reverse, str)                                                   \
192   static_assert(                                                               \
193       _icmp_hl_##tag == _icmp_ll_##tag,                                        \
194       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
195 ICEINSTICMP_TABLE
196 #undef X
197 } // end of anonymous namespace
198 
199 // Stack alignment
200 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
201 
202 // Value is in bytes. Return Value adjusted to the next highest multiple of the
203 // stack alignment.
applyStackAlignment(uint32_t Value)204 uint32_t applyStackAlignment(uint32_t Value) {
205   return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
206 }
207 
208 // Value is in bytes. Return Value adjusted to the next highest multiple of the
209 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)210 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
211   // Use natural alignment, except that normally (non-NaCl) ARM only aligns
212   // vectors to 8 bytes.
213   // TODO(jvoung): Check this ...
214   size_t typeAlignInBytes = typeWidthInBytes(Ty);
215   if (isVectorType(Ty))
216     typeAlignInBytes = 8;
217   return Utils::applyAlignment(Value, typeAlignInBytes);
218 }
219 
220 // Conservatively check if at compile time we know that the operand is
221 // definitely a non-zero integer.
isGuaranteedNonzeroInt(const Operand * Op)222 bool isGuaranteedNonzeroInt(const Operand *Op) {
223   if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
224     return Const->getValue() != 0;
225   }
226   return false;
227 }
228 
229 } // end of anonymous namespace
230 
TargetARM32Features(const ClFlags & Flags)231 TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
232   static_assert(
233       (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
234           (TargetInstructionSet::ARM32InstructionSet_End -
235            TargetInstructionSet::ARM32InstructionSet_Begin),
236       "ARM32InstructionSet range different from TargetInstructionSet");
237   if (Flags.getTargetInstructionSet() !=
238       TargetInstructionSet::BaseInstructionSet) {
239     InstructionSet = static_cast<ARM32InstructionSet>(
240         (Flags.getTargetInstructionSet() -
241          TargetInstructionSet::ARM32InstructionSet_Begin) +
242         ARM32InstructionSet::Begin);
243   }
244 }
245 
246 namespace {
247 constexpr SizeT NumGPRArgs =
248 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
249           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
250   +(((cc_arg) > 0) ? 1 : 0)
251     REGARM32_GPR_TABLE
252 #undef X
253     ;
254 std::array<RegNumT, NumGPRArgs> GPRArgInitializer;
255 
256 constexpr SizeT NumI64Args =
257 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
258           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
259   +(((cc_arg) > 0) ? 1 : 0)
260     REGARM32_I64PAIR_TABLE
261 #undef X
262     ;
263 std::array<RegNumT, NumI64Args> I64ArgInitializer;
264 
265 constexpr SizeT NumFP32Args =
266 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
267           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
268   +(((cc_arg) > 0) ? 1 : 0)
269     REGARM32_FP32_TABLE
270 #undef X
271     ;
272 std::array<RegNumT, NumFP32Args> FP32ArgInitializer;
273 
274 constexpr SizeT NumFP64Args =
275 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
276           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
277   +(((cc_arg) > 0) ? 1 : 0)
278     REGARM32_FP64_TABLE
279 #undef X
280     ;
281 std::array<RegNumT, NumFP64Args> FP64ArgInitializer;
282 
283 constexpr SizeT NumVec128Args =
284 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
285           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
286   +(((cc_arg > 0)) ? 1 : 0)
287     REGARM32_VEC128_TABLE
288 #undef X
289     ;
290 std::array<RegNumT, NumVec128Args> Vec128ArgInitializer;
291 
getRegClassName(RegClass C)292 const char *getRegClassName(RegClass C) {
293   auto ClassNum = static_cast<RegARM32::RegClassARM32>(C);
294   assert(ClassNum < RegARM32::RCARM32_NUM);
295   switch (ClassNum) {
296   default:
297     assert(C < RC_Target);
298     return regClassString(C);
299   // Add handling of new register classes below.
300   case RegARM32::RCARM32_QtoS:
301     return "QtoS";
302   }
303 }
304 
305 } // end of anonymous namespace
306 
TargetARM32(Cfg * Func)307 TargetARM32::TargetARM32(Cfg *Func)
308     : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl),
309       CPUFeatures(getFlags()) {}
310 
staticInit(GlobalContext * Ctx)311 void TargetARM32::staticInit(GlobalContext *Ctx) {
312   RegNumT::setLimit(RegARM32::Reg_NUM);
313   // Limit this size (or do all bitsets need to be the same width)???
314   SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
315   SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
316   SmallBitVector Float32Registers(RegARM32::Reg_NUM);
317   SmallBitVector Float64Registers(RegARM32::Reg_NUM);
318   SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
319   SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
320   SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
321   const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
322   for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
323     const auto &Entry = RegARM32::RegTable[i];
324     IntegerRegisters[i] = Entry.IsInt;
325     I64PairRegisters[i] = Entry.IsI64Pair;
326     Float32Registers[i] = Entry.IsFP32;
327     Float64Registers[i] = Entry.IsFP64;
328     VectorRegisters[i] = Entry.IsVec128;
329     RegisterAliases[i].resize(RegARM32::Reg_NUM);
330     // TODO(eholk): It would be better to store a QtoS flag in the
331     // IceRegistersARM32 table than to compare their encodings here.
332     QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8;
333     for (int j = 0; j < Entry.NumAliases; ++j) {
334       assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]);
335       RegisterAliases[i].set(Entry.Aliases[j]);
336     }
337     assert(RegisterAliases[i][i]);
338     if (Entry.CCArg <= 0) {
339       continue;
340     }
341     const auto RegNum = RegNumT::fromInt(i);
342     if (Entry.IsGPR) {
343       GPRArgInitializer[Entry.CCArg - 1] = RegNum;
344     } else if (Entry.IsI64Pair) {
345       I64ArgInitializer[Entry.CCArg - 1] = RegNum;
346     } else if (Entry.IsFP32) {
347       FP32ArgInitializer[Entry.CCArg - 1] = RegNum;
348     } else if (Entry.IsFP64) {
349       FP64ArgInitializer[Entry.CCArg - 1] = RegNum;
350     } else if (Entry.IsVec128) {
351       Vec128ArgInitializer[Entry.CCArg - 1] = RegNum;
352     }
353   }
354   TypeToRegisterSet[IceType_void] = InvalidRegisters;
355   TypeToRegisterSet[IceType_i1] = IntegerRegisters;
356   TypeToRegisterSet[IceType_i8] = IntegerRegisters;
357   TypeToRegisterSet[IceType_i16] = IntegerRegisters;
358   TypeToRegisterSet[IceType_i32] = IntegerRegisters;
359   TypeToRegisterSet[IceType_i64] = I64PairRegisters;
360   TypeToRegisterSet[IceType_f32] = Float32Registers;
361   TypeToRegisterSet[IceType_f64] = Float64Registers;
362   TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
363   TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
364   TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
365   TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
366   TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
367   TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
368   TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
369   TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters;
370 
371   for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
372     TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
373 
374   filterTypeToRegisterSet(Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
375                           llvm::array_lengthof(TypeToRegisterSet),
376                           [](RegNumT RegNum) -> std::string {
377                             // This function simply removes ", " from the
378                             // register name.
379                             std::string Name = RegARM32::getRegName(RegNum);
380                             constexpr const char RegSeparator[] = ", ";
381                             constexpr size_t RegSeparatorWidth =
382                                 llvm::array_lengthof(RegSeparator) - 1;
383                             for (size_t Pos = Name.find(RegSeparator);
384                                  Pos != std::string::npos;
385                                  Pos = Name.find(RegSeparator)) {
386                               Name.replace(Pos, RegSeparatorWidth, "");
387                             }
388                             return Name;
389                           },
390                           getRegClassName);
391 }
392 
393 namespace {
copyRegAllocFromInfWeightVariable64On32(const VarList & Vars)394 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
395   for (Variable *Var : Vars) {
396     auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
397     if (!Var64) {
398       // This is not the variable we are looking for.
399       continue;
400     }
401     // only allow infinite-weight i64 temporaries to be register allocated.
402     assert(!Var64->hasReg() || Var64->mustHaveReg());
403     if (!Var64->hasReg()) {
404       continue;
405     }
406     const auto FirstReg =
407         RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum()));
408     // This assumes little endian.
409     Variable *Lo = Var64->getLo();
410     Variable *Hi = Var64->getHi();
411     assert(Lo->hasReg() == Hi->hasReg());
412     if (Lo->hasReg()) {
413       continue;
414     }
415     Lo->setRegNum(FirstReg);
416     Lo->setMustHaveReg();
417     Hi->setRegNum(RegNumT::fixme(FirstReg + 1));
418     Hi->setMustHaveReg();
419   }
420 }
421 } // end of anonymous namespace
422 
getCallStackArgumentsSizeBytes(const InstCall * Call)423 uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
424   TargetARM32::CallingConv CC;
425   RegNumT DummyReg;
426   size_t OutArgsSizeBytes = 0;
427   for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
428     Operand *Arg = legalizeUndef(Call->getArg(i));
429     const Type Ty = Arg->getType();
430     if (isScalarIntegerType(Ty)) {
431       if (CC.argInGPR(Ty, &DummyReg)) {
432         continue;
433       }
434     } else {
435       if (CC.argInVFP(Ty, &DummyReg)) {
436         continue;
437       }
438     }
439 
440     OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
441     OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
442   }
443 
444   return applyStackAlignment(OutArgsSizeBytes);
445 }
446 
genTargetHelperCallFor(Inst * Instr)447 void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
448   constexpr bool NoTailCall = false;
449   constexpr bool IsTargetHelperCall = true;
450 
451   switch (Instr->getKind()) {
452   default:
453     return;
454   case Inst::Arithmetic: {
455     Variable *Dest = Instr->getDest();
456     const Type DestTy = Dest->getType();
457     const InstArithmetic::OpKind Op =
458         llvm::cast<InstArithmetic>(Instr)->getOp();
459     if (isVectorType(DestTy)) {
460       switch (Op) {
461       default:
462         break;
463       case InstArithmetic::Fdiv:
464       case InstArithmetic::Frem:
465       case InstArithmetic::Sdiv:
466       case InstArithmetic::Srem:
467       case InstArithmetic::Udiv:
468       case InstArithmetic::Urem:
469         scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
470         Instr->setDeleted();
471         return;
472       }
473     }
474     switch (DestTy) {
475     default:
476       return;
477     case IceType_i64: {
478       // Technically, ARM has its own aeabi routines, but we can use the
479       // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
480       // the more standard __moddi3 for rem.
481       RuntimeHelper HelperID = RuntimeHelper::H_Num;
482       switch (Op) {
483       default:
484         return;
485       case InstArithmetic::Udiv:
486         HelperID = RuntimeHelper::H_udiv_i64;
487         break;
488       case InstArithmetic::Sdiv:
489         HelperID = RuntimeHelper::H_sdiv_i64;
490         break;
491       case InstArithmetic::Urem:
492         HelperID = RuntimeHelper::H_urem_i64;
493         break;
494       case InstArithmetic::Srem:
495         HelperID = RuntimeHelper::H_srem_i64;
496         break;
497       }
498       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
499       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
500       constexpr SizeT MaxArgs = 2;
501       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
502                                             NoTailCall, IsTargetHelperCall);
503       Call->addArg(Instr->getSrc(0));
504       Call->addArg(Instr->getSrc(1));
505       Instr->setDeleted();
506       return;
507     }
508     case IceType_i32:
509     case IceType_i16:
510     case IceType_i8: {
511       const bool HasHWDiv = hasCPUFeature(TargetARM32Features::HWDivArm);
512       InstCast::OpKind CastKind;
513       RuntimeHelper HelperID = RuntimeHelper::H_Num;
514       switch (Op) {
515       default:
516         return;
517       case InstArithmetic::Udiv:
518         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_udiv_i32;
519         CastKind = InstCast::Zext;
520         break;
521       case InstArithmetic::Sdiv:
522         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_sdiv_i32;
523         CastKind = InstCast::Sext;
524         break;
525       case InstArithmetic::Urem:
526         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_urem_i32;
527         CastKind = InstCast::Zext;
528         break;
529       case InstArithmetic::Srem:
530         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_srem_i32;
531         CastKind = InstCast::Sext;
532         break;
533       }
534       if (HelperID == RuntimeHelper::H_Num) {
535         // HelperID should only ever be undefined when the processor does not
536         // have a hardware divider. If any other helpers are ever introduced,
537         // the following assert will have to be modified.
538         assert(HasHWDiv);
539         return;
540       }
541       Operand *Src0 = Instr->getSrc(0);
542       Operand *Src1 = Instr->getSrc(1);
543       if (DestTy != IceType_i32) {
544         // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0,
545         // we just insert a InstCast right before the call to the helper.
546         Variable *Src0_32 = Func->makeVariable(IceType_i32);
547         Context.insert<InstCast>(CastKind, Src0_32, Src0);
548         Src0 = Src0_32;
549 
550         // For extending Src1, we will just insert an InstCast if Src1 is not a
551         // Constant. If it is, then we extend it here, and not during program
552         // runtime. This allows preambleDivRem to optimize-out the div-by-0
553         // check.
554         if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
555           const int32_t ShAmt = (DestTy == IceType_i16) ? 16 : 24;
556           int32_t NewC = C->getValue();
557           if (CastKind == InstCast::Zext) {
558             NewC &= ~(0x80000000l >> ShAmt);
559           } else {
560             NewC = (NewC << ShAmt) >> ShAmt;
561           }
562           Src1 = Ctx->getConstantInt32(NewC);
563         } else {
564           Variable *Src1_32 = Func->makeVariable(IceType_i32);
565           Context.insert<InstCast>(CastKind, Src1_32, Src1);
566           Src1 = Src1_32;
567         }
568       }
569       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
570       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
571       constexpr SizeT MaxArgs = 2;
572       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
573                                             NoTailCall, IsTargetHelperCall);
574       assert(Src0->getType() == IceType_i32);
575       Call->addArg(Src0);
576       assert(Src1->getType() == IceType_i32);
577       Call->addArg(Src1);
578       Instr->setDeleted();
579       return;
580     }
581     case IceType_f64:
582     case IceType_f32: {
583       if (Op != InstArithmetic::Frem) {
584         return;
585       }
586       constexpr SizeT MaxArgs = 2;
587       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
588           DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
589                                 : RuntimeHelper::H_frem_f64);
590       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
591                                             NoTailCall, IsTargetHelperCall);
592       Call->addArg(Instr->getSrc(0));
593       Call->addArg(Instr->getSrc(1));
594       Instr->setDeleted();
595       return;
596     }
597     }
598     llvm::report_fatal_error("Control flow should never have reached here.");
599   }
600   case Inst::Cast: {
601     Variable *Dest = Instr->getDest();
602     Operand *Src0 = Instr->getSrc(0);
603     const Type DestTy = Dest->getType();
604     const Type SrcTy = Src0->getType();
605     auto *CastInstr = llvm::cast<InstCast>(Instr);
606     const InstCast::OpKind CastKind = CastInstr->getCastKind();
607 
608     switch (CastKind) {
609     default:
610       return;
611     case InstCast::Fptosi:
612     case InstCast::Fptoui: {
613       if (DestTy != IceType_i64) {
614         return;
615       }
616       const bool DestIsSigned = CastKind == InstCast::Fptosi;
617       const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
618       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
619           Src0IsF32 ? (DestIsSigned ? RuntimeHelper::H_fptosi_f32_i64
620                                     : RuntimeHelper::H_fptoui_f32_i64)
621                     : (DestIsSigned ? RuntimeHelper::H_fptosi_f64_i64
622                                     : RuntimeHelper::H_fptoui_f64_i64));
623       static constexpr SizeT MaxArgs = 1;
624       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
625                                             NoTailCall, IsTargetHelperCall);
626       Call->addArg(Src0);
627       Instr->setDeleted();
628       return;
629     }
630     case InstCast::Sitofp:
631     case InstCast::Uitofp: {
632       if (SrcTy != IceType_i64) {
633         return;
634       }
635       const bool SourceIsSigned = CastKind == InstCast::Sitofp;
636       const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType());
637       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
638           DestIsF32 ? (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f32
639                                       : RuntimeHelper::H_uitofp_i64_f32)
640                     : (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f64
641                                       : RuntimeHelper::H_uitofp_i64_f64));
642       static constexpr SizeT MaxArgs = 1;
643       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
644                                             NoTailCall, IsTargetHelperCall);
645       Call->addArg(Src0);
646       Instr->setDeleted();
647       return;
648     }
649     case InstCast::Bitcast: {
650       if (DestTy == SrcTy) {
651         return;
652       }
653       Variable *CallDest = Dest;
654       RuntimeHelper HelperID = RuntimeHelper::H_Num;
655       switch (DestTy) {
656       default:
657         return;
658       case IceType_i8:
659         assert(SrcTy == IceType_v8i1);
660         HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
661         CallDest = Func->makeVariable(IceType_i32);
662         break;
663       case IceType_i16:
664         assert(SrcTy == IceType_v16i1);
665         HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
666         CallDest = Func->makeVariable(IceType_i32);
667         break;
668       case IceType_v8i1: {
669         assert(SrcTy == IceType_i8);
670         HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
671         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
672         // Arguments to functions are required to be at least 32 bits wide.
673         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
674         Src0 = Src0AsI32;
675       } break;
676       case IceType_v16i1: {
677         assert(SrcTy == IceType_i16);
678         HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
679         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
680         // Arguments to functions are required to be at least 32 bits wide.
681         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
682         Src0 = Src0AsI32;
683       } break;
684       }
685       constexpr SizeT MaxSrcs = 1;
686       InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
687       Call->addArg(Src0);
688       Context.insert(Call);
689       // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
690       // call result to the appropriate type as necessary.
691       if (CallDest->getType() != Dest->getType())
692         Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
693       Instr->setDeleted();
694       return;
695     }
696     case InstCast::Trunc: {
697       if (DestTy == SrcTy) {
698         return;
699       }
700       if (!isVectorType(SrcTy)) {
701         return;
702       }
703       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
704       assert(typeElementType(DestTy) == IceType_i1);
705       assert(isVectorIntegerType(SrcTy));
706       return;
707     }
708     case InstCast::Sext:
709     case InstCast::Zext: {
710       if (DestTy == SrcTy) {
711         return;
712       }
713       if (!isVectorType(DestTy)) {
714         return;
715       }
716       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
717       assert(typeElementType(SrcTy) == IceType_i1);
718       assert(isVectorIntegerType(DestTy));
719       return;
720     }
721     }
722     llvm::report_fatal_error("Control flow should never have reached here.");
723   }
724   case Inst::IntrinsicCall: {
725     Variable *Dest = Instr->getDest();
726     auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr);
727     Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID;
728     switch (ID) {
729     default:
730       return;
731     case Intrinsics::Ctpop: {
732       Operand *Src0 = IntrinsicCall->getArg(0);
733       Operand *TargetHelper =
734           Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
735                                         ? RuntimeHelper::H_call_ctpop_i32
736                                         : RuntimeHelper::H_call_ctpop_i64);
737       static constexpr SizeT MaxArgs = 1;
738       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
739                                             NoTailCall, IsTargetHelperCall);
740       Call->addArg(Src0);
741       Instr->setDeleted();
742       if (Src0->getType() == IceType_i64) {
743         ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64;
744       }
745       return;
746     }
747     case Intrinsics::Longjmp: {
748       static constexpr SizeT MaxArgs = 2;
749       static constexpr Variable *NoDest = nullptr;
750       Operand *TargetHelper =
751           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
752       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
753                                             NoTailCall, IsTargetHelperCall);
754       Call->addArg(IntrinsicCall->getArg(0));
755       Call->addArg(IntrinsicCall->getArg(1));
756       Instr->setDeleted();
757       return;
758     }
759     case Intrinsics::Memcpy: {
760       // In the future, we could potentially emit an inline memcpy/memset, etc.
761       // for intrinsic calls w/ a known length.
762       static constexpr SizeT MaxArgs = 3;
763       static constexpr Variable *NoDest = nullptr;
764       Operand *TargetHelper =
765           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
766       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
767                                             NoTailCall, IsTargetHelperCall);
768       Call->addArg(IntrinsicCall->getArg(0));
769       Call->addArg(IntrinsicCall->getArg(1));
770       Call->addArg(IntrinsicCall->getArg(2));
771       Instr->setDeleted();
772       return;
773     }
774     case Intrinsics::Memmove: {
775       static constexpr SizeT MaxArgs = 3;
776       static constexpr Variable *NoDest = nullptr;
777       Operand *TargetHelper =
778           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
779       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
780                                             NoTailCall, IsTargetHelperCall);
781       Call->addArg(IntrinsicCall->getArg(0));
782       Call->addArg(IntrinsicCall->getArg(1));
783       Call->addArg(IntrinsicCall->getArg(2));
784       Instr->setDeleted();
785       return;
786     }
787     case Intrinsics::Memset: {
788       // The value operand needs to be extended to a stack slot size because the
789       // PNaCl ABI requires arguments to be at least 32 bits wide.
790       Operand *ValOp = IntrinsicCall->getArg(1);
791       assert(ValOp->getType() == IceType_i8);
792       Variable *ValExt = Func->makeVariable(stackSlotType());
793       Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
794 
795       // Technically, ARM has its own __aeabi_memset, but we can use plain
796       // memset too. The value and size argument need to be flipped if we ever
797       // decide to use __aeabi_memset.
798       static constexpr SizeT MaxArgs = 3;
799       static constexpr Variable *NoDest = nullptr;
800       Operand *TargetHelper =
801           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
802       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
803                                             NoTailCall, IsTargetHelperCall);
804       Call->addArg(IntrinsicCall->getArg(0));
805       Call->addArg(ValExt);
806       Call->addArg(IntrinsicCall->getArg(2));
807       Instr->setDeleted();
808       return;
809     }
810     case Intrinsics::NaClReadTP: {
811       if (SandboxingType == ST_NaCl) {
812         return;
813       }
814       static constexpr SizeT MaxArgs = 0;
815       Operand *TargetHelper =
816           SandboxingType == ST_Nonsfi
817               ? Ctx->getConstantExternSym(
818                     Ctx->getGlobalString("__aeabi_read_tp"))
819               : Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
820       Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
821                                IsTargetHelperCall);
822       Instr->setDeleted();
823       return;
824     }
825     case Intrinsics::Setjmp: {
826       static constexpr SizeT MaxArgs = 1;
827       Operand *TargetHelper =
828           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
829       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
830                                             NoTailCall, IsTargetHelperCall);
831       Call->addArg(IntrinsicCall->getArg(0));
832       Instr->setDeleted();
833       return;
834     }
835     }
836     llvm::report_fatal_error("Control flow should never have reached here.");
837   }
838   }
839 }
840 
findMaxStackOutArgsSize()841 void TargetARM32::findMaxStackOutArgsSize() {
842   // MinNeededOutArgsBytes should be updated if the Target ever creates a
843   // high-level InstCall that requires more stack bytes.
844   constexpr size_t MinNeededOutArgsBytes = 0;
845   MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
846   for (CfgNode *Node : Func->getNodes()) {
847     Context.init(Node);
848     while (!Context.atEnd()) {
849       PostIncrLoweringContext PostIncrement(Context);
850       Inst *CurInstr = iteratorToInst(Context.getCur());
851       if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
852         SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
853         MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
854       }
855     }
856   }
857 }
858 
createGotPtr()859 void TargetARM32::createGotPtr() {
860   if (SandboxingType != ST_Nonsfi) {
861     return;
862   }
863   GotPtr = Func->makeVariable(IceType_i32);
864 }
865 
insertGotPtrInitPlaceholder()866 void TargetARM32::insertGotPtrInitPlaceholder() {
867   if (SandboxingType != ST_Nonsfi) {
868     return;
869   }
870   assert(GotPtr != nullptr);
871   // We add the two placeholder instructions here. The first fakedefs T, an
872   // infinite-weight temporary, while the second fakedefs the GotPtr "using" T.
873   // This is needed because the GotPtr initialization, if needed, will require
874   // a register:
875   //
876   //   movw     reg, _GLOBAL_OFFSET_TABLE_ - 16 - .
877   //   movt     reg, _GLOBAL_OFFSET_TABLE_ - 12 - .
878   //   add      reg, pc, reg
879   //   mov      GotPtr, reg
880   //
881   // If GotPtr is not used, then both these pseudo-instructions are dce'd.
882   Variable *T = makeReg(IceType_i32);
883   Context.insert<InstFakeDef>(T);
884   Context.insert<InstFakeDef>(GotPtr, T);
885 }
886 
887 GlobalString
createGotoffRelocation(const ConstantRelocatable * CR)888 TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) {
889   GlobalString CRName = CR->getName();
890   GlobalString CRGotoffName =
891       Ctx->getGlobalString("GOTOFF$" + Func->getFunctionName() + "$" + CRName);
892   if (KnownGotoffs.count(CRGotoffName) == 0) {
893     constexpr bool SuppressMangling = true;
894     auto *Global =
895         VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
896     Global->setIsConstant(true);
897     Global->setName(CRName);
898     Func->getGlobalPool()->willNotBeEmitted(Global);
899 
900     auto *Gotoff =
901         VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
902     constexpr auto GotFixup = R_ARM_GOTOFF32;
903     Gotoff->setIsConstant(true);
904     Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create(
905         Func->getGlobalPool(), Global, {RelocOffset::create(Ctx, 0)},
906         GotFixup));
907     Gotoff->setName(CRGotoffName);
908     Func->addGlobal(Gotoff);
909     KnownGotoffs.emplace(CRGotoffName);
910   }
911   return CRGotoffName;
912 }
913 
materializeGotAddr(CfgNode * Node)914 void TargetARM32::materializeGotAddr(CfgNode *Node) {
915   if (SandboxingType != ST_Nonsfi) {
916     return;
917   }
918 
919   // At first, we try to find the
920   //    GotPtr = def T
921   // pseudo-instruction that we placed for defining the got ptr. That
922   // instruction is not just a place-holder for defining the GotPtr (thus
923   // keeping liveness consistent), but it is also located at a point where it is
924   // safe to materialize the got addr -- i.e., before loading parameters to
925   // registers, but after moving register parameters from their home location.
926   InstFakeDef *DefGotPtr = nullptr;
927   for (auto &Inst : Node->getInsts()) {
928     auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst);
929     if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) {
930       DefGotPtr = FakeDef;
931       break;
932     }
933   }
934 
935   if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) {
936     return;
937   }
938 
939   // The got addr needs to be materialized at the same point where DefGotPtr
940   // lives.
941   Context.setInsertPoint(instToIterator(DefGotPtr));
942   assert(DefGotPtr->getSrcSize() == 1);
943   auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0));
944   loadNamedConstantRelocatablePIC(Ctx->getGlobalString(GlobalOffsetTable), T,
945                                   [this, T](Variable *PC) { _add(T, PC, T); });
946   _mov(GotPtr, T);
947   DefGotPtr->setDeleted();
948 }
949 
loadNamedConstantRelocatablePIC(GlobalString Name,Variable * Register,std::function<void (Variable * PC)> Finish)950 void TargetARM32::loadNamedConstantRelocatablePIC(
951     GlobalString Name, Variable *Register,
952     std::function<void(Variable *PC)> Finish) {
953   assert(SandboxingType == ST_Nonsfi);
954   // We makeReg() here instead of getPhysicalRegister() because the latter ends
955   // up creating multi-blocks temporaries that liveness fails to validate.
956   auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);
957 
958   auto *AddPcReloc = RelocOffset::create(Ctx);
959   AddPcReloc->setSubtract(true);
960   auto *AddPcLabel = InstARM32Label::create(Func, this);
961   AddPcLabel->setRelocOffset(AddPcReloc);
962 
963   auto *MovwReloc = RelocOffset::create(Ctx);
964   auto *MovwLabel = InstARM32Label::create(Func, this);
965   MovwLabel->setRelocOffset(MovwReloc);
966 
967   auto *MovtReloc = RelocOffset::create(Ctx);
968   auto *MovtLabel = InstARM32Label::create(Func, this);
969   MovtLabel->setRelocOffset(MovtReloc);
970 
971   // The EmitString for these constant relocatables have hardcoded offsets
972   // attached to them. This could be dangerous if, e.g., we ever implemented
973   // instruction scheduling but llvm-mc currently does not support
974   //
975   //   movw reg, #:lower16:(Symbol - Label - Number)
976   //   movt reg, #:upper16:(Symbol - Label - Number)
977   //
978   // relocations.
979   static constexpr RelocOffsetT PcOffset = -8;
980   auto *CRLower = Ctx->getConstantSymWithEmitString(
981       PcOffset, {MovwReloc, AddPcReloc}, Name, Name + " -16");
982   auto *CRUpper = Ctx->getConstantSymWithEmitString(
983       PcOffset, {MovtReloc, AddPcReloc}, Name, Name + " -12");
984 
985   Context.insert(MovwLabel);
986   _movw(Register, CRLower);
987   Context.insert(MovtLabel);
988   _movt(Register, CRUpper);
989   // PC = fake-def to keep liveness consistent.
990   Context.insert<InstFakeDef>(PC);
991   Context.insert(AddPcLabel);
992   Finish(PC);
993 }
994 
translateO2()995 void TargetARM32::translateO2() {
996   TimerMarker T(TimerStack::TT_O2, Func);
997 
998   // TODO(stichnot): share passes with other targets?
999   // https://code.google.com/p/nativeclient/issues/detail?id=4094
1000   if (SandboxingType == ST_Nonsfi) {
1001     createGotPtr();
1002   }
1003   genTargetHelperCalls();
1004   findMaxStackOutArgsSize();
1005 
1006   // Do not merge Alloca instructions, and lay out the stack.
1007   static constexpr bool SortAndCombineAllocas = true;
1008   Func->processAllocas(SortAndCombineAllocas);
1009   Func->dump("After Alloca processing");
1010 
1011   if (!getFlags().getEnablePhiEdgeSplit()) {
1012     // Lower Phi instructions.
1013     Func->placePhiLoads();
1014     if (Func->hasError())
1015       return;
1016     Func->placePhiStores();
1017     if (Func->hasError())
1018       return;
1019     Func->deletePhis();
1020     if (Func->hasError())
1021       return;
1022     Func->dump("After Phi lowering");
1023   }
1024 
1025   // Address mode optimization.
1026   Func->getVMetadata()->init(VMK_SingleDefs);
1027   Func->doAddressOpt();
1028   Func->materializeVectorShuffles();
1029 
1030   // Argument lowering
1031   Func->doArgLowering();
1032 
1033   // Target lowering. This requires liveness analysis for some parts of the
1034   // lowering decisions, such as compare/branch fusing. If non-lightweight
1035   // liveness analysis is used, the instructions need to be renumbered first.
1036   // TODO: This renumbering should only be necessary if we're actually
1037   // calculating live intervals, which we only do for register allocation.
1038   Func->renumberInstructions();
1039   if (Func->hasError())
1040     return;
1041 
1042   // TODO: It should be sufficient to use the fastest liveness calculation,
1043   // i.e. livenessLightweight(). However, for some reason that slows down the
1044   // rest of the translation. Investigate.
1045   Func->liveness(Liveness_Basic);
1046   if (Func->hasError())
1047     return;
1048   Func->dump("After ARM32 address mode opt");
1049 
1050   if (SandboxingType == ST_Nonsfi) {
1051     insertGotPtrInitPlaceholder();
1052   }
1053   Func->genCode();
1054   if (Func->hasError())
1055     return;
1056   Func->dump("After ARM32 codegen");
1057 
1058   // Register allocation. This requires instruction renumbering and full
1059   // liveness analysis.
1060   Func->renumberInstructions();
1061   if (Func->hasError())
1062     return;
1063   Func->liveness(Liveness_Intervals);
1064   if (Func->hasError())
1065     return;
1066   // The post-codegen dump is done here, after liveness analysis and associated
1067   // cleanup, to make the dump cleaner and more useful.
1068   Func->dump("After initial ARM32 codegen");
1069   // Validate the live range computations. The expensive validation call is
1070   // deliberately only made when assertions are enabled.
1071   assert(Func->validateLiveness());
1072   Func->getVMetadata()->init(VMK_All);
1073   regAlloc(RAK_Global);
1074   if (Func->hasError())
1075     return;
1076 
1077   copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1078   Func->dump("After linear scan regalloc");
1079 
1080   if (getFlags().getEnablePhiEdgeSplit()) {
1081     Func->advancedPhiLowering();
1082     Func->dump("After advanced Phi lowering");
1083   }
1084 
1085   ForbidTemporaryWithoutReg _(this);
1086 
1087   // Stack frame mapping.
1088   Func->genFrame();
1089   if (Func->hasError())
1090     return;
1091   Func->dump("After stack frame mapping");
1092 
1093   postLowerLegalization();
1094   if (Func->hasError())
1095     return;
1096   Func->dump("After postLowerLegalization");
1097 
1098   Func->contractEmptyNodes();
1099   Func->reorderNodes();
1100 
1101   // Branch optimization. This needs to be done just before code emission. In
1102   // particular, no transformations that insert or reorder CfgNodes should be
1103   // done after branch optimization. We go ahead and do it before nop insertion
1104   // to reduce the amount of work needed for searching for opportunities.
1105   Func->doBranchOpt();
1106   Func->dump("After branch optimization");
1107 
1108   // Nop insertion
1109   if (getFlags().getShouldDoNopInsertion()) {
1110     Func->doNopInsertion();
1111   }
1112 }
1113 
translateOm1()1114 void TargetARM32::translateOm1() {
1115   TimerMarker T(TimerStack::TT_Om1, Func);
1116 
1117   // TODO(stichnot): share passes with other targets?
1118   if (SandboxingType == ST_Nonsfi) {
1119     createGotPtr();
1120   }
1121 
1122   genTargetHelperCalls();
1123   findMaxStackOutArgsSize();
1124 
1125   // Do not merge Alloca instructions, and lay out the stack.
1126   static constexpr bool DontSortAndCombineAllocas = false;
1127   Func->processAllocas(DontSortAndCombineAllocas);
1128   Func->dump("After Alloca processing");
1129 
1130   Func->placePhiLoads();
1131   if (Func->hasError())
1132     return;
1133   Func->placePhiStores();
1134   if (Func->hasError())
1135     return;
1136   Func->deletePhis();
1137   if (Func->hasError())
1138     return;
1139   Func->dump("After Phi lowering");
1140 
1141   Func->doArgLowering();
1142 
1143   if (SandboxingType == ST_Nonsfi) {
1144     insertGotPtrInitPlaceholder();
1145   }
1146   Func->genCode();
1147   if (Func->hasError())
1148     return;
1149   Func->dump("After initial ARM32 codegen");
1150 
1151   regAlloc(RAK_InfOnly);
1152   if (Func->hasError())
1153     return;
1154 
1155   copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1156   Func->dump("After regalloc of infinite-weight variables");
1157 
1158   ForbidTemporaryWithoutReg _(this);
1159 
1160   Func->genFrame();
1161   if (Func->hasError())
1162     return;
1163   Func->dump("After stack frame mapping");
1164 
1165   postLowerLegalization();
1166   if (Func->hasError())
1167     return;
1168   Func->dump("After postLowerLegalization");
1169 
1170   // Nop insertion
1171   if (getFlags().getShouldDoNopInsertion()) {
1172     Func->doNopInsertion();
1173   }
1174 }
1175 
getStackAlignment() const1176 uint32_t TargetARM32::getStackAlignment() const {
1177   return ARM32_STACK_ALIGNMENT_BYTES;
1178 }
1179 
doBranchOpt(Inst * I,const CfgNode * NextNode)1180 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
1181   if (auto *Br = llvm::dyn_cast<InstARM32Br>(I)) {
1182     return Br->optimizeBranch(NextNode);
1183   }
1184   return false;
1185 }
1186 
getRegName(RegNumT RegNum,Type Ty) const1187 const char *TargetARM32::getRegName(RegNumT RegNum, Type Ty) const {
1188   (void)Ty;
1189   return RegARM32::getRegName(RegNum);
1190 }
1191 
getPhysicalRegister(RegNumT RegNum,Type Ty)1192 Variable *TargetARM32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1193   static const Type DefaultType[] = {
1194 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
1195           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
1196   (isFP32)                                                                     \
1197       ? IceType_f32                                                            \
1198       : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))),
1199       REGARM32_TABLE
1200 #undef X
1201   };
1202 
1203   if (Ty == IceType_void) {
1204     assert(unsigned(RegNum) < llvm::array_lengthof(DefaultType));
1205     Ty = DefaultType[RegNum];
1206   }
1207   if (PhysicalRegisters[Ty].empty())
1208     PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
1209   assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
1210   Variable *Reg = PhysicalRegisters[Ty][RegNum];
1211   if (Reg == nullptr) {
1212     Reg = Func->makeVariable(Ty);
1213     Reg->setRegNum(RegNum);
1214     PhysicalRegisters[Ty][RegNum] = Reg;
1215     // Specially mark a named physical register as an "argument" so that it is
1216     // considered live upon function entry.  Otherwise it's possible to get
1217     // liveness validation errors for saving callee-save registers.
1218     Func->addImplicitArg(Reg);
1219     // Don't bother tracking the live range of a named physical register.
1220     Reg->setIgnoreLiveness();
1221   }
1222   return Reg;
1223 }
1224 
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1225 void TargetARM32::emitJumpTable(const Cfg *Func,
1226                                 const InstJumpTable *JumpTable) const {
1227   (void)Func;
1228   (void)JumpTable;
1229   UnimplementedError(getFlags());
1230 }
1231 
emitVariable(const Variable * Var) const1232 void TargetARM32::emitVariable(const Variable *Var) const {
1233   if (!BuildDefs::dump())
1234     return;
1235   Ostream &Str = Ctx->getStrEmit();
1236   if (Var->hasReg()) {
1237     Str << getRegName(Var->getRegNum(), Var->getType());
1238     return;
1239   }
1240   if (Var->mustHaveReg()) {
1241     llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1242                              ") has no register assigned - function " +
1243                              Func->getFunctionName());
1244   }
1245   assert(!Var->isRematerializable());
1246   int32_t Offset = Var->getStackOffset();
1247   auto BaseRegNum = Var->getBaseRegNum();
1248   if (BaseRegNum.hasNoValue()) {
1249     BaseRegNum = getFrameOrStackReg();
1250   }
1251   const Type VarTy = Var->getType();
1252   Str << "[" << getRegName(BaseRegNum, VarTy);
1253   if (Offset != 0) {
1254     Str << ", #" << Offset;
1255   }
1256   Str << "]";
1257 }
1258 
CallingConv()1259 TargetARM32::CallingConv::CallingConv()
1260     : GPRegsUsed(RegARM32::Reg_NUM),
1261       GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1262       I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1263       VFPRegsUsed(RegARM32::Reg_NUM),
1264       FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1265       FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()),
1266       Vec128Args(Vec128ArgInitializer.rbegin(), Vec128ArgInitializer.rend()) {}
1267 
argInGPR(Type Ty,RegNumT * Reg)1268 bool TargetARM32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1269   CfgVector<RegNumT> *Source;
1270 
1271   switch (Ty) {
1272   default: {
1273     assert(isScalarIntegerType(Ty));
1274     Source = &GPRArgs;
1275   } break;
1276   case IceType_i64: {
1277     Source = &I64Args;
1278   } break;
1279   }
1280 
1281   discardUnavailableGPRsAndTheirAliases(Source);
1282 
1283   if (Source->empty()) {
1284     GPRegsUsed.set();
1285     return false;
1286   }
1287 
1288   *Reg = Source->back();
1289   // Note that we don't Source->pop_back() here. This is intentional. Notice how
1290   // we mark all of Reg's aliases as Used. So, for the next argument,
1291   // Source->back() is marked as unavailable, and it is thus implicitly popped
1292   // from the stack.
1293   GPRegsUsed |= RegisterAliases[*Reg];
1294   return true;
1295 }
1296 
1297 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1298 // i32) will have the first argument in r0, the second in r1-r2, and the third
1299 // on the stack. To model this behavior, whenever we pop a register from Regs,
1300 // we remove all of its aliases from the pool of available GPRs. This has the
1301 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1302 void TargetARM32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1303     CfgVector<RegNumT> *Regs) {
1304   while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1305     GPRegsUsed |= RegisterAliases[Regs->back()];
1306     Regs->pop_back();
1307   }
1308 }
1309 
argInVFP(Type Ty,RegNumT * Reg)1310 bool TargetARM32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1311   CfgVector<RegNumT> *Source;
1312 
1313   switch (Ty) {
1314   default: {
1315     assert(isVectorType(Ty));
1316     Source = &Vec128Args;
1317   } break;
1318   case IceType_f32: {
1319     Source = &FP32Args;
1320   } break;
1321   case IceType_f64: {
1322     Source = &FP64Args;
1323   } break;
1324   }
1325 
1326   discardUnavailableVFPRegs(Source);
1327 
1328   if (Source->empty()) {
1329     VFPRegsUsed.set();
1330     return false;
1331   }
1332 
1333   *Reg = Source->back();
1334   VFPRegsUsed |= RegisterAliases[*Reg];
1335   return true;
1336 }
1337 
1338 // Arguments in VFP registers are not packed, so we don't mark the popped
1339 // registers' aliases as unavailable.
discardUnavailableVFPRegs(CfgVector<RegNumT> * Regs)1340 void TargetARM32::CallingConv::discardUnavailableVFPRegs(
1341     CfgVector<RegNumT> *Regs) {
1342   while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1343     Regs->pop_back();
1344   }
1345 }
1346 
lowerArguments()1347 void TargetARM32::lowerArguments() {
1348   VarList &Args = Func->getArgs();
1349   TargetARM32::CallingConv CC;
1350 
1351   // For each register argument, replace Arg in the argument list with the home
1352   // register. Then generate an instruction in the prolog to copy the home
1353   // register to the assigned location of Arg.
1354   Context.init(Func->getEntryNode());
1355   Context.setInsertPoint(Context.getCur());
1356 
1357   for (SizeT I = 0, E = Args.size(); I < E; ++I) {
1358     Variable *Arg = Args[I];
1359     Type Ty = Arg->getType();
1360     RegNumT RegNum;
1361     if (isScalarIntegerType(Ty)) {
1362       if (!CC.argInGPR(Ty, &RegNum)) {
1363         continue;
1364       }
1365     } else {
1366       if (!CC.argInVFP(Ty, &RegNum)) {
1367         continue;
1368       }
1369     }
1370 
1371     Variable *RegisterArg = Func->makeVariable(Ty);
1372     if (BuildDefs::dump()) {
1373       RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1374     }
1375     RegisterArg->setIsArg();
1376     Arg->setIsArg(false);
1377     Args[I] = RegisterArg;
1378     switch (Ty) {
1379     default: { RegisterArg->setRegNum(RegNum); } break;
1380     case IceType_i64: {
1381       auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1382       RegisterArg64->initHiLo(Func);
1383       RegisterArg64->getLo()->setRegNum(
1384           RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(RegNum)));
1385       RegisterArg64->getHi()->setRegNum(
1386           RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(RegNum)));
1387     } break;
1388     }
1389     Context.insert<InstAssign>(Arg, RegisterArg);
1390   }
1391 }
1392 
1393 // Helper function for addProlog().
1394 //
1395 // This assumes Arg is an argument passed on the stack. This sets the frame
1396 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1397 // I64 arg that has been split into Lo and Hi components, it calls itself
1398 // recursively on the components, taking care to handle Lo first because of the
1399 // little-endian architecture. Lastly, this function generates an instruction
1400 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1401 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
1402                                          size_t BasicFrameOffset,
1403                                          size_t *InArgsSizeBytes) {
1404   const Type Ty = Arg->getType();
1405   *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1406 
1407   if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1408     Variable *const Lo = Arg64On32->getLo();
1409     Variable *const Hi = Arg64On32->getHi();
1410     finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1411     finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1412     return;
1413   }
1414   assert(Ty != IceType_i64);
1415 
1416   const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1417   *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1418 
1419   if (!Arg->hasReg()) {
1420     Arg->setStackOffset(ArgStackOffset);
1421     return;
1422   }
1423 
1424   // If the argument variable has been assigned a register, we need to copy the
1425   // value from the stack slot.
1426   Variable *Parameter = Func->makeVariable(Ty);
1427   Parameter->setMustNotHaveReg();
1428   Parameter->setStackOffset(ArgStackOffset);
1429   _mov(Arg, Parameter);
1430 }
1431 
stackSlotType()1432 Type TargetARM32::stackSlotType() { return IceType_i32; }
1433 
addProlog(CfgNode * Node)1434 void TargetARM32::addProlog(CfgNode *Node) {
1435   // Stack frame layout:
1436   //
1437   // +------------------------+
1438   // | 1. preserved registers |
1439   // +------------------------+
1440   // | 2. padding             |
1441   // +------------------------+ <--- FramePointer (if used)
1442   // | 3. global spill area   |
1443   // +------------------------+
1444   // | 4. padding             |
1445   // +------------------------+
1446   // | 5. local spill area    |
1447   // +------------------------+
1448   // | 6. padding             |
1449   // +------------------------+
1450   // | 7. allocas (variable)  |
1451   // +------------------------+
1452   // | 8. padding             |
1453   // +------------------------+
1454   // | 9. out args            |
1455   // +------------------------+ <--- StackPointer
1456   //
1457   // The following variables record the size in bytes of the given areas:
1458   //  * PreservedRegsSizeBytes: area 1
1459   //  * SpillAreaPaddingBytes:  area 2
1460   //  * GlobalsSize:            area 3
1461   //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1462   //  * LocalsSpillAreaSize:    area 5
1463   //  * SpillAreaSizeBytes:     areas 2 - 6, and 9
1464   //  * MaxOutArgsSizeBytes:    area 9
1465   //
1466   // Determine stack frame offsets for each Variable without a register
1467   // assignment.  This can be done as one variable per stack slot.  Or, do
1468   // coalescing by running the register allocator again with an infinite set of
1469   // registers (as a side effect, this gives variables a second chance at
1470   // physical register assignment).
1471   //
1472   // A middle ground approach is to leverage sparsity and allocate one block of
1473   // space on the frame for globals (variables with multi-block lifetime), and
1474   // one block to share for locals (single-block lifetime).
1475 
1476   Context.init(Node);
1477   Context.setInsertPoint(Context.getCur());
1478 
1479   SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1480   RegsUsed = SmallBitVector(CalleeSaves.size());
1481   VarList SortedSpilledVariables;
1482   size_t GlobalsSize = 0;
1483   // If there is a separate locals area, this represents that area. Otherwise
1484   // it counts any variable not counted by GlobalsSize.
1485   SpillAreaSizeBytes = 0;
1486   // If there is a separate locals area, this specifies the alignment for it.
1487   uint32_t LocalsSlotsAlignmentBytes = 0;
1488   // The entire spill locations area gets aligned to largest natural alignment
1489   // of the variables that have a spill slot.
1490   uint32_t SpillAreaAlignmentBytes = 0;
1491   // For now, we don't have target-specific variables that need special
1492   // treatment (no stack-slot-linked SpillVariable type).
1493   std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1494     static constexpr bool AssignStackSlot = false;
1495     static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1496     if (llvm::isa<Variable64On32>(Var)) {
1497       return DontAssignStackSlot;
1498     }
1499     return AssignStackSlot;
1500   };
1501 
1502   // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1503   getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1504                         &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1505                         &LocalsSlotsAlignmentBytes, TargetVarHook);
1506   uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1507   SpillAreaSizeBytes += GlobalsSize;
1508 
1509   // Add push instructions for preserved registers. On ARM, "push" can push a
1510   // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
1511   // callee-saved float/vector registers.
1512   //
1513   // The "vpush" instruction can handle a whole list of float/vector registers,
1514   // but it only handles contiguous sequences of registers by specifying the
1515   // start and the length.
1516   PreservedGPRs.reserve(CalleeSaves.size());
1517   PreservedSRegs.reserve(CalleeSaves.size());
1518 
1519   // Consider FP and LR as callee-save / used as needed.
1520   if (UsesFramePointer) {
1521     if (RegsUsed[RegARM32::Reg_fp]) {
1522       llvm::report_fatal_error("Frame pointer has been used.");
1523     }
1524     CalleeSaves[RegARM32::Reg_fp] = true;
1525     RegsUsed[RegARM32::Reg_fp] = true;
1526   }
1527   if (!MaybeLeafFunc) {
1528     CalleeSaves[RegARM32::Reg_lr] = true;
1529     RegsUsed[RegARM32::Reg_lr] = true;
1530   }
1531 
1532   // Make two passes over the used registers. The first pass records all the
1533   // used registers -- and their aliases. Then, we figure out which GPRs and
1534   // VFP S registers should be saved. We don't bother saving D/Q registers
1535   // because their uses are recorded as S regs uses.
1536   SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1537   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1538     if (NeedSandboxing && i == RegARM32::Reg_r9) {
1539       // r9 is never updated in sandboxed code.
1540       continue;
1541     }
1542     if (CalleeSaves[i] && RegsUsed[i]) {
1543       ToPreserve |= RegisterAliases[i];
1544     }
1545   }
1546 
1547   uint32_t NumCallee = 0;
1548   size_t PreservedRegsSizeBytes = 0;
1549 
1550   // RegClasses is a tuple of
1551   //
1552   // <First Register in Class, Last Register in Class, Vector of Save Registers>
1553   //
1554   // We use this tuple to figure out which register we should push/pop during
1555   // prolog/epilog.
1556   using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1557   const RegClassType RegClasses[] = {
1558       RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
1559                    &PreservedGPRs),
1560       RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
1561                    &PreservedSRegs)};
1562   for (const auto &RegClass : RegClasses) {
1563     const uint32_t FirstRegInClass = std::get<0>(RegClass);
1564     const uint32_t LastRegInClass = std::get<1>(RegClass);
1565     VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1566     for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
1567       if (!ToPreserve[Reg]) {
1568         continue;
1569       }
1570       ++NumCallee;
1571       Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1572       PreservedRegsSizeBytes +=
1573           typeWidthInBytesOnStack(PhysicalRegister->getType());
1574       PreservedRegsInClass->push_back(PhysicalRegister);
1575     }
1576   }
1577 
1578   Ctx->statsUpdateRegistersSaved(NumCallee);
1579   if (!PreservedSRegs.empty())
1580     _push(PreservedSRegs);
1581   if (!PreservedGPRs.empty())
1582     _push(PreservedGPRs);
1583 
1584   // Generate "mov FP, SP" if needed.
1585   if (UsesFramePointer) {
1586     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1587     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1588     _mov(FP, SP);
1589     // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1590     Context.insert<InstFakeUse>(FP);
1591   }
1592 
1593   // Align the variables area. SpillAreaPaddingBytes is the size of the region
1594   // after the preserved registers and before the spill areas.
1595   // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1596   // locals area if they are separate.
1597   assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
1598   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1599   uint32_t SpillAreaPaddingBytes = 0;
1600   uint32_t LocalsSlotsPaddingBytes = 0;
1601   alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1602                        GlobalsSize, LocalsSlotsAlignmentBytes,
1603                        &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1604   SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1605   uint32_t GlobalsAndSubsequentPaddingSize =
1606       GlobalsSize + LocalsSlotsPaddingBytes;
1607 
1608   // Adds the out args space to the stack, and align SP if necessary.
1609   if (!NeedsStackAlignment) {
1610     SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1611   } else {
1612     uint32_t StackOffset = PreservedRegsSizeBytes;
1613     uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1614     StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
1615     SpillAreaSizeBytes = StackSize - StackOffset;
1616   }
1617 
1618   // Combine fixed alloca with SpillAreaSize.
1619   SpillAreaSizeBytes += FixedAllocaSizeBytes;
1620 
1621   // Generate "sub sp, SpillAreaSizeBytes"
1622   if (SpillAreaSizeBytes) {
1623     // Use the scratch register if needed to legalize the immediate.
1624     Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1625                                   Legal_Reg | Legal_Flex, getReservedTmpReg());
1626     Sandboxer(this).sub_sp(SubAmount);
1627     if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
1628       Sandboxer(this).align_sp(FixedAllocaAlignBytes);
1629     }
1630   }
1631 
1632   Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1633 
1634   // Fill in stack offsets for stack args, and copy args into registers for
1635   // those that were register-allocated. Args are pushed right to left, so
1636   // Arg[0] is closest to the stack/frame pointer.
1637   Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1638   size_t BasicFrameOffset = PreservedRegsSizeBytes;
1639   if (!UsesFramePointer)
1640     BasicFrameOffset += SpillAreaSizeBytes;
1641 
1642   materializeGotAddr(Node);
1643 
1644   const VarList &Args = Func->getArgs();
1645   size_t InArgsSizeBytes = 0;
1646   TargetARM32::CallingConv CC;
1647   for (Variable *Arg : Args) {
1648     RegNumT DummyReg;
1649     const Type Ty = Arg->getType();
1650 
1651     // Skip arguments passed in registers.
1652     if (isScalarIntegerType(Ty)) {
1653       if (CC.argInGPR(Ty, &DummyReg)) {
1654         continue;
1655       }
1656     } else {
1657       if (CC.argInVFP(Ty, &DummyReg)) {
1658         continue;
1659       }
1660     }
1661     finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes);
1662   }
1663 
1664   // Fill in stack offsets for locals.
1665   assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1666                       SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1667                       UsesFramePointer);
1668   this->HasComputedFrame = true;
1669 
1670   if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1671     OstreamLocker _(Func->getContext());
1672     Ostream &Str = Func->getContext()->getStrDump();
1673 
1674     Str << "Stack layout:\n";
1675     uint32_t SPAdjustmentPaddingSize =
1676         SpillAreaSizeBytes - LocalsSpillAreaSize -
1677         GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1678         MaxOutArgsSizeBytes;
1679     Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1680         << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1681         << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1682         << " globals spill area = " << GlobalsSize << " bytes\n"
1683         << " globals-locals spill areas intermediate padding = "
1684         << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1685         << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1686         << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1687 
1688     Str << "Stack details:\n"
1689         << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1690         << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1691         << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1692         << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1693         << " bytes\n"
1694         << " is FP based = " << UsesFramePointer << "\n";
1695   }
1696 }
1697 
addEpilog(CfgNode * Node)1698 void TargetARM32::addEpilog(CfgNode *Node) {
1699   InstList &Insts = Node->getInsts();
1700   InstList::reverse_iterator RI, E;
1701   for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1702     if (llvm::isa<InstARM32Ret>(*RI))
1703       break;
1704   }
1705   if (RI == E)
1706     return;
1707 
1708   // Convert the reverse_iterator position into its corresponding (forward)
1709   // iterator position.
1710   InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1711   --InsertPoint;
1712   Context.init(Node);
1713   Context.setInsertPoint(InsertPoint);
1714 
1715   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1716   if (UsesFramePointer) {
1717     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1718     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1719     // use of SP before the assignment of SP=FP keeps previous SP adjustments
1720     // from being dead-code eliminated.
1721     Context.insert<InstFakeUse>(SP);
1722     Sandboxer(this).reset_sp(FP);
1723   } else {
1724     // add SP, SpillAreaSizeBytes
1725     if (SpillAreaSizeBytes) {
1726       // Use the scratch register if needed to legalize the immediate.
1727       Operand *AddAmount =
1728           legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1729                    Legal_Reg | Legal_Flex, getReservedTmpReg());
1730       Sandboxer(this).add_sp(AddAmount);
1731     }
1732   }
1733 
1734   if (!PreservedGPRs.empty())
1735     _pop(PreservedGPRs);
1736   if (!PreservedSRegs.empty())
1737     _pop(PreservedSRegs);
1738 
1739   if (!getFlags().getUseSandboxing())
1740     return;
1741 
1742   // Change the original ret instruction into a sandboxed return sequence.
1743   //
1744   // bundle_lock
1745   // bic lr, #0xc000000f
1746   // bx lr
1747   // bundle_unlock
1748   //
1749   // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
1750   // restrict to the lower 1GB as well.
1751   Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
1752   Variable *RetValue = nullptr;
1753   if (RI->getSrcSize())
1754     RetValue = llvm::cast<Variable>(RI->getSrc(0));
1755 
1756   Sandboxer(this).ret(LR, RetValue);
1757 
1758   RI->setDeleted();
1759 }
1760 
isLegalMemOffset(Type Ty,int32_t Offset) const1761 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
1762   constexpr bool ZeroExt = false;
1763   return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
1764 }
1765 
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1766 Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister(
1767     Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1768   // Legalize will likely need a movw/movt combination, but if the top bits are
1769   // all 0 from negating the offset and subtracting, we could use that instead.
1770   const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1771   Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1772   if (ShouldSub) {
1773     Operand *OffsetVal =
1774         Target->legalize(Target->Ctx->getConstantInt32(-Offset),
1775                          Legal_Reg | Legal_Flex, ScratchRegNum);
1776     Target->_sub(ScratchReg, Base, OffsetVal);
1777   } else {
1778     Operand *OffsetVal =
1779         Target->legalize(Target->Ctx->getConstantInt32(Offset),
1780                          Legal_Reg | Legal_Flex, ScratchRegNum);
1781     Target->_add(ScratchReg, Base, OffsetVal);
1782   }
1783 
1784   if (ScratchRegNum == Target->getReservedTmpReg()) {
1785     const bool BaseIsStackOrFramePtr =
1786         Base->getRegNum() == Target->getFrameOrStackReg();
1787     // There is currently no code path that would trigger this assertion, so we
1788     // leave this assertion here in case it is ever violated. This is not a
1789     // fatal error (thus the use of assert() and not llvm::report_fatal_error)
1790     // as the program compiled by subzero will still work correctly.
1791     assert(BaseIsStackOrFramePtr);
1792     // Side-effect: updates TempBase to reflect the new Temporary.
1793     if (BaseIsStackOrFramePtr) {
1794       TempBaseReg = ScratchReg;
1795       TempBaseOffset = Offset;
1796     } else {
1797       TempBaseReg = nullptr;
1798       TempBaseOffset = 0;
1799     }
1800   }
1801 
1802   return ScratchReg;
1803 }
1804 
createMemOperand(Type Ty,Variable * Base,int32_t Offset,bool AllowOffsets)1805 OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand(
1806     Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) {
1807   assert(!Base->isRematerializable());
1808   if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) {
1809     return OperandARM32Mem::create(
1810         Target->Func, Ty, Base,
1811         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)),
1812         OperandARM32Mem::Offset);
1813   }
1814 
1815   if (!AllowOffsets || TempBaseReg == nullptr) {
1816     newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1817   }
1818 
1819   int32_t OffsetDiff = Offset - TempBaseOffset;
1820   assert(AllowOffsets || OffsetDiff == 0);
1821 
1822   if (!Target->isLegalMemOffset(Ty, OffsetDiff)) {
1823     newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1824     OffsetDiff = 0;
1825   }
1826 
1827   assert(!TempBaseReg->isRematerializable());
1828   return OperandARM32Mem::create(
1829       Target->Func, Ty, TempBaseReg,
1830       llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)),
1831       OperandARM32Mem::Offset);
1832 }
1833 
resetTempBaseIfClobberedBy(const Inst * Instr)1834 void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy(
1835     const Inst *Instr) {
1836   bool ClobbersTempBase = false;
1837   if (TempBaseReg != nullptr) {
1838     Variable *Dest = Instr->getDest();
1839     if (llvm::isa<InstARM32Call>(Instr)) {
1840       // The following assertion is an invariant, so we remove it from the if
1841       // test. If the invariant is ever broken/invalidated/changed, remember
1842       // to add it back to the if condition.
1843       assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1844       // The linker may need to clobber IP if the call is too far from PC. Thus,
1845       // we assume IP will be overwritten.
1846       ClobbersTempBase = true;
1847     } else if (Dest != nullptr &&
1848                Dest->getRegNum() == TempBaseReg->getRegNum()) {
1849       // Register redefinition.
1850       ClobbersTempBase = true;
1851     }
1852   }
1853 
1854   if (ClobbersTempBase) {
1855     TempBaseReg = nullptr;
1856     TempBaseOffset = 0;
1857   }
1858 }
1859 
legalizeMov(InstARM32Mov * MovInstr)1860 void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
1861   Variable *Dest = MovInstr->getDest();
1862   assert(Dest != nullptr);
1863   Type DestTy = Dest->getType();
1864   assert(DestTy != IceType_i64);
1865 
1866   Operand *Src = MovInstr->getSrc(0);
1867   Type SrcTy = Src->getType();
1868   (void)SrcTy;
1869   assert(SrcTy != IceType_i64);
1870 
1871   if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1872     return;
1873 
1874   bool Legalized = false;
1875   if (!Dest->hasReg()) {
1876     auto *SrcR = llvm::cast<Variable>(Src);
1877     assert(SrcR->hasReg());
1878     assert(!SrcR->isRematerializable());
1879     const int32_t Offset = Dest->getStackOffset();
1880     // This is a _mov(Mem(), Variable), i.e., a store.
1881     TargetARM32::Sandboxer(Target)
1882         .str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
1883              MovInstr->getPredicate());
1884     // _str() does not have a Dest, so we add a fake-def(Dest).
1885     Target->Context.insert<InstFakeDef>(Dest);
1886     Legalized = true;
1887   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1888     if (Var->isRematerializable()) {
1889       // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
1890 
1891       // ExtraOffset is only needed for frame-pointer based frames as we have
1892       // to account for spill storage.
1893       const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg())
1894                                       ? Target->getFrameFixedAllocaOffset()
1895                                       : 0;
1896 
1897       const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1898       Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
1899       Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
1900       Target->_mov(Dest, T);
1901       Legalized = true;
1902     } else {
1903       if (!Var->hasReg()) {
1904         // This is a _mov(Variable, Mem()), i.e., a load.
1905         const int32_t Offset = Var->getStackOffset();
1906         TargetARM32::Sandboxer(Target)
1907             .ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
1908                  MovInstr->getPredicate());
1909         Legalized = true;
1910       }
1911     }
1912   }
1913 
1914   if (Legalized) {
1915     if (MovInstr->isDestRedefined()) {
1916       Target->_set_dest_redefined();
1917     }
1918     MovInstr->setDeleted();
1919   }
1920 }
1921 
1922 // ARM32 address modes:
1923 //  ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12],
1924 //                    [reg +/- reg << shamt5]
1925 //  ld/st f[32|64]  : [reg], [reg +/- imm8] , [pc +/- imm8]
1926 //  ld/st vectors   : [reg]
1927 //
1928 // For now, we don't handle address modes with Relocatables.
1929 namespace {
1930 // MemTraits contains per-type valid address mode information.
1931 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,  \
1932           ubits, rraddr, shaddr)                                               \
1933   static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
1934 ICETYPEARM32_TABLE
1935 #undef X
1936 
1937 static const struct {
1938   int32_t ValidImmMask;
1939   bool CanHaveImm;
1940   bool CanHaveIndex;
1941   bool CanHaveShiftedIndex;
1942 } MemTraits[] = {
1943 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,  \
1944           ubits, rraddr, shaddr)                                               \
1945   { (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, }                           \
1946   ,
1947     ICETYPEARM32_TABLE
1948 #undef X
1949 };
1950 static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits);
1951 } // end of anonymous namespace
1952 
1953 OperandARM32Mem *
legalizeMemOperand(OperandARM32Mem * Mem,bool AllowOffsets)1954 TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
1955                                                        bool AllowOffsets) {
1956   assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable());
1957   assert(
1958       Mem->isRegReg() ||
1959       Target->isLegalMemOffset(Mem->getType(), Mem->getOffset()->getValue()));
1960 
1961   bool Legalized = false;
1962   Variable *Base = Mem->getBase();
1963   int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue();
1964   if (Base->isRematerializable()) {
1965     const int32_t ExtraOffset = (Base->getRegNum() == Target->getFrameReg())
1966                                     ? Target->getFrameFixedAllocaOffset()
1967                                     : 0;
1968     Offset += Base->getStackOffset() + ExtraOffset;
1969     Base = Target->getPhysicalRegister(Base->getRegNum());
1970     assert(!Base->isRematerializable());
1971     Legalized = true;
1972   }
1973 
1974   if (!Legalized && !Target->NeedSandboxing) {
1975     return nullptr;
1976   }
1977 
1978   if (!Mem->isRegReg()) {
1979     return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets);
1980   }
1981 
1982   if (Target->NeedSandboxing) {
1983     llvm::report_fatal_error("Reg-Reg address mode is not allowed.");
1984   }
1985 
1986   assert(MemTraits[Mem->getType()].CanHaveIndex);
1987 
1988   if (Offset != 0) {
1989     if (TempBaseReg == nullptr) {
1990       Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1991     } else {
1992       uint32_t Imm8, Rotate;
1993       const int32_t OffsetDiff = Offset - TempBaseOffset;
1994       if (OffsetDiff == 0) {
1995         Base = TempBaseReg;
1996       } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) {
1997         auto *OffsetDiffF = OperandARM32FlexImm::create(
1998             Target->Func, IceType_i32, Imm8, Rotate);
1999         Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF);
2000         TempBaseOffset += OffsetDiff;
2001         Base = TempBaseReg;
2002       } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) {
2003         auto *OffsetDiffF = OperandARM32FlexImm::create(
2004             Target->Func, IceType_i32, Imm8, Rotate);
2005         Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF);
2006         TempBaseOffset += OffsetDiff;
2007         Base = TempBaseReg;
2008       } else {
2009         Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2010       }
2011     }
2012   }
2013 
2014   return OperandARM32Mem::create(Target->Func, Mem->getType(), Base,
2015                                  Mem->getIndex(), Mem->getShiftOp(),
2016                                  Mem->getShiftAmt(), Mem->getAddrMode());
2017 }
2018 
postLowerLegalization()2019 void TargetARM32::postLowerLegalization() {
2020   // If a stack variable's frame offset doesn't fit, convert from:
2021   //   ldr X, OFF[SP]
2022   // to:
2023   //   movw/movt TMP, OFF_PART
2024   //   add TMP, TMP, SP
2025   //   ldr X, OFF_MORE[TMP]
2026   //
2027   // This is safe because we have reserved TMP, and add for ARM does not
2028   // clobber the flags register.
2029   Func->dump("Before postLowerLegalization");
2030   assert(hasComputedFrame());
2031   // Do a fairly naive greedy clustering for now. Pick the first stack slot
2032   // that's out of bounds and make a new base reg using the architecture's temp
2033   // register. If that works for the next slot, then great. Otherwise, create a
2034   // new base register, clobbering the previous base register. Never share a
2035   // base reg across different basic blocks. This isn't ideal if local and
2036   // multi-block variables are far apart and their references are interspersed.
2037   // It may help to be more coordinated about assign stack slot numbers and may
2038   // help to assign smaller offsets to higher-weight variables so that they
2039   // don't depend on this legalization.
2040   for (CfgNode *Node : Func->getNodes()) {
2041     Context.init(Node);
2042     // One legalizer per basic block, otherwise we would share the Temporary
2043     // Base Register between basic blocks.
2044     PostLoweringLegalizer Legalizer(this);
2045     while (!Context.atEnd()) {
2046       PostIncrLoweringContext PostIncrement(Context);
2047       Inst *CurInstr = iteratorToInst(Context.getCur());
2048 
2049       // Check if the previous TempBaseReg is clobbered, and reset if needed.
2050       Legalizer.resetTempBaseIfClobberedBy(CurInstr);
2051 
2052       if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
2053         Legalizer.legalizeMov(MovInstr);
2054       } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) {
2055         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2056                 llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) {
2057           Sandboxer(this)
2058               .ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate());
2059           CurInstr->setDeleted();
2060         }
2061       } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) {
2062         constexpr bool DisallowOffsetsBecauseLdrex = false;
2063         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2064                 llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)),
2065                 DisallowOffsetsBecauseLdrex)) {
2066           Sandboxer(this)
2067               .ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate());
2068           CurInstr->setDeleted();
2069         }
2070       } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) {
2071         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2072                 llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) {
2073           Sandboxer(this).str(llvm::cast<Variable>(CurInstr->getSrc(0)),
2074                               LegalMem, StrInstr->getPredicate());
2075           CurInstr->setDeleted();
2076         }
2077       } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) {
2078         constexpr bool DisallowOffsetsBecauseStrex = false;
2079         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2080                 llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)),
2081                 DisallowOffsetsBecauseStrex)) {
2082           Sandboxer(this).strex(CurInstr->getDest(),
2083                                 llvm::cast<Variable>(CurInstr->getSrc(0)),
2084                                 LegalMem, StrexInstr->getPredicate());
2085           CurInstr->setDeleted();
2086         }
2087       }
2088 
2089       // Sanity-check: the Legalizer will either have no Temp, or it will be
2090       // bound to IP.
2091       Legalizer.assertNoTempOrAssignedToIP();
2092     }
2093   }
2094 }
2095 
loOperand(Operand * Operand)2096 Operand *TargetARM32::loOperand(Operand *Operand) {
2097   assert(Operand->getType() == IceType_i64);
2098   if (Operand->getType() != IceType_i64)
2099     return Operand;
2100   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2101     return Var64On32->getLo();
2102   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand))
2103     return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2104   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2105     // Conservatively disallow memory operands with side-effects (pre/post
2106     // increment) in case of duplication.
2107     assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2108            Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2109     if (Mem->isRegReg()) {
2110       Variable *IndexR = legalizeToReg(Mem->getIndex());
2111       return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR,
2112                                      Mem->getShiftOp(), Mem->getShiftAmt(),
2113                                      Mem->getAddrMode());
2114     } else {
2115       return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
2116                                      Mem->getOffset(), Mem->getAddrMode());
2117     }
2118   }
2119   llvm::report_fatal_error("Unsupported operand type");
2120   return nullptr;
2121 }
2122 
hiOperand(Operand * Operand)2123 Operand *TargetARM32::hiOperand(Operand *Operand) {
2124   assert(Operand->getType() == IceType_i64);
2125   if (Operand->getType() != IceType_i64)
2126     return Operand;
2127   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2128     return Var64On32->getHi();
2129   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2130     return Ctx->getConstantInt32(
2131         static_cast<uint32_t>(Const->getValue() >> 32));
2132   }
2133   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2134     // Conservatively disallow memory operands with side-effects in case of
2135     // duplication.
2136     assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2137            Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2138     const Type SplitType = IceType_i32;
2139     if (Mem->isRegReg()) {
2140       // We have to make a temp variable T, and add 4 to either Base or Index.
2141       // The Index may be shifted, so adding 4 can mean something else. Thus,
2142       // prefer T := Base + 4, and use T as the new Base.
2143       Variable *Base = Mem->getBase();
2144       Constant *Four = Ctx->getConstantInt32(4);
2145       Variable *NewBase = Func->makeVariable(Base->getType());
2146       lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2147                                              Base, Four));
2148       Variable *BaseR = legalizeToReg(NewBase);
2149       Variable *IndexR = legalizeToReg(Mem->getIndex());
2150       return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
2151                                      Mem->getShiftOp(), Mem->getShiftAmt(),
2152                                      Mem->getAddrMode());
2153     } else {
2154       Variable *Base = Mem->getBase();
2155       ConstantInteger32 *Offset = Mem->getOffset();
2156       assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2157       int32_t NextOffsetVal = Offset->getValue() + 4;
2158       constexpr bool ZeroExt = false;
2159       if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) {
2160         // We have to make a temp variable and add 4 to either Base or Offset.
2161         // If we add 4 to Offset, this will convert a non-RegReg addressing
2162         // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2163         // RegReg addressing modes, prefer adding to base and replacing
2164         // instead. Thus we leave the old offset alone.
2165         Constant *_4 = Ctx->getConstantInt32(4);
2166         Variable *NewBase = Func->makeVariable(Base->getType());
2167         lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
2168                                                NewBase, Base, _4));
2169         Base = NewBase;
2170       } else {
2171         Offset =
2172             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2173       }
2174       Variable *BaseR = legalizeToReg(Base);
2175       return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
2176                                      Mem->getAddrMode());
2177     }
2178   }
2179   llvm::report_fatal_error("Unsupported operand type");
2180   return nullptr;
2181 }
2182 
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2183 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
2184                                            RegSetMask Exclude) const {
2185   SmallBitVector Registers(RegARM32::Reg_NUM);
2186 
2187   for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {
2188     const auto &Entry = RegARM32::RegTable[i];
2189     if (Entry.Scratch && (Include & RegSet_CallerSave))
2190       Registers[i] = true;
2191     if (Entry.Preserved && (Include & RegSet_CalleeSave))
2192       Registers[i] = true;
2193     if (Entry.StackPtr && (Include & RegSet_StackPointer))
2194       Registers[i] = true;
2195     if (Entry.FramePtr && (Include & RegSet_FramePointer))
2196       Registers[i] = true;
2197     if (Entry.Scratch && (Exclude & RegSet_CallerSave))
2198       Registers[i] = false;
2199     if (Entry.Preserved && (Exclude & RegSet_CalleeSave))
2200       Registers[i] = false;
2201     if (Entry.StackPtr && (Exclude & RegSet_StackPointer))
2202       Registers[i] = false;
2203     if (Entry.FramePtr && (Exclude & RegSet_FramePointer))
2204       Registers[i] = false;
2205   }
2206 
2207   return Registers;
2208 }
2209 
lowerAlloca(const InstAlloca * Instr)2210 void TargetARM32::lowerAlloca(const InstAlloca *Instr) {
2211   // Conservatively require the stack to be aligned. Some stack adjustment
2212   // operations implemented below assume that the stack is aligned before the
2213   // alloca. All the alloca code ensures that the stack alignment is preserved
2214   // after the alloca. The stack alignment restriction can be relaxed in some
2215   // cases.
2216   NeedsStackAlignment = true;
2217 
2218   // For default align=0, set it to the real value 1, to avoid any
2219   // bit-manipulation problems below.
2220   const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2221 
2222   // LLVM enforces power of 2 alignment.
2223   assert(llvm::isPowerOf2_32(AlignmentParam));
2224   assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
2225 
2226   const uint32_t Alignment =
2227       std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
2228   const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
2229   const bool OptM1 = Func->getOptLevel() == Opt_m1;
2230   const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2231   const bool UseFramePointer =
2232       hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2233 
2234   if (UseFramePointer)
2235     setHasFramePointer();
2236 
2237   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2238   if (OverAligned) {
2239     Sandboxer(this).align_sp(Alignment);
2240   }
2241 
2242   Variable *Dest = Instr->getDest();
2243   Operand *TotalSize = Instr->getSizeInBytes();
2244 
2245   if (const auto *ConstantTotalSize =
2246           llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2247     const uint32_t Value =
2248         Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2249     // Constant size alloca.
2250     if (!UseFramePointer) {
2251       // If we don't need a Frame Pointer, this alloca has a known offset to the
2252       // stack pointer. We don't need adjust the stack pointer, nor assign any
2253       // value to Dest, as Dest is rematerializable.
2254       assert(Dest->isRematerializable());
2255       FixedAllocaSizeBytes += Value;
2256       Context.insert<InstFakeDef>(Dest);
2257       return;
2258     }
2259 
2260     // If a frame pointer is required, then we need to store the alloca'd result
2261     // in Dest.
2262     Operand *SubAmountRF =
2263         legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
2264     Sandboxer(this).sub_sp(SubAmountRF);
2265   } else {
2266     // Non-constant sizes need to be adjusted to the next highest multiple of
2267     // the required alignment at runtime.
2268     TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
2269     Variable *T = makeReg(IceType_i32);
2270     _mov(T, TotalSize);
2271     Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
2272     _add(T, T, AddAmount);
2273     alignRegisterPow2(T, Alignment);
2274     Sandboxer(this).sub_sp(T);
2275   }
2276 
2277   // Adds back a few bytes to SP to account for the out args area.
2278   Variable *T = SP;
2279   if (MaxOutArgsSizeBytes != 0) {
2280     T = makeReg(getPointerType());
2281     Operand *OutArgsSizeRF = legalize(
2282         Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
2283     _add(T, SP, OutArgsSizeRF);
2284   }
2285 
2286   _mov(Dest, T);
2287 }
2288 
div0Check(Type Ty,Operand * SrcLo,Operand * SrcHi)2289 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
2290   if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
2291     return;
2292   Variable *SrcLoReg = legalizeToReg(SrcLo);
2293   switch (Ty) {
2294   default:
2295     llvm_unreachable(
2296         ("Unexpected type in div0Check: " + typeStdString(Ty)).c_str());
2297   case IceType_i8:
2298   case IceType_i16: {
2299     Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
2300     Variable *T = makeReg(IceType_i32);
2301     _lsls(T, SrcLoReg, ShAmtImm);
2302     Context.insert<InstFakeUse>(T);
2303   } break;
2304   case IceType_i32: {
2305     _tst(SrcLoReg, SrcLoReg);
2306     break;
2307   }
2308   case IceType_i64: {
2309     Variable *T = makeReg(IceType_i32);
2310     _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
2311     // T isn't going to be used, but we need the side-effect of setting flags
2312     // from this operation.
2313     Context.insert<InstFakeUse>(T);
2314   }
2315   }
2316   auto *Label = InstARM32Label::create(Func, this);
2317   _br(Label, CondARM32::NE);
2318   _trap();
2319   Context.insert(Label);
2320 }
2321 
lowerIDivRem(Variable * Dest,Variable * T,Variable * Src0R,Operand * Src1,ExtInstr ExtFunc,DivInstr DivFunc,bool IsRemainder)2322 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
2323                                Operand *Src1, ExtInstr ExtFunc,
2324                                DivInstr DivFunc, bool IsRemainder) {
2325   div0Check(Dest->getType(), Src1, nullptr);
2326   Variable *Src1R = legalizeToReg(Src1);
2327   Variable *T0R = Src0R;
2328   Variable *T1R = Src1R;
2329   if (Dest->getType() != IceType_i32) {
2330     T0R = makeReg(IceType_i32);
2331     (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
2332     T1R = makeReg(IceType_i32);
2333     (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
2334   }
2335   if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
2336     (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
2337     if (IsRemainder) {
2338       Variable *T2 = makeReg(IceType_i32);
2339       _mls(T2, T, T1R, T0R);
2340       T = T2;
2341     }
2342     _mov(Dest, T);
2343   } else {
2344     llvm::report_fatal_error("div should have already been turned into a call");
2345   }
2346 }
2347 
2348 TargetARM32::SafeBoolChain
lowerInt1Arithmetic(const InstArithmetic * Instr)2349 TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Instr) {
2350   Variable *Dest = Instr->getDest();
2351   assert(Dest->getType() == IceType_i1);
2352 
2353   // So folding didn't work for Instr. Not a problem: We just need to
2354   // materialize the Sources, and perform the operation. We create regular
2355   // Variables (and not infinite-weight ones) because this call might recurse a
2356   // lot, and we might end up with tons of infinite weight temporaries.
2357   assert(Instr->getSrcSize() == 2);
2358   Variable *Src0 = Func->makeVariable(IceType_i1);
2359   SafeBoolChain Src0Safe = lowerInt1(Src0, Instr->getSrc(0));
2360 
2361   Operand *Src1 = Instr->getSrc(1);
2362   SafeBoolChain Src1Safe = SBC_Yes;
2363 
2364   if (!llvm::isa<Constant>(Src1)) {
2365     Variable *Src1V = Func->makeVariable(IceType_i1);
2366     Src1Safe = lowerInt1(Src1V, Src1);
2367     Src1 = Src1V;
2368   }
2369 
2370   Variable *T = makeReg(IceType_i1);
2371   Src0 = legalizeToReg(Src0);
2372   Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2373   switch (Instr->getOp()) {
2374   default:
2375     // If this Unreachable is ever executed, add the offending operation to
2376     // the list of valid consumers.
2377     llvm::report_fatal_error("Unhandled i1 Op");
2378   case InstArithmetic::And:
2379     _and(T, Src0, Src1RF);
2380     break;
2381   case InstArithmetic::Or:
2382     _orr(T, Src0, Src1RF);
2383     break;
2384   case InstArithmetic::Xor:
2385     _eor(T, Src0, Src1RF);
2386     break;
2387   }
2388   _mov(Dest, T);
2389   return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
2390 }
2391 
2392 namespace {
2393 // NumericOperands is used during arithmetic/icmp lowering for constant folding.
2394 // It holds the two sources operands, and maintains some state as to whether one
2395 // of them is a constant. If one of the operands is a constant, then it will be
2396 // be stored as the operation's second source, with a bit indicating whether the
2397 // operands were swapped.
2398 //
2399 // The class is split into a base class with operand type-independent methods,
2400 // and a derived, templated class, for each type of operand we want to fold
2401 // constants for:
2402 //
2403 // NumericOperandsBase --> NumericOperands<ConstantFloat>
2404 //                     --> NumericOperands<ConstantDouble>
2405 //                     --> NumericOperands<ConstantInt32>
2406 //
2407 // NumericOperands<ConstantInt32> also exposes helper methods for emitting
2408 // inverted/negated immediates.
2409 class NumericOperandsBase {
2410   NumericOperandsBase() = delete;
2411   NumericOperandsBase(const NumericOperandsBase &) = delete;
2412   NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
2413 
2414 public:
NumericOperandsBase(Operand * S0,Operand * S1)2415   NumericOperandsBase(Operand *S0, Operand *S1)
2416       : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
2417         Swapped(Src0 == S1 && S0 != S1) {
2418     assert(Src0 != nullptr);
2419     assert(Src1 != nullptr);
2420     assert(Src0 != Src1 || S0 == S1);
2421   }
2422 
hasConstOperand() const2423   bool hasConstOperand() const {
2424     return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
2425   }
2426 
swappedOperands() const2427   bool swappedOperands() const { return Swapped; }
2428 
src0R(TargetARM32 * Target) const2429   Variable *src0R(TargetARM32 *Target) const {
2430     return legalizeToReg(Target, Src0);
2431   }
2432 
unswappedSrc0R(TargetARM32 * Target) const2433   Variable *unswappedSrc0R(TargetARM32 *Target) const {
2434     return legalizeToReg(Target, Swapped ? Src1 : Src0);
2435   }
2436 
src1RF(TargetARM32 * Target) const2437   Operand *src1RF(TargetARM32 *Target) const {
2438     return legalizeToRegOrFlex(Target, Src1);
2439   }
2440 
unswappedSrc1R(TargetARM32 * Target) const2441   Variable *unswappedSrc1R(TargetARM32 *Target) const {
2442     return legalizeToReg(Target, Swapped ? Src0 : Src1);
2443   }
2444 
src1() const2445   Operand *src1() const { return Src1; }
2446 
2447 protected:
2448   Operand *const Src0;
2449   Operand *const Src1;
2450   const bool Swapped;
2451 
legalizeToReg(TargetARM32 * Target,Operand * Src)2452   static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
2453     return Target->legalizeToReg(Src);
2454   }
2455 
legalizeToRegOrFlex(TargetARM32 * Target,Operand * Src)2456   static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
2457     return Target->legalize(Src,
2458                             TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
2459   }
2460 
2461 private:
NonConstOperand(Operand * S0,Operand * S1)2462   static Operand *NonConstOperand(Operand *S0, Operand *S1) {
2463     if (!llvm::isa<Constant>(S0))
2464       return S0;
2465     if (!llvm::isa<Constant>(S1))
2466       return S1;
2467     if (llvm::isa<ConstantRelocatable>(S1) &&
2468         !llvm::isa<ConstantRelocatable>(S0))
2469       return S1;
2470     return S0;
2471   }
2472 
ConstOperand(Operand * S0,Operand * S1)2473   static Operand *ConstOperand(Operand *S0, Operand *S1) {
2474     if (!llvm::isa<Constant>(S0))
2475       return S1;
2476     if (!llvm::isa<Constant>(S1))
2477       return S0;
2478     if (llvm::isa<ConstantRelocatable>(S1) &&
2479         !llvm::isa<ConstantRelocatable>(S0))
2480       return S0;
2481     return S1;
2482   }
2483 };
2484 
2485 template <typename C> class NumericOperands : public NumericOperandsBase {
2486   NumericOperands() = delete;
2487   NumericOperands(const NumericOperands &) = delete;
2488   NumericOperands &operator=(const NumericOperands &) = delete;
2489 
2490 public:
NumericOperands(Operand * S0,Operand * S1)2491   NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
2492     assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
2493   }
2494 
getConstantValue() const2495   typename C::PrimType getConstantValue() const {
2496     return llvm::cast<C>(Src1)->getValue();
2497   }
2498 };
2499 
2500 using FloatOperands = NumericOperands<ConstantFloat>;
2501 using DoubleOperands = NumericOperands<ConstantDouble>;
2502 
2503 class Int32Operands : public NumericOperands<ConstantInteger32> {
2504   Int32Operands() = delete;
2505   Int32Operands(const Int32Operands &) = delete;
2506   Int32Operands &operator=(const Int32Operands &) = delete;
2507 
2508 public:
Int32Operands(Operand * S0,Operand * S1)2509   Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
2510 
unswappedSrc1RShAmtImm(TargetARM32 * Target) const2511   Operand *unswappedSrc1RShAmtImm(TargetARM32 *Target) const {
2512     if (!swappedOperands() && hasConstOperand()) {
2513       return Target->shAmtImm(getConstantValue() & 0x1F);
2514     }
2515     return legalizeToReg(Target, Swapped ? Src0 : Src1);
2516   }
2517 
isSrc1ImmediateZero() const2518   bool isSrc1ImmediateZero() const {
2519     if (!swappedOperands() && hasConstOperand()) {
2520       return getConstantValue() == 0;
2521     }
2522     return false;
2523   }
2524 
immediateIsFlexEncodable() const2525   bool immediateIsFlexEncodable() const {
2526     uint32_t Rotate, Imm8;
2527     return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
2528   }
2529 
negatedImmediateIsFlexEncodable() const2530   bool negatedImmediateIsFlexEncodable() const {
2531     uint32_t Rotate, Imm8;
2532     return OperandARM32FlexImm::canHoldImm(
2533         -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
2534   }
2535 
negatedSrc1F(TargetARM32 * Target) const2536   Operand *negatedSrc1F(TargetARM32 *Target) const {
2537     return legalizeToRegOrFlex(Target,
2538                                Target->getCtx()->getConstantInt32(
2539                                    -static_cast<int32_t>(getConstantValue())));
2540   }
2541 
invertedImmediateIsFlexEncodable() const2542   bool invertedImmediateIsFlexEncodable() const {
2543     uint32_t Rotate, Imm8;
2544     return OperandARM32FlexImm::canHoldImm(
2545         ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
2546   }
2547 
invertedSrc1F(TargetARM32 * Target) const2548   Operand *invertedSrc1F(TargetARM32 *Target) const {
2549     return legalizeToRegOrFlex(Target,
2550                                Target->getCtx()->getConstantInt32(
2551                                    ~static_cast<uint32_t>(getConstantValue())));
2552   }
2553 };
2554 } // end of anonymous namespace
2555 
preambleDivRem(const InstCall * Instr)2556 void TargetARM32::preambleDivRem(const InstCall *Instr) {
2557   Operand *Src1 = Instr->getArg(1);
2558 
2559   switch (Src1->getType()) {
2560   default:
2561     llvm::report_fatal_error("Invalid type for idiv.");
2562   case IceType_i64: {
2563     if (auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2564       if (C->getValue() == 0) {
2565         _trap();
2566         return;
2567       }
2568     }
2569     div0Check(IceType_i64, loOperand(Src1), hiOperand(Src1));
2570     return;
2571   }
2572   case IceType_i32: {
2573     // Src0 and Src1 have already been appropriately extended to an i32, so we
2574     // don't check for i8 and i16.
2575     if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2576       if (C->getValue() == 0) {
2577         _trap();
2578         return;
2579       }
2580     }
2581     div0Check(IceType_i32, Src1, nullptr);
2582     return;
2583   }
2584   }
2585 }
2586 
lowerInt64Arithmetic(InstArithmetic::OpKind Op,Variable * Dest,Operand * Src0,Operand * Src1)2587 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
2588                                        Variable *Dest, Operand *Src0,
2589                                        Operand *Src1) {
2590   Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
2591   Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
2592   assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
2593   assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
2594 
2595   auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2596   auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2597   Variable *T_Lo = makeReg(DestLo->getType());
2598   Variable *T_Hi = makeReg(DestHi->getType());
2599 
2600   switch (Op) {
2601   case InstArithmetic::_num:
2602     llvm::report_fatal_error("Unknown arithmetic operator");
2603     return;
2604   case InstArithmetic::Add: {
2605     Variable *Src0LoR = SrcsLo.src0R(this);
2606     Operand *Src1LoRF = SrcsLo.src1RF(this);
2607     Variable *Src0HiR = SrcsHi.src0R(this);
2608     Operand *Src1HiRF = SrcsHi.src1RF(this);
2609     _adds(T_Lo, Src0LoR, Src1LoRF);
2610     _mov(DestLo, T_Lo);
2611     _adc(T_Hi, Src0HiR, Src1HiRF);
2612     _mov(DestHi, T_Hi);
2613     return;
2614   }
2615   case InstArithmetic::And: {
2616     Variable *Src0LoR = SrcsLo.src0R(this);
2617     Operand *Src1LoRF = SrcsLo.src1RF(this);
2618     Variable *Src0HiR = SrcsHi.src0R(this);
2619     Operand *Src1HiRF = SrcsHi.src1RF(this);
2620     _and(T_Lo, Src0LoR, Src1LoRF);
2621     _mov(DestLo, T_Lo);
2622     _and(T_Hi, Src0HiR, Src1HiRF);
2623     _mov(DestHi, T_Hi);
2624     return;
2625   }
2626   case InstArithmetic::Or: {
2627     Variable *Src0LoR = SrcsLo.src0R(this);
2628     Operand *Src1LoRF = SrcsLo.src1RF(this);
2629     Variable *Src0HiR = SrcsHi.src0R(this);
2630     Operand *Src1HiRF = SrcsHi.src1RF(this);
2631     _orr(T_Lo, Src0LoR, Src1LoRF);
2632     _mov(DestLo, T_Lo);
2633     _orr(T_Hi, Src0HiR, Src1HiRF);
2634     _mov(DestHi, T_Hi);
2635     return;
2636   }
2637   case InstArithmetic::Xor: {
2638     Variable *Src0LoR = SrcsLo.src0R(this);
2639     Operand *Src1LoRF = SrcsLo.src1RF(this);
2640     Variable *Src0HiR = SrcsHi.src0R(this);
2641     Operand *Src1HiRF = SrcsHi.src1RF(this);
2642     _eor(T_Lo, Src0LoR, Src1LoRF);
2643     _mov(DestLo, T_Lo);
2644     _eor(T_Hi, Src0HiR, Src1HiRF);
2645     _mov(DestHi, T_Hi);
2646     return;
2647   }
2648   case InstArithmetic::Sub: {
2649     Variable *Src0LoR = SrcsLo.src0R(this);
2650     Operand *Src1LoRF = SrcsLo.src1RF(this);
2651     Variable *Src0HiR = SrcsHi.src0R(this);
2652     Operand *Src1HiRF = SrcsHi.src1RF(this);
2653     if (SrcsLo.swappedOperands()) {
2654       _rsbs(T_Lo, Src0LoR, Src1LoRF);
2655       _mov(DestLo, T_Lo);
2656       _rsc(T_Hi, Src0HiR, Src1HiRF);
2657       _mov(DestHi, T_Hi);
2658     } else {
2659       _subs(T_Lo, Src0LoR, Src1LoRF);
2660       _mov(DestLo, T_Lo);
2661       _sbc(T_Hi, Src0HiR, Src1HiRF);
2662       _mov(DestHi, T_Hi);
2663     }
2664     return;
2665   }
2666   case InstArithmetic::Mul: {
2667     // GCC 4.8 does:
2668     // a=b*c ==>
2669     //   t_acc =(mul) (b.lo * c.hi)
2670     //   t_acc =(mla) (c.lo * b.hi) + t_acc
2671     //   t.hi,t.lo =(umull) b.lo * c.lo
2672     //   t.hi += t_acc
2673     //   a.lo = t.lo
2674     //   a.hi = t.hi
2675     //
2676     // LLVM does:
2677     //   t.hi,t.lo =(umull) b.lo * c.lo
2678     //   t.hi =(mla) (b.lo * c.hi) + t.hi
2679     //   t.hi =(mla) (b.hi * c.lo) + t.hi
2680     //   a.lo = t.lo
2681     //   a.hi = t.hi
2682     //
2683     // LLVM's lowering has fewer instructions, but more register pressure:
2684     // t.lo is live from beginning to end, while GCC delays the two-dest
2685     // instruction till the end, and kills c.hi immediately.
2686     Variable *T_Acc = makeReg(IceType_i32);
2687     Variable *T_Acc1 = makeReg(IceType_i32);
2688     Variable *T_Hi1 = makeReg(IceType_i32);
2689     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2690     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2691     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2692     Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
2693     _mul(T_Acc, Src0RLo, Src1RHi);
2694     _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
2695     _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
2696     _add(T_Hi, T_Hi1, T_Acc1);
2697     _mov(DestLo, T_Lo);
2698     _mov(DestHi, T_Hi);
2699     return;
2700   }
2701   case InstArithmetic::Shl: {
2702     if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2703       Variable *Src0RLo = SrcsLo.src0R(this);
2704       // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2705       const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
2706       if (ShAmtImm == 0) {
2707         _mov(DestLo, Src0RLo);
2708         _mov(DestHi, SrcsHi.src0R(this));
2709         return;
2710       }
2711 
2712       if (ShAmtImm >= 32) {
2713         if (ShAmtImm == 32) {
2714           _mov(DestHi, Src0RLo);
2715         } else {
2716           Operand *ShAmtOp = shAmtImm(ShAmtImm - 32);
2717           _lsl(T_Hi, Src0RLo, ShAmtOp);
2718           _mov(DestHi, T_Hi);
2719         }
2720 
2721         Operand *_0 =
2722             legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2723         _mov(T_Lo, _0);
2724         _mov(DestLo, T_Lo);
2725         return;
2726       }
2727 
2728       Variable *Src0RHi = SrcsHi.src0R(this);
2729       Operand *ShAmtOp = shAmtImm(ShAmtImm);
2730       Operand *ComplShAmtOp = shAmtImm(32 - ShAmtImm);
2731       _lsl(T_Hi, Src0RHi, ShAmtOp);
2732       _orr(T_Hi, T_Hi,
2733            OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
2734                                        OperandARM32::LSR, ComplShAmtOp));
2735       _mov(DestHi, T_Hi);
2736 
2737       _lsl(T_Lo, Src0RLo, ShAmtOp);
2738       _mov(DestLo, T_Lo);
2739       return;
2740     }
2741 
2742     // a=b<<c ==>
2743     // pnacl-llc does:
2744     // mov     t_b.lo, b.lo
2745     // mov     t_b.hi, b.hi
2746     // mov     t_c.lo, c.lo
2747     // rsb     T0, t_c.lo, #32
2748     // lsr     T1, t_b.lo, T0
2749     // orr     t_a.hi, T1, t_b.hi, lsl t_c.lo
2750     // sub     T2, t_c.lo, #32
2751     // cmp     T2, #0
2752     // lslge   t_a.hi, t_b.lo, T2
2753     // lsl     t_a.lo, t_b.lo, t_c.lo
2754     // mov     a.lo, t_a.lo
2755     // mov     a.hi, t_a.hi
2756     //
2757     // GCC 4.8 does:
2758     // sub t_c1, c.lo, #32
2759     // lsl t_hi, b.hi, c.lo
2760     // orr t_hi, t_hi, b.lo, lsl t_c1
2761     // rsb t_c2, c.lo, #32
2762     // orr t_hi, t_hi, b.lo, lsr t_c2
2763     // lsl t_lo, b.lo, c.lo
2764     // a.lo = t_lo
2765     // a.hi = t_hi
2766     //
2767     // These are incompatible, therefore we mimic pnacl-llc.
2768     // Can be strength-reduced for constant-shifts, but we don't do that for
2769     // now.
2770     // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
2771     // ARM, shifts only take the lower 8 bits of the shift register, and
2772     // saturate to the range 0-32, so the negative value will saturate to 32.
2773     Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2774     Operand *_0 =
2775         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2776     Variable *T0 = makeReg(IceType_i32);
2777     Variable *T1 = makeReg(IceType_i32);
2778     Variable *T2 = makeReg(IceType_i32);
2779     Variable *TA_Hi = makeReg(IceType_i32);
2780     Variable *TA_Lo = makeReg(IceType_i32);
2781     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2782     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2783     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2784     _rsb(T0, Src1RLo, _32);
2785     _lsr(T1, Src0RLo, T0);
2786     _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2787                                                 OperandARM32::LSL, Src1RLo));
2788     _sub(T2, Src1RLo, _32);
2789     _cmp(T2, _0);
2790     _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
2791     _set_dest_redefined();
2792     _lsl(TA_Lo, Src0RLo, Src1RLo);
2793     _mov(DestLo, TA_Lo);
2794     _mov(DestHi, TA_Hi);
2795     return;
2796   }
2797   case InstArithmetic::Lshr:
2798   case InstArithmetic::Ashr: {
2799     const bool ASR = Op == InstArithmetic::Ashr;
2800     if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2801       Variable *Src0RHi = SrcsHi.src0R(this);
2802       // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2803       const int32_t ShAmt = SrcsLo.getConstantValue() & 0x3F;
2804       if (ShAmt == 0) {
2805         _mov(DestHi, Src0RHi);
2806         _mov(DestLo, SrcsLo.src0R(this));
2807         return;
2808       }
2809 
2810       if (ShAmt >= 32) {
2811         if (ShAmt == 32) {
2812           _mov(DestLo, Src0RHi);
2813         } else {
2814           Operand *ShAmtImm = shAmtImm(ShAmt - 32);
2815           if (ASR) {
2816             _asr(T_Lo, Src0RHi, ShAmtImm);
2817           } else {
2818             _lsr(T_Lo, Src0RHi, ShAmtImm);
2819           }
2820           _mov(DestLo, T_Lo);
2821         }
2822 
2823         if (ASR) {
2824           Operand *_31 = shAmtImm(31);
2825           _asr(T_Hi, Src0RHi, _31);
2826         } else {
2827           Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
2828                                  Legal_Reg | Legal_Flex);
2829           _mov(T_Hi, _0);
2830         }
2831         _mov(DestHi, T_Hi);
2832         return;
2833       }
2834 
2835       Variable *Src0RLo = SrcsLo.src0R(this);
2836       Operand *ShAmtImm = shAmtImm(ShAmt);
2837       Operand *ComplShAmtImm = shAmtImm(32 - ShAmt);
2838       _lsr(T_Lo, Src0RLo, ShAmtImm);
2839       _orr(T_Lo, T_Lo,
2840            OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2841                                        OperandARM32::LSL, ComplShAmtImm));
2842       _mov(DestLo, T_Lo);
2843 
2844       if (ASR) {
2845         _asr(T_Hi, Src0RHi, ShAmtImm);
2846       } else {
2847         _lsr(T_Hi, Src0RHi, ShAmtImm);
2848       }
2849       _mov(DestHi, T_Hi);
2850       return;
2851     }
2852 
2853     // a=b>>c
2854     // pnacl-llc does:
2855     // mov        t_b.lo, b.lo
2856     // mov        t_b.hi, b.hi
2857     // mov        t_c.lo, c.lo
2858     // lsr        T0, t_b.lo, t_c.lo
2859     // rsb        T1, t_c.lo, #32
2860     // orr        t_a.lo, T0, t_b.hi, lsl T1
2861     // sub        T2, t_c.lo, #32
2862     // cmp        T2, #0
2863     // [al]srge   t_a.lo, t_b.hi, T2
2864     // [al]sr     t_a.hi, t_b.hi, t_c.lo
2865     // mov        a.lo, t_a.lo
2866     // mov        a.hi, t_a.hi
2867     //
2868     // GCC 4.8 does (lsr):
2869     // rsb        t_c1, c.lo, #32
2870     // lsr        t_lo, b.lo, c.lo
2871     // orr        t_lo, t_lo, b.hi, lsl t_c1
2872     // sub        t_c2, c.lo, #32
2873     // orr        t_lo, t_lo, b.hi, lsr t_c2
2874     // lsr        t_hi, b.hi, c.lo
2875     // mov        a.lo, t_lo
2876     // mov        a.hi, t_hi
2877     //
2878     // These are incompatible, therefore we mimic pnacl-llc.
2879     Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2880     Operand *_0 =
2881         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2882     Variable *T0 = makeReg(IceType_i32);
2883     Variable *T1 = makeReg(IceType_i32);
2884     Variable *T2 = makeReg(IceType_i32);
2885     Variable *TA_Lo = makeReg(IceType_i32);
2886     Variable *TA_Hi = makeReg(IceType_i32);
2887     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2888     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2889     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2890     _lsr(T0, Src0RLo, Src1RLo);
2891     _rsb(T1, Src1RLo, _32);
2892     _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2893                                                 OperandARM32::LSL, T1));
2894     _sub(T2, Src1RLo, _32);
2895     _cmp(T2, _0);
2896     if (ASR) {
2897       _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2898       _set_dest_redefined();
2899       _asr(TA_Hi, Src0RHi, Src1RLo);
2900     } else {
2901       _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2902       _set_dest_redefined();
2903       _lsr(TA_Hi, Src0RHi, Src1RLo);
2904     }
2905     _mov(DestLo, TA_Lo);
2906     _mov(DestHi, TA_Hi);
2907     return;
2908   }
2909   case InstArithmetic::Fadd:
2910   case InstArithmetic::Fsub:
2911   case InstArithmetic::Fmul:
2912   case InstArithmetic::Fdiv:
2913   case InstArithmetic::Frem:
2914     llvm::report_fatal_error("FP instruction with i64 type");
2915     return;
2916   case InstArithmetic::Udiv:
2917   case InstArithmetic::Sdiv:
2918   case InstArithmetic::Urem:
2919   case InstArithmetic::Srem:
2920     llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
2921                              "should have already been handled before");
2922     return;
2923   }
2924 }
2925 
2926 namespace {
2927 // StrengthReduction is a namespace with the strength reduction machinery. The
2928 // entry point is the StrengthReduction::tryToOptimize method. It returns true
2929 // if the optimization can be performed, and false otherwise.
2930 //
2931 // If the optimization can be performed, tryToOptimize sets its NumOperations
2932 // parameter to the number of shifts that are needed to perform the
2933 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>
2934 // tuples that describe how to materialize the multiplication.
2935 //
2936 // The algorithm finds contiguous 1s in the Multiplication source, and uses one
2937 // or two shifts to materialize it. A sequence of 1s, e.g.,
2938 //
2939 //                  M           N
2940 //   ...00000000000011111...111110000000...
2941 //
2942 // is materializable with (1 << (M + 1)) - (1 << N):
2943 //
2944 //   ...00000000000100000...000000000000...      [1 << (M + 1)]
2945 //   ...00000000000000000...000010000000... (-)  [1 << N]
2946 //   --------------------------------------
2947 //   ...00000000000011111...111110000000...
2948 //
2949 // And a single bit set, which is just a left shift.
2950 namespace StrengthReduction {
2951 enum AggregationOperation {
2952   AO_Invalid,
2953   AO_Add,
2954   AO_Sub,
2955 };
2956 
2957 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple.
2958 class AggregationElement {
2959   AggregationElement(const AggregationElement &) = delete;
2960 
2961 public:
2962   AggregationElement() = default;
2963   AggregationElement &operator=(const AggregationElement &) = default;
AggregationElement(AggregationOperation Op,uint32_t ShAmt)2964   AggregationElement(AggregationOperation Op, uint32_t ShAmt)
2965       : Op(Op), ShAmt(ShAmt) {}
2966 
createShiftedOperand(Cfg * Func,Variable * OpR) const2967   Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {
2968     assert(OpR->mustHaveReg());
2969     if (ShAmt == 0) {
2970       return OpR;
2971     }
2972     return OperandARM32FlexReg::create(
2973         Func, IceType_i32, OpR, OperandARM32::LSL,
2974         OperandARM32ShAmtImm::create(
2975             Func, llvm::cast<ConstantInteger32>(
2976                       Func->getContext()->getConstantInt32(ShAmt))));
2977   }
2978 
aggregateWithAdd() const2979   bool aggregateWithAdd() const {
2980     switch (Op) {
2981     case AO_Invalid:
2982       llvm::report_fatal_error("Invalid Strength Reduction Operations.");
2983     case AO_Add:
2984       return true;
2985     case AO_Sub:
2986       return false;
2987     }
2988     llvm_unreachable("(silence g++ warning)");
2989   }
2990 
shAmt() const2991   uint32_t shAmt() const { return ShAmt; }
2992 
2993 private:
2994   AggregationOperation Op = AO_Invalid;
2995   uint32_t ShAmt;
2996 };
2997 
2998 // [RangeStart, RangeEnd] is a range of 1s in Src.
2999 template <std::size_t N>
addOperations(uint32_t RangeStart,uint32_t RangeEnd,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)3000 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,
3001                    std::array<AggregationElement, N> *Operations) {
3002   assert(*NumOperations < N);
3003   if (RangeStart == RangeEnd) {
3004     // Single bit set:
3005     // Src           : 0...00010...
3006     // RangeStart    :        ^
3007     // RangeEnd      :        ^
3008     // NegSrc        : 0...00001...
3009     (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);
3010     ++(*NumOperations);
3011     return true;
3012   }
3013 
3014   // Sequence of 1s: (two operations required.)
3015   // Src           : 0...00011...110...
3016   // RangeStart    :        ^
3017   // RangeEnd      :              ^
3018   // NegSrc        : 0...00000...001...
3019   if (*NumOperations + 1 >= N) {
3020     return false;
3021   }
3022   (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);
3023   ++(*NumOperations);
3024   (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);
3025   ++(*NumOperations);
3026   return true;
3027 }
3028 
3029 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit
3030 // 1 surrounded by zeroes.
3031 template <std::size_t N>
tryToOptimize(uint32_t Src,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)3032 bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
3033                    std::array<AggregationElement, N> *Operations) {
3034   constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;
3035   uint32_t NegSrc = ~Src;
3036 
3037   *NumOperations = 0;
3038   while (Src != 0 && *NumOperations < N) {
3039     // Each step of the algorithm:
3040     //   * finds L, the last bit set in Src;
3041     //   * clears all the upper bits in NegSrc up to bit L;
3042     //   * finds nL, the last bit set in NegSrc;
3043     //   * clears all the upper bits in Src up to bit nL;
3044     //
3045     // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence
3046     // of 1s starting at L, and ending at nL + 1, was found.
3047     const uint32_t SrcLastBitSet = llvm::findLastSet(Src);
3048     const uint32_t NegSrcClearMask =
3049         (SrcLastBitSet == 0) ? 0
3050                              : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);
3051     NegSrc &= NegSrcClearMask;
3052     if (NegSrc == 0) {
3053       if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {
3054         return true;
3055       }
3056       return false;
3057     }
3058     const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);
3059     assert(NegSrcLastBitSet < SrcLastBitSet);
3060     const uint32_t SrcClearMask =
3061         (NegSrcLastBitSet == 0) ? 0 : (0xFFFFFFFFu) >>
3062                                           (SrcSizeBits - NegSrcLastBitSet);
3063     Src &= SrcClearMask;
3064     if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,
3065                        Operations)) {
3066       return false;
3067     }
3068   }
3069 
3070   return Src == 0;
3071 }
3072 } // end of namespace StrengthReduction
3073 } // end of anonymous namespace
3074 
lowerArithmetic(const InstArithmetic * Instr)3075 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
3076   Variable *Dest = Instr->getDest();
3077 
3078   if (Dest->isRematerializable()) {
3079     Context.insert<InstFakeDef>(Dest);
3080     return;
3081   }
3082 
3083   Type DestTy = Dest->getType();
3084   if (DestTy == IceType_i1) {
3085     lowerInt1Arithmetic(Instr);
3086     return;
3087   }
3088 
3089   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3090   Operand *Src1 = legalizeUndef(Instr->getSrc(1));
3091   if (DestTy == IceType_i64) {
3092     lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
3093     return;
3094   }
3095 
3096   if (isVectorType(DestTy)) {
3097     switch (Instr->getOp()) {
3098     default:
3099       UnimplementedLoweringError(this, Instr);
3100       return;
3101     // Explicitly whitelist vector instructions we have implemented/enabled.
3102     case InstArithmetic::Add:
3103     case InstArithmetic::And:
3104     case InstArithmetic::Ashr:
3105     case InstArithmetic::Fadd:
3106     case InstArithmetic::Fmul:
3107     case InstArithmetic::Fsub:
3108     case InstArithmetic::Lshr:
3109     case InstArithmetic::Mul:
3110     case InstArithmetic::Or:
3111     case InstArithmetic::Shl:
3112     case InstArithmetic::Sub:
3113     case InstArithmetic::Xor:
3114       break;
3115     }
3116   }
3117 
3118   Variable *T = makeReg(DestTy);
3119 
3120   // * Handle div/rem separately. They require a non-legalized Src1 to inspect
3121   // whether or not Src1 is a non-zero constant. Once legalized it is more
3122   // difficult to determine (constant may be moved to a register).
3123   // * Handle floating point arithmetic separately: they require Src1 to be
3124   // legalized to a register.
3125   switch (Instr->getOp()) {
3126   default:
3127     break;
3128   case InstArithmetic::Udiv: {
3129     constexpr bool NotRemainder = false;
3130     Variable *Src0R = legalizeToReg(Src0);
3131     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3132                  NotRemainder);
3133     return;
3134   }
3135   case InstArithmetic::Sdiv: {
3136     constexpr bool NotRemainder = false;
3137     Variable *Src0R = legalizeToReg(Src0);
3138     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3139                  NotRemainder);
3140     return;
3141   }
3142   case InstArithmetic::Urem: {
3143     constexpr bool IsRemainder = true;
3144     Variable *Src0R = legalizeToReg(Src0);
3145     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3146                  IsRemainder);
3147     return;
3148   }
3149   case InstArithmetic::Srem: {
3150     constexpr bool IsRemainder = true;
3151     Variable *Src0R = legalizeToReg(Src0);
3152     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3153                  IsRemainder);
3154     return;
3155   }
3156   case InstArithmetic::Frem: {
3157     if (!isScalarFloatingType(DestTy)) {
3158       llvm::report_fatal_error("Unexpected type when lowering frem.");
3159     }
3160     llvm::report_fatal_error("Frem should have already been lowered.");
3161   }
3162   case InstArithmetic::Fadd: {
3163     Variable *Src0R = legalizeToReg(Src0);
3164     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3165       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3166       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3167       _vmla(Src0R, Src1R, Src2R);
3168       _mov(Dest, Src0R);
3169       return;
3170     }
3171 
3172     Variable *Src1R = legalizeToReg(Src1);
3173     _vadd(T, Src0R, Src1R);
3174     _mov(Dest, T);
3175     return;
3176   }
3177   case InstArithmetic::Fsub: {
3178     Variable *Src0R = legalizeToReg(Src0);
3179     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3180       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3181       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3182       _vmls(Src0R, Src1R, Src2R);
3183       _mov(Dest, Src0R);
3184       return;
3185     }
3186     Variable *Src1R = legalizeToReg(Src1);
3187     _vsub(T, Src0R, Src1R);
3188     _mov(Dest, T);
3189     return;
3190   }
3191   case InstArithmetic::Fmul: {
3192     Variable *Src0R = legalizeToReg(Src0);
3193     Variable *Src1R = legalizeToReg(Src1);
3194     _vmul(T, Src0R, Src1R);
3195     _mov(Dest, T);
3196     return;
3197   }
3198   case InstArithmetic::Fdiv: {
3199     Variable *Src0R = legalizeToReg(Src0);
3200     Variable *Src1R = legalizeToReg(Src1);
3201     _vdiv(T, Src0R, Src1R);
3202     _mov(Dest, T);
3203     return;
3204   }
3205   }
3206 
3207   // Handle everything else here.
3208   Int32Operands Srcs(Src0, Src1);
3209   switch (Instr->getOp()) {
3210   case InstArithmetic::_num:
3211     llvm::report_fatal_error("Unknown arithmetic operator");
3212     return;
3213   case InstArithmetic::Add: {
3214     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3215       assert(!isVectorType(DestTy));
3216       Variable *Src0R = legalizeToReg(Src0);
3217       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3218       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3219       _mla(T, Src1R, Src2R, Src0R);
3220       _mov(Dest, T);
3221       return;
3222     }
3223 
3224     if (Srcs.hasConstOperand()) {
3225       if (!Srcs.immediateIsFlexEncodable() &&
3226           Srcs.negatedImmediateIsFlexEncodable()) {
3227         assert(!isVectorType(DestTy));
3228         Variable *Src0R = Srcs.src0R(this);
3229         Operand *Src1F = Srcs.negatedSrc1F(this);
3230         if (!Srcs.swappedOperands()) {
3231           _sub(T, Src0R, Src1F);
3232         } else {
3233           _rsb(T, Src0R, Src1F);
3234         }
3235         _mov(Dest, T);
3236         return;
3237       }
3238     }
3239     Variable *Src0R = Srcs.src0R(this);
3240     if (isVectorType(DestTy)) {
3241       Variable *Src1R = legalizeToReg(Src1);
3242       _vadd(T, Src0R, Src1R);
3243     } else {
3244       Operand *Src1RF = Srcs.src1RF(this);
3245       _add(T, Src0R, Src1RF);
3246     }
3247     _mov(Dest, T);
3248     return;
3249   }
3250   case InstArithmetic::And: {
3251     if (Srcs.hasConstOperand()) {
3252       if (!Srcs.immediateIsFlexEncodable() &&
3253           Srcs.invertedImmediateIsFlexEncodable()) {
3254         Variable *Src0R = Srcs.src0R(this);
3255         Operand *Src1F = Srcs.invertedSrc1F(this);
3256         _bic(T, Src0R, Src1F);
3257         _mov(Dest, T);
3258         return;
3259       }
3260     }
3261     assert(isIntegerType(DestTy));
3262     Variable *Src0R = Srcs.src0R(this);
3263     if (isVectorType(DestTy)) {
3264       Variable *Src1R = legalizeToReg(Src1);
3265       _vand(T, Src0R, Src1R);
3266     } else {
3267       Operand *Src1RF = Srcs.src1RF(this);
3268       _and(T, Src0R, Src1RF);
3269     }
3270     _mov(Dest, T);
3271     return;
3272   }
3273   case InstArithmetic::Or: {
3274     Variable *Src0R = Srcs.src0R(this);
3275     assert(isIntegerType(DestTy));
3276     if (isVectorType(DestTy)) {
3277       Variable *Src1R = legalizeToReg(Src1);
3278       _vorr(T, Src0R, Src1R);
3279     } else {
3280       Operand *Src1RF = Srcs.src1RF(this);
3281       _orr(T, Src0R, Src1RF);
3282     }
3283     _mov(Dest, T);
3284     return;
3285   }
3286   case InstArithmetic::Xor: {
3287     Variable *Src0R = Srcs.src0R(this);
3288     assert(isIntegerType(DestTy));
3289     if (isVectorType(DestTy)) {
3290       Variable *Src1R = legalizeToReg(Src1);
3291       _veor(T, Src0R, Src1R);
3292     } else {
3293       Operand *Src1RF = Srcs.src1RF(this);
3294       _eor(T, Src0R, Src1RF);
3295     }
3296     _mov(Dest, T);
3297     return;
3298   }
3299   case InstArithmetic::Sub: {
3300     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3301       assert(!isVectorType(DestTy));
3302       Variable *Src0R = legalizeToReg(Src0);
3303       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3304       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3305       _mls(T, Src1R, Src2R, Src0R);
3306       _mov(Dest, T);
3307       return;
3308     }
3309 
3310     if (Srcs.hasConstOperand()) {
3311       assert(!isVectorType(DestTy));
3312       if (Srcs.immediateIsFlexEncodable()) {
3313         Variable *Src0R = Srcs.src0R(this);
3314         Operand *Src1RF = Srcs.src1RF(this);
3315         if (Srcs.swappedOperands()) {
3316           _rsb(T, Src0R, Src1RF);
3317         } else {
3318           _sub(T, Src0R, Src1RF);
3319         }
3320         _mov(Dest, T);
3321         return;
3322       }
3323       if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
3324         Variable *Src0R = Srcs.src0R(this);
3325         Operand *Src1F = Srcs.negatedSrc1F(this);
3326         _add(T, Src0R, Src1F);
3327         _mov(Dest, T);
3328         return;
3329       }
3330     }
3331     Variable *Src0R = Srcs.unswappedSrc0R(this);
3332     Variable *Src1R = Srcs.unswappedSrc1R(this);
3333     if (isVectorType(DestTy)) {
3334       _vsub(T, Src0R, Src1R);
3335     } else {
3336       _sub(T, Src0R, Src1R);
3337     }
3338     _mov(Dest, T);
3339     return;
3340   }
3341   case InstArithmetic::Mul: {
3342     const bool OptM1 = Func->getOptLevel() == Opt_m1;
3343     if (!OptM1 && Srcs.hasConstOperand()) {
3344       constexpr std::size_t MaxShifts = 4;
3345       std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;
3346       SizeT NumOperations;
3347       int32_t Const = Srcs.getConstantValue();
3348       const bool Invert = Const < 0;
3349       const bool MultiplyByZero = Const == 0;
3350       Operand *_0 =
3351           legalize(Ctx->getConstantZero(DestTy), Legal_Reg | Legal_Flex);
3352 
3353       if (MultiplyByZero) {
3354         _mov(T, _0);
3355         _mov(Dest, T);
3356         return;
3357       }
3358 
3359       if (Invert) {
3360         Const = -Const;
3361       }
3362 
3363       if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {
3364         assert(NumOperations >= 1);
3365         Variable *Src0R = Srcs.src0R(this);
3366         int32_t Start;
3367         int32_t End;
3368         if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {
3369           // Multiplication by a power of 2 (NumOperations == 1); or
3370           // Multiplication by a even number not a power of 2.
3371           Start = 1;
3372           End = NumOperations;
3373           assert(Shifts[0].aggregateWithAdd());
3374           _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));
3375         } else {
3376           // Multiplication by an odd number. Put the free barrel shifter to a
3377           // good use.
3378           Start = 0;
3379           End = NumOperations - 2;
3380           const StrengthReduction::AggregationElement &Last =
3381               Shifts[NumOperations - 1];
3382           const StrengthReduction::AggregationElement &SecondToLast =
3383               Shifts[NumOperations - 2];
3384           if (!Last.aggregateWithAdd()) {
3385             assert(SecondToLast.aggregateWithAdd());
3386             _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3387           } else if (!SecondToLast.aggregateWithAdd()) {
3388             assert(Last.aggregateWithAdd());
3389             _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3390           } else {
3391             _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3392           }
3393         }
3394 
3395         // Odd numbers :   S                                 E   I   I
3396         //               +---+---+---+---+---+---+ ... +---+---+---+---+
3397         //     Shifts  = |   |   |   |   |   |   | ... |   |   |   |   |
3398         //               +---+---+---+---+---+---+ ... +---+---+---+---+
3399         // Even numbers:   I   S                                     E
3400         //
3401         // S: Start; E: End; I: Init
3402         for (int32_t I = Start; I < End; ++I) {
3403           const StrengthReduction::AggregationElement &Current = Shifts[I];
3404           Operand *SrcF = Current.createShiftedOperand(Func, Src0R);
3405           if (Current.aggregateWithAdd()) {
3406             _add(T, T, SrcF);
3407           } else {
3408             _sub(T, T, SrcF);
3409           }
3410         }
3411 
3412         if (Invert) {
3413           // T = 0 - T.
3414           _rsb(T, T, _0);
3415         }
3416 
3417         _mov(Dest, T);
3418         return;
3419       }
3420     }
3421     Variable *Src0R = Srcs.unswappedSrc0R(this);
3422     Variable *Src1R = Srcs.unswappedSrc1R(this);
3423     if (isVectorType(DestTy)) {
3424       _vmul(T, Src0R, Src1R);
3425     } else {
3426       _mul(T, Src0R, Src1R);
3427     }
3428     _mov(Dest, T);
3429     return;
3430   }
3431   case InstArithmetic::Shl: {
3432     Variable *Src0R = Srcs.unswappedSrc0R(this);
3433     if (!isVectorType(T->getType())) {
3434       if (Srcs.isSrc1ImmediateZero()) {
3435         _mov(T, Src0R);
3436       } else {
3437         Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3438         _lsl(T, Src0R, Src1R);
3439       }
3440     } else {
3441       if (Srcs.hasConstOperand()) {
3442         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3443         _vshl(T, Src0R, ShAmt);
3444       } else {
3445         auto *Src1R = Srcs.unswappedSrc1R(this);
3446         _vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
3447       }
3448     }
3449     _mov(Dest, T);
3450     return;
3451   }
3452   case InstArithmetic::Lshr: {
3453     Variable *Src0R = Srcs.unswappedSrc0R(this);
3454     if (!isVectorType(T->getType())) {
3455       if (DestTy != IceType_i32) {
3456         _uxt(Src0R, Src0R);
3457       }
3458       if (Srcs.isSrc1ImmediateZero()) {
3459         _mov(T, Src0R);
3460       } else {
3461         Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3462         _lsr(T, Src0R, Src1R);
3463       }
3464     } else {
3465       if (Srcs.hasConstOperand()) {
3466         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3467         _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Unsigned);
3468       } else {
3469         auto *Src1R = Srcs.unswappedSrc1R(this);
3470         auto *Src1RNeg = makeReg(Src1R->getType());
3471         _vneg(Src1RNeg, Src1R);
3472         _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
3473       }
3474     }
3475     _mov(Dest, T);
3476     return;
3477   }
3478   case InstArithmetic::Ashr: {
3479     Variable *Src0R = Srcs.unswappedSrc0R(this);
3480     if (!isVectorType(T->getType())) {
3481       if (DestTy != IceType_i32) {
3482         _sxt(Src0R, Src0R);
3483       }
3484       if (Srcs.isSrc1ImmediateZero()) {
3485         _mov(T, Src0R);
3486       } else {
3487         _asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
3488       }
3489     } else {
3490       if (Srcs.hasConstOperand()) {
3491         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3492         _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Signed);
3493       } else {
3494         auto *Src1R = Srcs.unswappedSrc1R(this);
3495         auto *Src1RNeg = makeReg(Src1R->getType());
3496         _vneg(Src1RNeg, Src1R);
3497         _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
3498       }
3499     }
3500     _mov(Dest, T);
3501     return;
3502   }
3503   case InstArithmetic::Udiv:
3504   case InstArithmetic::Sdiv:
3505   case InstArithmetic::Urem:
3506   case InstArithmetic::Srem:
3507     llvm::report_fatal_error(
3508         "Integer div/rem should have been handled earlier.");
3509     return;
3510   case InstArithmetic::Fadd:
3511   case InstArithmetic::Fsub:
3512   case InstArithmetic::Fmul:
3513   case InstArithmetic::Fdiv:
3514   case InstArithmetic::Frem:
3515     llvm::report_fatal_error(
3516         "Floating point arith should have been handled earlier.");
3517     return;
3518   }
3519 }
3520 
lowerAssign(const InstAssign * Instr)3521 void TargetARM32::lowerAssign(const InstAssign *Instr) {
3522   Variable *Dest = Instr->getDest();
3523 
3524   if (Dest->isRematerializable()) {
3525     Context.insert<InstFakeDef>(Dest);
3526     return;
3527   }
3528 
3529   Operand *Src0 = Instr->getSrc(0);
3530   assert(Dest->getType() == Src0->getType());
3531   if (Dest->getType() == IceType_i64) {
3532     Src0 = legalizeUndef(Src0);
3533 
3534     Variable *T_Lo = makeReg(IceType_i32);
3535     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3536     Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
3537     _mov(T_Lo, Src0Lo);
3538     _mov(DestLo, T_Lo);
3539 
3540     Variable *T_Hi = makeReg(IceType_i32);
3541     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3542     Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
3543     _mov(T_Hi, Src0Hi);
3544     _mov(DestHi, T_Hi);
3545 
3546     return;
3547   }
3548 
3549   Operand *NewSrc;
3550   if (Dest->hasReg()) {
3551     // If Dest already has a physical register, then legalize the Src operand
3552     // into a Variable with the same register assignment. This especially
3553     // helps allow the use of Flex operands.
3554     NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
3555   } else {
3556     // Dest could be a stack operand. Since we could potentially need to do a
3557     // Store (and store can only have Register operands), legalize this to a
3558     // register.
3559     NewSrc = legalize(Src0, Legal_Reg);
3560   }
3561 
3562   if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
3563     NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
3564   }
3565   _mov(Dest, NewSrc);
3566 }
3567 
lowerInt1ForBranch(Operand * Boolean,const LowerInt1BranchTarget & TargetTrue,const LowerInt1BranchTarget & TargetFalse,uint32_t ShortCircuitable)3568 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
3569     Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
3570     const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
3571   InstARM32Label *NewShortCircuitLabel = nullptr;
3572   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3573 
3574   const Inst *Producer = Computations.getProducerOf(Boolean);
3575 
3576   if (Producer == nullptr) {
3577     // No producer, no problem: just do emit code to perform (Boolean & 1) and
3578     // set the flags register. The branch should be taken if the resulting flags
3579     // indicate a non-zero result.
3580     _tst(legalizeToReg(Boolean), _1);
3581     return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3582   }
3583 
3584   switch (Producer->getKind()) {
3585   default:
3586     llvm::report_fatal_error("Unexpected producer.");
3587   case Inst::Icmp: {
3588     return ShortCircuitCondAndLabel(
3589         lowerIcmpCond(llvm::cast<InstIcmp>(Producer)));
3590   } break;
3591   case Inst::Fcmp: {
3592     return ShortCircuitCondAndLabel(
3593         lowerFcmpCond(llvm::cast<InstFcmp>(Producer)));
3594   } break;
3595   case Inst::Cast: {
3596     const auto *CastProducer = llvm::cast<InstCast>(Producer);
3597     assert(CastProducer->getCastKind() == InstCast::Trunc);
3598     Operand *Src = CastProducer->getSrc(0);
3599     if (Src->getType() == IceType_i64)
3600       Src = loOperand(Src);
3601     _tst(legalizeToReg(Src), _1);
3602     return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3603   } break;
3604   case Inst::Arithmetic: {
3605     const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
3606     switch (ArithProducer->getOp()) {
3607     default:
3608       llvm::report_fatal_error("Unhandled Arithmetic Producer.");
3609     case InstArithmetic::And: {
3610       if (!(ShortCircuitable & SC_And)) {
3611         NewShortCircuitLabel = InstARM32Label::create(Func, this);
3612       }
3613 
3614       LowerInt1BranchTarget NewTarget =
3615           TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel);
3616 
3617       ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3618           Producer->getSrc(0), TargetTrue, NewTarget, SC_And);
3619       const CondWhenTrue &Cond = CondAndLabel.Cond;
3620 
3621       _br_short_circuit(NewTarget, Cond.invert());
3622 
3623       InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3624       if (ShortCircuitLabel != nullptr)
3625         Context.insert(ShortCircuitLabel);
3626 
3627       return ShortCircuitCondAndLabel(
3628           lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All)
3629               .assertNoLabelAndReturnCond(),
3630           NewShortCircuitLabel);
3631     } break;
3632     case InstArithmetic::Or: {
3633       if (!(ShortCircuitable & SC_Or)) {
3634         NewShortCircuitLabel = InstARM32Label::create(Func, this);
3635       }
3636 
3637       LowerInt1BranchTarget NewTarget =
3638           TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel);
3639 
3640       ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3641           Producer->getSrc(0), NewTarget, TargetFalse, SC_Or);
3642       const CondWhenTrue &Cond = CondAndLabel.Cond;
3643 
3644       _br_short_circuit(NewTarget, Cond);
3645 
3646       InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3647       if (ShortCircuitLabel != nullptr)
3648         Context.insert(ShortCircuitLabel);
3649 
3650       return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1),
3651                                                          NewTarget, TargetFalse,
3652                                                          SC_All)
3653                                           .assertNoLabelAndReturnCond(),
3654                                       NewShortCircuitLabel);
3655     } break;
3656     }
3657   }
3658   }
3659 }
3660 
lowerBr(const InstBr * Instr)3661 void TargetARM32::lowerBr(const InstBr *Instr) {
3662   if (Instr->isUnconditional()) {
3663     _br(Instr->getTargetUnconditional());
3664     return;
3665   }
3666 
3667   CfgNode *TargetTrue = Instr->getTargetTrue();
3668   CfgNode *TargetFalse = Instr->getTargetFalse();
3669   ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3670       Instr->getCondition(), LowerInt1BranchTarget(TargetTrue),
3671       LowerInt1BranchTarget(TargetFalse), SC_All);
3672   assert(CondAndLabel.ShortCircuitTarget == nullptr);
3673 
3674   const CondWhenTrue &Cond = CondAndLabel.Cond;
3675   if (Cond.WhenTrue1 != CondARM32::kNone) {
3676     assert(Cond.WhenTrue0 != CondARM32::AL);
3677     _br(TargetTrue, Cond.WhenTrue1);
3678   }
3679 
3680   switch (Cond.WhenTrue0) {
3681   default:
3682     _br(TargetTrue, TargetFalse, Cond.WhenTrue0);
3683     break;
3684   case CondARM32::kNone:
3685     _br(TargetFalse);
3686     break;
3687   case CondARM32::AL:
3688     _br(TargetTrue);
3689     break;
3690   }
3691 }
3692 
lowerCall(const InstCall * Instr)3693 void TargetARM32::lowerCall(const InstCall *Instr) {
3694   Operand *CallTarget = Instr->getCallTarget();
3695   if (Instr->isTargetHelperCall()) {
3696     auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget);
3697     if (TargetHelperPreamble != ARM32HelpersPreamble.end()) {
3698       (this->*TargetHelperPreamble->second)(Instr);
3699     }
3700   }
3701   MaybeLeafFunc = false;
3702   NeedsStackAlignment = true;
3703 
3704   // Assign arguments to registers and stack. Also reserve stack.
3705   TargetARM32::CallingConv CC;
3706   // Pair of Arg Operand -> GPR number assignments.
3707   llvm::SmallVector<std::pair<Operand *, RegNumT>, NumGPRArgs> GPRArgs;
3708   llvm::SmallVector<std::pair<Operand *, RegNumT>, NumFP32Args> FPArgs;
3709   // Pair of Arg Operand -> stack offset.
3710   llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3711   size_t ParameterAreaSizeBytes = 0;
3712 
3713   // Classify each argument operand according to the location where the
3714   // argument is passed.
3715   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3716     Operand *Arg = legalizeUndef(Instr->getArg(i));
3717     const Type Ty = Arg->getType();
3718     bool InReg = false;
3719     RegNumT Reg;
3720     if (isScalarIntegerType(Ty)) {
3721       InReg = CC.argInGPR(Ty, &Reg);
3722     } else {
3723       InReg = CC.argInVFP(Ty, &Reg);
3724     }
3725 
3726     if (!InReg) {
3727       ParameterAreaSizeBytes =
3728           applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3729       StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3730       ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3731       continue;
3732     }
3733 
3734     if (Ty == IceType_i64) {
3735       Operand *Lo = loOperand(Arg);
3736       Operand *Hi = hiOperand(Arg);
3737       GPRArgs.push_back(std::make_pair(
3738           Lo, RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Reg))));
3739       GPRArgs.push_back(std::make_pair(
3740           Hi, RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(Reg))));
3741     } else if (isScalarIntegerType(Ty)) {
3742       GPRArgs.push_back(std::make_pair(Arg, Reg));
3743     } else {
3744       FPArgs.push_back(std::make_pair(Arg, Reg));
3745     }
3746   }
3747 
3748   // Adjust the parameter area so that the stack is aligned. It is assumed that
3749   // the stack is already aligned at the start of the calling sequence.
3750   ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3751 
3752   if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
3753     llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
3754   }
3755 
3756   // Copy arguments that are passed on the stack to the appropriate stack
3757   // locations.
3758   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3759   for (auto &StackArg : StackArgs) {
3760     ConstantInteger32 *Loc =
3761         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3762     Type Ty = StackArg.first->getType();
3763     OperandARM32Mem *Addr;
3764     constexpr bool SignExt = false;
3765     if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3766       Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
3767     } else {
3768       Variable *NewBase = Func->makeVariable(SP->getType());
3769       lowerArithmetic(
3770           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3771       Addr = formMemoryOperand(NewBase, Ty);
3772     }
3773     lowerStore(InstStore::create(Func, StackArg.first, Addr));
3774   }
3775 
3776   // Generate the call instruction. Assign its result to a temporary with high
3777   // register allocation weight.
3778   Variable *Dest = Instr->getDest();
3779   // ReturnReg doubles as ReturnRegLo as necessary.
3780   Variable *ReturnReg = nullptr;
3781   Variable *ReturnRegHi = nullptr;
3782   if (Dest) {
3783     switch (Dest->getType()) {
3784     case IceType_NUM:
3785       llvm::report_fatal_error("Invalid Call dest type");
3786       break;
3787     case IceType_void:
3788       break;
3789     case IceType_i1:
3790       assert(Computations.getProducerOf(Dest) == nullptr);
3791     // Fall-through intended.
3792     case IceType_i8:
3793     case IceType_i16:
3794     case IceType_i32:
3795       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
3796       break;
3797     case IceType_i64:
3798       ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
3799       ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
3800       break;
3801     case IceType_f32:
3802       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
3803       break;
3804     case IceType_f64:
3805       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
3806       break;
3807     case IceType_v4i1:
3808     case IceType_v8i1:
3809     case IceType_v16i1:
3810     case IceType_v16i8:
3811     case IceType_v8i16:
3812     case IceType_v4i32:
3813     case IceType_v4f32:
3814       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
3815       break;
3816     }
3817   }
3818 
3819   // Allow ConstantRelocatable to be left alone as a direct call, but force
3820   // other constants like ConstantInteger32 to be in a register and make it an
3821   // indirect call.
3822   if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3823     CallTarget = legalize(CallTarget, Legal_Reg);
3824   }
3825 
3826   // Copy arguments to be passed in registers to the appropriate registers.
3827   CfgVector<Variable *> RegArgs;
3828   for (auto &FPArg : FPArgs) {
3829     RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3830   }
3831   for (auto &GPRArg : GPRArgs) {
3832     RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3833   }
3834 
3835   // Generate a FakeUse of register arguments so that they do not get dead code
3836   // eliminated as a result of the FakeKill of scratch registers after the call.
3837   // These fake-uses need to be placed here to avoid argument registers from
3838   // being used during the legalizeToReg() calls above.
3839   for (auto *RegArg : RegArgs) {
3840     Context.insert<InstFakeUse>(RegArg);
3841   }
3842 
3843   InstARM32Call *NewCall =
3844       Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget);
3845 
3846   if (ReturnRegHi)
3847     Context.insert<InstFakeDef>(ReturnRegHi);
3848 
3849   // Insert a register-kill pseudo instruction.
3850   Context.insert<InstFakeKill>(NewCall);
3851 
3852   // Generate a FakeUse to keep the call live if necessary.
3853   if (Instr->hasSideEffects() && ReturnReg) {
3854     Context.insert<InstFakeUse>(ReturnReg);
3855   }
3856 
3857   if (Dest != nullptr) {
3858     // Assign the result of the call to Dest.
3859     if (ReturnReg != nullptr) {
3860       if (ReturnRegHi) {
3861         auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3862         Variable *DestLo = Dest64On32->getLo();
3863         Variable *DestHi = Dest64On32->getHi();
3864         _mov(DestLo, ReturnReg);
3865         _mov(DestHi, ReturnRegHi);
3866       } else {
3867         if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
3868           _mov(Dest, ReturnReg);
3869         } else {
3870           assert(isIntegerType(Dest->getType()) &&
3871                  typeWidthInBytes(Dest->getType()) <= 4);
3872           _mov(Dest, ReturnReg);
3873         }
3874       }
3875     }
3876   }
3877 
3878   if (Instr->isTargetHelperCall()) {
3879     auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget);
3880     if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) {
3881       (this->*TargetHelpersPostamble->second)(Instr);
3882     }
3883   }
3884 }
3885 
3886 namespace {
configureBitcastTemporary(Variable64On32 * Var)3887 void configureBitcastTemporary(Variable64On32 *Var) {
3888   Var->setMustNotHaveReg();
3889   Var->getHi()->setMustHaveReg();
3890   Var->getLo()->setMustHaveReg();
3891 }
3892 } // end of anonymous namespace
3893 
lowerCast(const InstCast * Instr)3894 void TargetARM32::lowerCast(const InstCast *Instr) {
3895   InstCast::OpKind CastKind = Instr->getCastKind();
3896   Variable *Dest = Instr->getDest();
3897   const Type DestTy = Dest->getType();
3898   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3899   switch (CastKind) {
3900   default:
3901     Func->setError("Cast type not supported");
3902     return;
3903   case InstCast::Sext: {
3904     if (isVectorType(DestTy)) {
3905       Variable *T0 = makeReg(DestTy);
3906       Variable *T1 = makeReg(DestTy);
3907       ConstantInteger32 *ShAmt = nullptr;
3908       switch (DestTy) {
3909       default:
3910         llvm::report_fatal_error("Unexpected type in vector sext.");
3911       case IceType_v16i8:
3912         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(7));
3913         break;
3914       case IceType_v8i16:
3915         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(15));
3916         break;
3917       case IceType_v4i32:
3918         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(31));
3919         break;
3920       }
3921       auto *Src0R = legalizeToReg(Src0);
3922       _vshl(T0, Src0R, ShAmt);
3923       _vshr(T1, T0, ShAmt)->setSignType(InstARM32::FS_Signed);
3924       _mov(Dest, T1);
3925     } else if (DestTy == IceType_i64) {
3926       // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
3927       Constant *ShiftAmt = Ctx->getConstantInt32(31);
3928       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3929       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3930       Variable *T_Lo = makeReg(DestLo->getType());
3931       if (Src0->getType() == IceType_i32) {
3932         Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
3933         _mov(T_Lo, Src0RF);
3934       } else if (Src0->getType() != IceType_i1) {
3935         Variable *Src0R = legalizeToReg(Src0);
3936         _sxt(T_Lo, Src0R);
3937       } else {
3938         Operand *_0 = Ctx->getConstantZero(IceType_i32);
3939         Operand *_m1 = Ctx->getConstantInt32(-1);
3940         lowerInt1ForSelect(T_Lo, Src0, _m1, _0);
3941       }
3942       _mov(DestLo, T_Lo);
3943       Variable *T_Hi = makeReg(DestHi->getType());
3944       if (Src0->getType() != IceType_i1) {
3945         _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
3946                                                OperandARM32::ASR, ShiftAmt));
3947       } else {
3948         // For i1, the asr instruction is already done above.
3949         _mov(T_Hi, T_Lo);
3950       }
3951       _mov(DestHi, T_Hi);
3952     } else if (Src0->getType() != IceType_i1) {
3953       // t1 = sxt src; dst = t1
3954       Variable *Src0R = legalizeToReg(Src0);
3955       Variable *T = makeReg(DestTy);
3956       _sxt(T, Src0R);
3957       _mov(Dest, T);
3958     } else {
3959       Constant *_0 = Ctx->getConstantZero(IceType_i32);
3960       Operand *_m1 = Ctx->getConstantInt(DestTy, -1);
3961       Variable *T = makeReg(DestTy);
3962       lowerInt1ForSelect(T, Src0, _m1, _0);
3963       _mov(Dest, T);
3964     }
3965     break;
3966   }
3967   case InstCast::Zext: {
3968     if (isVectorType(DestTy)) {
3969       auto *Mask = makeReg(DestTy);
3970       auto *_1 = Ctx->getConstantInt32(1);
3971       auto *T = makeReg(DestTy);
3972       auto *Src0R = legalizeToReg(Src0);
3973       _mov(Mask, _1);
3974       _vand(T, Src0R, Mask);
3975       _mov(Dest, T);
3976     } else if (DestTy == IceType_i64) {
3977       // t1=uxtb src; dst.lo=t1; dst.hi=0
3978       Operand *_0 =
3979           legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
3980       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3981       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3982       Variable *T_Lo = makeReg(DestLo->getType());
3983 
3984       switch (Src0->getType()) {
3985       default: {
3986         assert(Src0->getType() != IceType_i64);
3987         _uxt(T_Lo, legalizeToReg(Src0));
3988       } break;
3989       case IceType_i32: {
3990         _mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex));
3991       } break;
3992       case IceType_i1: {
3993         SafeBoolChain Safe = lowerInt1(T_Lo, Src0);
3994         if (Safe == SBC_No) {
3995           Operand *_1 =
3996               legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3997           _and(T_Lo, T_Lo, _1);
3998         }
3999       } break;
4000       }
4001 
4002       _mov(DestLo, T_Lo);
4003 
4004       Variable *T_Hi = makeReg(DestLo->getType());
4005       _mov(T_Hi, _0);
4006       _mov(DestHi, T_Hi);
4007     } else if (Src0->getType() == IceType_i1) {
4008       Variable *T = makeReg(DestTy);
4009 
4010       SafeBoolChain Safe = lowerInt1(T, Src0);
4011       if (Safe == SBC_No) {
4012         Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
4013         _and(T, T, _1);
4014       }
4015 
4016       _mov(Dest, T);
4017     } else {
4018       // t1 = uxt src; dst = t1
4019       Variable *Src0R = legalizeToReg(Src0);
4020       Variable *T = makeReg(DestTy);
4021       _uxt(T, Src0R);
4022       _mov(Dest, T);
4023     }
4024     break;
4025   }
4026   case InstCast::Trunc: {
4027     if (isVectorType(DestTy)) {
4028       auto *T = makeReg(DestTy);
4029       auto *Src0R = legalizeToReg(Src0);
4030       _mov(T, Src0R);
4031       _mov(Dest, T);
4032     } else {
4033       if (Src0->getType() == IceType_i64)
4034         Src0 = loOperand(Src0);
4035       Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
4036       // t1 = trunc Src0RF; Dest = t1
4037       Variable *T = makeReg(DestTy);
4038       _mov(T, Src0RF);
4039       if (DestTy == IceType_i1)
4040         _and(T, T, Ctx->getConstantInt1(1));
4041       _mov(Dest, T);
4042     }
4043     break;
4044   }
4045   case InstCast::Fptrunc:
4046   case InstCast::Fpext: {
4047     // fptrunc: dest.f32 = fptrunc src0.fp64
4048     // fpext: dest.f64 = fptrunc src0.fp32
4049     const bool IsTrunc = CastKind == InstCast::Fptrunc;
4050     assert(!isVectorType(DestTy));
4051     assert(DestTy == (IsTrunc ? IceType_f32 : IceType_f64));
4052     assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32));
4053     Variable *Src0R = legalizeToReg(Src0);
4054     Variable *T = makeReg(DestTy);
4055     _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d);
4056     _mov(Dest, T);
4057     break;
4058   }
4059   case InstCast::Fptosi:
4060   case InstCast::Fptoui: {
4061     const bool DestIsSigned = CastKind == InstCast::Fptosi;
4062     Variable *Src0R = legalizeToReg(Src0);
4063 
4064     if (isVectorType(DestTy)) {
4065       assert(typeElementType(Src0->getType()) == IceType_f32);
4066       auto *T = makeReg(DestTy);
4067       _vcvt(T, Src0R,
4068             DestIsSigned ? InstARM32Vcvt::Vs2si : InstARM32Vcvt::Vs2ui);
4069       _mov(Dest, T);
4070       break;
4071     }
4072 
4073     const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType());
4074     if (llvm::isa<Variable64On32>(Dest)) {
4075       llvm::report_fatal_error("fp-to-i64 should have been pre-lowered.");
4076     }
4077     // fptosi:
4078     //     t1.fp = vcvt src0.fp
4079     //     t2.i32 = vmov t1.fp
4080     //     dest.int = conv t2.i32     @ Truncates the result if needed.
4081     // fptoui:
4082     //     t1.fp = vcvt src0.fp
4083     //     t2.u32 = vmov t1.fp
4084     //     dest.uint = conv t2.u32    @ Truncates the result if needed.
4085     Variable *T_fp = makeReg(IceType_f32);
4086     const InstARM32Vcvt::VcvtVariant Conversion =
4087         Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui)
4088                   : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
4089     _vcvt(T_fp, Src0R, Conversion);
4090     Variable *T = makeReg(IceType_i32);
4091     _mov(T, T_fp);
4092     if (DestTy != IceType_i32) {
4093       Variable *T_1 = makeReg(DestTy);
4094       lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
4095       T = T_1;
4096     }
4097     _mov(Dest, T);
4098     break;
4099   }
4100   case InstCast::Sitofp:
4101   case InstCast::Uitofp: {
4102     const bool SourceIsSigned = CastKind == InstCast::Sitofp;
4103 
4104     if (isVectorType(DestTy)) {
4105       assert(typeElementType(DestTy) == IceType_f32);
4106       auto *T = makeReg(DestTy);
4107       Variable *Src0R = legalizeToReg(Src0);
4108       _vcvt(T, Src0R,
4109             SourceIsSigned ? InstARM32Vcvt::Vsi2s : InstARM32Vcvt::Vui2s);
4110       _mov(Dest, T);
4111       break;
4112     }
4113 
4114     const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
4115     if (Src0->getType() == IceType_i64) {
4116       llvm::report_fatal_error("i64-to-fp should have been pre-lowered.");
4117     }
4118     // sitofp:
4119     //     t1.i32 = sext src.int    @ sign-extends src0 if needed.
4120     //     t2.fp32 = vmov t1.i32
4121     //     t3.fp = vcvt.{fp}.s32    @ fp is either f32 or f64
4122     // uitofp:
4123     //     t1.i32 = zext src.int    @ zero-extends src0 if needed.
4124     //     t2.fp32 = vmov t1.i32
4125     //     t3.fp = vcvt.{fp}.s32    @ fp is either f32 or f64
4126     if (Src0->getType() != IceType_i32) {
4127       Variable *Src0R_32 = makeReg(IceType_i32);
4128       lowerCast(InstCast::create(Func, SourceIsSigned ? InstCast::Sext
4129                                                       : InstCast::Zext,
4130                                  Src0R_32, Src0));
4131       Src0 = Src0R_32;
4132     }
4133     Variable *Src0R = legalizeToReg(Src0);
4134     Variable *Src0R_f32 = makeReg(IceType_f32);
4135     _mov(Src0R_f32, Src0R);
4136     Src0R = Src0R_f32;
4137     Variable *T = makeReg(DestTy);
4138     const InstARM32Vcvt::VcvtVariant Conversion =
4139         DestIsF32
4140             ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s)
4141             : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
4142     _vcvt(T, Src0R, Conversion);
4143     _mov(Dest, T);
4144     break;
4145   }
4146   case InstCast::Bitcast: {
4147     Operand *Src0 = Instr->getSrc(0);
4148     if (DestTy == Src0->getType()) {
4149       auto *Assign = InstAssign::create(Func, Dest, Src0);
4150       lowerAssign(Assign);
4151       return;
4152     }
4153     switch (DestTy) {
4154     case IceType_NUM:
4155     case IceType_void:
4156       llvm::report_fatal_error("Unexpected bitcast.");
4157     case IceType_i1:
4158       UnimplementedLoweringError(this, Instr);
4159       break;
4160     case IceType_i8:
4161       assert(Src0->getType() == IceType_v8i1);
4162       llvm::report_fatal_error(
4163           "i8 to v8i1 conversion should have been prelowered.");
4164       break;
4165     case IceType_i16:
4166       assert(Src0->getType() == IceType_v16i1);
4167       llvm::report_fatal_error(
4168           "i16 to v16i1 conversion should have been prelowered.");
4169       break;
4170     case IceType_i32:
4171     case IceType_f32: {
4172       Variable *Src0R = legalizeToReg(Src0);
4173       Variable *T = makeReg(DestTy);
4174       _mov(T, Src0R);
4175       lowerAssign(InstAssign::create(Func, Dest, T));
4176       break;
4177     }
4178     case IceType_i64: {
4179       // t0, t1 <- src0
4180       // dest[31..0]  = t0
4181       // dest[63..32] = t1
4182       assert(Src0->getType() == IceType_f64);
4183       auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4184       T->initHiLo(Func);
4185       configureBitcastTemporary(T);
4186       Variable *Src0R = legalizeToReg(Src0);
4187       _mov(T, Src0R);
4188       Context.insert<InstFakeUse>(T->getHi());
4189       Context.insert<InstFakeUse>(T->getLo());
4190       lowerAssign(InstAssign::create(Func, Dest, T));
4191       break;
4192     }
4193     case IceType_f64: {
4194       // T0 <- lo(src)
4195       // T1 <- hi(src)
4196       // vmov T2, T0, T1
4197       // Dest <- T2
4198       assert(Src0->getType() == IceType_i64);
4199       Variable *T = makeReg(DestTy);
4200       auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4201       Src64->initHiLo(Func);
4202       configureBitcastTemporary(Src64);
4203       lowerAssign(InstAssign::create(Func, Src64, Src0));
4204       _mov(T, Src64);
4205       lowerAssign(InstAssign::create(Func, Dest, T));
4206       break;
4207     }
4208     case IceType_v8i1:
4209       assert(Src0->getType() == IceType_i8);
4210       llvm::report_fatal_error(
4211           "v8i1 to i8 conversion should have been prelowered.");
4212       break;
4213     case IceType_v16i1:
4214       assert(Src0->getType() == IceType_i16);
4215       llvm::report_fatal_error(
4216           "v16i1 to i16 conversion should have been prelowered.");
4217       break;
4218     case IceType_v4i1:
4219     case IceType_v8i16:
4220     case IceType_v16i8:
4221     case IceType_v4f32:
4222     case IceType_v4i32: {
4223       assert(typeWidthInBytes(DestTy) == typeWidthInBytes(Src0->getType()));
4224       assert(isVectorType(DestTy) == isVectorType(Src0->getType()));
4225       Variable *T = makeReg(DestTy);
4226       _mov(T, Src0);
4227       _mov(Dest, T);
4228       break;
4229     }
4230     }
4231     break;
4232   }
4233   }
4234 }
4235 
lowerExtractElement(const InstExtractElement * Instr)4236 void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) {
4237   Variable *Dest = Instr->getDest();
4238   Type DestTy = Dest->getType();
4239 
4240   Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4241   Operand *Src1 = Instr->getSrc(1);
4242 
4243   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
4244     const uint32_t Index = Imm->getValue();
4245     Variable *T = makeReg(DestTy);
4246     Variable *TSrc0 = makeReg(Src0->getType());
4247 
4248     if (isFloatingType(DestTy)) {
4249       // We need to make sure the source is in a suitable register.
4250       TSrc0->setRegClass(RegARM32::RCARM32_QtoS);
4251     }
4252 
4253     _mov(TSrc0, Src0);
4254     _extractelement(T, TSrc0, Index);
4255     _mov(Dest, T);
4256     return;
4257   }
4258   assert(false && "extractelement requires a constant index");
4259 }
4260 
4261 namespace {
4262 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
4263 // (and naming).
4264 enum {
4265 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val,
4266   FCMPARM32_TABLE
4267 #undef X
4268       _fcmp_ll_NUM
4269 };
4270 
4271 enum {
4272 #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag,
4273   ICEINSTFCMP_TABLE
4274 #undef X
4275       _fcmp_hl_NUM
4276 };
4277 
4278 static_assert((uint32_t)_fcmp_hl_NUM == (uint32_t)_fcmp_ll_NUM,
4279               "Inconsistency between high-level and low-level fcmp tags.");
4280 #define X(tag, str)                                                            \
4281   static_assert(                                                               \
4282       (uint32_t)_fcmp_hl_##tag == (uint32_t)_fcmp_ll_##tag,                    \
4283       "Inconsistency between high-level and low-level fcmp tag " #tag);
4284 ICEINSTFCMP_TABLE
4285 #undef X
4286 
4287 struct {
4288   CondARM32::Cond CC0;
4289   CondARM32::Cond CC1;
4290 } TableFcmp[] = {
4291 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)                           \
4292   { CondARM32::CC0, CondARM32::CC1 }                                           \
4293   ,
4294     FCMPARM32_TABLE
4295 #undef X
4296 };
4297 
isFloatingPointZero(const Operand * Src)4298 bool isFloatingPointZero(const Operand *Src) {
4299   if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) {
4300     return Utils::isPositiveZero(F32->getValue());
4301   }
4302 
4303   if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) {
4304     return Utils::isPositiveZero(F64->getValue());
4305   }
4306 
4307   return false;
4308 }
4309 } // end of anonymous namespace
4310 
lowerFcmpCond(const InstFcmp * Instr)4311 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
4312   InstFcmp::FCond Condition = Instr->getCondition();
4313   switch (Condition) {
4314   case InstFcmp::False:
4315     return CondWhenTrue(CondARM32::kNone);
4316   case InstFcmp::True:
4317     return CondWhenTrue(CondARM32::AL);
4318     break;
4319   default: {
4320     Variable *Src0R = legalizeToReg(Instr->getSrc(0));
4321     Operand *Src1 = Instr->getSrc(1);
4322     if (isFloatingPointZero(Src1)) {
4323       _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
4324     } else {
4325       _vcmp(Src0R, legalizeToReg(Src1));
4326     }
4327     _vmrs();
4328     assert(Condition < llvm::array_lengthof(TableFcmp));
4329     return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
4330   }
4331   }
4332 }
4333 
lowerFcmp(const InstFcmp * Instr)4334 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
4335   Variable *Dest = Instr->getDest();
4336   const Type DestTy = Dest->getType();
4337 
4338   if (isVectorType(DestTy)) {
4339     if (Instr->getCondition() == InstFcmp::False) {
4340       constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4341       auto *T = makeReg(SafeTypeForMovingConstant);
4342       _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)));
4343       _mov(Dest, T);
4344       return;
4345     }
4346 
4347     if (Instr->getCondition() == InstFcmp::True) {
4348       constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4349       auto *T = makeReg(SafeTypeForMovingConstant);
4350       _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1)));
4351       _mov(Dest, T);
4352       return;
4353     }
4354 
4355     Variable *T0;
4356     Variable *T1;
4357     bool Negate = false;
4358     auto *Src0 = legalizeToReg(Instr->getSrc(0));
4359     auto *Src1 = legalizeToReg(Instr->getSrc(1));
4360 
4361     switch (Instr->getCondition()) {
4362     default:
4363       llvm::report_fatal_error("Unhandled fp comparison.");
4364 #define _Vcnone(Tptr, S0, S1)                                                  \
4365   do {                                                                         \
4366     *(Tptr) = nullptr;                                                         \
4367   } while (0)
4368 #define _Vceq(Tptr, S0, S1)                                                    \
4369   do {                                                                         \
4370     *(Tptr) = makeReg(DestTy);                                                 \
4371     _vceq(*(Tptr), S0, S1);                                                    \
4372   } while (0)
4373 #define _Vcge(Tptr, S0, S1)                                                    \
4374   do {                                                                         \
4375     *(Tptr) = makeReg(DestTy);                                                 \
4376     _vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed);                 \
4377   } while (0)
4378 #define _Vcgt(Tptr, S0, S1)                                                    \
4379   do {                                                                         \
4380     *(Tptr) = makeReg(DestTy);                                                 \
4381     _vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed);                 \
4382   } while (0)
4383 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)                           \
4384   case InstFcmp::val: {                                                        \
4385     _Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1);             \
4386     _Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0);             \
4387     Negate = NEG_V;                                                            \
4388   } break;
4389       FCMPARM32_TABLE
4390 #undef X
4391 #undef _Vcgt
4392 #undef _Vcge
4393 #undef _Vceq
4394 #undef _Vcnone
4395     }
4396     assert(T0 != nullptr);
4397     Variable *T = T0;
4398     if (T1 != nullptr) {
4399       T = makeReg(DestTy);
4400       _vorr(T, T0, T1);
4401     }
4402 
4403     if (Negate) {
4404       auto *TNeg = makeReg(DestTy);
4405       _vmvn(TNeg, T);
4406       T = TNeg;
4407     }
4408 
4409     _mov(Dest, T);
4410     return;
4411   }
4412 
4413   Variable *T = makeReg(IceType_i1);
4414   Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4415   Operand *_0 =
4416       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4417 
4418   CondWhenTrue Cond = lowerFcmpCond(Instr);
4419 
4420   bool RedefineT = false;
4421   if (Cond.WhenTrue0 != CondARM32::AL) {
4422     _mov(T, _0);
4423     RedefineT = true;
4424   }
4425 
4426   if (Cond.WhenTrue0 == CondARM32::kNone) {
4427     _mov(Dest, T);
4428     return;
4429   }
4430 
4431   if (RedefineT) {
4432     _mov_redefined(T, _1, Cond.WhenTrue0);
4433   } else {
4434     _mov(T, _1, Cond.WhenTrue0);
4435   }
4436 
4437   if (Cond.WhenTrue1 != CondARM32::kNone) {
4438     _mov_redefined(T, _1, Cond.WhenTrue1);
4439   }
4440 
4441   _mov(Dest, T);
4442 }
4443 
4444 TargetARM32::CondWhenTrue
lowerInt64IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4445 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4446                                 Operand *Src1) {
4447   assert(Condition < llvm::array_lengthof(TableIcmp64));
4448 
4449   Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
4450   Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
4451   assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
4452   assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
4453 
4454   if (SrcsLo.hasConstOperand()) {
4455     const uint32_t ValueLo = SrcsLo.getConstantValue();
4456     const uint32_t ValueHi = SrcsHi.getConstantValue();
4457     const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
4458     if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
4459         Value == 0) {
4460       Variable *T = makeReg(IceType_i32);
4461       Variable *Src0LoR = SrcsLo.src0R(this);
4462       Variable *Src0HiR = SrcsHi.src0R(this);
4463       _orrs(T, Src0LoR, Src0HiR);
4464       Context.insert<InstFakeUse>(T);
4465       return CondWhenTrue(TableIcmp64[Condition].C1);
4466     }
4467 
4468     Variable *Src0RLo = SrcsLo.src0R(this);
4469     Variable *Src0RHi = SrcsHi.src0R(this);
4470     Operand *Src1RFLo = SrcsLo.src1RF(this);
4471     Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
4472 
4473     const bool UseRsb =
4474         TableIcmp64[Condition].Swapped != SrcsLo.swappedOperands();
4475 
4476     if (UseRsb) {
4477       if (TableIcmp64[Condition].IsSigned) {
4478         Variable *T = makeReg(IceType_i32);
4479         _rsbs(T, Src0RLo, Src1RFLo);
4480         Context.insert<InstFakeUse>(T);
4481 
4482         T = makeReg(IceType_i32);
4483         _rscs(T, Src0RHi, Src1RFHi);
4484         // We need to add a FakeUse here because liveness gets mad at us (Def
4485         // without Use.) Note that flag-setting instructions are considered to
4486         // have side effects and, therefore, are not DCE'ed.
4487         Context.insert<InstFakeUse>(T);
4488       } else {
4489         Variable *T = makeReg(IceType_i32);
4490         _rsbs(T, Src0RHi, Src1RFHi);
4491         Context.insert<InstFakeUse>(T);
4492 
4493         T = makeReg(IceType_i32);
4494         _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
4495         Context.insert<InstFakeUse>(T);
4496       }
4497     } else {
4498       if (TableIcmp64[Condition].IsSigned) {
4499         _cmp(Src0RLo, Src1RFLo);
4500         Variable *T = makeReg(IceType_i32);
4501         _sbcs(T, Src0RHi, Src1RFHi);
4502         Context.insert<InstFakeUse>(T);
4503       } else {
4504         _cmp(Src0RHi, Src1RFHi);
4505         _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4506       }
4507     }
4508 
4509     return CondWhenTrue(TableIcmp64[Condition].C1);
4510   }
4511 
4512   Variable *Src0RLo, *Src0RHi;
4513   Operand *Src1RFLo, *Src1RFHi;
4514   if (TableIcmp64[Condition].Swapped) {
4515     Src0RLo = legalizeToReg(loOperand(Src1));
4516     Src0RHi = legalizeToReg(hiOperand(Src1));
4517     Src1RFLo = legalizeToReg(loOperand(Src0));
4518     Src1RFHi = legalizeToReg(hiOperand(Src0));
4519   } else {
4520     Src0RLo = legalizeToReg(loOperand(Src0));
4521     Src0RHi = legalizeToReg(hiOperand(Src0));
4522     Src1RFLo = legalizeToReg(loOperand(Src1));
4523     Src1RFHi = legalizeToReg(hiOperand(Src1));
4524   }
4525 
4526   // a=icmp cond, b, c ==>
4527   // GCC does:
4528   //   cmp      b.hi, c.hi     or  cmp      b.lo, c.lo
4529   //   cmp.eq   b.lo, c.lo         sbcs t1, b.hi, c.hi
4530   //   mov.<C1> t, #1              mov.<C1> t, #1
4531   //   mov.<C2> t, #0              mov.<C2> t, #0
4532   //   mov      a, t               mov      a, t
4533   // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
4534   // is used for signed compares. In some cases, b and c need to be swapped as
4535   // well.
4536   //
4537   // LLVM does:
4538   // for EQ and NE:
4539   //   eor  t1, b.hi, c.hi
4540   //   eor  t2, b.lo, c.hi
4541   //   orrs t, t1, t2
4542   //   mov.<C> t, #1
4543   //   mov  a, t
4544   //
4545   // that's nice in that it's just as short but has fewer dependencies for
4546   // better ILP at the cost of more registers.
4547   //
4548   // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
4549   // unconditional mov #0, two cmps, two conditional mov #1, and one
4550   // conditional reg mov. That has few dependencies for good ILP, but is a
4551   // longer sequence.
4552   //
4553   // So, we are going with the GCC version since it's usually better (except
4554   // perhaps for eq/ne). We could revisit special-casing eq/ne later.
4555   if (TableIcmp64[Condition].IsSigned) {
4556     Variable *ScratchReg = makeReg(IceType_i32);
4557     _cmp(Src0RLo, Src1RFLo);
4558     _sbcs(ScratchReg, Src0RHi, Src1RFHi);
4559     // ScratchReg isn't going to be used, but we need the side-effect of
4560     // setting flags from this operation.
4561     Context.insert<InstFakeUse>(ScratchReg);
4562   } else {
4563     _cmp(Src0RHi, Src1RFHi);
4564     _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4565   }
4566   return CondWhenTrue(TableIcmp64[Condition].C1);
4567 }
4568 
4569 TargetARM32::CondWhenTrue
lowerInt32IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4570 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4571                                 Operand *Src1) {
4572   Int32Operands Srcs(Src0, Src1);
4573   if (!Srcs.hasConstOperand()) {
4574 
4575     Variable *Src0R = Srcs.src0R(this);
4576     Operand *Src1RF = Srcs.src1RF(this);
4577     _cmp(Src0R, Src1RF);
4578     return CondWhenTrue(getIcmp32Mapping(Condition));
4579   }
4580 
4581   Variable *Src0R = Srcs.src0R(this);
4582   const int32_t Value = Srcs.getConstantValue();
4583   if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4584     _tst(Src0R, Src0R);
4585     return CondWhenTrue(getIcmp32Mapping(Condition));
4586   }
4587 
4588   if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
4589       Srcs.negatedImmediateIsFlexEncodable()) {
4590     Operand *Src1F = Srcs.negatedSrc1F(this);
4591     _cmn(Src0R, Src1F);
4592     return CondWhenTrue(getIcmp32Mapping(Condition));
4593   }
4594 
4595   Operand *Src1RF = Srcs.src1RF(this);
4596   if (!Srcs.swappedOperands()) {
4597     _cmp(Src0R, Src1RF);
4598   } else {
4599     Variable *T = makeReg(IceType_i32);
4600     _rsbs(T, Src0R, Src1RF);
4601     Context.insert<InstFakeUse>(T);
4602   }
4603   return CondWhenTrue(getIcmp32Mapping(Condition));
4604 }
4605 
4606 TargetARM32::CondWhenTrue
lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4607 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4608                                        Operand *Src1) {
4609   Int32Operands Srcs(Src0, Src1);
4610   const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
4611   assert(ShAmt >= 0);
4612 
4613   if (!Srcs.hasConstOperand()) {
4614     Variable *Src0R = makeReg(IceType_i32);
4615     Operand *ShAmtImm = shAmtImm(ShAmt);
4616     _lsl(Src0R, legalizeToReg(Src0), ShAmtImm);
4617 
4618     Variable *Src1R = legalizeToReg(Src1);
4619     auto *Src1F = OperandARM32FlexReg::create(Func, IceType_i32, Src1R,
4620                                               OperandARM32::LSL, ShAmtImm);
4621     _cmp(Src0R, Src1F);
4622     return CondWhenTrue(getIcmp32Mapping(Condition));
4623   }
4624 
4625   const int32_t Value = Srcs.getConstantValue();
4626   if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4627     Operand *ShAmtImm = shAmtImm(ShAmt);
4628     Variable *T = makeReg(IceType_i32);
4629     _lsls(T, Srcs.src0R(this), ShAmtImm);
4630     Context.insert<InstFakeUse>(T);
4631     return CondWhenTrue(getIcmp32Mapping(Condition));
4632   }
4633 
4634   Variable *ConstR = makeReg(IceType_i32);
4635   _mov(ConstR,
4636        legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
4637   Operand *NonConstF = OperandARM32FlexReg::create(
4638       Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
4639       Ctx->getConstantInt32(ShAmt));
4640 
4641   if (Srcs.swappedOperands()) {
4642     _cmp(ConstR, NonConstF);
4643   } else {
4644     Variable *T = makeReg(IceType_i32);
4645     _rsbs(T, ConstR, NonConstF);
4646     Context.insert<InstFakeUse>(T);
4647   }
4648   return CondWhenTrue(getIcmp32Mapping(Condition));
4649 }
4650 
lowerIcmpCond(const InstIcmp * Instr)4651 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
4652   return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
4653                        Instr->getSrc(1));
4654 }
4655 
lowerIcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4656 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
4657                                                      Operand *Src0,
4658                                                      Operand *Src1) {
4659   Src0 = legalizeUndef(Src0);
4660   Src1 = legalizeUndef(Src1);
4661 
4662   // a=icmp cond b, c ==>
4663   // GCC does:
4664   //   <u/s>xtb tb, b
4665   //   <u/s>xtb tc, c
4666   //   cmp      tb, tc
4667   //   mov.C1   t, #0
4668   //   mov.C2   t, #1
4669   //   mov      a, t
4670   // where the unsigned/sign extension is not needed for 32-bit. They also have
4671   // special cases for EQ and NE. E.g., for NE:
4672   //   <extend to tb, tc>
4673   //   subs     t, tb, tc
4674   //   movne    t, #1
4675   //   mov      a, t
4676   //
4677   // LLVM does:
4678   //   lsl     tb, b, #<N>
4679   //   mov     t, #0
4680   //   cmp     tb, c, lsl #<N>
4681   //   mov.<C> t, #1
4682   //   mov     a, t
4683   //
4684   // the left shift is by 0, 16, or 24, which allows the comparison to focus on
4685   // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
4686   // the unsigned case, for some reason it does similar to GCC and does a uxtb
4687   // first. It's not clear to me why that special-casing is needed.
4688   //
4689   // We'll go with the LLVM way for now, since it's shorter and has just as few
4690   // dependencies.
4691   switch (Src0->getType()) {
4692   default:
4693     llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
4694   case IceType_i1:
4695   case IceType_i8:
4696   case IceType_i16:
4697     return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
4698   case IceType_i32:
4699     return lowerInt32IcmpCond(Condition, Src0, Src1);
4700   case IceType_i64:
4701     return lowerInt64IcmpCond(Condition, Src0, Src1);
4702   }
4703 }
4704 
lowerIcmp(const InstIcmp * Instr)4705 void TargetARM32::lowerIcmp(const InstIcmp *Instr) {
4706   Variable *Dest = Instr->getDest();
4707   const Type DestTy = Dest->getType();
4708 
4709   if (isVectorType(DestTy)) {
4710     auto *T = makeReg(DestTy);
4711     auto *Src0 = legalizeToReg(Instr->getSrc(0));
4712     auto *Src1 = legalizeToReg(Instr->getSrc(1));
4713     const Type SrcTy = Src0->getType();
4714 
4715     bool NeedsShl = false;
4716     Type NewTypeAfterShl;
4717     SizeT ShAmt;
4718     switch (SrcTy) {
4719     default:
4720       break;
4721     case IceType_v16i1:
4722       NeedsShl = true;
4723       NewTypeAfterShl = IceType_v16i8;
4724       ShAmt = 7;
4725       break;
4726     case IceType_v8i1:
4727       NeedsShl = true;
4728       NewTypeAfterShl = IceType_v8i16;
4729       ShAmt = 15;
4730       break;
4731     case IceType_v4i1:
4732       NeedsShl = true;
4733       NewTypeAfterShl = IceType_v4i32;
4734       ShAmt = 31;
4735       break;
4736     }
4737 
4738     if (NeedsShl) {
4739       auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt));
4740       auto *Src0T = makeReg(NewTypeAfterShl);
4741       auto *Src0Shl = makeReg(NewTypeAfterShl);
4742       _mov(Src0T, Src0);
4743       _vshl(Src0Shl, Src0T, Imm);
4744       Src0 = Src0Shl;
4745 
4746       auto *Src1T = makeReg(NewTypeAfterShl);
4747       auto *Src1Shl = makeReg(NewTypeAfterShl);
4748       _mov(Src1T, Src1);
4749       _vshl(Src1Shl, Src1T, Imm);
4750       Src1 = Src1Shl;
4751     }
4752 
4753     switch (Instr->getCondition()) {
4754     default:
4755       llvm::report_fatal_error("Unhandled integer comparison.");
4756 #define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1)
4757 #define _Vcge(T, S0, S1, Signed)                                               \
4758   _vcge(T, S0, S1)                                                             \
4759       ->setSignType(Signed ? InstARM32::FS_Signed : InstARM32::FS_Unsigned)
4760 #define _Vcgt(T, S0, S1, Signed)                                               \
4761   _vcgt(T, S0, S1)                                                             \
4762       ->setSignType(Signed ? InstARM32::FS_Signed : InstARM32::FS_Unsigned)
4763 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
4764   case InstIcmp::val: {                                                        \
4765     _Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed);      \
4766     if (NEG_V) {                                                               \
4767       auto *TInv = makeReg(DestTy);                                            \
4768       _vmvn(TInv, T);                                                          \
4769       T = TInv;                                                                \
4770     }                                                                          \
4771   } break;
4772       ICMPARM32_TABLE
4773 #undef X
4774 #undef _Vcgt
4775 #undef _Vcge
4776 #undef _Vceq
4777     }
4778     _mov(Dest, T);
4779     return;
4780   }
4781 
4782   Operand *_0 =
4783       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4784   Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4785   Variable *T = makeReg(IceType_i1);
4786 
4787   _mov(T, _0);
4788   CondWhenTrue Cond = lowerIcmpCond(Instr);
4789   _mov_redefined(T, _1, Cond.WhenTrue0);
4790   _mov(Dest, T);
4791 
4792   assert(Cond.WhenTrue1 == CondARM32::kNone);
4793 
4794   return;
4795 }
4796 
lowerInsertElement(const InstInsertElement * Instr)4797 void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) {
4798   Variable *Dest = Instr->getDest();
4799   Type DestTy = Dest->getType();
4800 
4801   Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4802   Variable *Src1 = legalizeToReg(Instr->getSrc(1));
4803   Operand *Src2 = Instr->getSrc(2);
4804 
4805   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4806     const uint32_t Index = Imm->getValue();
4807     Variable *T = makeReg(DestTy);
4808 
4809     if (isFloatingType(DestTy)) {
4810       T->setRegClass(RegARM32::RCARM32_QtoS);
4811     }
4812 
4813     _mov(T, Src0);
4814     _insertelement(T, Src1, Index);
4815     _set_dest_redefined();
4816     _mov(Dest, T);
4817     return;
4818   }
4819   assert(false && "insertelement requires a constant index");
4820 }
4821 
4822 namespace {
getConstantMemoryOrder(Operand * Opnd)4823 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
4824   if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
4825     return Integer->getValue();
4826   return Intrinsics::MemoryOrderInvalid;
4827 }
4828 } // end of anonymous namespace
4829 
lowerLoadLinkedStoreExclusive(Type Ty,Operand * Addr,std::function<Variable * (Variable *)> Operation,CondARM32::Cond Cond)4830 void TargetARM32::lowerLoadLinkedStoreExclusive(
4831     Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
4832     CondARM32::Cond Cond) {
4833 
4834   auto *Retry = Context.insert<InstARM32Label>(this);
4835 
4836   { // scoping for loop highlighting.
4837     Variable *Success = makeReg(IceType_i32);
4838     Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
4839     auto *_0 = Ctx->getConstantZero(IceType_i32);
4840 
4841     Context.insert<InstFakeDef>(Tmp);
4842     Context.insert<InstFakeUse>(Tmp);
4843     Variable *AddrR = legalizeToReg(Addr);
4844     _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
4845     auto *StoreValue = Operation(Tmp);
4846     assert(StoreValue->mustHaveReg());
4847     // strex requires Dest to be a register other than Value or Addr. This
4848     // restriction is cleanly represented by adding an "early" definition of
4849     // Dest (or a latter use of all the sources.)
4850     Context.insert<InstFakeDef>(Success);
4851     if (Cond != CondARM32::AL) {
4852       _mov_redefined(Success, legalize(_0, Legal_Reg | Legal_Flex),
4853                      InstARM32::getOppositeCondition(Cond));
4854     }
4855     _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond)
4856         ->setDestRedefined();
4857     _cmp(Success, _0);
4858   }
4859 
4860   _br(Retry, CondARM32::NE);
4861 }
4862 
4863 namespace {
createArithInst(Cfg * Func,uint32_t Operation,Variable * Dest,Variable * Src0,Operand * Src1)4864 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
4865                                 Variable *Src0, Operand *Src1) {
4866   InstArithmetic::OpKind Oper;
4867   switch (Operation) {
4868   default:
4869     llvm::report_fatal_error("Unknown AtomicRMW operation");
4870   case Intrinsics::AtomicExchange:
4871     llvm::report_fatal_error("Can't handle Atomic xchg operation");
4872   case Intrinsics::AtomicAdd:
4873     Oper = InstArithmetic::Add;
4874     break;
4875   case Intrinsics::AtomicAnd:
4876     Oper = InstArithmetic::And;
4877     break;
4878   case Intrinsics::AtomicSub:
4879     Oper = InstArithmetic::Sub;
4880     break;
4881   case Intrinsics::AtomicOr:
4882     Oper = InstArithmetic::Or;
4883     break;
4884   case Intrinsics::AtomicXor:
4885     Oper = InstArithmetic::Xor;
4886     break;
4887   }
4888   return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
4889 }
4890 } // end of anonymous namespace
4891 
lowerAtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4892 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
4893                                  Operand *Addr, Operand *Val) {
4894   // retry:
4895   //     ldrex tmp, [addr]
4896   //     mov contents, tmp
4897   //     op result, contents, Val
4898   //     strex success, result, [addr]
4899   //     cmp success, 0
4900   //     jne retry
4901   //     fake-use(addr, operand)  @ prevents undesirable clobbering.
4902   //     mov dest, contents
4903   auto DestTy = Dest->getType();
4904 
4905   if (DestTy == IceType_i64) {
4906     lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
4907     return;
4908   }
4909 
4910   Operand *ValRF = nullptr;
4911   if (llvm::isa<ConstantInteger32>(Val)) {
4912     ValRF = Val;
4913   } else {
4914     ValRF = legalizeToReg(Val);
4915   }
4916   auto *ContentsR = makeReg(DestTy);
4917   auto *ResultR = makeReg(DestTy);
4918 
4919   _dmb();
4920   lowerLoadLinkedStoreExclusive(
4921       DestTy, Addr,
4922       [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4923         lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4924         if (Operation == Intrinsics::AtomicExchange) {
4925           lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4926         } else {
4927           lowerArithmetic(
4928               createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4929         }
4930         return ResultR;
4931       });
4932   _dmb();
4933   if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
4934     Context.insert<InstFakeUse>(ValR);
4935   }
4936   // Can't dce ContentsR.
4937   Context.insert<InstFakeUse>(ContentsR);
4938   lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4939 }
4940 
lowerInt64AtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4941 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
4942                                       Operand *Addr, Operand *Val) {
4943   assert(Dest->getType() == IceType_i64);
4944 
4945   auto *ResultR = makeI64RegPair();
4946 
4947   Context.insert<InstFakeDef>(ResultR);
4948 
4949   Operand *ValRF = nullptr;
4950   if (llvm::dyn_cast<ConstantInteger64>(Val)) {
4951     ValRF = Val;
4952   } else {
4953     auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4954     ValR64->initHiLo(Func);
4955     ValR64->setMustNotHaveReg();
4956     ValR64->getLo()->setMustHaveReg();
4957     ValR64->getHi()->setMustHaveReg();
4958     lowerAssign(InstAssign::create(Func, ValR64, Val));
4959     ValRF = ValR64;
4960   }
4961 
4962   auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4963   ContentsR->initHiLo(Func);
4964   ContentsR->setMustNotHaveReg();
4965   ContentsR->getLo()->setMustHaveReg();
4966   ContentsR->getHi()->setMustHaveReg();
4967 
4968   _dmb();
4969   lowerLoadLinkedStoreExclusive(
4970       IceType_i64, Addr,
4971       [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4972         lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4973         Context.insert<InstFakeUse>(Tmp);
4974         if (Operation == Intrinsics::AtomicExchange) {
4975           lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4976         } else {
4977           lowerArithmetic(
4978               createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4979         }
4980         Context.insert<InstFakeUse>(ResultR->getHi());
4981         Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
4982             ->setDestRedefined();
4983         return ResultR;
4984       });
4985   _dmb();
4986   if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
4987     Context.insert<InstFakeUse>(ValR64->getLo());
4988     Context.insert<InstFakeUse>(ValR64->getHi());
4989   }
4990   lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4991 }
4992 
postambleCtpop64(const InstCall * Instr)4993 void TargetARM32::postambleCtpop64(const InstCall *Instr) {
4994   Operand *Arg0 = Instr->getArg(0);
4995   if (isInt32Asserting32Or64(Arg0->getType())) {
4996     return;
4997   }
4998   // The popcount helpers always return 32-bit values, while the intrinsic's
4999   // signature matches some 64-bit platform's native instructions and expect to
5000   // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the
5001   // user doesn't do that in the IR or doesn't toss the bits via truncate.
5002   auto *DestHi = llvm::cast<Variable>(hiOperand(Instr->getDest()));
5003   Variable *T = makeReg(IceType_i32);
5004   Operand *_0 =
5005       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5006   _mov(T, _0);
5007   _mov(DestHi, T);
5008 }
5009 
lowerIntrinsicCall(const InstIntrinsicCall * Instr)5010 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
5011   Variable *Dest = Instr->getDest();
5012   Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
5013   Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
5014   switch (ID) {
5015   case Intrinsics::AtomicFence:
5016   case Intrinsics::AtomicFenceAll:
5017     assert(Dest == nullptr);
5018     _dmb();
5019     return;
5020   case Intrinsics::AtomicIsLockFree: {
5021     Operand *ByteSize = Instr->getArg(0);
5022     auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
5023     if (CI == nullptr) {
5024       // The PNaCl ABI requires the byte size to be a compile-time constant.
5025       Func->setError("AtomicIsLockFree byte size should be compile-time const");
5026       return;
5027     }
5028     static constexpr int32_t NotLockFree = 0;
5029     static constexpr int32_t LockFree = 1;
5030     int32_t Result = NotLockFree;
5031     switch (CI->getValue()) {
5032     case 1:
5033     case 2:
5034     case 4:
5035     case 8:
5036       Result = LockFree;
5037       break;
5038     }
5039     _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
5040     return;
5041   }
5042   case Intrinsics::AtomicLoad: {
5043     assert(isScalarIntegerType(DestTy));
5044     // We require the memory address to be naturally aligned. Given that is the
5045     // case, then normal loads are atomic.
5046     if (!Intrinsics::isMemoryOrderValid(
5047             ID, getConstantMemoryOrder(Instr->getArg(1)))) {
5048       Func->setError("Unexpected memory ordering for AtomicLoad");
5049       return;
5050     }
5051     Variable *T;
5052 
5053     if (DestTy == IceType_i64) {
5054       // ldrex is the only arm instruction that is guaranteed to load a 64-bit
5055       // integer atomically. Everything else works with a regular ldr.
5056       T = makeI64RegPair();
5057       _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
5058     } else {
5059       T = makeReg(DestTy);
5060       _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
5061     }
5062     _dmb();
5063     lowerAssign(InstAssign::create(Func, Dest, T));
5064     // Adding a fake-use T to ensure the atomic load is not removed if Dest is
5065     // unused.
5066     Context.insert<InstFakeUse>(T);
5067     return;
5068   }
5069   case Intrinsics::AtomicStore: {
5070     // We require the memory address to be naturally aligned. Given that is the
5071     // case, then normal loads are atomic.
5072     if (!Intrinsics::isMemoryOrderValid(
5073             ID, getConstantMemoryOrder(Instr->getArg(2)))) {
5074       Func->setError("Unexpected memory ordering for AtomicStore");
5075       return;
5076     }
5077 
5078     auto *Value = Instr->getArg(0);
5079     if (Value->getType() == IceType_i64) {
5080       auto *ValueR = makeI64RegPair();
5081       Context.insert<InstFakeDef>(ValueR);
5082       lowerAssign(InstAssign::create(Func, ValueR, Value));
5083       _dmb();
5084       lowerLoadLinkedStoreExclusive(
5085           IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
5086             // The following fake-use prevents the ldrex instruction from being
5087             // dead code eliminated.
5088             Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
5089             Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
5090             Context.insert<InstFakeUse>(Tmp);
5091             return ValueR;
5092           });
5093       Context.insert<InstFakeUse>(ValueR);
5094       _dmb();
5095       return;
5096     }
5097 
5098     auto *ValueR = legalizeToReg(Instr->getArg(0));
5099     const auto ValueTy = ValueR->getType();
5100     assert(isScalarIntegerType(ValueTy));
5101     auto *Addr = legalizeToReg(Instr->getArg(1));
5102 
5103     // non-64-bit stores are atomically as long as the address is aligned. This
5104     // is PNaCl, so addresses are aligned.
5105     _dmb();
5106     _str(ValueR, formMemoryOperand(Addr, ValueTy));
5107     _dmb();
5108     return;
5109   }
5110   case Intrinsics::AtomicCmpxchg: {
5111     // retry:
5112     //     ldrex tmp, [addr]
5113     //     cmp tmp, expected
5114     //     mov expected, tmp
5115     //     strexeq success, new, [addr]
5116     //     cmpeq success, #0
5117     //     bne retry
5118     //     mov dest, expected
5119     assert(isScalarIntegerType(DestTy));
5120     // We require the memory address to be naturally aligned. Given that is the
5121     // case, then normal loads are atomic.
5122     if (!Intrinsics::isMemoryOrderValid(
5123             ID, getConstantMemoryOrder(Instr->getArg(3)),
5124             getConstantMemoryOrder(Instr->getArg(4)))) {
5125       Func->setError("Unexpected memory ordering for AtomicCmpxchg");
5126       return;
5127     }
5128 
5129     if (DestTy == IceType_i64) {
5130       Variable *LoadedValue = nullptr;
5131 
5132       auto *New = makeI64RegPair();
5133       Context.insert<InstFakeDef>(New);
5134       lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
5135 
5136       auto *Expected = makeI64RegPair();
5137       Context.insert<InstFakeDef>(Expected);
5138       lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
5139 
5140       _dmb();
5141       lowerLoadLinkedStoreExclusive(
5142           DestTy, Instr->getArg(0),
5143           [this, Expected, New, Instr, DestTy, &LoadedValue](Variable *Tmp) {
5144             auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
5145             auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
5146             auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
5147             auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
5148             _cmp(TmpLoR, ExpectedLoR);
5149             _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
5150             LoadedValue = Tmp;
5151             return New;
5152           },
5153           CondARM32::EQ);
5154       _dmb();
5155 
5156       Context.insert<InstFakeUse>(LoadedValue);
5157       lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5158       // The fake-use Expected prevents the assignments to Expected (above)
5159       // from being removed if Dest is not used.
5160       Context.insert<InstFakeUse>(Expected);
5161       // New needs to be alive here, or its live range will end in the
5162       // strex instruction.
5163       Context.insert<InstFakeUse>(New);
5164       return;
5165     }
5166 
5167     auto *New = legalizeToReg(Instr->getArg(2));
5168     auto *Expected = legalizeToReg(Instr->getArg(1));
5169     Variable *LoadedValue = nullptr;
5170 
5171     _dmb();
5172     lowerLoadLinkedStoreExclusive(
5173         DestTy, Instr->getArg(0),
5174         [this, Expected, New, Instr, DestTy, &LoadedValue](Variable *Tmp) {
5175           lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
5176           LoadedValue = Tmp;
5177           return New;
5178         },
5179         CondARM32::EQ);
5180     _dmb();
5181 
5182     lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5183     Context.insert<InstFakeUse>(Expected);
5184     Context.insert<InstFakeUse>(New);
5185     return;
5186   }
5187   case Intrinsics::AtomicRMW: {
5188     if (!Intrinsics::isMemoryOrderValid(
5189             ID, getConstantMemoryOrder(Instr->getArg(3)))) {
5190       Func->setError("Unexpected memory ordering for AtomicRMW");
5191       return;
5192     }
5193     lowerAtomicRMW(
5194         Dest, static_cast<uint32_t>(
5195                   llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
5196         Instr->getArg(1), Instr->getArg(2));
5197     return;
5198   }
5199   case Intrinsics::Bswap: {
5200     Operand *Val = Instr->getArg(0);
5201     Type Ty = Val->getType();
5202     if (Ty == IceType_i64) {
5203       Val = legalizeUndef(Val);
5204       Variable *Val_Lo = legalizeToReg(loOperand(Val));
5205       Variable *Val_Hi = legalizeToReg(hiOperand(Val));
5206       Variable *T_Lo = makeReg(IceType_i32);
5207       Variable *T_Hi = makeReg(IceType_i32);
5208       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5209       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5210       _rev(T_Lo, Val_Lo);
5211       _rev(T_Hi, Val_Hi);
5212       _mov(DestLo, T_Hi);
5213       _mov(DestHi, T_Lo);
5214     } else {
5215       assert(Ty == IceType_i32 || Ty == IceType_i16);
5216       Variable *ValR = legalizeToReg(Val);
5217       Variable *T = makeReg(Ty);
5218       _rev(T, ValR);
5219       if (Val->getType() == IceType_i16) {
5220         Operand *_16 = shAmtImm(16);
5221         _lsr(T, T, _16);
5222       }
5223       _mov(Dest, T);
5224     }
5225     return;
5226   }
5227   case Intrinsics::Ctpop: {
5228     llvm::report_fatal_error("Ctpop should have been prelowered.");
5229   }
5230   case Intrinsics::Ctlz: {
5231     // The "is zero undef" parameter is ignored and we always return a
5232     // well-defined value.
5233     Operand *Val = Instr->getArg(0);
5234     Variable *ValLoR;
5235     Variable *ValHiR = nullptr;
5236     if (Val->getType() == IceType_i64) {
5237       Val = legalizeUndef(Val);
5238       ValLoR = legalizeToReg(loOperand(Val));
5239       ValHiR = legalizeToReg(hiOperand(Val));
5240     } else {
5241       ValLoR = legalizeToReg(Val);
5242     }
5243     lowerCLZ(Dest, ValLoR, ValHiR);
5244     return;
5245   }
5246   case Intrinsics::Cttz: {
5247     // Essentially like Clz, but reverse the bits first.
5248     Operand *Val = Instr->getArg(0);
5249     Variable *ValLoR;
5250     Variable *ValHiR = nullptr;
5251     if (Val->getType() == IceType_i64) {
5252       Val = legalizeUndef(Val);
5253       ValLoR = legalizeToReg(loOperand(Val));
5254       ValHiR = legalizeToReg(hiOperand(Val));
5255       Variable *TLo = makeReg(IceType_i32);
5256       Variable *THi = makeReg(IceType_i32);
5257       _rbit(TLo, ValLoR);
5258       _rbit(THi, ValHiR);
5259       ValLoR = THi;
5260       ValHiR = TLo;
5261     } else {
5262       ValLoR = legalizeToReg(Val);
5263       Variable *T = makeReg(IceType_i32);
5264       _rbit(T, ValLoR);
5265       ValLoR = T;
5266     }
5267     lowerCLZ(Dest, ValLoR, ValHiR);
5268     return;
5269   }
5270   case Intrinsics::Fabs: {
5271     Variable *T = makeReg(DestTy);
5272     _vabs(T, legalizeToReg(Instr->getArg(0)));
5273     _mov(Dest, T);
5274     return;
5275   }
5276   case Intrinsics::Longjmp: {
5277     llvm::report_fatal_error("longjmp should have been prelowered.");
5278   }
5279   case Intrinsics::Memcpy: {
5280     llvm::report_fatal_error("memcpy should have been prelowered.");
5281   }
5282   case Intrinsics::Memmove: {
5283     llvm::report_fatal_error("memmove should have been prelowered.");
5284   }
5285   case Intrinsics::Memset: {
5286     llvm::report_fatal_error("memmove should have been prelowered.");
5287   }
5288   case Intrinsics::NaClReadTP: {
5289     if (SandboxingType != ST_NaCl) {
5290       llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5291     }
5292     Variable *TP = legalizeToReg(OperandARM32Mem::create(
5293         Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9),
5294         llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5295     _mov(Dest, TP);
5296     return;
5297   }
5298   case Intrinsics::Setjmp: {
5299     llvm::report_fatal_error("setjmp should have been prelowered.");
5300   }
5301   case Intrinsics::Sqrt: {
5302     assert(isScalarFloatingType(Dest->getType()) ||
5303            getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5304     Variable *Src = legalizeToReg(Instr->getArg(0));
5305     Variable *T = makeReg(DestTy);
5306     _vsqrt(T, Src);
5307     _mov(Dest, T);
5308     return;
5309   }
5310   case Intrinsics::Stacksave: {
5311     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5312     _mov(Dest, SP);
5313     return;
5314   }
5315   case Intrinsics::Stackrestore: {
5316     Variable *Val = legalizeToReg(Instr->getArg(0));
5317     Sandboxer(this).reset_sp(Val);
5318     return;
5319   }
5320   case Intrinsics::Trap:
5321     _trap();
5322     return;
5323   case Intrinsics::AddSaturateSigned:
5324   case Intrinsics::AddSaturateUnsigned: {
5325     bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned);
5326     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5327     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5328     Variable *T = makeReg(DestTy);
5329     _vqadd(T, Src0, Src1, Unsigned);
5330     _mov(Dest, T);
5331     return;
5332   }
5333   case Intrinsics::LoadSubVector: {
5334     assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) &&
5335            "LoadSubVector second argument must be a constant");
5336     Variable *Dest = Instr->getDest();
5337     Type Ty = Dest->getType();
5338     auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1));
5339     Operand *Addr = Instr->getArg(0);
5340     OperandARM32Mem *Src = formMemoryOperand(Addr, Ty);
5341     doMockBoundsCheck(Src);
5342 
5343     if (Dest->isRematerializable()) {
5344       Context.insert<InstFakeDef>(Dest);
5345       return;
5346     }
5347 
5348     auto *T = makeReg(Ty);
5349     switch (SubVectorSize->getValue()) {
5350     case 4:
5351       _vldr1d(T, Src);
5352       break;
5353     case 8:
5354       _vldr1q(T, Src);
5355       break;
5356     default:
5357       Func->setError("Unexpected size for LoadSubVector");
5358       return;
5359     }
5360     _mov(Dest, T);
5361     return;
5362   }
5363   case Intrinsics::StoreSubVector: {
5364     assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) &&
5365            "StoreSubVector third argument must be a constant");
5366     auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2));
5367     Variable *Value = legalizeToReg(Instr->getArg(0));
5368     Operand *Addr = Instr->getArg(1);
5369     OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5370     doMockBoundsCheck(NewAddr);
5371 
5372     Value = legalizeToReg(Value);
5373 
5374     switch (SubVectorSize->getValue()) {
5375     case 4:
5376       _vstr1d(Value, NewAddr);
5377       break;
5378     case 8:
5379       _vstr1q(Value, NewAddr);
5380       break;
5381     default:
5382       Func->setError("Unexpected size for StoreSubVector");
5383       return;
5384     }
5385     return;
5386   }
5387   case Intrinsics::MultiplyAddPairs: {
5388     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5389     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5390     Variable *T = makeReg(DestTy);
5391     _vmlap(T, Src0, Src1);
5392     _mov(Dest, T);
5393     return;
5394   }
5395   case Intrinsics::MultiplyHighSigned:
5396   case Intrinsics::MultiplyHighUnsigned: {
5397     bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned);
5398     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5399     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5400     Variable *T = makeReg(DestTy);
5401     _vmulh(T, Src0, Src1, Unsigned);
5402     _mov(Dest, T);
5403     return;
5404   }
5405   case Intrinsics::Nearbyint: {
5406     UnimplementedLoweringError(this, Instr);
5407     return;
5408   }
5409   case Intrinsics::Round: {
5410     UnimplementedLoweringError(this, Instr);
5411     return;
5412   }
5413   case Intrinsics::SignMask: {
5414     UnimplementedLoweringError(this, Instr);
5415     return;
5416   }
5417   case Intrinsics::SubtractSaturateSigned:
5418   case Intrinsics::SubtractSaturateUnsigned: {
5419     bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned);
5420     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5421     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5422     Variable *T = makeReg(DestTy);
5423     _vqsub(T, Src0, Src1, Unsigned);
5424     _mov(Dest, T);
5425     return;
5426   }
5427   case Intrinsics::VectorPackSigned:
5428   case Intrinsics::VectorPackUnsigned: {
5429     bool Unsigned = (ID == Intrinsics::VectorPackUnsigned);
5430     bool Saturating = true;
5431     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5432     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5433     Variable *T = makeReg(DestTy);
5434     _vqmovn2(T, Src0, Src1, Unsigned, Saturating);
5435     _mov(Dest, T);
5436     return;
5437   }
5438   default: // UnknownIntrinsic
5439     Func->setError("Unexpected intrinsic");
5440     return;
5441   }
5442   return;
5443 }
5444 
lowerCLZ(Variable * Dest,Variable * ValLoR,Variable * ValHiR)5445 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
5446   Type Ty = Dest->getType();
5447   assert(Ty == IceType_i32 || Ty == IceType_i64);
5448   Variable *T = makeReg(IceType_i32);
5449   _clz(T, ValLoR);
5450   if (Ty == IceType_i64) {
5451     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5452     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5453     Operand *Zero =
5454         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5455     Operand *ThirtyTwo =
5456         legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
5457     _cmp(ValHiR, Zero);
5458     Variable *T2 = makeReg(IceType_i32);
5459     _add(T2, T, ThirtyTwo);
5460     _clz(T2, ValHiR, CondARM32::NE);
5461     // T2 is actually a source as well when the predicate is not AL (since it
5462     // may leave T2 alone). We use _set_dest_redefined to prolong the liveness
5463     // of T2 as if it was used as a source.
5464     _set_dest_redefined();
5465     _mov(DestLo, T2);
5466     Variable *T3 = makeReg(Zero->getType());
5467     _mov(T3, Zero);
5468     _mov(DestHi, T3);
5469     return;
5470   }
5471   _mov(Dest, T);
5472   return;
5473 }
5474 
lowerLoad(const InstLoad * Load)5475 void TargetARM32::lowerLoad(const InstLoad *Load) {
5476   // A Load instruction can be treated the same as an Assign instruction, after
5477   // the source operand is transformed into an OperandARM32Mem operand.
5478   Type Ty = Load->getDest()->getType();
5479   Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
5480   Variable *DestLoad = Load->getDest();
5481 
5482   // TODO(jvoung): handled folding opportunities. Sign and zero extension can
5483   // be folded into a load.
5484   auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5485   lowerAssign(Assign);
5486 }
5487 
5488 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Variable * OffsetReg,int16_t OffsetRegShAmt,const Inst * Reason)5489 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5490                     const Variable *OffsetReg, int16_t OffsetRegShAmt,
5491                     const Inst *Reason) {
5492   if (!BuildDefs::dump())
5493     return;
5494   if (!Func->isVerbose(IceV_AddrOpt))
5495     return;
5496   OstreamLocker _(Func->getContext());
5497   Ostream &Str = Func->getContext()->getStrDump();
5498   Str << "Instruction: ";
5499   Reason->dumpDecorated(Func);
5500   Str << "  results in Base=";
5501   if (Base)
5502     Base->dump(Func);
5503   else
5504     Str << "<null>";
5505   Str << ", OffsetReg=";
5506   if (OffsetReg)
5507     OffsetReg->dump(Func);
5508   else
5509     Str << "<null>";
5510   Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n";
5511 }
5512 
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5513 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5514                  int32_t *Offset, const Inst **Reason) {
5515   // Var originates from Var=SrcVar ==> set Var:=SrcVar
5516   if (*Var == nullptr)
5517     return false;
5518   const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5519   if (!VarAssign)
5520     return false;
5521   assert(!VMetadata->isMultiDef(*Var));
5522   if (!llvm::isa<InstAssign>(VarAssign))
5523     return false;
5524 
5525   Operand *SrcOp = VarAssign->getSrc(0);
5526   bool Optimized = false;
5527   if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5528     if (!VMetadata->isMultiDef(SrcVar) ||
5529         // TODO: ensure SrcVar stays single-BB
5530         false) {
5531       Optimized = true;
5532       *Var = SrcVar;
5533     } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5534       int32_t MoreOffset = Const->getValue();
5535       int32_t NewOffset = MoreOffset + *Offset;
5536       if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5537         return false;
5538       *Var = nullptr;
5539       *Offset += NewOffset;
5540       Optimized = true;
5541     }
5542   }
5543 
5544   if (Optimized) {
5545     *Reason = VarAssign;
5546   }
5547 
5548   return Optimized;
5549 }
5550 
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5551 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5552   if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5553     switch (Arith->getOp()) {
5554     default:
5555       return false;
5556     case InstArithmetic::Add:
5557     case InstArithmetic::Sub:
5558       *Kind = Arith->getOp();
5559       return true;
5560     }
5561   }
5562   return false;
5563 }
5564 
matchCombinedBaseIndex(const VariablesMetadata * VMetadata,Variable ** Base,Variable ** OffsetReg,int32_t OffsetRegShamt,const Inst ** Reason)5565 bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base,
5566                             Variable **OffsetReg, int32_t OffsetRegShamt,
5567                             const Inst **Reason) {
5568   // OffsetReg==nullptr && Base is Base=Var1+Var2 ==>
5569   //   set Base=Var1, OffsetReg=Var2, Shift=0
5570   if (*Base == nullptr)
5571     return false;
5572   if (*OffsetReg != nullptr)
5573     return false;
5574   (void)OffsetRegShamt;
5575   assert(OffsetRegShamt == 0);
5576   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5577   if (BaseInst == nullptr)
5578     return false;
5579   assert(!VMetadata->isMultiDef(*Base));
5580   if (BaseInst->getSrcSize() < 2)
5581     return false;
5582   auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0));
5583   if (!Var1)
5584     return false;
5585   if (VMetadata->isMultiDef(Var1))
5586     return false;
5587   auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1));
5588   if (!Var2)
5589     return false;
5590   if (VMetadata->isMultiDef(Var2))
5591     return false;
5592   InstArithmetic::OpKind _;
5593   if (!isAddOrSub(BaseInst, &_) ||
5594       // TODO: ensure Var1 and Var2 stay single-BB
5595       false)
5596     return false;
5597   *Base = Var1;
5598   *OffsetReg = Var2;
5599   // OffsetRegShamt is already 0.
5600   *Reason = BaseInst;
5601   return true;
5602 }
5603 
matchShiftedOffsetReg(const VariablesMetadata * VMetadata,Variable ** OffsetReg,OperandARM32::ShiftKind * Kind,int32_t * OffsetRegShamt,const Inst ** Reason)5604 bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata,
5605                            Variable **OffsetReg, OperandARM32::ShiftKind *Kind,
5606                            int32_t *OffsetRegShamt, const Inst **Reason) {
5607   // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==>
5608   //   OffsetReg=Var, Shift+=log2(Const)
5609   // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==>
5610   //   OffsetReg=Var, Shift+=Const
5611   // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==>
5612   //   OffsetReg=Var, Shift-=Const
5613   OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift;
5614   if (*OffsetReg == nullptr)
5615     return false;
5616   auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg);
5617   if (IndexInst == nullptr)
5618     return false;
5619   assert(!VMetadata->isMultiDef(*OffsetReg));
5620   if (IndexInst->getSrcSize() < 2)
5621     return false;
5622   auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst);
5623   if (ArithInst == nullptr)
5624     return false;
5625   auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0));
5626   if (Var == nullptr)
5627     return false;
5628   auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
5629   if (Const == nullptr) {
5630     assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0)));
5631     return false;
5632   }
5633   if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
5634     return false;
5635 
5636   uint32_t NewShamt = -1;
5637   switch (ArithInst->getOp()) {
5638   default:
5639     return false;
5640   case InstArithmetic::Shl: {
5641     NewShiftKind = OperandARM32::LSL;
5642     NewShamt = Const->getValue();
5643     if (NewShamt > 31)
5644       return false;
5645   } break;
5646   case InstArithmetic::Lshr: {
5647     NewShiftKind = OperandARM32::LSR;
5648     NewShamt = Const->getValue();
5649     if (NewShamt > 31)
5650       return false;
5651   } break;
5652   case InstArithmetic::Ashr: {
5653     NewShiftKind = OperandARM32::ASR;
5654     NewShamt = Const->getValue();
5655     if (NewShamt > 31)
5656       return false;
5657   } break;
5658   case InstArithmetic::Udiv:
5659   case InstArithmetic::Mul: {
5660     const uint32_t UnsignedConst = Const->getValue();
5661     NewShamt = llvm::findFirstSet(UnsignedConst);
5662     if (NewShamt != llvm::findLastSet(UnsignedConst)) {
5663       // First bit set is not the same as the last bit set, so Const is not
5664       // a power of 2.
5665       return false;
5666     }
5667     NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv
5668                        ? OperandARM32::LSR
5669                        : OperandARM32::LSL;
5670   } break;
5671   }
5672   // Allowed "transitions":
5673   //   kNoShift -> * iff NewShamt < 31
5674   //   LSL -> LSL    iff NewShamt + OffsetRegShamt < 31
5675   //   LSR -> LSR    iff NewShamt + OffsetRegShamt < 31
5676   //   ASR -> ASR    iff NewShamt + OffsetRegShamt < 31
5677   if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) {
5678     return false;
5679   }
5680   const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt;
5681   if (NewOffsetRegShamt > 31)
5682     return false;
5683   *OffsetReg = Var;
5684   *OffsetRegShamt = NewOffsetRegShamt;
5685   *Kind = NewShiftKind;
5686   *Reason = IndexInst;
5687   return true;
5688 }
5689 
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5690 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5691                      int32_t *Offset, const Inst **Reason) {
5692   // Base is Base=Var+Const || Base is Base=Const+Var ==>
5693   //   set Base=Var, Offset+=Const
5694   // Base is Base=Var-Const ==>
5695   //   set Base=Var, Offset-=Const
5696   if (*Base == nullptr)
5697     return false;
5698   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5699   if (BaseInst == nullptr) {
5700     return false;
5701   }
5702   assert(!VMetadata->isMultiDef(*Base));
5703 
5704   auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5705   if (ArithInst == nullptr)
5706     return false;
5707   InstArithmetic::OpKind Kind;
5708   if (!isAddOrSub(ArithInst, &Kind))
5709     return false;
5710   bool IsAdd = Kind == InstArithmetic::Add;
5711   Operand *Src0 = ArithInst->getSrc(0);
5712   Operand *Src1 = ArithInst->getSrc(1);
5713   auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5714   auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5715   auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5716   auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5717   Variable *NewBase = nullptr;
5718   int32_t NewOffset = *Offset;
5719 
5720   if (Var0 == nullptr && Const0 == nullptr) {
5721     assert(llvm::isa<ConstantRelocatable>(Src0));
5722     return false;
5723   }
5724 
5725   if (Var1 == nullptr && Const1 == nullptr) {
5726     assert(llvm::isa<ConstantRelocatable>(Src1));
5727     return false;
5728   }
5729 
5730   if (Var0 && Var1)
5731     // TODO(jpp): merge base/index splitting into here.
5732     return false;
5733   if (!IsAdd && Var1)
5734     return false;
5735   if (Var0)
5736     NewBase = Var0;
5737   else if (Var1)
5738     NewBase = Var1;
5739   // Compute the updated constant offset.
5740   if (Const0) {
5741     int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5742     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5743       return false;
5744     NewOffset += MoreOffset;
5745   }
5746   if (Const1) {
5747     int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5748     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5749       return false;
5750     NewOffset += MoreOffset;
5751   }
5752 
5753   // Update the computed address parameters once we are sure optimization
5754   // is valid.
5755   *Base = NewBase;
5756   *Offset = NewOffset;
5757   *Reason = BaseInst;
5758   return true;
5759 }
5760 } // end of anonymous namespace
5761 
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5762 OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
5763                                                  const Inst *LdSt,
5764                                                  Operand *Base) {
5765   assert(Base != nullptr);
5766   int32_t OffsetImm = 0;
5767   Variable *OffsetReg = nullptr;
5768   int32_t OffsetRegShamt = 0;
5769   OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift;
5770 
5771   Func->resetCurrentNode();
5772   if (Func->isVerbose(IceV_AddrOpt)) {
5773     OstreamLocker _(Func->getContext());
5774     Ostream &Str = Func->getContext()->getStrDump();
5775     Str << "\nAddress mode formation:\t";
5776     LdSt->dumpDecorated(Func);
5777   }
5778 
5779   if (isVectorType(Ty))
5780     // vector loads and stores do not allow offsets, and only support the
5781     // "[reg]" addressing mode (the other supported modes are write back.)
5782     return nullptr;
5783 
5784   auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5785   if (BaseVar == nullptr)
5786     return nullptr;
5787 
5788   (void)MemTraitsSize;
5789   assert(Ty < MemTraitsSize);
5790   auto *TypeTraits = &MemTraits[Ty];
5791   const bool CanHaveIndex = !NeedSandboxing && TypeTraits->CanHaveIndex;
5792   const bool CanHaveShiftedIndex =
5793       !NeedSandboxing && TypeTraits->CanHaveShiftedIndex;
5794   const bool CanHaveImm = TypeTraits->CanHaveImm;
5795   const int32_t ValidImmMask = TypeTraits->ValidImmMask;
5796   (void)ValidImmMask;
5797   assert(!CanHaveImm || ValidImmMask >= 0);
5798 
5799   const VariablesMetadata *VMetadata = Func->getVMetadata();
5800   const Inst *Reason = nullptr;
5801 
5802   do {
5803     if (Reason != nullptr) {
5804       dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt,
5805                      Reason);
5806       Reason = nullptr;
5807     }
5808 
5809     if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5810       continue;
5811     }
5812 
5813     if (CanHaveIndex &&
5814         matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) {
5815       continue;
5816     }
5817 
5818     if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg,
5819                                                OffsetRegShamt, &Reason)) {
5820       continue;
5821     }
5822 
5823     if (CanHaveShiftedIndex) {
5824       if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind,
5825                                 &OffsetRegShamt, &Reason)) {
5826         continue;
5827       }
5828 
5829       if ((OffsetRegShamt == 0) &&
5830           matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind,
5831                                 &OffsetRegShamt, &Reason)) {
5832         std::swap(BaseVar, OffsetReg);
5833         continue;
5834       }
5835     }
5836 
5837     if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5838       continue;
5839     }
5840   } while (Reason);
5841 
5842   if (BaseVar == nullptr) {
5843     // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to
5844     // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}].
5845     // Instead of a zeroed BaseReg, we initialize it with OffsetImm:
5846     //
5847     // [OffsetReg{, LSL Shamt}{, #OffsetImm}] ->
5848     //     mov BaseReg, #OffsetImm
5849     //     use of [BaseReg, OffsetReg{, LSL Shamt}]
5850     //
5851     const Type PointerType = getPointerType();
5852     BaseVar = makeReg(PointerType);
5853     Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5854     OffsetImm = 0;
5855   } else if (OffsetImm != 0) {
5856     // ARM Ldr/Str instructions have limited range immediates. The formation
5857     // loop above materialized an Immediate carelessly, so we ensure the
5858     // generated offset is sane.
5859     const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5860     const InstArithmetic::OpKind Op =
5861         OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5862 
5863     if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) ||
5864         OffsetReg != nullptr) {
5865       if (OffsetReg == nullptr) {
5866         // We formed a [Base, #const] addressing mode which is not encodable in
5867         // ARM. There is little point in forming an address mode now if we don't
5868         // have an offset. Effectively, we would end up with something like
5869         //
5870         // [Base, #const] -> add T, Base, #const
5871         //                   use of [T]
5872         //
5873         // Which is exactly what we already have. So we just bite the bullet
5874         // here and don't form any address mode.
5875         return nullptr;
5876       }
5877       // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to
5878       //
5879       // [Base, Offset, {LSL amount}, #const] ->
5880       //      add T, Base, #const
5881       //      use of [T, Offset {, LSL amount}]
5882       const Type PointerType = getPointerType();
5883       Variable *T = makeReg(PointerType);
5884       Context.insert<InstArithmetic>(Op, T, BaseVar,
5885                                      Ctx->getConstantInt32(PositiveOffset));
5886       BaseVar = T;
5887       OffsetImm = 0;
5888     }
5889   }
5890 
5891   assert(BaseVar != nullptr);
5892   assert(OffsetImm == 0 || OffsetReg == nullptr);
5893   assert(OffsetReg == nullptr || CanHaveIndex);
5894   assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
5895                        : (ValidImmMask & OffsetImm) == OffsetImm);
5896 
5897   if (OffsetReg != nullptr) {
5898     Variable *OffsetR = makeReg(getPointerType());
5899     Context.insert<InstAssign>(OffsetR, OffsetReg);
5900     return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind,
5901                                    OffsetRegShamt);
5902   }
5903 
5904   return OperandARM32Mem::create(
5905       Func, Ty, BaseVar,
5906       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5907 }
5908 
doAddressOptLoad()5909 void TargetARM32::doAddressOptLoad() {
5910   Inst *Instr = iteratorToInst(Context.getCur());
5911   assert(llvm::isa<InstLoad>(Instr));
5912   Variable *Dest = Instr->getDest();
5913   Operand *Addr = Instr->getSrc(0);
5914   if (OperandARM32Mem *Mem =
5915           formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5916     Instr->setDeleted();
5917     Context.insert<InstLoad>(Dest, Mem);
5918   }
5919 }
5920 
randomlyInsertNop(float Probability,RandomNumberGenerator & RNG)5921 void TargetARM32::randomlyInsertNop(float Probability,
5922                                     RandomNumberGenerator &RNG) {
5923   RandomNumberGeneratorWrapper RNGW(RNG);
5924   if (RNGW.getTrueWithProbability(Probability)) {
5925     _nop();
5926   }
5927 }
5928 
lowerPhi(const InstPhi *)5929 void TargetARM32::lowerPhi(const InstPhi * /*Instr*/) {
5930   Func->setError("Phi found in regular instruction list");
5931 }
5932 
lowerRet(const InstRet * Instr)5933 void TargetARM32::lowerRet(const InstRet *Instr) {
5934   Variable *Reg = nullptr;
5935   if (Instr->hasRetValue()) {
5936     Operand *Src0 = Instr->getRetValue();
5937     Type Ty = Src0->getType();
5938     if (Ty == IceType_i64) {
5939       Src0 = legalizeUndef(Src0);
5940       Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
5941       Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
5942       Reg = R0;
5943       Context.insert<InstFakeUse>(R1);
5944     } else if (Ty == IceType_f32) {
5945       Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
5946       Reg = S0;
5947     } else if (Ty == IceType_f64) {
5948       Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
5949       Reg = D0;
5950     } else if (isVectorType(Src0->getType())) {
5951       Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
5952       Reg = Q0;
5953     } else {
5954       Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
5955       Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0);
5956       _mov(Reg, Src0F, CondARM32::AL);
5957     }
5958   }
5959   // Add a ret instruction even if sandboxing is enabled, because addEpilog
5960   // explicitly looks for a ret instruction as a marker for where to insert the
5961   // frame removal instructions. addEpilog is responsible for restoring the
5962   // "lr" register as needed prior to this ret instruction.
5963   _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
5964 
5965   // Add a fake use of sp to make sure sp stays alive for the entire function.
5966   // Otherwise post-call sp adjustments get dead-code eliminated.
5967   // TODO: Are there more places where the fake use should be inserted? E.g.
5968   // "void f(int n){while(1) g(n);}" may not have a ret instruction.
5969   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5970   Context.insert<InstFakeUse>(SP);
5971 }
5972 
lowerShuffleVector(const InstShuffleVector * Instr)5973 void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) {
5974   auto *Dest = Instr->getDest();
5975   const Type DestTy = Dest->getType();
5976 
5977   auto *T = makeReg(DestTy);
5978   auto *Src0 = Instr->getSrc(0);
5979   auto *Src1 = Instr->getSrc(1);
5980   const SizeT NumElements = typeNumElements(DestTy);
5981   const Type ElementType = typeElementType(DestTy);
5982 
5983   bool Replicate = true;
5984   for (SizeT I = 1; Replicate && I < Instr->getNumIndexes(); ++I) {
5985     if (Instr->getIndexValue(I) != Instr->getIndexValue(0)) {
5986       Replicate = false;
5987     }
5988   }
5989 
5990   if (Replicate) {
5991     Variable *Src0Var = legalizeToReg(Src0);
5992     _vdup(T, Src0Var, Instr->getIndexValue(0));
5993     _mov(Dest, T);
5994     return;
5995   }
5996 
5997   switch (DestTy) {
5998   case IceType_v8i1:
5999   case IceType_v8i16: {
6000     static constexpr SizeT ExpectedNumElements = 8;
6001     assert(ExpectedNumElements == Instr->getNumIndexes());
6002     (void)ExpectedNumElements;
6003 
6004     if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3)) {
6005       Variable *Src0R = legalizeToReg(Src0);
6006       _vzip(T, Src0R, Src0R);
6007       _mov(Dest, T);
6008       return;
6009     }
6010 
6011     if (Instr->indexesAre(0, 8, 1, 9, 2, 10, 3, 11)) {
6012       Variable *Src0R = legalizeToReg(Src0);
6013       Variable *Src1R = legalizeToReg(Src1);
6014       _vzip(T, Src0R, Src1R);
6015       _mov(Dest, T);
6016       return;
6017     }
6018 
6019     if (Instr->indexesAre(0, 2, 4, 6, 0, 2, 4, 6)) {
6020       Variable *Src0R = legalizeToReg(Src0);
6021       _vqmovn2(T, Src0R, Src0R, false, false);
6022       _mov(Dest, T);
6023       return;
6024     }
6025   } break;
6026   case IceType_v16i1:
6027   case IceType_v16i8: {
6028     static constexpr SizeT ExpectedNumElements = 16;
6029     assert(ExpectedNumElements == Instr->getNumIndexes());
6030     (void)ExpectedNumElements;
6031 
6032     if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7)) {
6033       Variable *Src0R = legalizeToReg(Src0);
6034       _vzip(T, Src0R, Src0R);
6035       _mov(Dest, T);
6036       return;
6037     }
6038 
6039     if (Instr->indexesAre(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7,
6040                           23)) {
6041       Variable *Src0R = legalizeToReg(Src0);
6042       Variable *Src1R = legalizeToReg(Src1);
6043       _vzip(T, Src0R, Src1R);
6044       _mov(Dest, T);
6045       return;
6046     }
6047   } break;
6048   case IceType_v4i1:
6049   case IceType_v4i32:
6050   case IceType_v4f32: {
6051     static constexpr SizeT ExpectedNumElements = 4;
6052     assert(ExpectedNumElements == Instr->getNumIndexes());
6053     (void)ExpectedNumElements;
6054 
6055     if (Instr->indexesAre(0, 0, 1, 1)) {
6056       Variable *Src0R = legalizeToReg(Src0);
6057       _vzip(T, Src0R, Src0R);
6058       _mov(Dest, T);
6059       return;
6060     }
6061 
6062     if (Instr->indexesAre(0, 4, 1, 5)) {
6063       Variable *Src0R = legalizeToReg(Src0);
6064       Variable *Src1R = legalizeToReg(Src1);
6065       _vzip(T, Src0R, Src1R);
6066       _mov(Dest, T);
6067       return;
6068     }
6069 
6070     if (Instr->indexesAre(0, 1, 4, 5)) {
6071       Variable *Src0R = legalizeToReg(Src0);
6072       Variable *Src1R = legalizeToReg(Src1);
6073       _vmovlh(T, Src0R, Src1R);
6074       _mov(Dest, T);
6075       return;
6076     }
6077 
6078     if (Instr->indexesAre(2, 3, 2, 3)) {
6079       Variable *Src0R = legalizeToReg(Src0);
6080       _vmovhl(T, Src0R, Src0R);
6081       _mov(Dest, T);
6082       return;
6083     }
6084 
6085     if (Instr->indexesAre(2, 3, 6, 7)) {
6086       Variable *Src0R = legalizeToReg(Src0);
6087       Variable *Src1R = legalizeToReg(Src1);
6088       _vmovhl(T, Src1R, Src0R);
6089       _mov(Dest, T);
6090       return;
6091     }
6092   } break;
6093   default:
6094     break;
6095     // TODO(jpp): figure out how to properly lower this without scalarization.
6096   }
6097 
6098   // Unoptimized shuffle. Perform a series of inserts and extracts.
6099   Context.insert<InstFakeDef>(T);
6100   for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
6101     auto *Index = Instr->getIndex(I);
6102     const SizeT Elem = Index->getValue();
6103     auto *ExtElmt = makeReg(ElementType);
6104     if (Elem < NumElements) {
6105       lowerExtractElement(
6106           InstExtractElement::create(Func, ExtElmt, Src0, Index));
6107     } else {
6108       lowerExtractElement(InstExtractElement::create(
6109           Func, ExtElmt, Src1,
6110           Ctx->getConstantInt32(Index->getValue() - NumElements)));
6111     }
6112     auto *NewT = makeReg(DestTy);
6113     lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
6114                                                  Ctx->getConstantInt32(I)));
6115     T = NewT;
6116   }
6117   _mov(Dest, T);
6118 }
6119 
lowerSelect(const InstSelect * Instr)6120 void TargetARM32::lowerSelect(const InstSelect *Instr) {
6121   Variable *Dest = Instr->getDest();
6122   Type DestTy = Dest->getType();
6123   Operand *SrcT = Instr->getTrueOperand();
6124   Operand *SrcF = Instr->getFalseOperand();
6125   Operand *Condition = Instr->getCondition();
6126 
6127   if (!isVectorType(DestTy)) {
6128     lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT),
6129                        legalizeUndef(SrcF));
6130     return;
6131   }
6132 
6133   Type TType = DestTy;
6134   switch (DestTy) {
6135   default:
6136     llvm::report_fatal_error("Unexpected type for vector select.");
6137   case IceType_v4i1:
6138     TType = IceType_v4i32;
6139     break;
6140   case IceType_v8i1:
6141     TType = IceType_v8i16;
6142     break;
6143   case IceType_v16i1:
6144     TType = IceType_v16i8;
6145     break;
6146   case IceType_v4f32:
6147     TType = IceType_v4i32;
6148     break;
6149   case IceType_v4i32:
6150   case IceType_v8i16:
6151   case IceType_v16i8:
6152     break;
6153   }
6154   auto *T = makeReg(TType);
6155   lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
6156   auto *SrcTR = legalizeToReg(SrcT);
6157   auto *SrcFR = legalizeToReg(SrcF);
6158   _vbsl(T, SrcTR, SrcFR)->setDestRedefined();
6159   _mov(Dest, T);
6160 }
6161 
lowerStore(const InstStore * Instr)6162 void TargetARM32::lowerStore(const InstStore *Instr) {
6163   Operand *Value = Instr->getData();
6164   Operand *Addr = Instr->getAddr();
6165   OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
6166   Type Ty = NewAddr->getType();
6167 
6168   if (Ty == IceType_i64) {
6169     Value = legalizeUndef(Value);
6170     Variable *ValueHi = legalizeToReg(hiOperand(Value));
6171     Variable *ValueLo = legalizeToReg(loOperand(Value));
6172     _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
6173     _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
6174   } else {
6175     Variable *ValueR = legalizeToReg(Value);
6176     _str(ValueR, NewAddr);
6177   }
6178 }
6179 
doAddressOptStore()6180 void TargetARM32::doAddressOptStore() {
6181   Inst *Instr = iteratorToInst(Context.getCur());
6182   assert(llvm::isa<InstStore>(Instr));
6183   Operand *Src = Instr->getSrc(0);
6184   Operand *Addr = Instr->getSrc(1);
6185   if (OperandARM32Mem *Mem =
6186           formAddressingMode(Src->getType(), Func, Instr, Addr)) {
6187     Instr->setDeleted();
6188     Context.insert<InstStore>(Src, Mem);
6189   }
6190 }
6191 
lowerSwitch(const InstSwitch * Instr)6192 void TargetARM32::lowerSwitch(const InstSwitch *Instr) {
6193   // This implements the most naive possible lowering.
6194   // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
6195   Operand *Src0 = Instr->getComparison();
6196   SizeT NumCases = Instr->getNumCases();
6197   if (Src0->getType() == IceType_i64) {
6198     Src0 = legalizeUndef(Src0);
6199     Variable *Src0Lo = legalizeToReg(loOperand(Src0));
6200     Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
6201     for (SizeT I = 0; I < NumCases; ++I) {
6202       Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
6203       Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
6204       ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
6205       ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
6206       _cmp(Src0Lo, ValueLo);
6207       _cmp(Src0Hi, ValueHi, CondARM32::EQ);
6208       _br(Instr->getLabel(I), CondARM32::EQ);
6209     }
6210     _br(Instr->getLabelDefault());
6211     return;
6212   }
6213 
6214   Variable *Src0Var = legalizeToReg(Src0);
6215   // If Src0 is not an i32, we left shift it -- see the icmp lowering for the
6216   // reason.
6217   assert(Src0Var->mustHaveReg());
6218   const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
6219   assert(ShiftAmt < 32);
6220   if (ShiftAmt > 0) {
6221     Operand *ShAmtImm = shAmtImm(ShiftAmt);
6222     Variable *T = makeReg(IceType_i32);
6223     _lsl(T, Src0Var, ShAmtImm);
6224     Src0Var = T;
6225   }
6226 
6227   for (SizeT I = 0; I < NumCases; ++I) {
6228     Operand *Value = Ctx->getConstantInt32(Instr->getValue(I) << ShiftAmt);
6229     Value = legalize(Value, Legal_Reg | Legal_Flex);
6230     _cmp(Src0Var, Value);
6231     _br(Instr->getLabel(I), CondARM32::EQ);
6232   }
6233   _br(Instr->getLabelDefault());
6234 }
6235 
lowerBreakpoint(const InstBreakpoint * Instr)6236 void TargetARM32::lowerBreakpoint(const InstBreakpoint *Instr) {
6237   UnimplementedLoweringError(this, Instr);
6238 }
6239 
lowerUnreachable(const InstUnreachable *)6240 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) {
6241   _trap();
6242 }
6243 
6244 namespace {
6245 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables,
6246 // and fp constants will need access to the GOT address.
operandNeedsGot(const Operand * Opnd)6247 bool operandNeedsGot(const Operand *Opnd) {
6248   if (llvm::isa<ConstantRelocatable>(Opnd)) {
6249     return true;
6250   }
6251 
6252   if (llvm::isa<ConstantFloat>(Opnd)) {
6253     uint32_t _;
6254     return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_);
6255   }
6256 
6257   const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd);
6258   if (F64 != nullptr) {
6259     uint32_t _;
6260     return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) &&
6261            !isFloatingPointZero(F64);
6262   }
6263 
6264   return false;
6265 }
6266 
6267 // Returns whether Phi needs the GOT address (which it does if any of its
6268 // operands needs the GOT address.)
phiNeedsGot(const InstPhi * Phi)6269 bool phiNeedsGot(const InstPhi *Phi) {
6270   if (Phi->isDeleted()) {
6271     return false;
6272   }
6273 
6274   for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
6275     if (operandNeedsGot(Phi->getSrc(I))) {
6276       return true;
6277     }
6278   }
6279 
6280   return false;
6281 }
6282 
6283 // Returns whether **any** phi in Node needs the GOT address.
anyPhiInNodeNeedsGot(CfgNode * Node)6284 bool anyPhiInNodeNeedsGot(CfgNode *Node) {
6285   for (auto &Inst : Node->getPhis()) {
6286     if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) {
6287       return true;
6288     }
6289   }
6290   return false;
6291 }
6292 
6293 } // end of anonymous namespace
6294 
prelowerPhis()6295 void TargetARM32::prelowerPhis() {
6296   CfgNode *Node = Context.getNode();
6297 
6298   if (SandboxingType == ST_Nonsfi) {
6299     assert(GotPtr != nullptr);
6300     if (anyPhiInNodeNeedsGot(Node)) {
6301       // If any phi instruction needs the GOT address, we place a
6302       //   fake-use GotPtr
6303       // in Node to prevent the GotPtr's initialization from being dead code
6304       // eliminated.
6305       Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr));
6306     }
6307   }
6308 
6309   PhiLowering::prelowerPhis32Bit(this, Node, Func);
6310 }
6311 
makeVectorOfZeros(Type Ty,RegNumT RegNum)6312 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) {
6313   Variable *Reg = makeReg(Ty, RegNum);
6314   Context.insert<InstFakeDef>(Reg);
6315   assert(isVectorType(Ty));
6316   _veor(Reg, Reg, Reg);
6317   return Reg;
6318 }
6319 
6320 // Helper for legalize() to emit the right code to lower an operand to a
6321 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)6322 Variable *TargetARM32::copyToReg(Operand *Src, RegNumT RegNum) {
6323   Type Ty = Src->getType();
6324   Variable *Reg = makeReg(Ty, RegNum);
6325   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) {
6326     _ldr(Reg, Mem);
6327   } else {
6328     _mov(Reg, Src);
6329   }
6330   return Reg;
6331 }
6332 
6333 // TODO(jpp): remove unneeded else clauses in legalize.
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)6334 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
6335                                RegNumT RegNum) {
6336   Type Ty = From->getType();
6337   // Assert that a physical register is allowed. To date, all calls to
6338   // legalize() allow a physical register. Legal_Flex converts registers to the
6339   // right type OperandARM32FlexReg as needed.
6340   assert(Allowed & Legal_Reg);
6341 
6342   // Copied ipsis literis from TargetX86Base<Machine>.
6343   if (RegNum.hasNoValue()) {
6344     if (Variable *Subst = getContext().availabilityGet(From)) {
6345       // At this point we know there is a potential substitution available.
6346       if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
6347           !Subst->hasReg()) {
6348         // At this point we know the substitution will have a register.
6349         if (From->getType() == Subst->getType()) {
6350           // At this point we know the substitution's register is compatible.
6351           return Subst;
6352         }
6353       }
6354     }
6355   }
6356 
6357   // Go through the various types of operands: OperandARM32Mem,
6358   // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
6359   // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
6360   // can always copy to a register.
6361   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
6362     // Before doing anything with a Mem operand, we need to ensure that the
6363     // Base and Index components are in physical registers.
6364     Variable *Base = Mem->getBase();
6365     Variable *Index = Mem->getIndex();
6366     ConstantInteger32 *Offset = Mem->getOffset();
6367     assert(Index == nullptr || Offset == nullptr);
6368     Variable *RegBase = nullptr;
6369     Variable *RegIndex = nullptr;
6370     assert(Base);
6371     RegBase = llvm::cast<Variable>(
6372         legalize(Base, Legal_Reg | Legal_Rematerializable));
6373     assert(Ty < MemTraitsSize);
6374     if (Index) {
6375       assert(Offset == nullptr);
6376       assert(MemTraits[Ty].CanHaveIndex);
6377       RegIndex = legalizeToReg(Index);
6378     }
6379     if (Offset && Offset->getValue() != 0) {
6380       assert(Index == nullptr);
6381       static constexpr bool ZeroExt = false;
6382       assert(MemTraits[Ty].CanHaveImm);
6383       if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
6384         llvm::report_fatal_error("Invalid memory offset.");
6385       }
6386     }
6387 
6388     // Create a new operand if there was a change.
6389     if (Base != RegBase || Index != RegIndex) {
6390       // There is only a reg +/- reg or reg + imm form.
6391       // Figure out which to re-create.
6392       if (RegIndex) {
6393         Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
6394                                       Mem->getShiftOp(), Mem->getShiftAmt(),
6395                                       Mem->getAddrMode());
6396       } else {
6397         Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset,
6398                                       Mem->getAddrMode());
6399       }
6400     }
6401     if (Allowed & Legal_Mem) {
6402       From = Mem;
6403     } else {
6404       Variable *Reg = makeReg(Ty, RegNum);
6405       _ldr(Reg, Mem);
6406       From = Reg;
6407     }
6408     return From;
6409   }
6410 
6411   if (auto *Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
6412     if (!(Allowed & Legal_Flex)) {
6413       if (auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
6414         if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
6415           From = FlexReg->getReg();
6416           // Fall through and let From be checked as a Variable below, where it
6417           // may or may not need a register.
6418         } else {
6419           return copyToReg(Flex, RegNum);
6420         }
6421       } else {
6422         return copyToReg(Flex, RegNum);
6423       }
6424     } else {
6425       return From;
6426     }
6427   }
6428 
6429   if (llvm::isa<Constant>(From)) {
6430     if (llvm::isa<ConstantUndef>(From)) {
6431       From = legalizeUndef(From, RegNum);
6432       if (isVectorType(Ty))
6433         return From;
6434     }
6435     // There should be no constants of vector type (other than undef).
6436     assert(!isVectorType(Ty));
6437     if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
6438       uint32_t RotateAmt;
6439       uint32_t Immed_8;
6440       uint32_t Value = static_cast<uint32_t>(C32->getValue());
6441       if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
6442         // The immediate can be encoded as a Flex immediate. We may return the
6443         // Flex operand if the caller has Allow'ed it.
6444         auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6445         const bool CanBeFlex = Allowed & Legal_Flex;
6446         if (CanBeFlex)
6447           return OpF;
6448         return copyToReg(OpF, RegNum);
6449       } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
6450                                                  &Immed_8)) {
6451         // Even though the immediate can't be encoded as a Flex operand, its
6452         // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
6453         // constant with a single instruction.
6454         auto *InvOpF =
6455             OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6456         Variable *Reg = makeReg(Ty, RegNum);
6457         _mvn(Reg, InvOpF);
6458         return Reg;
6459       } else {
6460         // Do a movw/movt to a register.
6461         Variable *Reg = makeReg(Ty, RegNum);
6462         uint32_t UpperBits = (Value >> 16) & 0xFFFF;
6463         _movw(Reg,
6464               UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
6465         if (UpperBits != 0) {
6466           _movt(Reg, Ctx->getConstantInt32(UpperBits));
6467         }
6468         return Reg;
6469       }
6470     } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
6471       Variable *Reg = makeReg(Ty, RegNum);
6472       if (SandboxingType != ST_Nonsfi) {
6473         _movw(Reg, C);
6474         _movt(Reg, C);
6475       } else {
6476         auto *GotAddr = legalizeToReg(GotPtr);
6477         GlobalString CGotoffName = createGotoffRelocation(C);
6478         loadNamedConstantRelocatablePIC(
6479             CGotoffName, Reg, [this, Reg](Variable *PC) {
6480               _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg));
6481             });
6482         _add(Reg, GotAddr, Reg);
6483       }
6484       return Reg;
6485     } else {
6486       assert(isScalarFloatingType(Ty));
6487       uint32_t ModifiedImm;
6488       if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
6489         Variable *T = makeReg(Ty, RegNum);
6490         _mov(T,
6491              OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
6492         return T;
6493       }
6494 
6495       if (Ty == IceType_f64 && isFloatingPointZero(From)) {
6496         // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
6497         // because ARM does not have a veor instruction with S registers.
6498         Variable *T = makeReg(IceType_f64, RegNum);
6499         Context.insert<InstFakeDef>(T);
6500         _veor(T, T, T);
6501         return T;
6502       }
6503 
6504       // Load floats/doubles from literal pool.
6505       auto *CFrom = llvm::cast<Constant>(From);
6506       assert(CFrom->getShouldBePooled());
6507       Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
6508       Variable *BaseReg = nullptr;
6509       if (SandboxingType == ST_Nonsfi) {
6510         // vldr does not support the [base, index] addressing mode, so we need
6511         // to legalize Offset to a register. Otherwise, we could simply
6512         //   vldr dest, [got, reg(Offset)]
6513         BaseReg = legalizeToReg(Offset);
6514       } else {
6515         BaseReg = makeReg(getPointerType());
6516         _movw(BaseReg, Offset);
6517         _movt(BaseReg, Offset);
6518       }
6519       From = formMemoryOperand(BaseReg, Ty);
6520       return copyToReg(From, RegNum);
6521     }
6522   }
6523 
6524   if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6525     if (Var->isRematerializable()) {
6526       if (Allowed & Legal_Rematerializable) {
6527         return From;
6528       }
6529 
6530       Variable *T = makeReg(Var->getType(), RegNum);
6531       _mov(T, Var);
6532       return T;
6533     }
6534     // Check if the variable is guaranteed a physical register. This can happen
6535     // either when the variable is pre-colored or when it is assigned infinite
6536     // weight.
6537     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6538     // We need a new physical register for the operand if:
6539     //   Mem is not allowed and Var isn't guaranteed a physical
6540     //   register, or
6541     //   RegNum is required and Var->getRegNum() doesn't match.
6542     if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6543         (RegNum.hasValue() && (RegNum != Var->getRegNum()))) {
6544       From = copyToReg(From, RegNum);
6545     }
6546     return From;
6547   }
6548   llvm::report_fatal_error("Unhandled operand kind in legalize()");
6549 
6550   return From;
6551 }
6552 
6553 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)6554 Variable *TargetARM32::legalizeToReg(Operand *From, RegNumT RegNum) {
6555   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
6556 }
6557 
6558 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)6559 Operand *TargetARM32::legalizeUndef(Operand *From, RegNumT RegNum) {
6560   Type Ty = From->getType();
6561   if (llvm::isa<ConstantUndef>(From)) {
6562     // Lower undefs to zero. Another option is to lower undefs to an
6563     // uninitialized register; however, using an uninitialized register results
6564     // in less predictable code.
6565     //
6566     // If in the future the implementation is changed to lower undef values to
6567     // uninitialized registers, a FakeDef will be needed:
6568     // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
6569     // ensure that the live range of Reg is not overestimated. If the constant
6570     // being lowered is a 64 bit value, then the result should be split and the
6571     // lo and hi components will need to go in uninitialized registers.
6572     if (isVectorType(Ty))
6573       return makeVectorOfZeros(Ty, RegNum);
6574     return Ctx->getConstantZero(Ty);
6575   }
6576   return From;
6577 }
6578 
formMemoryOperand(Operand * Operand,Type Ty)6579 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
6580   auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
6581   // It may be the case that address mode optimization already creates an
6582   // OperandARM32Mem, so in that case it wouldn't need another level of
6583   // transformation.
6584   if (Mem) {
6585     return llvm::cast<OperandARM32Mem>(legalize(Mem));
6586   }
6587   // If we didn't do address mode optimization, then we only have a
6588   // base/offset to work with. ARM always requires a base register, so
6589   // just use that to hold the operand.
6590   auto *Base = llvm::cast<Variable>(
6591       legalize(Operand, Legal_Reg | Legal_Rematerializable));
6592   return OperandARM32Mem::create(
6593       Func, Ty, Base,
6594       llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
6595 }
6596 
makeI64RegPair()6597 Variable64On32 *TargetARM32::makeI64RegPair() {
6598   Variable64On32 *Reg =
6599       llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
6600   Reg->setMustHaveReg();
6601   Reg->initHiLo(Func);
6602   Reg->getLo()->setMustNotHaveReg();
6603   Reg->getHi()->setMustNotHaveReg();
6604   return Reg;
6605 }
6606 
makeReg(Type Type,RegNumT RegNum)6607 Variable *TargetARM32::makeReg(Type Type, RegNumT RegNum) {
6608   // There aren't any 64-bit integer registers for ARM32.
6609   assert(Type != IceType_i64);
6610   assert(AllowTemporaryWithNoReg || RegNum.hasValue());
6611   Variable *Reg = Func->makeVariable(Type);
6612   if (RegNum.hasValue())
6613     Reg->setRegNum(RegNum);
6614   else
6615     Reg->setMustHaveReg();
6616   return Reg;
6617 }
6618 
alignRegisterPow2(Variable * Reg,uint32_t Align,RegNumT TmpRegNum)6619 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
6620                                     RegNumT TmpRegNum) {
6621   assert(llvm::isPowerOf2_32(Align));
6622   uint32_t RotateAmt;
6623   uint32_t Immed_8;
6624   Operand *Mask;
6625   // Use AND or BIC to mask off the bits, depending on which immediate fits (if
6626   // it fits at all). Assume Align is usually small, in which case BIC works
6627   // better. Thus, this rounds down to the alignment.
6628   if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
6629     Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
6630                     TmpRegNum);
6631     _bic(Reg, Reg, Mask);
6632   } else {
6633     Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
6634                     TmpRegNum);
6635     _and(Reg, Reg, Mask);
6636   }
6637 }
6638 
postLower()6639 void TargetARM32::postLower() {
6640   if (Func->getOptLevel() == Opt_m1)
6641     return;
6642   markRedefinitions();
6643   Context.availabilityUpdate();
6644 }
6645 
makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> & Permutation,const SmallBitVector & ExcludeRegisters,uint64_t Salt) const6646 void TargetARM32::makeRandomRegisterPermutation(
6647     llvm::SmallVectorImpl<RegNumT> &Permutation,
6648     const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
6649   (void)Permutation;
6650   (void)ExcludeRegisters;
6651   (void)Salt;
6652   UnimplementedError(getFlags());
6653 }
6654 
emit(const ConstantInteger32 * C) const6655 void TargetARM32::emit(const ConstantInteger32 *C) const {
6656   if (!BuildDefs::dump())
6657     return;
6658   Ostream &Str = Ctx->getStrEmit();
6659   Str << "#" << C->getValue();
6660 }
6661 
emit(const ConstantInteger64 *) const6662 void TargetARM32::emit(const ConstantInteger64 *) const {
6663   llvm::report_fatal_error("Not expecting to emit 64-bit integers");
6664 }
6665 
emit(const ConstantFloat * C) const6666 void TargetARM32::emit(const ConstantFloat *C) const {
6667   (void)C;
6668   UnimplementedError(getFlags());
6669 }
6670 
emit(const ConstantDouble * C) const6671 void TargetARM32::emit(const ConstantDouble *C) const {
6672   (void)C;
6673   UnimplementedError(getFlags());
6674 }
6675 
emit(const ConstantUndef *) const6676 void TargetARM32::emit(const ConstantUndef *) const {
6677   llvm::report_fatal_error("undef value encountered by emitter.");
6678 }
6679 
emit(const ConstantRelocatable * C) const6680 void TargetARM32::emit(const ConstantRelocatable *C) const {
6681   if (!BuildDefs::dump())
6682     return;
6683   Ostream &Str = Ctx->getStrEmit();
6684   Str << "#";
6685   emitWithoutPrefix(C);
6686 }
6687 
lowerInt1ForSelect(Variable * Dest,Operand * Boolean,Operand * TrueValue,Operand * FalseValue)6688 void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
6689                                      Operand *TrueValue, Operand *FalseValue) {
6690   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6691 
6692   assert(Boolean->getType() == IceType_i1);
6693 
6694   bool NeedsAnd1 = false;
6695   if (TrueValue->getType() == IceType_i1) {
6696     assert(FalseValue->getType() == IceType_i1);
6697 
6698     Variable *TrueValueV = Func->makeVariable(IceType_i1);
6699     SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue);
6700     TrueValue = TrueValueV;
6701 
6702     Variable *FalseValueV = Func->makeVariable(IceType_i1);
6703     SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue);
6704     FalseValue = FalseValueV;
6705 
6706     NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No;
6707   }
6708 
6709   Variable *DestLo = (Dest->getType() == IceType_i64)
6710                          ? llvm::cast<Variable>(loOperand(Dest))
6711                          : Dest;
6712   Variable *DestHi = (Dest->getType() == IceType_i64)
6713                          ? llvm::cast<Variable>(hiOperand(Dest))
6714                          : nullptr;
6715   Operand *FalseValueLo = (FalseValue->getType() == IceType_i64)
6716                               ? loOperand(FalseValue)
6717                               : FalseValue;
6718   Operand *FalseValueHi =
6719       (FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr;
6720 
6721   Operand *TrueValueLo =
6722       (TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue;
6723   Operand *TrueValueHi =
6724       (TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr;
6725 
6726   Variable *T_Lo = makeReg(DestLo->getType());
6727   Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType());
6728 
6729   _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
6730   if (DestHi) {
6731     _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
6732   }
6733 
6734   CondWhenTrue Cond(CondARM32::kNone);
6735   // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
6736   // add an explicit _tst instruction below.
6737   bool FlagsWereSet = false;
6738   if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6739     switch (Producer->getKind()) {
6740     default:
6741       llvm::report_fatal_error("Unexpected producer.");
6742     case Inst::Icmp: {
6743       Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6744       FlagsWereSet = true;
6745     } break;
6746     case Inst::Fcmp: {
6747       Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6748       FlagsWereSet = true;
6749     } break;
6750     case Inst::Cast: {
6751       const auto *CastProducer = llvm::cast<InstCast>(Producer);
6752       assert(CastProducer->getCastKind() == InstCast::Trunc);
6753       Boolean = CastProducer->getSrc(0);
6754       // No flags were set, so a _tst(Src, 1) will be emitted below. Don't
6755       // bother legalizing Src to a Reg because it will be legalized before
6756       // emitting the tst instruction.
6757       FlagsWereSet = false;
6758     } break;
6759     case Inst::Arithmetic: {
6760       // This is a special case: we eagerly assumed Producer could be folded,
6761       // but in reality, it can't. No reason to panic: we just lower it using
6762       // the regular lowerArithmetic helper.
6763       const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6764       lowerArithmetic(ArithProducer);
6765       Boolean = ArithProducer->getDest();
6766       // No flags were set, so a _tst(Dest, 1) will be emitted below. Don't
6767       // bother legalizing Dest to a Reg because it will be legalized before
6768       // emitting  the tst instruction.
6769       FlagsWereSet = false;
6770     } break;
6771     }
6772   }
6773 
6774   if (!FlagsWereSet) {
6775     // No flags have been set, so emit a tst Boolean, 1.
6776     Variable *Src = legalizeToReg(Boolean);
6777     _tst(Src, _1);
6778     Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero.
6779   }
6780 
6781   if (Cond.WhenTrue0 == CondARM32::kNone) {
6782     assert(Cond.WhenTrue1 == CondARM32::kNone);
6783   } else {
6784     _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6785                    Cond.WhenTrue0);
6786     if (DestHi) {
6787       _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6788                      Cond.WhenTrue0);
6789     }
6790   }
6791 
6792   if (Cond.WhenTrue1 != CondARM32::kNone) {
6793     _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6794                    Cond.WhenTrue1);
6795     if (DestHi) {
6796       _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6797                      Cond.WhenTrue1);
6798     }
6799   }
6800 
6801   if (NeedsAnd1) {
6802     // We lowered something that is unsafe (i.e., can't provably be zero or
6803     // one). Truncate the result.
6804     _and(T_Lo, T_Lo, _1);
6805   }
6806 
6807   _mov(DestLo, T_Lo);
6808   if (DestHi) {
6809     _mov(DestHi, T_Hi);
6810   }
6811 }
6812 
lowerInt1(Variable * Dest,Operand * Boolean)6813 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
6814                                                   Operand *Boolean) {
6815   assert(Boolean->getType() == IceType_i1);
6816   Variable *T = makeReg(IceType_i1);
6817   Operand *_0 =
6818       legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
6819   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6820 
6821   SafeBoolChain Safe = SBC_Yes;
6822   if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6823     switch (Producer->getKind()) {
6824     default:
6825       llvm::report_fatal_error("Unexpected producer.");
6826     case Inst::Icmp: {
6827       _mov(T, _0);
6828       CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6829       assert(Cond.WhenTrue0 != CondARM32::AL);
6830       assert(Cond.WhenTrue0 != CondARM32::kNone);
6831       assert(Cond.WhenTrue1 == CondARM32::kNone);
6832       _mov_redefined(T, _1, Cond.WhenTrue0);
6833     } break;
6834     case Inst::Fcmp: {
6835       _mov(T, _0);
6836       Inst *MovZero = Context.getLastInserted();
6837       CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6838       if (Cond.WhenTrue0 == CondARM32::AL) {
6839         assert(Cond.WhenTrue1 == CondARM32::kNone);
6840         MovZero->setDeleted();
6841         _mov(T, _1);
6842       } else if (Cond.WhenTrue0 != CondARM32::kNone) {
6843         _mov_redefined(T, _1, Cond.WhenTrue0);
6844       }
6845       if (Cond.WhenTrue1 != CondARM32::kNone) {
6846         assert(Cond.WhenTrue0 != CondARM32::kNone);
6847         assert(Cond.WhenTrue0 != CondARM32::AL);
6848         _mov_redefined(T, _1, Cond.WhenTrue1);
6849       }
6850     } break;
6851     case Inst::Cast: {
6852       const auto *CastProducer = llvm::cast<InstCast>(Producer);
6853       assert(CastProducer->getCastKind() == InstCast::Trunc);
6854       Operand *Src = CastProducer->getSrc(0);
6855       if (Src->getType() == IceType_i64)
6856         Src = loOperand(Src);
6857       _mov(T, legalize(Src, Legal_Reg | Legal_Flex));
6858       Safe = SBC_No;
6859     } break;
6860     case Inst::Arithmetic: {
6861       const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6862       Safe = lowerInt1Arithmetic(ArithProducer);
6863       _mov(T, ArithProducer->getDest());
6864     } break;
6865     }
6866   } else {
6867     _mov(T, legalize(Boolean, Legal_Reg | Legal_Flex));
6868   }
6869 
6870   _mov(Dest, T);
6871   return Safe;
6872 }
6873 
6874 namespace {
6875 namespace BoolFolding {
shouldTrackProducer(const Inst & Instr)6876 bool shouldTrackProducer(const Inst &Instr) {
6877   switch (Instr.getKind()) {
6878   default:
6879     return false;
6880   case Inst::Icmp:
6881   case Inst::Fcmp:
6882     return true;
6883   case Inst::Cast: {
6884     switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6885     default:
6886       return false;
6887     case InstCast::Trunc:
6888       return true;
6889     }
6890   }
6891   case Inst::Arithmetic: {
6892     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6893     default:
6894       return false;
6895     case InstArithmetic::And:
6896     case InstArithmetic::Or:
6897       return true;
6898     }
6899   }
6900   }
6901 }
6902 
isValidConsumer(const Inst & Instr)6903 bool isValidConsumer(const Inst &Instr) {
6904   switch (Instr.getKind()) {
6905   default:
6906     return false;
6907   case Inst::Br:
6908     return true;
6909   case Inst::Select:
6910     return !isVectorType(Instr.getDest()->getType());
6911   case Inst::Cast: {
6912     switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6913     default:
6914       return false;
6915     case InstCast::Sext:
6916       return !isVectorType(Instr.getDest()->getType());
6917     case InstCast::Zext:
6918       return !isVectorType(Instr.getDest()->getType());
6919     }
6920   }
6921   case Inst::Arithmetic: {
6922     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6923     default:
6924       return false;
6925     case InstArithmetic::And:
6926       return !isVectorType(Instr.getDest()->getType());
6927     case InstArithmetic::Or:
6928       return !isVectorType(Instr.getDest()->getType());
6929     }
6930   }
6931   }
6932 }
6933 } // end of namespace BoolFolding
6934 
6935 namespace FpFolding {
shouldTrackProducer(const Inst & Instr)6936 bool shouldTrackProducer(const Inst &Instr) {
6937   switch (Instr.getKind()) {
6938   default:
6939     return false;
6940   case Inst::Arithmetic: {
6941     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6942     default:
6943       return false;
6944     case InstArithmetic::Fmul:
6945       return true;
6946     }
6947   }
6948   }
6949 }
6950 
isValidConsumer(const Inst & Instr)6951 bool isValidConsumer(const Inst &Instr) {
6952   switch (Instr.getKind()) {
6953   default:
6954     return false;
6955   case Inst::Arithmetic: {
6956     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6957     default:
6958       return false;
6959     case InstArithmetic::Fadd:
6960     case InstArithmetic::Fsub:
6961       return true;
6962     }
6963   }
6964   }
6965 }
6966 } // end of namespace FpFolding
6967 
6968 namespace IntFolding {
shouldTrackProducer(const Inst & Instr)6969 bool shouldTrackProducer(const Inst &Instr) {
6970   switch (Instr.getKind()) {
6971   default:
6972     return false;
6973   case Inst::Arithmetic: {
6974     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6975     default:
6976       return false;
6977     case InstArithmetic::Mul:
6978       return true;
6979     }
6980   }
6981   }
6982 }
6983 
isValidConsumer(const Inst & Instr)6984 bool isValidConsumer(const Inst &Instr) {
6985   switch (Instr.getKind()) {
6986   default:
6987     return false;
6988   case Inst::Arithmetic: {
6989     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6990     default:
6991       return false;
6992     case InstArithmetic::Add:
6993     case InstArithmetic::Sub:
6994       return true;
6995     }
6996   }
6997   }
6998 }
6999 } // end of namespace FpFolding
7000 } // end of anonymous namespace
7001 
recordProducers(CfgNode * Node)7002 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
7003   for (Inst &Instr : Node->getInsts()) {
7004     // Check whether Instr is a valid producer.
7005     Variable *Dest = Instr.getDest();
7006     if (!Instr.isDeleted() // only consider non-deleted instructions; and
7007         && Dest            // only instructions with an actual dest var; and
7008         && Dest->getType() == IceType_i1 // only bool-type dest vars; and
7009         && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7010       KnownComputations.emplace(Dest->getIndex(),
7011                                 ComputationEntry(&Instr, IceType_i1));
7012     }
7013     if (!Instr.isDeleted() // only consider non-deleted instructions; and
7014         && Dest            // only instructions with an actual dest var; and
7015         && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
7016         && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7017       KnownComputations.emplace(Dest->getIndex(),
7018                                 ComputationEntry(&Instr, Dest->getType()));
7019     }
7020     if (!Instr.isDeleted() // only consider non-deleted instructions; and
7021         && Dest            // only instructions with an actual dest var; and
7022         && Dest->getType() == IceType_i32            // i32 only dest vars; and
7023         && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7024       KnownComputations.emplace(Dest->getIndex(),
7025                                 ComputationEntry(&Instr, IceType_i32));
7026     }
7027     // Check each src variable against the map.
7028     FOREACH_VAR_IN_INST(Var, Instr) {
7029       SizeT VarNum = Var->getIndex();
7030       auto ComputationIter = KnownComputations.find(VarNum);
7031       if (ComputationIter == KnownComputations.end()) {
7032         continue;
7033       }
7034 
7035       ++ComputationIter->second.NumUses;
7036       switch (ComputationIter->second.ComputationType) {
7037       default:
7038         KnownComputations.erase(VarNum);
7039         continue;
7040       case IceType_i1:
7041         if (!BoolFolding::isValidConsumer(Instr)) {
7042           KnownComputations.erase(VarNum);
7043           continue;
7044         }
7045         break;
7046       case IceType_i32:
7047         if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
7048           KnownComputations.erase(VarNum);
7049           continue;
7050         }
7051         break;
7052       case IceType_f32:
7053       case IceType_f64:
7054         if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
7055           KnownComputations.erase(VarNum);
7056           continue;
7057         }
7058         break;
7059       }
7060 
7061       if (Instr.isLastUse(Var)) {
7062         ComputationIter->second.IsLiveOut = false;
7063       }
7064     }
7065   }
7066 
7067   for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
7068        Iter != End;) {
7069     // Disable the folding if its dest may be live beyond this block.
7070     if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
7071       Iter = KnownComputations.erase(Iter);
7072       continue;
7073     }
7074 
7075     // Mark as "dead" rather than outright deleting. This is so that other
7076     // peephole style optimizations during or before lowering have access to
7077     // this instruction in undeleted form. See for example
7078     // tryOptimizedCmpxchgCmpBr().
7079     Iter->second.Instr->setDead();
7080     ++Iter;
7081   }
7082 }
7083 
Sandboxer(TargetARM32 * Target,InstBundleLock::Option BundleOption)7084 TargetARM32::Sandboxer::Sandboxer(TargetARM32 *Target,
7085                                   InstBundleLock::Option BundleOption)
7086     : Target(Target), BundleOption(BundleOption) {}
7087 
~Sandboxer()7088 TargetARM32::Sandboxer::~Sandboxer() {}
7089 
7090 namespace {
indirectBranchBicMask(Cfg * Func)7091 OperandARM32FlexImm *indirectBranchBicMask(Cfg *Func) {
7092   constexpr uint32_t Imm8 = 0xFC; // 0xC000000F
7093   constexpr uint32_t RotateAmt = 2;
7094   return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
7095 }
7096 
memOpBicMask(Cfg * Func)7097 OperandARM32FlexImm *memOpBicMask(Cfg *Func) {
7098   constexpr uint32_t Imm8 = 0x0C; // 0xC0000000
7099   constexpr uint32_t RotateAmt = 2;
7100   return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
7101 }
7102 
baseNeedsBic(Variable * Base)7103 static bool baseNeedsBic(Variable *Base) {
7104   return Base->getRegNum() != RegARM32::Reg_r9 &&
7105          Base->getRegNum() != RegARM32::Reg_sp;
7106 }
7107 } // end of anonymous namespace
7108 
createAutoBundle()7109 void TargetARM32::Sandboxer::createAutoBundle() {
7110   Bundler = makeUnique<AutoBundle>(Target, BundleOption);
7111 }
7112 
add_sp(Operand * AddAmount)7113 void TargetARM32::Sandboxer::add_sp(Operand *AddAmount) {
7114   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7115   if (!Target->NeedSandboxing) {
7116     Target->_add(SP, SP, AddAmount);
7117     return;
7118   }
7119   createAutoBundle();
7120   Target->_add(SP, SP, AddAmount);
7121   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7122 }
7123 
align_sp(size_t Alignment)7124 void TargetARM32::Sandboxer::align_sp(size_t Alignment) {
7125   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7126   if (!Target->NeedSandboxing) {
7127     Target->alignRegisterPow2(SP, Alignment);
7128     return;
7129   }
7130   createAutoBundle();
7131   Target->alignRegisterPow2(SP, Alignment);
7132   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7133 }
7134 
bl(Variable * ReturnReg,Operand * CallTarget)7135 InstARM32Call *TargetARM32::Sandboxer::bl(Variable *ReturnReg,
7136                                           Operand *CallTarget) {
7137   if (Target->NeedSandboxing) {
7138     createAutoBundle();
7139     if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
7140       Target->_bic(CallTargetR, CallTargetR,
7141                    indirectBranchBicMask(Target->Func));
7142     }
7143   }
7144   return Target->Context.insert<InstARM32Call>(ReturnReg, CallTarget);
7145 }
7146 
ldr(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)7147 void TargetARM32::Sandboxer::ldr(Variable *Dest, OperandARM32Mem *Mem,
7148                                  CondARM32::Cond Pred) {
7149   Variable *MemBase = Mem->getBase();
7150   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7151     createAutoBundle();
7152     assert(!Mem->isRegReg());
7153     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7154   }
7155   Target->_ldr(Dest, Mem, Pred);
7156 }
7157 
ldrex(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)7158 void TargetARM32::Sandboxer::ldrex(Variable *Dest, OperandARM32Mem *Mem,
7159                                    CondARM32::Cond Pred) {
7160   Variable *MemBase = Mem->getBase();
7161   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7162     createAutoBundle();
7163     assert(!Mem->isRegReg());
7164     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7165   }
7166   Target->_ldrex(Dest, Mem, Pred);
7167 }
7168 
reset_sp(Variable * Src)7169 void TargetARM32::Sandboxer::reset_sp(Variable *Src) {
7170   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7171   if (!Target->NeedSandboxing) {
7172     Target->_mov_redefined(SP, Src);
7173     return;
7174   }
7175   createAutoBundle();
7176   Target->_mov_redefined(SP, Src);
7177   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7178 }
7179 
ret(Variable * RetAddr,Variable * RetValue)7180 void TargetARM32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
7181   if (Target->NeedSandboxing) {
7182     createAutoBundle();
7183     Target->_bic(RetAddr, RetAddr, indirectBranchBicMask(Target->Func));
7184   }
7185   Target->_ret(RetAddr, RetValue);
7186 }
7187 
str(Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)7188 void TargetARM32::Sandboxer::str(Variable *Src, OperandARM32Mem *Mem,
7189                                  CondARM32::Cond Pred) {
7190   Variable *MemBase = Mem->getBase();
7191   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7192     createAutoBundle();
7193     assert(!Mem->isRegReg());
7194     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7195   }
7196   Target->_str(Src, Mem, Pred);
7197 }
7198 
strex(Variable * Dest,Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)7199 void TargetARM32::Sandboxer::strex(Variable *Dest, Variable *Src,
7200                                    OperandARM32Mem *Mem, CondARM32::Cond Pred) {
7201   Variable *MemBase = Mem->getBase();
7202   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7203     createAutoBundle();
7204     assert(!Mem->isRegReg());
7205     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7206   }
7207   Target->_strex(Dest, Src, Mem, Pred);
7208 }
7209 
sub_sp(Operand * SubAmount)7210 void TargetARM32::Sandboxer::sub_sp(Operand *SubAmount) {
7211   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7212   if (!Target->NeedSandboxing) {
7213     Target->_sub(SP, SP, SubAmount);
7214     return;
7215   }
7216   createAutoBundle();
7217   Target->_sub(SP, SP, SubAmount);
7218   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7219 }
7220 
TargetDataARM32(GlobalContext * Ctx)7221 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
7222     : TargetDataLowering(Ctx) {}
7223 
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)7224 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
7225                                    const std::string &SectionSuffix) {
7226   const bool IsPIC = getFlags().getUseNonsfi();
7227   switch (getFlags().getOutFileType()) {
7228   case FT_Elf: {
7229     ELFObjectWriter *Writer = Ctx->getObjectWriter();
7230     Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix,
7231                              IsPIC);
7232   } break;
7233   case FT_Asm:
7234   case FT_Iasm: {
7235     OstreamLocker _(Ctx);
7236     for (const VariableDeclaration *Var : Vars) {
7237       if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
7238         emitGlobal(*Var, SectionSuffix);
7239       }
7240     }
7241   } break;
7242   }
7243 }
7244 
7245 namespace {
7246 template <typename T> struct ConstantPoolEmitterTraits;
7247 
7248 static_assert(sizeof(uint64_t) == 8,
7249               "uint64_t is supposed to be 8 bytes wide.");
7250 
7251 // TODO(jpp): implement the following when implementing constant randomization:
7252 //  * template <> struct ConstantPoolEmitterTraits<uint8_t>
7253 //  * template <> struct ConstantPoolEmitterTraits<uint16_t>
7254 //  * template <> struct ConstantPoolEmitterTraits<uint32_t>
7255 template <> struct ConstantPoolEmitterTraits<float> {
7256   using ConstantType = ConstantFloat;
7257   static constexpr Type IceType = IceType_f32;
7258   // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
7259   // about them being constexpr.
7260   static const char AsmTag[];
7261   static const char TypeName[];
bitcastToUint64Ice::ARM32::__anon6a253d621e11::ConstantPoolEmitterTraits7262   static uint64_t bitcastToUint64(float Value) {
7263     static_assert(sizeof(Value) == sizeof(uint32_t),
7264                   "Float should be 4 bytes.");
7265     const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
7266     return static_cast<uint64_t>(IntValue);
7267   }
7268 };
7269 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long";
7270 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
7271 
7272 template <> struct ConstantPoolEmitterTraits<double> {
7273   using ConstantType = ConstantDouble;
7274   static constexpr Type IceType = IceType_f64;
7275   static const char AsmTag[];
7276   static const char TypeName[];
bitcastToUint64Ice::ARM32::__anon6a253d621e11::ConstantPoolEmitterTraits7277   static uint64_t bitcastToUint64(double Value) {
7278     static_assert(sizeof(double) == sizeof(uint64_t),
7279                   "Double should be 8 bytes.");
7280     return Utils::bitCopy<uint64_t>(Value);
7281   }
7282 };
7283 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
7284 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
7285 
7286 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)7287 void emitConstant(
7288     Ostream &Str,
7289     const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
7290   using Traits = ConstantPoolEmitterTraits<T>;
7291   Str << Const->getLabelName();
7292   Str << ":\n\t" << Traits::AsmTag << "\t0x";
7293   T Value = Const->getValue();
7294   Str.write_hex(Traits::bitcastToUint64(Value));
7295   Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
7296 }
7297 
emitConstantPool(GlobalContext * Ctx)7298 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
7299   if (!BuildDefs::dump()) {
7300     return;
7301   }
7302 
7303   using Traits = ConstantPoolEmitterTraits<T>;
7304   static constexpr size_t MinimumAlignment = 4;
7305   SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
7306   assert((Align % 4) == 0 && "Constants should be aligned");
7307   Ostream &Str = Ctx->getStrEmit();
7308   ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
7309 
7310   Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
7311       << "\n"
7312       << "\t.align\t" << Align << "\n";
7313 
7314   if (getFlags().getReorderPooledConstants()) {
7315     // TODO(jpp): add constant pooling.
7316     UnimplementedError(getFlags());
7317   }
7318 
7319   for (Constant *C : Pool) {
7320     if (!C->getShouldBePooled()) {
7321       continue;
7322     }
7323 
7324     emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
7325   }
7326 }
7327 } // end of anonymous namespace
7328 
lowerConstants()7329 void TargetDataARM32::lowerConstants() {
7330   if (getFlags().getDisableTranslation())
7331     return;
7332   switch (getFlags().getOutFileType()) {
7333   case FT_Elf: {
7334     ELFObjectWriter *Writer = Ctx->getObjectWriter();
7335     Writer->writeConstantPool<ConstantFloat>(IceType_f32);
7336     Writer->writeConstantPool<ConstantDouble>(IceType_f64);
7337   } break;
7338   case FT_Asm:
7339   case FT_Iasm: {
7340     OstreamLocker _(Ctx);
7341     emitConstantPool<float>(Ctx);
7342     emitConstantPool<double>(Ctx);
7343     break;
7344   }
7345   }
7346 }
7347 
lowerJumpTables()7348 void TargetDataARM32::lowerJumpTables() {
7349   if (getFlags().getDisableTranslation())
7350     return;
7351   switch (getFlags().getOutFileType()) {
7352   case FT_Elf:
7353     if (!Ctx->getJumpTables().empty()) {
7354       llvm::report_fatal_error("ARM32 does not support jump tables yet.");
7355     }
7356     break;
7357   case FT_Asm:
7358     // Already emitted from Cfg
7359     break;
7360   case FT_Iasm: {
7361     // TODO(kschimpf): Fill this in when we get more information.
7362     break;
7363   }
7364   }
7365 }
7366 
TargetHeaderARM32(GlobalContext * Ctx)7367 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
7368     : TargetHeaderLowering(Ctx), CPUFeatures(getFlags()) {}
7369 
lower()7370 void TargetHeaderARM32::lower() {
7371   OstreamLocker _(Ctx);
7372   Ostream &Str = Ctx->getStrEmit();
7373   Str << ".syntax unified\n";
7374   // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
7375   // "Addenda to, and Errata in the ABI for the ARM architecture"
7376   // http://infocenter.arm.com
7377   //                  /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
7378   //
7379   // Tag_conformance should be be emitted first in a file-scope sub-subsection
7380   // of the first public subsection of the attributes.
7381   Str << ".eabi_attribute 67, \"2.09\"      @ Tag_conformance\n";
7382   // Chromebooks are at least A15, but do A9 for higher compat. For some
7383   // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
7384   // specified on the commandline. So to test hwdiv, we need to set the .cpu
7385   // directive higher (can't just rely on --mattr=...).
7386   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7387     Str << ".cpu    cortex-a15\n";
7388   } else {
7389     Str << ".cpu    cortex-a9\n";
7390   }
7391   Str << ".eabi_attribute 6, 10   @ Tag_CPU_arch: ARMv7\n"
7392       << ".eabi_attribute 7, 65   @ Tag_CPU_arch_profile: App profile\n";
7393   Str << ".eabi_attribute 8, 1    @ Tag_ARM_ISA_use: Yes\n"
7394       << ".eabi_attribute 9, 2    @ Tag_THUMB_ISA_use: Thumb-2\n";
7395   Str << ".fpu    neon\n"
7396       << ".eabi_attribute 17, 1   @ Tag_ABI_PCS_GOT_use: permit directly\n"
7397       << ".eabi_attribute 20, 1   @ Tag_ABI_FP_denormal\n"
7398       << ".eabi_attribute 21, 1   @ Tag_ABI_FP_exceptions\n"
7399       << ".eabi_attribute 23, 3   @ Tag_ABI_FP_number_model: IEEE 754\n"
7400       << ".eabi_attribute 34, 1   @ Tag_CPU_unaligned_access\n"
7401       << ".eabi_attribute 24, 1   @ Tag_ABI_align_needed: 8-byte\n"
7402       << ".eabi_attribute 25, 1   @ Tag_ABI_align_preserved: 8-byte\n"
7403       << ".eabi_attribute 28, 1   @ Tag_ABI_VFP_args\n"
7404       << ".eabi_attribute 36, 1   @ Tag_FP_HP_extension\n"
7405       << ".eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format\n"
7406       << ".eabi_attribute 42, 1   @ Tag_MPextension_use\n"
7407       << ".eabi_attribute 68, 1   @ Tag_Virtualization_use\n";
7408   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7409     Str << ".eabi_attribute 44, 2   @ Tag_DIV_use\n";
7410   }
7411   // Technically R9 is used for TLS with Sandboxing, and we reserve it.
7412   // However, for compatibility with current NaCl LLVM, don't claim that.
7413   Str << ".eabi_attribute 14, 3   @ Tag_ABI_PCS_R9_use: Not used\n";
7414 }
7415 
7416 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
7417 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
7418 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
7419 
7420 } // end of namespace ARM32
7421 } // end of namespace Ice
7422