1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares the TargetLoweringARM32 class, which implements the
12 /// TargetLowering interface for the ARM 32-bit architecture.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H
18 
19 #include "IceAssemblerARM32.h"
20 #include "IceDefs.h"
21 #include "IceInstARM32.h"
22 #include "IceRegistersARM32.h"
23 #include "IceTargetLowering.h"
24 
25 #include <utility>
26 
27 namespace Ice {
28 namespace ARM32 {
29 
30 // Class encapsulating ARM cpu features / instruction set.
31 class TargetARM32Features {
32   TargetARM32Features() = delete;
33   TargetARM32Features(const TargetARM32Features &) = delete;
34   TargetARM32Features &operator=(const TargetARM32Features &) = delete;
35 
36 public:
37   explicit TargetARM32Features(const ClFlags &Flags);
38 
39   enum ARM32InstructionSet {
40     Begin,
41     // Neon is the PNaCl baseline instruction set.
42     Neon = Begin,
43     HWDivArm, // HW divide in ARM mode (not just Thumb mode).
44     End
45   };
46 
hasFeature(ARM32InstructionSet I)47   bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
48 
49 private:
50   ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
51 };
52 
53 // The target lowering logic for ARM32.
54 class TargetARM32 : public TargetLowering {
55   TargetARM32() = delete;
56   TargetARM32(const TargetARM32 &) = delete;
57   TargetARM32 &operator=(const TargetARM32 &) = delete;
58 
59 public:
60   static void staticInit(GlobalContext *Ctx);
61 
shouldBePooled(const Constant * C)62   static bool shouldBePooled(const Constant *C) {
63     if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) {
64       return !Utils::isPositiveZero(ConstDouble->getValue());
65     }
66     if (llvm::isa<ConstantFloat>(C))
67       return true;
68     return false;
69   }
70 
getPointerType()71   static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; }
72 
73   // TODO(jvoung): return a unique_ptr.
create(Cfg * Func)74   static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
75     return makeUnique<TargetARM32>(Func);
76   }
77 
createAssembler()78   std::unique_ptr<::Ice::Assembler> createAssembler() const override {
79     const bool IsNonsfi = SandboxingType == ST_Nonsfi;
80     return makeUnique<ARM32::AssemblerARM32>(IsNonsfi);
81   }
82 
initNodeForLowering(CfgNode * Node)83   void initNodeForLowering(CfgNode *Node) override {
84     Computations.forgetProducers();
85     Computations.recordProducers(Node);
86     Computations.dump(Func);
87   }
88 
89   void translateOm1() override;
90   void translateO2() override;
91   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
92 
getNumRegisters()93   SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
94   Variable *getPhysicalRegister(RegNumT RegNum,
95                                 Type Ty = IceType_void) override;
96   const char *getRegName(RegNumT RegNum, Type Ty) const override;
97   SmallBitVector getRegisterSet(RegSetMask Include,
98                                 RegSetMask Exclude) const override;
99   const SmallBitVector &
getRegistersForVariable(const Variable * Var)100   getRegistersForVariable(const Variable *Var) const override {
101     RegClass RC = Var->getRegClass();
102     switch (RC) {
103     default:
104       assert(RC < RC_Target);
105       return TypeToRegisterSet[RC];
106     case RegARM32::RCARM32_QtoS:
107       return TypeToRegisterSet[RC];
108     }
109   }
110   const SmallBitVector &
getAllRegistersForVariable(const Variable * Var)111   getAllRegistersForVariable(const Variable *Var) const override {
112     RegClass RC = Var->getRegClass();
113     assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
114     return TypeToRegisterSetUnfiltered[RC];
115   }
getAliasesForRegister(RegNumT Reg)116   const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
117     return RegisterAliases[Reg];
118   }
hasFramePointer()119   bool hasFramePointer() const override { return UsesFramePointer; }
setHasFramePointer()120   void setHasFramePointer() override { UsesFramePointer = true; }
getStackReg()121   RegNumT getStackReg() const override { return RegARM32::Reg_sp; }
getFrameReg()122   RegNumT getFrameReg() const override { return RegARM32::Reg_fp; }
getFrameOrStackReg()123   RegNumT getFrameOrStackReg() const override {
124     return UsesFramePointer ? getFrameReg() : getStackReg();
125   }
getReservedTmpReg()126   RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; }
127 
typeWidthInBytesOnStack(Type Ty)128   size_t typeWidthInBytesOnStack(Type Ty) const override {
129     // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
130     // are rounded up to 4 bytes.
131     return (typeWidthInBytes(Ty) + 3) & ~3;
132   }
133   uint32_t getStackAlignment() const override;
reserveFixedAllocaArea(size_t Size,size_t Align)134   void reserveFixedAllocaArea(size_t Size, size_t Align) override {
135     FixedAllocaSizeBytes = Size;
136     assert(llvm::isPowerOf2_32(Align));
137     FixedAllocaAlignBytes = Align;
138     PrologEmitsFixedAllocas = true;
139   }
getFrameFixedAllocaOffset()140   int32_t getFrameFixedAllocaOffset() const override {
141     return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
142   }
maxOutArgsSizeBytes()143   uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
144 
shouldSplitToVariable64On32(Type Ty)145   bool shouldSplitToVariable64On32(Type Ty) const override {
146     return Ty == IceType_i64;
147   }
148 
149   // TODO(ascull): what size is best for ARM?
getMinJumpTableSize()150   SizeT getMinJumpTableSize() const override { return 3; }
151   void emitJumpTable(const Cfg *Func,
152                      const InstJumpTable *JumpTable) const override;
153 
154   void emitVariable(const Variable *Var) const override;
155 
156   void emit(const ConstantUndef *C) const final;
157   void emit(const ConstantInteger32 *C) const final;
158   void emit(const ConstantInteger64 *C) const final;
159   void emit(const ConstantFloat *C) const final;
160   void emit(const ConstantDouble *C) const final;
161   void emit(const ConstantRelocatable *C) const final;
162 
163   void lowerArguments() override;
164   void addProlog(CfgNode *Node) override;
165   void addEpilog(CfgNode *Node) override;
166 
167   Operand *loOperand(Operand *Operand);
168   Operand *hiOperand(Operand *Operand);
169   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
170                               size_t BasicFrameOffset, size_t *InArgsSizeBytes);
171 
hasCPUFeature(TargetARM32Features::ARM32InstructionSet I)172   bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
173     return CPUFeatures.hasFeature(I);
174   }
175 
176   enum OperandLegalization {
177     Legal_Reg = 1 << 0,  /// physical register, not stack location
178     Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
179                          /// immediates, shifted registers, or modified fp imm.
180     Legal_Mem = 1 << 2,  /// includes [r0, r1 lsl #2] as well as [sp, #12]
181     Legal_Rematerializable = 1 << 3,
182     Legal_Default = ~Legal_Rematerializable,
183   };
184 
185   using LegalMask = uint32_t;
186   Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
187   Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
188                     RegNumT RegNum = RegNumT());
189   Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT());
190 
shAmtImm(uint32_t ShAmtImm)191   OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const {
192     assert(ShAmtImm < 32);
193     return OperandARM32ShAmtImm::create(
194         Func,
195         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F)));
196   }
197 
getCtx()198   GlobalContext *getCtx() const { return Ctx; }
199 
200 protected:
201   explicit TargetARM32(Cfg *Func);
202 
203   void postLower() override;
204 
205   enum SafeBoolChain {
206     SBC_No,
207     SBC_Yes,
208   };
209 
210   void lowerAlloca(const InstAlloca *Instr) override;
211   SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr);
212   void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
213                             Operand *Src0, Operand *Src1);
214   void lowerArithmetic(const InstArithmetic *Instr) override;
215   void lowerAssign(const InstAssign *Instr) override;
216   void lowerBr(const InstBr *Instr) override;
217   void lowerCall(const InstCall *Instr) override;
218   void lowerCast(const InstCast *Instr) override;
219   void lowerExtractElement(const InstExtractElement *Instr) override;
220 
221   /// CondWhenTrue is a helper type returned by every method in the lowering
222   /// that emits code to set the condition codes.
223   class CondWhenTrue {
224   public:
225     explicit CondWhenTrue(CondARM32::Cond T0,
226                           CondARM32::Cond T1 = CondARM32::kNone)
WhenTrue0(T0)227         : WhenTrue0(T0), WhenTrue1(T1) {
228       assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
229       assert(T1 != T0 || T0 == CondARM32::kNone);
230     }
231     CondARM32::Cond WhenTrue0;
232     CondARM32::Cond WhenTrue1;
233 
234     /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
invert()235     CondWhenTrue invert() const {
236       switch (WhenTrue0) {
237       default:
238         if (WhenTrue1 == CondARM32::kNone)
239           return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
240         return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
241                             InstARM32::getOppositeCondition(WhenTrue1));
242       case CondARM32::AL:
243         return CondWhenTrue(CondARM32::kNone);
244       case CondARM32::kNone:
245         return CondWhenTrue(CondARM32::AL);
246       }
247     }
248   };
249 
250   CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
251   void lowerFcmp(const InstFcmp *Instr) override;
252   CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
253                                          Operand *Src0, Operand *Src1);
254   CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
255                                   Operand *Src1);
256   CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
257                                   Operand *Src1);
258   CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0,
259                              Operand *Src1);
260   CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
261   void lowerIcmp(const InstIcmp *Instr) override;
262   /// Emits the basic sequence for lower-linked/store-exclusive loops:
263   ///
264   /// retry:
265   ///        ldrex tmp, [Addr]
266   ///        StoreValue = Operation(tmp)
267   ///        strexCond success, StoreValue, [Addr]
268   ///        cmpCond success, #0
269   ///        bne retry
270   ///
271   /// Operation needs to return which value to strex in Addr, it must not change
272   /// the flags if Cond is not AL, and must not emit any instructions that could
273   /// end up writing to memory. Operation also needs to handle fake-defing for
274   /// i64 handling.
275   void
276   lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr,
277                                 std::function<Variable *(Variable *)> Operation,
278                                 CondARM32::Cond Cond = CondARM32::AL);
279   void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
280                            Operand *Val);
281   void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
282                       Operand *Val);
283   void lowerBreakpoint(const InstBreakpoint *Instr) override;
284   void lowerIntrinsic(const InstIntrinsic *Instr) override;
285   void lowerInsertElement(const InstInsertElement *Instr) override;
286   void lowerLoad(const InstLoad *Instr) override;
287   void lowerPhi(const InstPhi *Instr) override;
288   void lowerRet(const InstRet *Instr) override;
289   void lowerSelect(const InstSelect *Instr) override;
290   void lowerShuffleVector(const InstShuffleVector *Instr) override;
291   void lowerStore(const InstStore *Instr) override;
292   void lowerSwitch(const InstSwitch *Instr) override;
293   void lowerUnreachable(const InstUnreachable *Instr) override;
294   void prelowerPhis() override;
295   uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
296   void genTargetHelperCallFor(Inst *Instr) override;
297   void doAddressOptLoad() override;
298   void doAddressOptStore() override;
299 
300   OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
301 
302   Variable64On32 *makeI64RegPair();
303   Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT());
304   static Type stackSlotType();
305   Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
306   void alignRegisterPow2(Variable *Reg, uint32_t Align,
307                          RegNumT TmpRegNum = RegNumT());
308 
309   /// Returns a vector in a register with the given constant entries.
310   Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());
311 
312   // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
313   // .LSKIP: <continuation>. If no check is needed nothing is inserted.
314   void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
315   using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
316                                          CondARM32::Cond);
317   using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *,
318                                          CondARM32::Cond);
319   void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
320                     ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder);
321 
322   void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
323 
324   // The following are helpers that insert lowered ARM32 instructions with
325   // minimal syntactic overhead, so that the lowering code can look as close to
326   // assembly as practical.
327   void _add(Variable *Dest, Variable *Src0, Operand *Src1,
328             CondARM32::Cond Pred = CondARM32::AL) {
329     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred);
330   }
331   void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
332              CondARM32::Cond Pred = CondARM32::AL) {
333     constexpr bool SetFlags = true;
334     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags);
335     if (SetFlags) {
336       Context.insert<InstFakeUse>(Dest);
337     }
338   }
339   void _adc(Variable *Dest, Variable *Src0, Operand *Src1,
340             CondARM32::Cond Pred = CondARM32::AL) {
341     Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred);
342   }
343   void _and(Variable *Dest, Variable *Src0, Operand *Src1,
344             CondARM32::Cond Pred = CondARM32::AL) {
345     Context.insert<InstARM32And>(Dest, Src0, Src1, Pred);
346   }
347   void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
348             CondARM32::Cond Pred = CondARM32::AL) {
349     Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred);
350   }
351   void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
352             CondARM32::Cond Pred = CondARM32::AL) {
353     Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred);
354   }
_br(CfgNode * TargetTrue,CfgNode * TargetFalse,CondARM32::Cond Condition)355   void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
356            CondARM32::Cond Condition) {
357     Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition);
358   }
_br(CfgNode * Target)359   void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); }
_br(CfgNode * Target,CondARM32::Cond Condition)360   void _br(CfgNode *Target, CondARM32::Cond Condition) {
361     Context.insert<InstARM32Br>(Target, Condition);
362   }
_br(InstARM32Label * Label,CondARM32::Cond Condition)363   void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
364     Context.insert<InstARM32Br>(Label, Condition);
365   }
366   void _cmn(Variable *Src0, Operand *Src1,
367             CondARM32::Cond Pred = CondARM32::AL) {
368     Context.insert<InstARM32Cmn>(Src0, Src1, Pred);
369   }
370   void _cmp(Variable *Src0, Operand *Src1,
371             CondARM32::Cond Pred = CondARM32::AL) {
372     Context.insert<InstARM32Cmp>(Src0, Src1, Pred);
373   }
374   void _clz(Variable *Dest, Variable *Src0,
375             CondARM32::Cond Pred = CondARM32::AL) {
376     Context.insert<InstARM32Clz>(Dest, Src0, Pred);
377   }
_dmb()378   void _dmb() { Context.insert<InstARM32Dmb>(); }
379   void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
380             CondARM32::Cond Pred = CondARM32::AL) {
381     Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred);
382   }
383   /// _ldr, for all your memory to Variable data moves. It handles all types
384   /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
385   /// type (e.g., no immediates for vector loads, and no index registers for fp
386   /// loads.)
387   void _ldr(Variable *Dest, OperandARM32Mem *Addr,
388             CondARM32::Cond Pred = CondARM32::AL) {
389     Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
390   }
391   InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr,
392                          CondARM32::Cond Pred = CondARM32::AL) {
393     auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
394     if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
395       Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
396       Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
397     }
398     return Ldrex;
399   }
400   void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
401             CondARM32::Cond Pred = CondARM32::AL) {
402     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred);
403   }
404   void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
405              CondARM32::Cond Pred = CondARM32::AL) {
406     constexpr bool SetFlags = true;
407     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags);
408     if (SetFlags) {
409       Context.insert<InstFakeUse>(Dest);
410     }
411   }
412   void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
413             CondARM32::Cond Pred = CondARM32::AL) {
414     Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred);
415   }
416   void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
417             CondARM32::Cond Pred = CondARM32::AL) {
418     Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred);
419   }
420   void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
421             CondARM32::Cond Pred = CondARM32::AL) {
422     Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred);
423   }
424   /// _mov, for all your Variable to Variable data movement needs. It handles
425   /// all types (integer, floating point, and vectors), as well as moves between
426   /// Core and VFP registers. This is not a panacea: you must obey the (weird,
427   /// confusing, non-uniform) rules for data moves in ARM.
428   void _mov(Variable *Dest, Operand *Src0,
429             CondARM32::Cond Pred = CondARM32::AL) {
430     // _mov used to be unique in the sense that it would create a temporary
431     // automagically if Dest was nullptr. It won't do that anymore, so we keep
432     // an assert around just in case there is some untested code path where Dest
433     // is nullptr.
434     assert(Dest != nullptr);
435     assert(!llvm::isa<OperandARM32Mem>(Src0));
436     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
437 
438     if (Instr->isMultiDest()) {
439       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
440       // fake-def for Instr.DestHi here.
441       assert(llvm::isa<Variable64On32>(Dest));
442       Context.insert<InstFakeDef>(Instr->getDestHi());
443     }
444   }
445 
446   void _mov_redefined(Variable *Dest, Operand *Src0,
447                       CondARM32::Cond Pred = CondARM32::AL) {
448     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
449     Instr->setDestRedefined();
450     if (Instr->isMultiDest()) {
451       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
452       // fake-def for Instr.DestHi here.
453       assert(llvm::isa<Variable64On32>(Dest));
454       Context.insert<InstFakeDef>(Instr->getDestHi());
455     }
456   }
457 
_nop()458   void _nop() { Context.insert<InstARM32Nop>(); }
459 
460   // Generates a vmov instruction to extract the given index from a vector
461   // register.
462   void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index,
463                        CondARM32::Cond Pred = CondARM32::AL) {
464     Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred);
465   }
466 
467   // Generates a vmov instruction to insert a value into the given index of a
468   // vector register.
469   void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index,
470                       CondARM32::Cond Pred = CondARM32::AL) {
471     Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred);
472   }
473 
474   // --------------------------------------------------------------------------
475   // Begin bool folding machinery.
476   //
477   // There are three types of boolean lowerings handled by this target:
478   //
479   // 1) Boolean expressions leading to a boolean Variable definition
480   // ---------------------------------------------------------------
481   //
482   // Whenever a i1 Variable is live out (i.e., its live range extends beyond
483   // the defining basic block) we do not fold the operation. We instead
484   // materialize (i.e., compute) the variable normally, so that it can be used
485   // when needed. We also materialize i1 values that are not single use to
486   // avoid code duplication. These expressions are not short circuited.
487   //
488   // 2) Boolean expressions leading to a select
489   // ------------------------------------------
490   //
491   // These include boolean chains leading to a select instruction, as well as
492   // i1 Sexts. These boolean expressions are lowered to:
493   //
494   // mov T, <false value>
495   // CC <- eval(Boolean Expression)
496   // movCC T, <true value>
497   //
498   // For Sexts, <false value> is 0, and <true value> is -1.
499   //
500   // 3) Boolean expressions leading to a br i1
501   // -----------------------------------------
502   //
503   // These are the boolean chains leading to a branch. These chains are
504   // short-circuited, i.e.:
505   //
506   //   A = or i1 B, C
507   //   br i1 A, label %T, label %F
508   //
509   // becomes
510   //
511   //   tst B
512   //   jne %T
513   //   tst B
514   //   jne %T
515   //   j %F
516   //
517   // and
518   //
519   //   A = and i1 B, C
520   //   br i1 A, label %T, label %F
521   //
522   // becomes
523   //
524   //   tst B
525   //   jeq %F
526   //   tst B
527   //   jeq %F
528   //   j %T
529   //
530   // Arbitrarily long chains are short circuited, e.g
531   //
532   //   A = or  i1 B, C
533   //   D = and i1 A, E
534   //   F = and i1 G, H
535   //   I = or i1 D, F
536   //   br i1 I, label %True, label %False
537   //
538   // becomes
539   //
540   // Label[A]:
541   //   tst B, 1
542   //   bne Label[D]
543   //   tst C, 1
544   //   beq Label[I]
545   // Label[D]:
546   //   tst E, 1
547   //   bne %True
548   // Label[I]
549   //   tst G, 1
550   //   beq %False
551   //   tst H, 1
552   //   beq %False (bne %True)
553 
554   /// lowerInt1 materializes Boolean to a Variable.
555   SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
556 
557   /// lowerInt1ForSelect generates the following instruction sequence:
558   ///
559   ///   mov T, FalseValue
560   ///   CC <- eval(Boolean)
561   ///   movCC T, TrueValue
562   ///   mov Dest, T
563   ///
564   /// It is used for lowering select i1, as well as i1 Sext.
565   void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
566                           Operand *FalseValue);
567 
568   /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
569   /// an InstARM32Label (but never both) so that, during br i1 lowering, we can
570   /// create auxiliary labels for short circuiting the condition evaluation.
571   class LowerInt1BranchTarget {
572   public:
LowerInt1BranchTarget(CfgNode * const Target)573     explicit LowerInt1BranchTarget(CfgNode *const Target)
574         : NodeTarget(Target) {}
LowerInt1BranchTarget(InstARM32Label * const Target)575     explicit LowerInt1BranchTarget(InstARM32Label *const Target)
576         : LabelTarget(Target) {}
577 
578     /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
579     /// is the exact copy of this if Label is nullptr; otherwise, the returned
580     /// object will wrap Label instead.
581     LowerInt1BranchTarget
createForLabelOrDuplicate(InstARM32Label * Label)582     createForLabelOrDuplicate(InstARM32Label *Label) const {
583       if (Label != nullptr)
584         return LowerInt1BranchTarget(Label);
585       if (NodeTarget)
586         return LowerInt1BranchTarget(NodeTarget);
587       return LowerInt1BranchTarget(LabelTarget);
588     }
589 
590     CfgNode *const NodeTarget = nullptr;
591     InstARM32Label *const LabelTarget = nullptr;
592   };
593 
594   /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
595   /// determining which type arithmetic is allowed to be short circuited. This
596   /// is useful for lowering
597   ///
598   ///   t1 = and i1 A, B
599   ///   t2 = and i1 t1, C
600   ///   br i1 t2, label %False, label %True
601   ///
602   /// to
603   ///
604   ///   tst A, 1
605   ///   beq %False
606   ///   tst B, 1
607   ///   beq %False
608   ///   tst C, 1
609   ///   bne %True
610   ///   b %False
611   ///
612   /// Without this information, short circuiting would only allow to short
613   /// circuit a single high level instruction. For example:
614   ///
615   ///   t1 = or i1 A, B
616   ///   t2 = and i1 t1, C
617   ///   br i1 t2, label %False, label %True
618   ///
619   /// cannot be lowered to
620   ///
621   ///   tst A, 1
622   ///   bne %True
623   ///   tst B, 1
624   ///   bne %True
625   ///   tst C, 1
626   ///   beq %True
627   ///   b %False
628   ///
629   /// It needs to be lowered to
630   ///
631   ///   tst A, 1
632   ///   bne Aux
633   ///   tst B, 1
634   ///   beq %False
635   /// Aux:
636   ///   tst C, 1
637   ///   bne %True
638   ///   b %False
639   ///
640   /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
641   /// might.)
642   enum LowerInt1AllowShortCircuit {
643     SC_And = 1,
644     SC_Or = 2,
645     SC_All = SC_And | SC_Or,
646   };
647 
648   /// ShortCircuitCondAndLabel wraps the condition codes that should be used
649   /// after a lowerInt1ForBranch returns to branch to the
650   /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
651   /// called lowerInt1forBranch created an internal (i.e., short-circuit) label
652   /// used for short circuiting.
653   class ShortCircuitCondAndLabel {
654   public:
655     explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
656                                       InstARM32Label *L = nullptr)
Cond(std::move (C))657         : Cond(std::move(C)), ShortCircuitTarget(L) {}
658     const CondWhenTrue Cond;
659     InstARM32Label *const ShortCircuitTarget;
660 
assertNoLabelAndReturnCond()661     CondWhenTrue assertNoLabelAndReturnCond() const {
662       assert(ShortCircuitTarget == nullptr);
663       return Cond;
664     }
665   };
666 
667   /// lowerInt1ForBranch expands Boolean, and returns the condition codes that
668   /// are to be used for branching to the branch's TrueTarget. It may return a
669   /// label that the expansion of Boolean used to short circuit the chain's
670   /// evaluation.
671   ShortCircuitCondAndLabel
672   lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
673                      const LowerInt1BranchTarget &TargetFalse,
674                      uint32_t ShortCircuitable);
675 
676   // _br is a convenience wrapper that emits br instructions to Target.
677   void _br(const LowerInt1BranchTarget &BrTarget,
678            CondARM32::Cond Cond = CondARM32::AL) {
679     assert((BrTarget.NodeTarget == nullptr) !=
680            (BrTarget.LabelTarget == nullptr));
681     if (BrTarget.NodeTarget != nullptr)
682       _br(BrTarget.NodeTarget, Cond);
683     else
684       _br(BrTarget.LabelTarget, Cond);
685   }
686 
687   // _br_short_circuit is used when lowering InstArithmetic::And and
688   // InstArithmetic::Or and a short circuit branch is needed.
_br_short_circuit(const LowerInt1BranchTarget & Target,const CondWhenTrue & Cond)689   void _br_short_circuit(const LowerInt1BranchTarget &Target,
690                          const CondWhenTrue &Cond) {
691     if (Cond.WhenTrue1 != CondARM32::kNone) {
692       _br(Target, Cond.WhenTrue1);
693     }
694     if (Cond.WhenTrue0 != CondARM32::kNone) {
695       _br(Target, Cond.WhenTrue0);
696     }
697   }
698   // End of bool folding machinery
699   // --------------------------------------------------------------------------
700 
701   /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
702   /// an upper16 relocation).
703   void _movt(Variable *Dest, Operand *Src0,
704              CondARM32::Cond Pred = CondARM32::AL) {
705     Context.insert<InstARM32Movt>(Dest, Src0, Pred);
706   }
707   void _movw(Variable *Dest, Operand *Src0,
708              CondARM32::Cond Pred = CondARM32::AL) {
709     Context.insert<InstARM32Movw>(Dest, Src0, Pred);
710   }
711   void _mul(Variable *Dest, Variable *Src0, Variable *Src1,
712             CondARM32::Cond Pred = CondARM32::AL) {
713     Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred);
714   }
715   void _mvn(Variable *Dest, Operand *Src0,
716             CondARM32::Cond Pred = CondARM32::AL) {
717     Context.insert<InstARM32Mvn>(Dest, Src0, Pred);
718   }
719   void _orr(Variable *Dest, Variable *Src0, Operand *Src1,
720             CondARM32::Cond Pred = CondARM32::AL) {
721     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred);
722   }
723   void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
724              CondARM32::Cond Pred = CondARM32::AL) {
725     constexpr bool SetFlags = true;
726     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags);
727     if (SetFlags) {
728       Context.insert<InstFakeUse>(Dest);
729     }
730   }
_push(const VarList & Sources)731   void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); }
_pop(const VarList & Dests)732   void _pop(const VarList &Dests) {
733     Context.insert<InstARM32Pop>(Dests);
734     // Mark dests as modified.
735     for (Variable *Dest : Dests)
736       Context.insert<InstFakeDef>(Dest);
737   }
738   void _rbit(Variable *Dest, Variable *Src0,
739              CondARM32::Cond Pred = CondARM32::AL) {
740     Context.insert<InstARM32Rbit>(Dest, Src0, Pred);
741   }
742   void _rev(Variable *Dest, Variable *Src0,
743             CondARM32::Cond Pred = CondARM32::AL) {
744     Context.insert<InstARM32Rev>(Dest, Src0, Pred);
745   }
746   void _ret(Variable *LR, Variable *Src0 = nullptr) {
747     Context.insert<InstARM32Ret>(LR, Src0);
748   }
749   void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
750              CondARM32::Cond Pred = CondARM32::AL) {
751     constexpr bool SetFlags = true;
752     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags);
753     if (SetFlags) {
754       Context.insert<InstFakeUse>(Dest);
755     }
756   }
757   void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
758             CondARM32::Cond Pred = CondARM32::AL) {
759     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred);
760   }
761   void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
762              CondARM32::Cond Pred = CondARM32::AL) {
763     constexpr bool SetFlags = true;
764     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags);
765     if (SetFlags) {
766       Context.insert<InstFakeUse>(Dest);
767     }
768   }
769   void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
770             CondARM32::Cond Pred = CondARM32::AL) {
771     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred);
772   }
773   void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
774             CondARM32::Cond Pred = CondARM32::AL) {
775     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred);
776   }
777   void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
778              CondARM32::Cond Pred = CondARM32::AL) {
779     constexpr bool SetFlags = true;
780     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags);
781     if (SetFlags) {
782       Context.insert<InstFakeUse>(Dest);
783     }
784   }
785   void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
786              CondARM32::Cond Pred = CondARM32::AL) {
787     Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred);
788   }
789   /// _str, for all your Variable to memory transfers. Addr has the same
790   /// restrictions that it does in _ldr.
791   void _str(Variable *Value, OperandARM32Mem *Addr,
792             CondARM32::Cond Pred = CondARM32::AL) {
793     Context.insert<InstARM32Str>(Value, Addr, Pred);
794   }
795   InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr,
796                          CondARM32::Cond Pred = CondARM32::AL) {
797     if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
798       Context.insert<InstFakeUse>(Value64->getLo());
799       Context.insert<InstFakeUse>(Value64->getHi());
800     }
801     return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred);
802   }
803   void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
804             CondARM32::Cond Pred = CondARM32::AL) {
805     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred);
806   }
807   void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
808              CondARM32::Cond Pred = CondARM32::AL) {
809     constexpr bool SetFlags = true;
810     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags);
811     if (SetFlags) {
812       Context.insert<InstFakeUse>(Dest);
813     }
814   }
815   void _sxt(Variable *Dest, Variable *Src0,
816             CondARM32::Cond Pred = CondARM32::AL) {
817     Context.insert<InstARM32Sxt>(Dest, Src0, Pred);
818   }
819   void _tst(Variable *Src0, Operand *Src1,
820             CondARM32::Cond Pred = CondARM32::AL) {
821     Context.insert<InstARM32Tst>(Src0, Src1, Pred);
822   }
_trap()823   void _trap() { Context.insert<InstARM32Trap>(); }
824   void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
825              CondARM32::Cond Pred = CondARM32::AL) {
826     Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred);
827   }
828   void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
829               Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
830     // umull requires DestLo and DestHi to be assigned to different GPRs. The
831     // following lines create overlapping liveness ranges for both variables. If
832     // either one of them is live, then they are both going to be live, and thus
833     // assigned to different registers; if they are both dead, then DCE will
834     // kick in and delete the following three instructions.
835     Context.insert<InstFakeDef>(DestHi);
836     Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred);
837     Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined();
838     Context.insert<InstFakeUse>(DestHi);
839   }
840   void _uxt(Variable *Dest, Variable *Src0,
841             CondARM32::Cond Pred = CondARM32::AL) {
842     Context.insert<InstARM32Uxt>(Dest, Src0, Pred);
843   }
844   void _vabs(Variable *Dest, Variable *Src,
845              CondARM32::Cond Pred = CondARM32::AL) {
846     Context.insert<InstARM32Vabs>(Dest, Src, Pred);
847   }
_vadd(Variable * Dest,Variable * Src0,Variable * Src1)848   void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
849     Context.insert<InstARM32Vadd>(Dest, Src0, Src1);
850   }
_vand(Variable * Dest,Variable * Src0,Variable * Src1)851   void _vand(Variable *Dest, Variable *Src0, Variable *Src1) {
852     Context.insert<InstARM32Vand>(Dest, Src0, Src1);
853   }
_vbsl(Variable * Dest,Variable * Src0,Variable * Src1)854   InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) {
855     return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1);
856   }
_vceq(Variable * Dest,Variable * Src0,Variable * Src1)857   void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) {
858     Context.insert<InstARM32Vceq>(Dest, Src0, Src1);
859   }
_vcge(Variable * Dest,Variable * Src0,Variable * Src1)860   InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) {
861     return Context.insert<InstARM32Vcge>(Dest, Src0, Src1);
862   }
_vcgt(Variable * Dest,Variable * Src0,Variable * Src1)863   InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) {
864     return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1);
865   }
866   void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
867              CondARM32::Cond Pred = CondARM32::AL) {
868     Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
869   }
_vdiv(Variable * Dest,Variable * Src0,Variable * Src1)870   void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
871     Context.insert<InstARM32Vdiv>(Dest, Src0, Src1);
872   }
873   void _vcmp(Variable *Src0, Variable *Src1,
874              CondARM32::Cond Pred = CondARM32::AL) {
875     Context.insert<InstARM32Vcmp>(Src0, Src1, Pred);
876   }
877   void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
878              CondARM32::Cond Pred = CondARM32::AL) {
879     Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred);
880   }
_vdup(Variable * Dest,Variable * Src,int Idx)881   void _vdup(Variable *Dest, Variable *Src, int Idx) {
882     Context.insert<InstARM32Vdup>(Dest, Src, Idx);
883   }
_veor(Variable * Dest,Variable * Src0,Variable * Src1)884   void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
885     Context.insert<InstARM32Veor>(Dest, Src0, Src1);
886   }
887   void _vldr1d(Variable *Dest, OperandARM32Mem *Addr,
888                CondARM32::Cond Pred = CondARM32::AL) {
889     Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred);
890   }
891   void _vldr1q(Variable *Dest, OperandARM32Mem *Addr,
892                CondARM32::Cond Pred = CondARM32::AL) {
893     Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred);
894   }
895   void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
896     Context.insert<InstARM32Vmrs>(Pred);
897   }
_vmla(Variable * Dest,Variable * Src0,Variable * Src1)898   void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
899     Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
900   }
_vmlap(Variable * Dest,Variable * Src0,Variable * Src1)901   void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) {
902     Context.insert<InstARM32Vmlap>(Dest, Src0, Src1);
903   }
_vmls(Variable * Dest,Variable * Src0,Variable * Src1)904   void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
905     Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
906   }
_vmovl(Variable * Dest,Variable * Src0,Variable * Src1)907   void _vmovl(Variable *Dest, Variable *Src0, Variable *Src1) {
908     Context.insert<InstARM32Vmovl>(Dest, Src0, Src1);
909   }
_vmovh(Variable * Dest,Variable * Src0,Variable * Src1)910   void _vmovh(Variable *Dest, Variable *Src0, Variable *Src1) {
911     Context.insert<InstARM32Vmovh>(Dest, Src0, Src1);
912   }
_vmovhl(Variable * Dest,Variable * Src0,Variable * Src1)913   void _vmovhl(Variable *Dest, Variable *Src0, Variable *Src1) {
914     Context.insert<InstARM32Vmovhl>(Dest, Src0, Src1);
915   }
_vmovlh(Variable * Dest,Variable * Src0,Variable * Src1)916   void _vmovlh(Variable *Dest, Variable *Src0, Variable *Src1) {
917     Context.insert<InstARM32Vmovlh>(Dest, Src0, Src1);
918   }
_vmul(Variable * Dest,Variable * Src0,Variable * Src1)919   void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
920     Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
921   }
_vmulh(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)922   void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
923     Context.insert<InstARM32Vmulh>(Dest, Src0, Src1)
924         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
925   }
_vmvn(Variable * Dest,Variable * Src0)926   void _vmvn(Variable *Dest, Variable *Src0) {
927     Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL);
928   }
_vneg(Variable * Dest,Variable * Src0)929   void _vneg(Variable *Dest, Variable *Src0) {
930     Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
931         ->setSignType(InstARM32::FS_Signed);
932   }
_vorr(Variable * Dest,Variable * Src0,Variable * Src1)933   void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
934     Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
935   }
_vqadd(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)936   void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
937     Context.insert<InstARM32Vqadd>(Dest, Src0, Src1)
938         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
939   }
_vqmovn2(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned,bool Saturating)940   void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned,
941                 bool Saturating) {
942     Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1)
943         ->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned
944                                              : InstARM32::FS_Signed)
945                                  : InstARM32::FS_None);
946   }
_vqsub(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)947   void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
948     Context.insert<InstARM32Vqsub>(Dest, Src0, Src1)
949         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
950   }
_vshl(Variable * Dest,Variable * Src0,Variable * Src1)951   InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
952     return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
953   }
_vshl(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)954   void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
955     Context.insert<InstARM32Vshl>(Dest, Src0, Src1)
956         ->setSignType(InstARM32::FS_Unsigned);
957   }
_vshr(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)958   InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0,
959                        ConstantInteger32 *Src1) {
960     return Context.insert<InstARM32Vshr>(Dest, Src0, Src1);
961   }
962   void _vsqrt(Variable *Dest, Variable *Src,
963               CondARM32::Cond Pred = CondARM32::AL) {
964     Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
965   }
966   void _vstr1d(Variable *Value, OperandARM32Mem *Addr,
967                CondARM32::Cond Pred = CondARM32::AL) {
968     Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32);
969   }
970   void _vstr1q(Variable *Value, OperandARM32Mem *Addr,
971                CondARM32::Cond Pred = CondARM32::AL) {
972     Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64);
973   }
_vsub(Variable * Dest,Variable * Src0,Variable * Src1)974   void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
975     Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
976   }
_vzip(Variable * Dest,Variable * Src0,Variable * Src1)977   void _vzip(Variable *Dest, Variable *Src0, Variable *Src1) {
978     Context.insert<InstARM32Vzip>(Dest, Src0, Src1);
979   }
980 
981   // Iterates over the CFG and determines the maximum outgoing stack arguments
982   // bytes. This information is later used during addProlog() to pre-allocate
983   // the outargs area.
984   // TODO(jpp): This could live in the Parser, if we provided a Target-specific
985   // method that the Parser could call.
986   void findMaxStackOutArgsSize();
987 
988   /// Returns true if the given Offset can be represented in a Load/Store Mem
989   /// Operand.
990   bool isLegalMemOffset(Type Ty, int32_t Offset) const;
991 
992   void postLowerLegalization();
993 
994   /// Manages the GotPtr variable, which is used for Nonsfi sandboxing.
995   /// @{
996   void createGotPtr();
997   void insertGotPtrInitPlaceholder();
998   VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc);
999   void materializeGotAddr(CfgNode *Node);
1000   Variable *GotPtr = nullptr;
1001   // TODO(jpp): use CfgLocalAllocator.
1002   /// @}
1003 
1004   /// Manages the Gotoff relocations created during the function lowering. A
1005   /// single Gotoff relocation is created for each global variable used by the
1006   /// function being lowered.
1007   /// @{
1008   // TODO(jpp): if the same global G is used in different functions, then this
1009   // method will emit one G(gotoff) relocation per function.
1010   GlobalString createGotoffRelocation(const ConstantRelocatable *CR);
1011   CfgUnorderedSet<GlobalString> KnownGotoffs;
1012   /// @}
1013 
1014   /// Loads the constant relocatable Name to Register. Then invoke Finish to
1015   /// finish the relocatable lowering. Finish **must** use PC in its first
1016   /// emitted instruction, or the relocatable in Register will contain the wrong
1017   /// value.
1018   //
1019   // Lowered sequence:
1020   //
1021   // Movw:
1022   //     movw Register, #:lower16:Name - (End - Movw) - 8 .
1023   // Movt:
1024   //     movt Register, #:upper16:Name - (End - Movt) - 8 .
1025   //     PC = fake-def
1026   // End:
1027   //     Finish(PC)
1028   //
1029   // The -8 in movw/movt above is to account for the PC value that the first
1030   // instruction emitted by Finish(PC) will read.
1031   void
1032   loadNamedConstantRelocatablePIC(GlobalString Name, Variable *Register,
1033                                   std::function<void(Variable *PC)> Finish);
1034 
1035   /// Sandboxer defines methods for ensuring that "dangerous" operations are
1036   /// masked during sandboxed code emission. For regular, non-sandboxed code
1037   /// emission, its methods are simple pass-through methods.
1038   ///
1039   /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions
1040   /// in the constructor/destructor during sandboxed code emission. Therefore,
1041   /// it is a bad idea to create an object of this type and "keep it around."
1042   /// The recommended usage is:
1043   ///
1044   /// AutoSandboxing(this).<<operation>>(...);
1045   ///
1046   /// This usage ensures that no other instructions are inadvertently added to
1047   /// the bundle.
1048   class Sandboxer {
1049     Sandboxer() = delete;
1050     Sandboxer(const Sandboxer &) = delete;
1051     Sandboxer &operator=(const Sandboxer &) = delete;
1052 
1053   public:
1054     explicit Sandboxer(
1055         TargetARM32 *Target,
1056         InstBundleLock::Option BundleOption = InstBundleLock::Opt_None);
1057     ~Sandboxer();
1058 
1059     /// Increments sp:
1060     ///
1061     ///   add sp, sp, AddAmount
1062     ///   bic sp, sp, 0xc0000000
1063     ///
1064     /// (for the rationale, see the ARM 32-bit Sandbox Specification.)
1065     void add_sp(Operand *AddAmount);
1066 
1067     /// Emits code to align sp to the specified alignment:
1068     ///
1069     ///   bic/and sp, sp, Alignment
1070     ///   bic, sp, sp, 0xc0000000
1071     void align_sp(size_t Alignment);
1072 
1073     /// Emits a call instruction. If CallTarget is a Variable, it emits
1074     ///
1075     ///   bic CallTarget, CallTarget, 0xc000000f
1076     ///   bl CallTarget
1077     ///
1078     /// Otherwise, it emits
1079     ///
1080     ///   bl CallTarget
1081     ///
1082     /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16.
1083     InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget);
1084 
1085     /// Emits a load:
1086     ///
1087     ///   bic rBase, rBase, 0xc0000000
1088     ///   ldr rDest, [rBase, #Offset]
1089     ///
1090     /// Exception: if rBase is r9 or sp, then the load is emitted as:
1091     ///
1092     ///   ldr rDest, [rBase, #Offset]
1093     ///
1094     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1095     /// always valid.
1096     void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
1097 
1098     /// Emits a load exclusive:
1099     ///
1100     ///   bic rBase, rBase, 0xc0000000
1101     ///   ldrex rDest, [rBase]
1102     ///
1103     /// Exception: if rBase is r9 or sp, then the load is emitted as:
1104     ///
1105     ///   ldrex rDest, [rBase]
1106     ///
1107     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1108     /// always valid.
1109     void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
1110 
1111     /// Resets sp to Src:
1112     ///
1113     ///   mov sp, Src
1114     ///   bic sp, sp, 0xc0000000
1115     void reset_sp(Variable *Src);
1116 
1117     /// Emits code to return from a function:
1118     ///
1119     ///   bic lr, lr, 0xc000000f
1120     ///   bx lr
1121     void ret(Variable *RetAddr, Variable *RetValue);
1122 
1123     /// Emits a store:
1124     ///
1125     ///   bic rBase, rBase, 0xc0000000
1126     ///   str rSrc, [rBase, #Offset]
1127     ///
1128     /// Exception: if rBase is r9 or sp, then the store is emitted as:
1129     ///
1130     ///   str rDest, [rBase, #Offset]
1131     ///
1132     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1133     /// always valid.
1134     void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred);
1135 
1136     /// Emits a store exclusive:
1137     ///
1138     ///   bic rBase, rBase, 0xc0000000
1139     ///   strex rDest, rSrc, [rBase]
1140     ///
1141     /// Exception: if rBase is r9 or sp, then the store is emitted as:
1142     ///
1143     ///   strex rDest, rSrc, [rBase]
1144     ///
1145     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1146     /// always valid.
1147     void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem,
1148                CondARM32::Cond Pred);
1149 
1150     /// Decrements sp:
1151     ///
1152     ///   sub sp, sp, SubAmount
1153     ///   bic sp, sp, 0xc0000000
1154     void sub_sp(Operand *SubAmount);
1155 
1156   private:
1157     TargetARM32 *const Target;
1158     const InstBundleLock::Option BundleOption;
1159     std::unique_ptr<AutoBundle> Bundler;
1160 
1161     void createAutoBundle();
1162   };
1163 
1164   class PostLoweringLegalizer {
1165     PostLoweringLegalizer() = delete;
1166     PostLoweringLegalizer(const PostLoweringLegalizer &) = delete;
1167     PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete;
1168 
1169   public:
PostLoweringLegalizer(TargetARM32 * Target)1170     explicit PostLoweringLegalizer(TargetARM32 *Target)
1171         : Target(Target), StackOrFrameReg(Target->getPhysicalRegister(
1172                               Target->getFrameOrStackReg())) {}
1173 
1174     void resetTempBaseIfClobberedBy(const Inst *Instr);
1175 
1176     // Ensures that the TempBase register held by the this legalizer (if any) is
1177     // assigned to IP.
assertNoTempOrAssignedToIP()1178     void assertNoTempOrAssignedToIP() const {
1179       assert(TempBaseReg == nullptr ||
1180              TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1181     }
1182 
1183     // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is
1184     // fixed up.
1185     OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem,
1186                                         bool AllowOffsets = true);
1187 
1188     /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or
1189     /// if its Source is a Rematerializable variable (this form is used in lieu
1190     /// of lea, which is not available in ARM.)
1191     ///
1192     /// Moves to memory become store instructions, and moves from memory, loads.
1193     void legalizeMov(InstARM32Mov *Mov);
1194 
1195   private:
1196     /// Creates a new Base register centered around [Base, +/- Offset].
1197     Variable *newBaseRegister(Variable *Base, int32_t Offset,
1198                               RegNumT ScratchRegNum);
1199 
1200     /// Creates a new, legal OperandARM32Mem for accessing Base + Offset.
1201     /// The returned mem operand is a legal operand for accessing memory that is
1202     /// of type Ty.
1203     ///
1204     /// If [Base, #Offset] is encodable, then the method returns a Mem operand
1205     /// expressing it. Otherwise,
1206     ///
1207     /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the
1208     /// method will return that. Otherwise,
1209     ///
1210     /// a new base register ip=Base+Offset is created, and the method returns a
1211     /// memory operand expressing [ip, #0].
1212     OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset,
1213                                       bool AllowOffsets = true);
1214     TargetARM32 *const Target;
1215     Variable *const StackOrFrameReg;
1216     Variable *TempBaseReg = nullptr;
1217     int32_t TempBaseOffset = 0;
1218   };
1219 
1220   const bool NeedSandboxing;
1221   TargetARM32Features CPUFeatures;
1222   bool UsesFramePointer = false;
1223   bool NeedsStackAlignment = false;
1224   bool MaybeLeafFunc = true;
1225   size_t SpillAreaSizeBytes = 0;
1226   size_t FixedAllocaSizeBytes = 0;
1227   size_t FixedAllocaAlignBytes = 0;
1228   bool PrologEmitsFixedAllocas = false;
1229   uint32_t MaxOutArgsSizeBytes = 0;
1230   // TODO(jpp): std::array instead of array.
1231   static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
1232   static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
1233   static SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
1234   SmallBitVector RegsUsed;
1235   VarList PhysicalRegisters[IceType_NUM];
1236   VarList PreservedGPRs;
1237   VarList PreservedSRegs;
1238 
1239   /// Helper class that understands the Calling Convention and register
1240   /// assignments. The first few integer type parameters can use r0-r3,
1241   /// regardless of their position relative to the floating-point/vector
1242   /// arguments in the argument list. Floating-point and vector arguments
1243   /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
1244   /// see the ARM Architecture Procedure Calling Standards (AAPCS).
1245   ///
1246   /// Technically, arguments that can start with registers but extend beyond the
1247   /// available registers can be split between the registers and the stack.
1248   /// However, this is typically  for passing GPR structs by value, and PNaCl
1249   /// transforms expand this out.
1250   ///
1251   /// At (public) function entry, the stack must be 8-byte aligned.
1252   class CallingConv {
1253     CallingConv(const CallingConv &) = delete;
1254     CallingConv &operator=(const CallingConv &) = delete;
1255 
1256   public:
1257     CallingConv();
1258     ~CallingConv() = default;
1259 
1260     /// argInGPR returns true if there is a GPR available for the requested
1261     /// type, and false otherwise. If it returns true, Reg is set to the
1262     /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
1263     /// be an I64 register pair.
1264     bool argInGPR(Type Ty, RegNumT *Reg);
1265 
1266     /// argInVFP is to floating-point/vector types what argInGPR is for integer
1267     /// types.
1268     bool argInVFP(Type Ty, RegNumT *Reg);
1269 
1270   private:
1271     void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);
1272     SmallBitVector GPRegsUsed;
1273     CfgVector<RegNumT> GPRArgs;
1274     CfgVector<RegNumT> I64Args;
1275 
1276     void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);
1277     SmallBitVector VFPRegsUsed;
1278     CfgVector<RegNumT> FP32Args;
1279     CfgVector<RegNumT> FP64Args;
1280     CfgVector<RegNumT> Vec128Args;
1281   };
1282 
1283 private:
1284   ENABLE_MAKE_UNIQUE;
1285 
1286   OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
1287                                       Operand *Base);
1288 
1289   void postambleCtpop64(const InstCall *Instr);
1290   void preambleDivRem(const InstCall *Instr);
1291   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
1292       ARM32HelpersPreamble;
1293   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
1294       ARM32HelpersPostamble;
1295 
1296   class ComputationTracker {
1297   public:
1298     ComputationTracker() = default;
1299     ~ComputationTracker() = default;
1300 
forgetProducers()1301     void forgetProducers() { KnownComputations.clear(); }
1302     void recordProducers(CfgNode *Node);
1303 
getProducerOf(const Operand * Opnd)1304     const Inst *getProducerOf(const Operand *Opnd) const {
1305       auto *Var = llvm::dyn_cast<Variable>(Opnd);
1306       if (Var == nullptr) {
1307         return nullptr;
1308       }
1309 
1310       auto Iter = KnownComputations.find(Var->getIndex());
1311       if (Iter == KnownComputations.end()) {
1312         return nullptr;
1313       }
1314 
1315       return Iter->second.Instr;
1316     }
1317 
dump(const Cfg * Func)1318     void dump(const Cfg *Func) const {
1319       if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
1320         return;
1321       OstreamLocker L(Func->getContext());
1322       Ostream &Str = Func->getContext()->getStrDump();
1323       Str << "foldable producer:\n";
1324       for (const auto &Computation : KnownComputations) {
1325         Str << "    ";
1326         Computation.second.Instr->dump(Func);
1327         Str << "\n";
1328       }
1329       Str << "\n";
1330     }
1331 
1332   private:
1333     class ComputationEntry {
1334     public:
ComputationEntry(Inst * I,Type Ty)1335       ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
1336       Inst *const Instr;
1337       // Boolean folding is disabled for variables whose live range is multi
1338       // block. We conservatively initialize IsLiveOut to true, and set it to
1339       // false once we find the end of the live range for the variable defined
1340       // by this instruction. If liveness analysis is not performed (e.g., in
1341       // Om1 mode) IsLiveOut will never be set to false, and folding will be
1342       // disabled.
1343       bool IsLiveOut = true;
1344       int32_t NumUses = 0;
1345       Type ComputationType;
1346     };
1347 
1348     // ComputationMap maps a Variable number to a payload identifying which
1349     // instruction defined it.
1350     using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>;
1351     ComputationMap KnownComputations;
1352   };
1353 
1354   ComputationTracker Computations;
1355 
1356   // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
1357   // without specifying a physical register. This is needed for creating unbound
1358   // temporaries during Ice -> ARM lowering, but before register allocation.
1359   // This a safe-guard that no unbound temporaries are created during the
1360   // legalization post-passes.
1361   bool AllowTemporaryWithNoReg = true;
1362   // ForbidTemporaryWithoutReg is a RAII class that manages
1363   // AllowTemporaryWithNoReg.
1364   class ForbidTemporaryWithoutReg {
1365     ForbidTemporaryWithoutReg() = delete;
1366     ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete;
1367     ForbidTemporaryWithoutReg &
1368     operator=(const ForbidTemporaryWithoutReg &) = delete;
1369 
1370   public:
ForbidTemporaryWithoutReg(TargetARM32 * Target)1371     explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
1372       Target->AllowTemporaryWithNoReg = false;
1373     }
~ForbidTemporaryWithoutReg()1374     ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
1375 
1376   private:
1377     TargetARM32 *const Target;
1378   };
1379 };
1380 
1381 class TargetDataARM32 final : public TargetDataLowering {
1382   TargetDataARM32() = delete;
1383   TargetDataARM32(const TargetDataARM32 &) = delete;
1384   TargetDataARM32 &operator=(const TargetDataARM32 &) = delete;
1385 
1386 public:
create(GlobalContext * Ctx)1387   static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
1388     return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx));
1389   }
1390 
1391   void lowerGlobals(const VariableDeclarationList &Vars,
1392                     const std::string &SectionSuffix) override;
1393   void lowerConstants() override;
1394   void lowerJumpTables() override;
1395 
1396 protected:
1397   explicit TargetDataARM32(GlobalContext *Ctx);
1398 
1399 private:
1400   ~TargetDataARM32() override = default;
1401 };
1402 
1403 class TargetHeaderARM32 final : public TargetHeaderLowering {
1404   TargetHeaderARM32() = delete;
1405   TargetHeaderARM32(const TargetHeaderARM32 &) = delete;
1406   TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete;
1407 
1408 public:
create(GlobalContext * Ctx)1409   static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
1410     return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx));
1411   }
1412 
1413   void lower() override;
1414 
1415 protected:
1416   explicit TargetHeaderARM32(GlobalContext *Ctx);
1417 
1418 private:
1419   ~TargetHeaderARM32() = default;
1420 
1421   TargetARM32Features CPUFeatures;
1422 };
1423 
1424 } // end of namespace ARM32
1425 } // end of namespace Ice
1426 
1427 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H
1428