1 //===- subzero/src/IceTargetLowering.h - Lowering interface -----*- C++ -*-===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares the TargetLowering, LoweringContext, and TargetDataLowering
12 /// classes.
13 ///
14 /// TargetLowering is an abstract class used to drive the translation/lowering
15 /// process. LoweringContext maintains a context for lowering each instruction,
16 /// offering conveniences such as iterating over non-deleted instructions.
17 /// TargetDataLowering is an abstract class used to drive the lowering/emission
18 /// of global initializers, external global declarations, and internal constant
19 /// pools.
20 ///
21 //===----------------------------------------------------------------------===//
22 
23 #ifndef SUBZERO_SRC_ICETARGETLOWERING_H
24 #define SUBZERO_SRC_ICETARGETLOWERING_H
25 
26 #include "IceBitVector.h"
27 #include "IceCfgNode.h"
28 #include "IceDefs.h"
29 #include "IceInst.h" // for the names of the Inst subtypes
30 #include "IceOperand.h"
31 #include "IceRegAlloc.h"
32 #include "IceTypes.h"
33 
34 #include <utility>
35 
36 namespace Ice {
37 
38 // UnimplementedError is defined as a macro so that we can get actual line
39 // numbers.
40 #define UnimplementedError(Flags)                                              \
41   do {                                                                         \
42     if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) {         \
43       /* Use llvm_unreachable instead of report_fatal_error, which gives       \
44          better stack traces. */                                               \
45       llvm_unreachable("Not yet implemented");                                 \
46       abort();                                                                 \
47     }                                                                          \
48   } while (0)
49 
50 // UnimplementedLoweringError is similar in style to UnimplementedError.  Given
51 // a TargetLowering object pointer and an Inst pointer, it adds appropriate
52 // FakeDef and FakeUse instructions to try maintain liveness consistency.
53 #define UnimplementedLoweringError(Target, Instr)                              \
54   do {                                                                         \
55     if (getFlags().getSkipUnimplemented()) {                                   \
56       (Target)->addFakeDefUses(Instr);                                         \
57     } else {                                                                   \
58       /* Use llvm_unreachable instead of report_fatal_error, which gives       \
59          better stack traces. */                                               \
60       llvm_unreachable(                                                        \
61           (std::string("Not yet implemented: ") + Instr->getInstName())        \
62               .c_str());                                                       \
63       abort();                                                                 \
64     }                                                                          \
65   } while (0)
66 
67 /// LoweringContext makes it easy to iterate through non-deleted instructions in
68 /// a node, and insert new (lowered) instructions at the current point. Along
69 /// with the instruction list container and associated iterators, it holds the
70 /// current node, which is needed when inserting new instructions in order to
71 /// track whether variables are used as single-block or multi-block.
72 class LoweringContext {
73   LoweringContext(const LoweringContext &) = delete;
74   LoweringContext &operator=(const LoweringContext &) = delete;
75 
76 public:
77   LoweringContext() = default;
78   ~LoweringContext() = default;
79   void init(CfgNode *Node);
getNextInst()80   Inst *getNextInst() const {
81     if (Next == End)
82       return nullptr;
83     return iteratorToInst(Next);
84   }
getNextInst(InstList::iterator & Iter)85   Inst *getNextInst(InstList::iterator &Iter) const {
86     advanceForward(Iter);
87     if (Iter == End)
88       return nullptr;
89     return iteratorToInst(Iter);
90   }
getNode()91   CfgNode *getNode() const { return Node; }
atEnd()92   bool atEnd() const { return Cur == End; }
getCur()93   InstList::iterator getCur() const { return Cur; }
getNext()94   InstList::iterator getNext() const { return Next; }
getEnd()95   InstList::iterator getEnd() const { return End; }
96   void insert(Inst *Instr);
insert(Args &&...A)97   template <typename Inst, typename... Args> Inst *insert(Args &&... A) {
98     auto *New = Inst::create(Node->getCfg(), std::forward<Args>(A)...);
99     insert(New);
100     return New;
101   }
102   Inst *getLastInserted() const;
advanceCur()103   void advanceCur() { Cur = Next; }
advanceNext()104   void advanceNext() { advanceForward(Next); }
setCur(InstList::iterator C)105   void setCur(InstList::iterator C) { Cur = C; }
setNext(InstList::iterator N)106   void setNext(InstList::iterator N) { Next = N; }
107   void rewind();
setInsertPoint(const InstList::iterator & Position)108   void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
109   void availabilityReset();
110   void availabilityUpdate();
111   Variable *availabilityGet(Operand *Src) const;
112 
113 private:
114   /// Node is the argument to Inst::updateVars().
115   CfgNode *Node = nullptr;
116   Inst *LastInserted = nullptr;
117   /// Cur points to the current instruction being considered. It is guaranteed
118   /// to point to a non-deleted instruction, or to be End.
119   InstList::iterator Cur;
120   /// Next doubles as a pointer to the next valid instruction (if any), and the
121   /// new-instruction insertion point. It is also updated for the caller in case
122   /// the lowering consumes more than one high-level instruction. It is
123   /// guaranteed to point to a non-deleted instruction after Cur, or to be End.
124   // TODO: Consider separating the notion of "next valid instruction" and "new
125   // instruction insertion point", to avoid confusion when previously-deleted
126   // instructions come between the two points.
127   InstList::iterator Next;
128   /// Begin is a copy of Insts.begin(), used if iterators are moved backward.
129   InstList::iterator Begin;
130   /// End is a copy of Insts.end(), used if Next needs to be advanced.
131   InstList::iterator End;
132   /// LastDest and LastSrc capture the parameters of the last "Dest=Src" simple
133   /// assignment inserted (provided Src is a variable).  This is used for simple
134   /// availability analysis.
135   Variable *LastDest = nullptr;
136   Variable *LastSrc = nullptr;
137 
138   void skipDeleted(InstList::iterator &I) const;
139   void advanceForward(InstList::iterator &I) const;
140 };
141 
142 /// A helper class to advance the LoweringContext at each loop iteration.
143 class PostIncrLoweringContext {
144   PostIncrLoweringContext() = delete;
145   PostIncrLoweringContext(const PostIncrLoweringContext &) = delete;
146   PostIncrLoweringContext &operator=(const PostIncrLoweringContext &) = delete;
147 
148 public:
PostIncrLoweringContext(LoweringContext & Context)149   explicit PostIncrLoweringContext(LoweringContext &Context)
150       : Context(Context) {}
~PostIncrLoweringContext()151   ~PostIncrLoweringContext() {
152     Context.advanceCur();
153     Context.advanceNext();
154   }
155 
156 private:
157   LoweringContext &Context;
158 };
159 
160 /// TargetLowering is the base class for all backends in Subzero. In addition to
161 /// implementing the abstract methods in this class, each concrete target must
162 /// also implement a named constructor in its own namespace. For instance, for
163 /// X8632 we have:
164 ///
165 ///  namespace X8632 {
166 ///    void createTargetLowering(Cfg *Func);
167 ///  }
168 class TargetLowering {
169   TargetLowering() = delete;
170   TargetLowering(const TargetLowering &) = delete;
171   TargetLowering &operator=(const TargetLowering &) = delete;
172 
173 public:
174   static void staticInit(GlobalContext *Ctx);
175   // Each target must define a public static method:
176   //   static void staticInit(GlobalContext *Ctx);
177   static bool shouldBePooled(const class Constant *C);
178   static Type getPointerType();
179 
180   static std::unique_ptr<TargetLowering> createLowering(TargetArch Target,
181                                                         Cfg *Func);
182 
183   virtual std::unique_ptr<Assembler> createAssembler() const = 0;
184 
translate()185   void translate() {
186     switch (Func->getOptLevel()) {
187     case Opt_m1:
188       translateOm1();
189       break;
190     case Opt_0:
191       translateO0();
192       break;
193     case Opt_1:
194       translateO1();
195       break;
196     case Opt_2:
197       translateO2();
198       break;
199     }
200   }
translateOm1()201   virtual void translateOm1() {
202     Func->setError("Target doesn't specify Om1 lowering steps.");
203   }
translateO0()204   virtual void translateO0() {
205     Func->setError("Target doesn't specify O0 lowering steps.");
206   }
translateO1()207   virtual void translateO1() {
208     Func->setError("Target doesn't specify O1 lowering steps.");
209   }
translateO2()210   virtual void translateO2() {
211     Func->setError("Target doesn't specify O2 lowering steps.");
212   }
213 
214   /// Generates calls to intrinsics for operations the Target can't handle.
215   void genTargetHelperCalls();
216   /// Tries to do address mode optimization on a single instruction.
217   void doAddressOpt();
218   /// Randomly insert NOPs.
219   void doNopInsertion(RandomNumberGenerator &RNG);
220   /// Lowers a single non-Phi instruction.
221   void lower();
222   /// Inserts and lowers a single high-level instruction at a specific insertion
223   /// point.
224   void lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr);
225   /// Does preliminary lowering of the set of Phi instructions in the current
226   /// node. The main intention is to do what's needed to keep the unlowered Phi
227   /// instructions consistent with the lowered non-Phi instructions, e.g. to
228   /// lower 64-bit operands on a 32-bit target.
prelowerPhis()229   virtual void prelowerPhis() {}
230   /// Tries to do branch optimization on a single instruction. Returns true if
231   /// some optimization was done.
doBranchOpt(Inst *,const CfgNode *)232   virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) {
233     return false;
234   }
235 
236   virtual SizeT getNumRegisters() const = 0;
237   /// Returns a variable pre-colored to the specified physical register. This is
238   /// generally used to get very direct access to the register such as in the
239   /// prolog or epilog or for marking scratch registers as killed by a call. If
240   /// a Type is not provided, a target-specific default type is used.
241   virtual Variable *getPhysicalRegister(RegNumT RegNum,
242                                         Type Ty = IceType_void) = 0;
243   /// Returns a printable name for the register.
244   virtual const char *getRegName(RegNumT RegNum, Type Ty) const = 0;
245 
hasFramePointer()246   virtual bool hasFramePointer() const { return false; }
247   virtual void setHasFramePointer() = 0;
248   virtual RegNumT getStackReg() const = 0;
249   virtual RegNumT getFrameReg() const = 0;
250   virtual RegNumT getFrameOrStackReg() const = 0;
251   virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
252   virtual uint32_t getStackAlignment() const = 0;
needsStackPointerAlignment()253   virtual bool needsStackPointerAlignment() const { return false; }
254   virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
255   virtual int32_t getFrameFixedAllocaOffset() const = 0;
maxOutArgsSizeBytes()256   virtual uint32_t maxOutArgsSizeBytes() const { return 0; }
257   // Addressing relative to frame pointer differs in MIPS compared to X86/ARM
258   // since MIPS decrements its stack pointer prior to saving it in the frame
259   // pointer register.
getFramePointerOffset(uint32_t CurrentOffset,uint32_t Size)260   virtual uint32_t getFramePointerOffset(uint32_t CurrentOffset,
261                                          uint32_t Size) const {
262     return -(CurrentOffset + Size);
263   }
264   /// Return whether a 64-bit Variable should be split into a Variable64On32.
265   virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
266 
267   /// Return whether a Vector Variable should be split into a VariableVecOn32.
shouldSplitToVariableVecOn32(Type Ty)268   virtual bool shouldSplitToVariableVecOn32(Type Ty) const {
269     (void)Ty;
270     return false;
271   }
272 
hasComputedFrame()273   bool hasComputedFrame() const { return HasComputedFrame; }
274   /// Returns true if this function calls a function that has the "returns
275   /// twice" attribute.
callsReturnsTwice()276   bool callsReturnsTwice() const { return CallsReturnsTwice; }
setCallsReturnsTwice(bool RetTwice)277   void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; }
makeNextLabelNumber()278   SizeT makeNextLabelNumber() { return NextLabelNumber++; }
makeNextJumpTableNumber()279   SizeT makeNextJumpTableNumber() { return NextJumpTableNumber++; }
getContext()280   LoweringContext &getContext() { return Context; }
getFunc()281   Cfg *getFunc() const { return Func; }
getGlobalContext()282   GlobalContext *getGlobalContext() const { return Ctx; }
283 
284   enum RegSet {
285     RegSet_None = 0,
286     RegSet_CallerSave = 1 << 0,
287     RegSet_CalleeSave = 1 << 1,
288     RegSet_StackPointer = 1 << 2,
289     RegSet_FramePointer = 1 << 3,
290     RegSet_All = ~RegSet_None
291   };
292   using RegSetMask = uint32_t;
293 
294   virtual SmallBitVector getRegisterSet(RegSetMask Include,
295                                         RegSetMask Exclude) const = 0;
296   /// Get the set of physical registers available for the specified Variable's
297   /// register class, applying register restrictions from the command line.
298   virtual const SmallBitVector &
299   getRegistersForVariable(const Variable *Var) const = 0;
300   /// Get the set of *all* physical registers available for the specified
301   /// Variable's register class, *not* applying register restrictions from the
302   /// command line.
303   virtual const SmallBitVector &
304   getAllRegistersForVariable(const Variable *Var) const = 0;
305   virtual const SmallBitVector &getAliasesForRegister(RegNumT) const = 0;
306 
307   void regAlloc(RegAllocKind Kind);
308   void postRegallocSplitting(const SmallBitVector &RegMask);
309 
310   virtual void
311   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
312                                 const SmallBitVector &ExcludeRegisters,
313                                 uint64_t Salt) const = 0;
314 
315   /// Get the minimum number of clusters required for a jump table to be
316   /// considered.
317   virtual SizeT getMinJumpTableSize() const = 0;
318   virtual void emitJumpTable(const Cfg *Func,
319                              const InstJumpTable *JumpTable) const = 0;
320 
321   virtual void emitVariable(const Variable *Var) const = 0;
322 
323   void emitWithoutPrefix(const ConstantRelocatable *CR,
324                          const char *Suffix = "") const;
325 
326   virtual void emit(const ConstantInteger32 *C) const = 0;
327   virtual void emit(const ConstantInteger64 *C) const = 0;
328   virtual void emit(const ConstantFloat *C) const = 0;
329   virtual void emit(const ConstantDouble *C) const = 0;
330   virtual void emit(const ConstantUndef *C) const = 0;
331   virtual void emit(const ConstantRelocatable *CR) const = 0;
332 
333   /// Performs target-specific argument lowering.
334   virtual void lowerArguments() = 0;
335 
initNodeForLowering(CfgNode *)336   virtual void initNodeForLowering(CfgNode *) {}
337   virtual void addProlog(CfgNode *Node) = 0;
338   virtual void addEpilog(CfgNode *Node) = 0;
339 
340   /// Create a properly-typed "mov" instruction.  This is primarily for local
341   /// variable splitting.
createLoweredMove(Variable * Dest,Variable * SrcVar)342   virtual Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) {
343     // TODO(stichnot): make pure virtual by implementing for all targets
344     (void)Dest;
345     (void)SrcVar;
346     llvm::report_fatal_error("createLoweredMove() unimplemented");
347     return nullptr;
348   }
349 
350   virtual ~TargetLowering() = default;
351 
352 private:
353   // This control variable is used by AutoBundle (RAII-style bundle
354   // locking/unlocking) to prevent nested bundles.
355   bool AutoBundling = false;
356 
357   /// This indicates whether we are in the genTargetHelperCalls phase, and
358   /// therefore can do things like scalarization.
359   bool GeneratingTargetHelpers = false;
360 
361   // _bundle_lock(), and _bundle_unlock(), were made private to force subtargets
362   // to use the AutoBundle helper.
363   void
364   _bundle_lock(InstBundleLock::Option BundleOption = InstBundleLock::Opt_None) {
365     Context.insert<InstBundleLock>(BundleOption);
366   }
_bundle_unlock()367   void _bundle_unlock() { Context.insert<InstBundleUnlock>(); }
368 
369 protected:
370   /// AutoBundle provides RIAA-style bundling. Sub-targets are expected to use
371   /// it when emitting NaCl Bundles to ensure proper bundle_unlocking, and
372   /// prevent nested bundles.
373   ///
374   /// AutoBundle objects will emit a _bundle_lock during construction (but only
375   /// if sandboxed code generation was requested), and a bundle_unlock() during
376   /// destruction. By carefully scoping objects of this type, Subtargets can
377   /// ensure proper bundle emission.
378   class AutoBundle {
379     AutoBundle() = delete;
380     AutoBundle(const AutoBundle &) = delete;
381     AutoBundle &operator=(const AutoBundle &) = delete;
382 
383   public:
384     explicit AutoBundle(TargetLowering *Target, InstBundleLock::Option Option =
385                                                     InstBundleLock::Opt_None);
386     ~AutoBundle();
387 
388   private:
389     TargetLowering *const Target;
390     const bool NeedSandboxing;
391   };
392 
393   explicit TargetLowering(Cfg *Func);
394   // Applies command line filters to TypeToRegisterSet array.
395   static void filterTypeToRegisterSet(
396       GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[],
397       size_t TypeToRegisterSetSize,
398       std::function<std::string(RegNumT)> getRegName,
399       std::function<const char *(RegClass)> getRegClassName);
400   virtual void lowerAlloca(const InstAlloca *Instr) = 0;
401   virtual void lowerArithmetic(const InstArithmetic *Instr) = 0;
402   virtual void lowerAssign(const InstAssign *Instr) = 0;
403   virtual void lowerBr(const InstBr *Instr) = 0;
404   virtual void lowerBreakpoint(const InstBreakpoint *Instr) = 0;
405   virtual void lowerCall(const InstCall *Instr) = 0;
406   virtual void lowerCast(const InstCast *Instr) = 0;
407   virtual void lowerFcmp(const InstFcmp *Instr) = 0;
408   virtual void lowerExtractElement(const InstExtractElement *Instr) = 0;
409   virtual void lowerIcmp(const InstIcmp *Instr) = 0;
410   virtual void lowerInsertElement(const InstInsertElement *Instr) = 0;
411   virtual void lowerIntrinsicCall(const InstIntrinsicCall *Instr) = 0;
412   virtual void lowerLoad(const InstLoad *Instr) = 0;
413   virtual void lowerPhi(const InstPhi *Instr) = 0;
414   virtual void lowerRet(const InstRet *Instr) = 0;
415   virtual void lowerSelect(const InstSelect *Instr) = 0;
416   virtual void lowerShuffleVector(const InstShuffleVector *Instr) = 0;
417   virtual void lowerStore(const InstStore *Instr) = 0;
418   virtual void lowerSwitch(const InstSwitch *Instr) = 0;
419   virtual void lowerUnreachable(const InstUnreachable *Instr) = 0;
420   virtual void lowerOther(const Inst *Instr);
421 
422   virtual void genTargetHelperCallFor(Inst *Instr) = 0;
423   virtual uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) = 0;
424 
425   /// Opportunity to modify other instructions to help Address Optimization
doAddressOptOther()426   virtual void doAddressOptOther() {}
doAddressOptLoad()427   virtual void doAddressOptLoad() {}
doAddressOptStore()428   virtual void doAddressOptStore() {}
doAddressOptLoadSubVector()429   virtual void doAddressOptLoadSubVector() {}
doAddressOptStoreSubVector()430   virtual void doAddressOptStoreSubVector() {}
doMockBoundsCheck(Operand *)431   virtual void doMockBoundsCheck(Operand *) {}
432   virtual void randomlyInsertNop(float Probability,
433                                  RandomNumberGenerator &RNG) = 0;
434   /// This gives the target an opportunity to post-process the lowered expansion
435   /// before returning.
postLower()436   virtual void postLower() {}
437 
438   /// When the SkipUnimplemented flag is set, addFakeDefUses() gets invoked by
439   /// the UnimplementedLoweringError macro to insert fake uses of all the
440   /// instruction variables and a fake def of the instruction dest, in order to
441   /// preserve integrity of liveness analysis.
442   void addFakeDefUses(const Inst *Instr);
443 
444   /// Find (non-SSA) instructions where the Dest variable appears in some source
445   /// operand, and set the IsDestRedefined flag.  This keeps liveness analysis
446   /// consistent.
447   void markRedefinitions();
448 
449   /// Make a pass over the Cfg to determine which variables need stack slots and
450   /// place them in a sorted list (SortedSpilledVariables). Among those, vars,
451   /// classify the spill variables as local to the basic block vs global
452   /// (multi-block) in order to compute the parameters GlobalsSize and
453   /// SpillAreaSizeBytes (represents locals or general vars if the coalescing of
454   /// locals is disallowed) along with alignments required for variables in each
455   /// area. We rely on accurate VMetadata in order to classify a variable as
456   /// global vs local (otherwise the variable is conservatively global). The
457   /// in-args should be initialized to 0.
458   ///
459   /// This is only a pre-pass and the actual stack slot assignment is handled
460   /// separately.
461   ///
462   /// There may be target-specific Variable types, which will be handled by
463   /// TargetVarHook. If the TargetVarHook returns true, then the variable is
464   /// skipped and not considered with the rest of the spilled variables.
465   void getVarStackSlotParams(VarList &SortedSpilledVariables,
466                              SmallBitVector &RegsUsed, size_t *GlobalsSize,
467                              size_t *SpillAreaSizeBytes,
468                              uint32_t *SpillAreaAlignmentBytes,
469                              uint32_t *LocalsSlotsAlignmentBytes,
470                              std::function<bool(Variable *)> TargetVarHook);
471 
472   /// Calculate the amount of padding needed to align the local and global areas
473   /// to the required alignment. This assumes the globals/locals layout used by
474   /// getVarStackSlotParams and assignVarStackSlots.
475   void alignStackSpillAreas(uint32_t SpillAreaStartOffset,
476                             uint32_t SpillAreaAlignmentBytes,
477                             size_t GlobalsSize,
478                             uint32_t LocalsSlotsAlignmentBytes,
479                             uint32_t *SpillAreaPaddingBytes,
480                             uint32_t *LocalsSlotsPaddingBytes);
481 
482   /// Make a pass through the SortedSpilledVariables and actually assign stack
483   /// slots. SpillAreaPaddingBytes takes into account stack alignment padding.
484   /// The SpillArea starts after that amount of padding. This matches the scheme
485   /// in getVarStackSlotParams, where there may be a separate multi-block global
486   /// var spill area and a local var spill area.
487   void assignVarStackSlots(VarList &SortedSpilledVariables,
488                            size_t SpillAreaPaddingBytes,
489                            size_t SpillAreaSizeBytes,
490                            size_t GlobalsAndSubsequentPaddingSize,
491                            bool UsesFramePointer);
492 
493   /// Sort the variables in Source based on required alignment. The variables
494   /// with the largest alignment need are placed in the front of the Dest list.
495   void sortVarsByAlignment(VarList &Dest, const VarList &Source) const;
496 
497   InstCall *makeHelperCall(RuntimeHelper FuncID, Variable *Dest, SizeT MaxSrcs);
498 
_set_dest_redefined()499   void _set_dest_redefined() { Context.getLastInserted()->setDestRedefined(); }
500 
501   bool shouldOptimizeMemIntrins();
502 
503   void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
504                            Operand *Src0, Operand *Src1);
505 
506   /// Generalizes scalarizeArithmetic to support other instruction types.
507   ///
508   /// insertScalarInstruction is a function-like object with signature
509   /// (Variable *Dest, Variable *Src0, Variable *Src1) -> Instr *.
510   template <typename... Operands,
511             typename F = std::function<Inst *(Variable *, Operands *...)>>
scalarizeInstruction(Variable * Dest,F insertScalarInstruction,Operands * ...Srcs)512   void scalarizeInstruction(Variable *Dest, F insertScalarInstruction,
513                             Operands *... Srcs) {
514     assert(GeneratingTargetHelpers &&
515            "scalarizeInstruction called during incorrect phase");
516     const Type DestTy = Dest->getType();
517     assert(isVectorType(DestTy));
518     const Type DestElementTy = typeElementType(DestTy);
519     const SizeT NumElements = typeNumElements(DestTy);
520 
521     Variable *T = Func->makeVariable(DestTy);
522     if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(T)) {
523       VarVecOn32->initVecElement(Func);
524       auto *Undef = ConstantUndef::create(Ctx, DestTy);
525       Context.insert<InstAssign>(T, Undef);
526     } else {
527       Context.insert<InstFakeDef>(T);
528     }
529 
530     for (SizeT I = 0; I < NumElements; ++I) {
531       auto *Index = Ctx->getConstantInt32(I);
532 
533       auto makeExtractThunk = [this, Index, NumElements](Operand *Src) {
534         return [this, Index, NumElements, Src]() {
535           (void)NumElements;
536           assert(typeNumElements(Src->getType()) == NumElements);
537 
538           const auto ElementTy = typeElementType(Src->getType());
539           auto *Op = Func->makeVariable(ElementTy);
540           Context.insert<InstExtractElement>(Op, Src, Index);
541           return Op;
542         };
543       };
544 
545       // Perform the operation as a scalar operation.
546       auto *Res = Func->makeVariable(DestElementTy);
547       auto *Arith = applyToThunkedArgs(insertScalarInstruction, Res,
548                                        makeExtractThunk(Srcs)...);
549       genTargetHelperCallFor(Arith);
550 
551       Variable *DestT = Func->makeVariable(DestTy);
552       Context.insert<InstInsertElement>(DestT, T, Res, Index);
553       T = DestT;
554     }
555     Context.insert<InstAssign>(Dest, T);
556   }
557 
558   // applyToThunkedArgs is used by scalarizeInstruction. Ideally, we would just
559   // call insertScalarInstruction(Res, Srcs...), but C++ does not specify
560   // evaluation order which means this leads to an unpredictable final
561   // output. Instead, we wrap each of the Srcs in a thunk and these
562   // applyToThunkedArgs functions apply the thunks in a well defined order so we
563   // still get well-defined output.
applyToThunkedArgs(std::function<Inst * (Variable *,Variable *)> insertScalarInstruction,Variable * Res,std::function<Variable * ()> thunk0)564   Inst *applyToThunkedArgs(
565       std::function<Inst *(Variable *, Variable *)> insertScalarInstruction,
566       Variable *Res, std::function<Variable *()> thunk0) {
567     auto *Src0 = thunk0();
568     return insertScalarInstruction(Res, Src0);
569   }
570 
571   Inst *
applyToThunkedArgs(std::function<Inst * (Variable *,Variable *,Variable *)> insertScalarInstruction,Variable * Res,std::function<Variable * ()> thunk0,std::function<Variable * ()> thunk1)572   applyToThunkedArgs(std::function<Inst *(Variable *, Variable *, Variable *)>
573                          insertScalarInstruction,
574                      Variable *Res, std::function<Variable *()> thunk0,
575                      std::function<Variable *()> thunk1) {
576     auto *Src0 = thunk0();
577     auto *Src1 = thunk1();
578     return insertScalarInstruction(Res, Src0, Src1);
579   }
580 
applyToThunkedArgs(std::function<Inst * (Variable *,Variable *,Variable *,Variable *)> insertScalarInstruction,Variable * Res,std::function<Variable * ()> thunk0,std::function<Variable * ()> thunk1,std::function<Variable * ()> thunk2)581   Inst *applyToThunkedArgs(
582       std::function<Inst *(Variable *, Variable *, Variable *, Variable *)>
583           insertScalarInstruction,
584       Variable *Res, std::function<Variable *()> thunk0,
585       std::function<Variable *()> thunk1, std::function<Variable *()> thunk2) {
586     auto *Src0 = thunk0();
587     auto *Src1 = thunk1();
588     auto *Src2 = thunk2();
589     return insertScalarInstruction(Res, Src0, Src1, Src2);
590   }
591 
592   /// SandboxType enumerates all possible sandboxing strategies that
593   enum SandboxType {
594     ST_None,
595     ST_NaCl,
596     ST_Nonsfi,
597   };
598 
599   static SandboxType determineSandboxTypeFromFlags(const ClFlags &Flags);
600 
601   Cfg *Func;
602   GlobalContext *Ctx;
603   bool HasComputedFrame = false;
604   bool CallsReturnsTwice = false;
605   SizeT NextLabelNumber = 0;
606   SizeT NextJumpTableNumber = 0;
607   LoweringContext Context;
608   const SandboxType SandboxingType = ST_None;
609 
610   const static constexpr char *H_getIP_prefix = "__Sz_getIP_";
611 };
612 
613 /// TargetDataLowering is used for "lowering" data including initializers for
614 /// global variables, and the internal constant pools. It is separated out from
615 /// TargetLowering because it does not require a Cfg.
616 class TargetDataLowering {
617   TargetDataLowering() = delete;
618   TargetDataLowering(const TargetDataLowering &) = delete;
619   TargetDataLowering &operator=(const TargetDataLowering &) = delete;
620 
621 public:
622   static std::unique_ptr<TargetDataLowering> createLowering(GlobalContext *Ctx);
623   virtual ~TargetDataLowering();
624 
625   virtual void lowerGlobals(const VariableDeclarationList &Vars,
626                             const std::string &SectionSuffix) = 0;
627   virtual void lowerConstants() = 0;
628   virtual void lowerJumpTables() = 0;
emitTargetRODataSections()629   virtual void emitTargetRODataSections() {}
630 
631 protected:
632   void emitGlobal(const VariableDeclaration &Var,
633                   const std::string &SectionSuffix);
634 
635   /// For now, we assume .long is the right directive for emitting 4 byte emit
636   /// global relocations. However, LLVM MIPS usually uses .4byte instead.
637   /// Perhaps there is some difference when the location is unaligned.
getEmit32Directive()638   static const char *getEmit32Directive() { return ".long"; }
639 
TargetDataLowering(GlobalContext * Ctx)640   explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
641   GlobalContext *Ctx;
642 };
643 
644 /// TargetHeaderLowering is used to "lower" the header of an output file. It
645 /// writes out the target-specific header attributes. E.g., for ARM this writes
646 /// out the build attributes (float ABI, etc.).
647 class TargetHeaderLowering {
648   TargetHeaderLowering() = delete;
649   TargetHeaderLowering(const TargetHeaderLowering &) = delete;
650   TargetHeaderLowering &operator=(const TargetHeaderLowering &) = delete;
651 
652 public:
653   static std::unique_ptr<TargetHeaderLowering>
654   createLowering(GlobalContext *Ctx);
655   virtual ~TargetHeaderLowering();
656 
lower()657   virtual void lower() {}
658 
659 protected:
TargetHeaderLowering(GlobalContext * Ctx)660   explicit TargetHeaderLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
661   GlobalContext *Ctx;
662 };
663 
664 } // end of namespace Ice
665 
666 #endif // SUBZERO_SRC_ICETARGETLOWERING_H
667