1 //===- llvm/DataLayout.h - Data size & alignment info -----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines layout properties related to datatype size/offset/alignment
11 // information.  It uses lazy annotations to cache information about how
12 // structure types are laid out and used.
13 //
14 // This structure should be created once, filled in if the defaults are not
15 // correct and then passed around by const&.  None of the members functions
16 // require modification to the object.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef LLVM_IR_DATALAYOUT_H
21 #define LLVM_IR_DATALAYOUT_H
22 
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/MathExtras.h"
33 #include <cassert>
34 #include <cstdint>
35 #include <string>
36 
37 // This needs to be outside of the namespace, to avoid conflict with llvm-c
38 // decl.
39 using LLVMTargetDataRef = struct LLVMOpaqueTargetData *;
40 
41 namespace llvm {
42 
43 class GlobalVariable;
44 class LLVMContext;
45 class Module;
46 class StructLayout;
47 class Triple;
48 class Value;
49 
50 /// Enum used to categorize the alignment types stored by LayoutAlignElem
51 enum AlignTypeEnum {
52   INVALID_ALIGN = 0,
53   INTEGER_ALIGN = 'i',
54   VECTOR_ALIGN = 'v',
55   FLOAT_ALIGN = 'f',
56   AGGREGATE_ALIGN = 'a'
57 };
58 
59 // FIXME: Currently the DataLayout string carries a "preferred alignment"
60 // for types. As the DataLayout is module/global, this should likely be
61 // sunk down to an FTTI element that is queried rather than a global
62 // preference.
63 
64 /// Layout alignment element.
65 ///
66 /// Stores the alignment data associated with a given alignment type (integer,
67 /// vector, float) and type bit width.
68 ///
69 /// \note The unusual order of elements in the structure attempts to reduce
70 /// padding and make the structure slightly more cache friendly.
71 struct LayoutAlignElem {
72   /// Alignment type from \c AlignTypeEnum
73   unsigned AlignType : 8;
74   unsigned TypeBitWidth : 24;
75   unsigned ABIAlign : 16;
76   unsigned PrefAlign : 16;
77 
78   static LayoutAlignElem get(AlignTypeEnum align_type, unsigned abi_align,
79                              unsigned pref_align, uint32_t bit_width);
80 
81   bool operator==(const LayoutAlignElem &rhs) const;
82 };
83 
84 /// Layout pointer alignment element.
85 ///
86 /// Stores the alignment data associated with a given pointer and address space.
87 ///
88 /// \note The unusual order of elements in the structure attempts to reduce
89 /// padding and make the structure slightly more cache friendly.
90 struct PointerAlignElem {
91   unsigned ABIAlign;
92   unsigned PrefAlign;
93   uint32_t TypeByteWidth;
94   uint32_t AddressSpace;
95   uint32_t IndexWidth;
96 
97   /// Initializer
98   static PointerAlignElem get(uint32_t AddressSpace, unsigned ABIAlign,
99                               unsigned PrefAlign, uint32_t TypeByteWidth,
100                               uint32_t IndexWidth);
101 
102   bool operator==(const PointerAlignElem &rhs) const;
103 };
104 
105 /// A parsed version of the target data layout string in and methods for
106 /// querying it.
107 ///
108 /// The target data layout string is specified *by the target* - a frontend
109 /// generating LLVM IR is required to generate the right target data for the
110 /// target being codegen'd to.
111 class DataLayout {
112 private:
113   /// Defaults to false.
114   bool BigEndian;
115 
116   unsigned AllocaAddrSpace;
117   unsigned StackNaturalAlign;
118   unsigned ProgramAddrSpace;
119 
120   enum ManglingModeT {
121     MM_None,
122     MM_ELF,
123     MM_MachO,
124     MM_WinCOFF,
125     MM_WinCOFFX86,
126     MM_Mips
127   };
128   ManglingModeT ManglingMode;
129 
130   SmallVector<unsigned char, 8> LegalIntWidths;
131 
132   /// Primitive type alignment data. This is sorted by type and bit
133   /// width during construction.
134   using AlignmentsTy = SmallVector<LayoutAlignElem, 16>;
135   AlignmentsTy Alignments;
136 
137   AlignmentsTy::const_iterator
findAlignmentLowerBound(AlignTypeEnum AlignType,uint32_t BitWidth)138   findAlignmentLowerBound(AlignTypeEnum AlignType, uint32_t BitWidth) const {
139     return const_cast<DataLayout *>(this)->findAlignmentLowerBound(AlignType,
140                                                                    BitWidth);
141   }
142 
143   AlignmentsTy::iterator
144   findAlignmentLowerBound(AlignTypeEnum AlignType, uint32_t BitWidth);
145 
146   /// The string representation used to create this DataLayout
147   std::string StringRepresentation;
148 
149   using PointersTy = SmallVector<PointerAlignElem, 8>;
150   PointersTy Pointers;
151 
152   PointersTy::const_iterator
findPointerLowerBound(uint32_t AddressSpace)153   findPointerLowerBound(uint32_t AddressSpace) const {
154     return const_cast<DataLayout *>(this)->findPointerLowerBound(AddressSpace);
155   }
156 
157   PointersTy::iterator findPointerLowerBound(uint32_t AddressSpace);
158 
159   // The StructType -> StructLayout map.
160   mutable void *LayoutMap = nullptr;
161 
162   /// Pointers in these address spaces are non-integral, and don't have a
163   /// well-defined bitwise representation.
164   SmallVector<unsigned, 8> NonIntegralAddressSpaces;
165 
166   void setAlignment(AlignTypeEnum align_type, unsigned abi_align,
167                     unsigned pref_align, uint32_t bit_width);
168   unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
169                             bool ABIAlign, Type *Ty) const;
170   void setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
171                            unsigned PrefAlign, uint32_t TypeByteWidth,
172                            uint32_t IndexWidth);
173 
174   /// Internal helper method that returns requested alignment for type.
175   unsigned getAlignment(Type *Ty, bool abi_or_pref) const;
176 
177   /// Parses a target data specification string. Assert if the string is
178   /// malformed.
179   void parseSpecifier(StringRef LayoutDescription);
180 
181   // Free all internal data structures.
182   void clear();
183 
184 public:
185   /// Constructs a DataLayout from a specification string. See reset().
DataLayout(StringRef LayoutDescription)186   explicit DataLayout(StringRef LayoutDescription) {
187     reset(LayoutDescription);
188   }
189 
190   /// Initialize target data from properties stored in the module.
191   explicit DataLayout(const Module *M);
192 
DataLayout(const DataLayout & DL)193   DataLayout(const DataLayout &DL) { *this = DL; }
194 
195   ~DataLayout(); // Not virtual, do not subclass this class
196 
197   DataLayout &operator=(const DataLayout &DL) {
198     clear();
199     StringRepresentation = DL.StringRepresentation;
200     BigEndian = DL.isBigEndian();
201     AllocaAddrSpace = DL.AllocaAddrSpace;
202     StackNaturalAlign = DL.StackNaturalAlign;
203     ProgramAddrSpace = DL.ProgramAddrSpace;
204     ManglingMode = DL.ManglingMode;
205     LegalIntWidths = DL.LegalIntWidths;
206     Alignments = DL.Alignments;
207     Pointers = DL.Pointers;
208     NonIntegralAddressSpaces = DL.NonIntegralAddressSpaces;
209     return *this;
210   }
211 
212   bool operator==(const DataLayout &Other) const;
213   bool operator!=(const DataLayout &Other) const { return !(*this == Other); }
214 
215   void init(const Module *M);
216 
217   /// Parse a data layout string (with fallback to default values).
218   void reset(StringRef LayoutDescription);
219 
220   /// Layout endianness...
isLittleEndian()221   bool isLittleEndian() const { return !BigEndian; }
isBigEndian()222   bool isBigEndian() const { return BigEndian; }
223 
224   /// Returns the string representation of the DataLayout.
225   ///
226   /// This representation is in the same format accepted by the string
227   /// constructor above. This should not be used to compare two DataLayout as
228   /// different string can represent the same layout.
getStringRepresentation()229   const std::string &getStringRepresentation() const {
230     return StringRepresentation;
231   }
232 
233   /// Test if the DataLayout was constructed from an empty string.
isDefault()234   bool isDefault() const { return StringRepresentation.empty(); }
235 
236   /// Returns true if the specified type is known to be a native integer
237   /// type supported by the CPU.
238   ///
239   /// For example, i64 is not native on most 32-bit CPUs and i37 is not native
240   /// on any known one. This returns false if the integer width is not legal.
241   ///
242   /// The width is specified in bits.
isLegalInteger(uint64_t Width)243   bool isLegalInteger(uint64_t Width) const {
244     for (unsigned LegalIntWidth : LegalIntWidths)
245       if (LegalIntWidth == Width)
246         return true;
247     return false;
248   }
249 
isIllegalInteger(uint64_t Width)250   bool isIllegalInteger(uint64_t Width) const { return !isLegalInteger(Width); }
251 
252   /// Returns true if the given alignment exceeds the natural stack alignment.
exceedsNaturalStackAlignment(unsigned Align)253   bool exceedsNaturalStackAlignment(unsigned Align) const {
254     return (StackNaturalAlign != 0) && (Align > StackNaturalAlign);
255   }
256 
getStackAlignment()257   unsigned getStackAlignment() const { return StackNaturalAlign; }
getAllocaAddrSpace()258   unsigned getAllocaAddrSpace() const { return AllocaAddrSpace; }
259 
getProgramAddressSpace()260   unsigned getProgramAddressSpace() const { return ProgramAddrSpace; }
261 
hasMicrosoftFastStdCallMangling()262   bool hasMicrosoftFastStdCallMangling() const {
263     return ManglingMode == MM_WinCOFFX86;
264   }
265 
266   /// Returns true if symbols with leading question marks should not receive IR
267   /// mangling. True for Windows mangling modes.
doNotMangleLeadingQuestionMark()268   bool doNotMangleLeadingQuestionMark() const {
269     return ManglingMode == MM_WinCOFF || ManglingMode == MM_WinCOFFX86;
270   }
271 
hasLinkerPrivateGlobalPrefix()272   bool hasLinkerPrivateGlobalPrefix() const { return ManglingMode == MM_MachO; }
273 
getLinkerPrivateGlobalPrefix()274   StringRef getLinkerPrivateGlobalPrefix() const {
275     if (ManglingMode == MM_MachO)
276       return "l";
277     return "";
278   }
279 
getGlobalPrefix()280   char getGlobalPrefix() const {
281     switch (ManglingMode) {
282     case MM_None:
283     case MM_ELF:
284     case MM_Mips:
285     case MM_WinCOFF:
286       return '\0';
287     case MM_MachO:
288     case MM_WinCOFFX86:
289       return '_';
290     }
291     llvm_unreachable("invalid mangling mode");
292   }
293 
getPrivateGlobalPrefix()294   StringRef getPrivateGlobalPrefix() const {
295     switch (ManglingMode) {
296     case MM_None:
297       return "";
298     case MM_ELF:
299     case MM_WinCOFF:
300       return ".L";
301     case MM_Mips:
302       return "$";
303     case MM_MachO:
304     case MM_WinCOFFX86:
305       return "L";
306     }
307     llvm_unreachable("invalid mangling mode");
308   }
309 
310   static const char *getManglingComponent(const Triple &T);
311 
312   /// Returns true if the specified type fits in a native integer type
313   /// supported by the CPU.
314   ///
315   /// For example, if the CPU only supports i32 as a native integer type, then
316   /// i27 fits in a legal integer type but i45 does not.
fitsInLegalInteger(unsigned Width)317   bool fitsInLegalInteger(unsigned Width) const {
318     for (unsigned LegalIntWidth : LegalIntWidths)
319       if (Width <= LegalIntWidth)
320         return true;
321     return false;
322   }
323 
324   /// Layout pointer alignment
325   unsigned getPointerABIAlignment(unsigned AS) const;
326 
327   /// Return target's alignment for stack-based pointers
328   /// FIXME: The defaults need to be removed once all of
329   /// the backends/clients are updated.
330   unsigned getPointerPrefAlignment(unsigned AS = 0) const;
331 
332   /// Layout pointer size
333   /// FIXME: The defaults need to be removed once all of
334   /// the backends/clients are updated.
335   unsigned getPointerSize(unsigned AS = 0) const;
336 
337   // Index size used for address calculation.
338   unsigned getIndexSize(unsigned AS) const;
339 
340   /// Return the address spaces containing non-integral pointers.  Pointers in
341   /// this address space don't have a well-defined bitwise representation.
getNonIntegralAddressSpaces()342   ArrayRef<unsigned> getNonIntegralAddressSpaces() const {
343     return NonIntegralAddressSpaces;
344   }
345 
isNonIntegralPointerType(PointerType * PT)346   bool isNonIntegralPointerType(PointerType *PT) const {
347     ArrayRef<unsigned> NonIntegralSpaces = getNonIntegralAddressSpaces();
348     return find(NonIntegralSpaces, PT->getAddressSpace()) !=
349            NonIntegralSpaces.end();
350   }
351 
isNonIntegralPointerType(Type * Ty)352   bool isNonIntegralPointerType(Type *Ty) const {
353     auto *PTy = dyn_cast<PointerType>(Ty);
354     return PTy && isNonIntegralPointerType(PTy);
355   }
356 
357   /// Layout pointer size, in bits
358   /// FIXME: The defaults need to be removed once all of
359   /// the backends/clients are updated.
360   unsigned getPointerSizeInBits(unsigned AS = 0) const {
361     return getPointerSize(AS) * 8;
362   }
363 
364   /// Size in bits of index used for address calculation in getelementptr.
getIndexSizeInBits(unsigned AS)365   unsigned getIndexSizeInBits(unsigned AS) const {
366     return getIndexSize(AS) * 8;
367   }
368 
369   /// Layout pointer size, in bits, based on the type.  If this function is
370   /// called with a pointer type, then the type size of the pointer is returned.
371   /// If this function is called with a vector of pointers, then the type size
372   /// of the pointer is returned.  This should only be called with a pointer or
373   /// vector of pointers.
374   unsigned getPointerTypeSizeInBits(Type *) const;
375 
376   /// Layout size of the index used in GEP calculation.
377   /// The function should be called with pointer or vector of pointers type.
378   unsigned getIndexTypeSizeInBits(Type *Ty) const;
379 
getPointerTypeSize(Type * Ty)380   unsigned getPointerTypeSize(Type *Ty) const {
381     return getPointerTypeSizeInBits(Ty) / 8;
382   }
383 
384   /// Size examples:
385   ///
386   /// Type        SizeInBits  StoreSizeInBits  AllocSizeInBits[*]
387   /// ----        ----------  ---------------  ---------------
388   ///  i1            1           8                8
389   ///  i8            8           8                8
390   ///  i19          19          24               32
391   ///  i32          32          32               32
392   ///  i100        100         104              128
393   ///  i128        128         128              128
394   ///  Float        32          32               32
395   ///  Double       64          64               64
396   ///  X86_FP80     80          80               96
397   ///
398   /// [*] The alloc size depends on the alignment, and thus on the target.
399   ///     These values are for x86-32 linux.
400 
401   /// Returns the number of bits necessary to hold the specified type.
402   ///
403   /// For example, returns 36 for i36 and 80 for x86_fp80. The type passed must
404   /// have a size (Type::isSized() must return true).
405   uint64_t getTypeSizeInBits(Type *Ty) const;
406 
407   /// Returns the maximum number of bytes that may be overwritten by
408   /// storing the specified type.
409   ///
410   /// For example, returns 5 for i36 and 10 for x86_fp80.
getTypeStoreSize(Type * Ty)411   uint64_t getTypeStoreSize(Type *Ty) const {
412     return (getTypeSizeInBits(Ty) + 7) / 8;
413   }
414 
415   /// Returns the maximum number of bits that may be overwritten by
416   /// storing the specified type; always a multiple of 8.
417   ///
418   /// For example, returns 40 for i36 and 80 for x86_fp80.
getTypeStoreSizeInBits(Type * Ty)419   uint64_t getTypeStoreSizeInBits(Type *Ty) const {
420     return 8 * getTypeStoreSize(Ty);
421   }
422 
423   /// Returns the offset in bytes between successive objects of the
424   /// specified type, including alignment padding.
425   ///
426   /// This is the amount that alloca reserves for this type. For example,
427   /// returns 12 or 16 for x86_fp80, depending on alignment.
getTypeAllocSize(Type * Ty)428   uint64_t getTypeAllocSize(Type *Ty) const {
429     // Round up to the next alignment boundary.
430     return alignTo(getTypeStoreSize(Ty), getABITypeAlignment(Ty));
431   }
432 
433   /// Returns the offset in bits between successive objects of the
434   /// specified type, including alignment padding; always a multiple of 8.
435   ///
436   /// This is the amount that alloca reserves for this type. For example,
437   /// returns 96 or 128 for x86_fp80, depending on alignment.
getTypeAllocSizeInBits(Type * Ty)438   uint64_t getTypeAllocSizeInBits(Type *Ty) const {
439     return 8 * getTypeAllocSize(Ty);
440   }
441 
442   /// Returns the minimum ABI-required alignment for the specified type.
443   unsigned getABITypeAlignment(Type *Ty) const;
444 
445   /// Returns the minimum ABI-required alignment for an integer type of
446   /// the specified bitwidth.
447   unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const;
448 
449   /// Returns the preferred stack/global alignment for the specified
450   /// type.
451   ///
452   /// This is always at least as good as the ABI alignment.
453   unsigned getPrefTypeAlignment(Type *Ty) const;
454 
455   /// Returns the preferred alignment for the specified type, returned as
456   /// log2 of the value (a shift amount).
457   unsigned getPreferredTypeAlignmentShift(Type *Ty) const;
458 
459   /// Returns an integer type with size at least as big as that of a
460   /// pointer in the given address space.
461   IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace = 0) const;
462 
463   /// Returns an integer (vector of integer) type with size at least as
464   /// big as that of a pointer of the given pointer (vector of pointer) type.
465   Type *getIntPtrType(Type *) const;
466 
467   /// Returns the smallest integer type with size at least as big as
468   /// Width bits.
469   Type *getSmallestLegalIntType(LLVMContext &C, unsigned Width = 0) const;
470 
471   /// Returns the largest legal integer type, or null if none are set.
getLargestLegalIntType(LLVMContext & C)472   Type *getLargestLegalIntType(LLVMContext &C) const {
473     unsigned LargestSize = getLargestLegalIntTypeSizeInBits();
474     return (LargestSize == 0) ? nullptr : Type::getIntNTy(C, LargestSize);
475   }
476 
477   /// Returns the size of largest legal integer type size, or 0 if none
478   /// are set.
479   unsigned getLargestLegalIntTypeSizeInBits() const;
480 
481   /// Returns the type of a GEP index.
482   /// If it was not specified explicitly, it will be the integer type of the
483   /// pointer width - IntPtrType.
484   Type *getIndexType(Type *PtrTy) const;
485 
486   /// Returns the offset from the beginning of the type for the specified
487   /// indices.
488   ///
489   /// Note that this takes the element type, not the pointer type.
490   /// This is used to implement getelementptr.
491   int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef<Value *> Indices) const;
492 
493   /// Returns a StructLayout object, indicating the alignment of the
494   /// struct, its size, and the offsets of its fields.
495   ///
496   /// Note that this information is lazily cached.
497   const StructLayout *getStructLayout(StructType *Ty) const;
498 
499   /// Returns the preferred alignment of the specified global.
500   ///
501   /// This includes an explicitly requested alignment (if the global has one).
502   unsigned getPreferredAlignment(const GlobalVariable *GV) const;
503 
504   /// Returns the preferred alignment of the specified global, returned
505   /// in log form.
506   ///
507   /// This includes an explicitly requested alignment (if the global has one).
508   unsigned getPreferredAlignmentLog(const GlobalVariable *GV) const;
509 };
510 
unwrap(LLVMTargetDataRef P)511 inline DataLayout *unwrap(LLVMTargetDataRef P) {
512   return reinterpret_cast<DataLayout *>(P);
513 }
514 
wrap(const DataLayout * P)515 inline LLVMTargetDataRef wrap(const DataLayout *P) {
516   return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout *>(P));
517 }
518 
519 /// Used to lazily calculate structure layout information for a target machine,
520 /// based on the DataLayout structure.
521 class StructLayout {
522   uint64_t StructSize;
523   unsigned StructAlignment;
524   unsigned IsPadded : 1;
525   unsigned NumElements : 31;
526   uint64_t MemberOffsets[1]; // variable sized array!
527 
528 public:
getSizeInBytes()529   uint64_t getSizeInBytes() const { return StructSize; }
530 
getSizeInBits()531   uint64_t getSizeInBits() const { return 8 * StructSize; }
532 
getAlignment()533   unsigned getAlignment() const { return StructAlignment; }
534 
535   /// Returns whether the struct has padding or not between its fields.
536   /// NB: Padding in nested element is not taken into account.
hasPadding()537   bool hasPadding() const { return IsPadded; }
538 
539   /// Given a valid byte offset into the structure, returns the structure
540   /// index that contains it.
541   unsigned getElementContainingOffset(uint64_t Offset) const;
542 
getElementOffset(unsigned Idx)543   uint64_t getElementOffset(unsigned Idx) const {
544     assert(Idx < NumElements && "Invalid element idx!");
545     return MemberOffsets[Idx];
546   }
547 
getElementOffsetInBits(unsigned Idx)548   uint64_t getElementOffsetInBits(unsigned Idx) const {
549     return getElementOffset(Idx) * 8;
550   }
551 
552 private:
553   friend class DataLayout; // Only DataLayout can create this class
554 
555   StructLayout(StructType *ST, const DataLayout &DL);
556 };
557 
558 // The implementation of this method is provided inline as it is particularly
559 // well suited to constant folding when called on a specific Type subclass.
getTypeSizeInBits(Type * Ty)560 inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
561   assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
562   switch (Ty->getTypeID()) {
563   case Type::LabelTyID:
564     return getPointerSizeInBits(0);
565   case Type::PointerTyID:
566     return getPointerSizeInBits(Ty->getPointerAddressSpace());
567   case Type::ArrayTyID: {
568     ArrayType *ATy = cast<ArrayType>(Ty);
569     return ATy->getNumElements() *
570            getTypeAllocSizeInBits(ATy->getElementType());
571   }
572   case Type::StructTyID:
573     // Get the layout annotation... which is lazily created on demand.
574     return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
575   case Type::IntegerTyID:
576     return Ty->getIntegerBitWidth();
577   case Type::HalfTyID:
578     return 16;
579   case Type::FloatTyID:
580     return 32;
581   case Type::DoubleTyID:
582   case Type::X86_MMXTyID:
583     return 64;
584   case Type::PPC_FP128TyID:
585   case Type::FP128TyID:
586     return 128;
587   // In memory objects this is always aligned to a higher boundary, but
588   // only 80 bits contain information.
589   case Type::X86_FP80TyID:
590     return 80;
591   case Type::VectorTyID: {
592     VectorType *VTy = cast<VectorType>(Ty);
593     return VTy->getNumElements() * getTypeSizeInBits(VTy->getElementType());
594   }
595   default:
596     llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
597   }
598 }
599 
600 } // end namespace llvm
601 
602 #endif // LLVM_IR_DATALAYOUT_H
603