1 //===- NaClBitCodes.h - Enum values for the bitcode format ------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This header Bitcode enum values.
11 //
12 // The enum values defined in this file should be considered permanent.  If
13 // new features are added, they should have values added at the end of the
14 // respective lists.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #ifndef LLVM_BITCODE_NACL_NACLBITCODES_H
19 #define LLVM_BITCODE_NACL_NACLBITCODES_H
20 
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Support/DataTypes.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include <climits>
26 
27 namespace llvm {
28 class raw_ostream;
29 
30 namespace naclbitc {
31 enum StandardWidths {
32   BlockIDWidth = 8,      // We use VBR-8 for block IDs.
33   CodeLenWidth = 4,      // Codelen are VBR-4.
34   BlockSizeWidth = 32,   // BlockSize up to 2^32 32-bit words = 16GB per block.
35   MaxAbbrevWidth = 32,   // Maximum value allowed for Fixed and VBR.
36   BitstreamWordSize = sizeof(uint32_t), // Number of bytes in bitstream word.
37   MinRecordBitSize = 2   // Minimum number of bits needed to represent a record.
38 };
39 
40   // The standard abbrev namespace always has a way to exit a block, enter a
41   // nested block, define abbrevs, and define an unabbreviated record.
42   enum FixedAbbrevIDs {
43     END_BLOCK = 0,  // Must be zero to guarantee termination for broken bitcode.
44     ENTER_SUBBLOCK = 1,
45 
46     /// DEFINE_ABBREV - Defines an abbrev for the current block.  It consists
47     /// of a vbr5 for # operand infos.  Each operand info is emitted with a
48     /// single bit to indicate if it is a literal encoding.  If so, the value is
49     /// emitted with a vbr8.  If not, the encoding is emitted as 3 bits followed
50     /// by the info value as a vbr5 if needed.
51     DEFINE_ABBREV = 2,
52 
53     // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
54     // a vbr6 for the # operands, followed by vbr6's for each operand.
55     UNABBREV_RECORD = 3,
56 
57     // This is not a code, this is a marker for the first abbrev assignment.
58     // In addition, we assume up to two additional enumerated constants are
59     // added for each extension. These constants are:
60     //
61     //   PREFIX_MAX_FIXED_ABBREV
62     //   PREFIX_MAX_ABBREV
63     //
64     // PREFIX_MAX_ABBREV defines the maximal enumeration value used for
65     // the code selector of a block. If Both PREFIX_MAX_FIXED_ABBREV
66     // and PREFIX_MAX_ABBREV is defined, then PREFIX_MAX_FIXED_ABBREV
67     // defines the last code selector of the block that must be read using
68     // a single read (i.e. a FIXED read, or the first chunk of a VBR read.
69     FIRST_APPLICATION_ABBREV = 4,
70     // Defines default values for code length, if no additional selectors
71     // are added.
72     DEFAULT_MAX_ABBREV = FIRST_APPLICATION_ABBREV-1
73   };
74 
75   /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
76   /// block, which contains metadata about other blocks in the file.
77   enum StandardBlockIDs {
78     /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
79     /// standard abbrevs that should be available to all blocks of a specified
80     /// ID.
81     BLOCKINFO_BLOCK_ID = 0,
82     // Block IDs 1-6 are reserved for future expansion.
83     // Dummy block added around all records in a bitcode file. Allows the code
84     // to treat top-level records like all other records (i.e. all records
85     // appear in a block).
86     TOP_LEVEL_BLOCKID = 7,
87     FIRST_APPLICATION_BLOCKID = 8
88   };
89 
90   /// BlockInfoCodes - The blockinfo block contains metadata about user-defined
91   /// blocks.
92   enum BlockInfoCodes {
93     // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
94     // block, instead of the BlockInfo block.
95 
96     BLOCKINFO_CODE_SETBID        = 1, // SETBID: [blockid#]
97                                       // The following two codes were removed
98                                       // because the PNaCl reader could read
99                                       // them, but couldn't be generated by
100                                       // the writer.
101     BLOCKINFO_CODE_BLOCKNAME     = 2, // Not used in PNaCl.
102     BLOCKINFO_CODE_SETRECORDNAME = 3  // Not used in PNaCl.
103   };
104 
105 } // End naclbitc namespace
106 
107 /// NaClBitCodeAbbrevOp - This describes one or more operands in an abbreviation.
108 /// This is actually a union of two different things:
109 ///   1. It could be a literal integer value ("the operand is always 17").
110 ///   2. It could be an encoding specification ("this operand encoded like so").
111 ///
112 class NaClBitCodeAbbrevOp {
113 public:
114   enum Encoding {
115     Literal = 0, // Value is literal value.
116     Fixed = 1,   // A fixed width field, Val specifies number of bits.
117     VBR   = 2,   // A VBR field where Val specifies the width of each chunk.
118     Array = 3,   // A sequence of fields, next field species elt encoding.
119     Char6 = 4,   // A 6-bit fixed field which maps to [a-zA-Z0-9._].
120     Encoding_MAX = Char6
121   };
122 
NaClBitCodeAbbrevOp(uint64_t V)123   explicit NaClBitCodeAbbrevOp(uint64_t V) :  Enc(Literal), Val(V) {}
124   explicit NaClBitCodeAbbrevOp(Encoding E, uint64_t Data = 0);
125 
getEncoding()126   Encoding getEncoding() const { return Enc; }
127 
isValidEncoding(uint64_t Enc)128   static bool isValidEncoding(uint64_t Enc) { return Enc <= Encoding_MAX; }
129 
getValue()130   uint64_t getValue() const { return Val; }
131 
hasValue()132   bool hasValue() const {
133     return hasValue(Enc);
134   }
hasValue(Encoding E)135   static bool hasValue(Encoding E) {
136     return E <= Encoding_MAX && HasValueArray[E];
137   }
138 
isValid()139   bool isValid() const { return isValid(Enc, Val); }
140   static bool isValid(Encoding E, uint64_t Val);
isValid(Encoding E)141   static bool isValid(Encoding E) { return isValid(E, 0); }
142 
isLiteral()143   bool isLiteral() const { return Enc == Literal; }
144 
isArrayOp()145   bool isArrayOp() const { return Enc == Array; }
146 
147   /// Returns the number of arguments expected by this abbrevation operator.
NumArguments()148   unsigned NumArguments() const {
149     if (isArrayOp())
150       return 1;
151     else
152       return 0;
153   }
154 
155   // Returns the name of the encoding
getEncodingName(Encoding E)156   static const char *getEncodingName(Encoding E) {
157     if (E > Encoding_MAX)
158       return "???";
159     return EncodingNameArray[E];
160   }
161 
162   /// Prints out the abbreviation operator to the given stream.
163   void Print(raw_ostream &Stream) const;
164 
165   /// isChar6 - Return true if this character is legal in the Char6 encoding.
isChar6(char C)166   static bool isChar6(char C) {
167     if (C >= 'a' && C <= 'z') return true;
168     if (C >= 'A' && C <= 'Z') return true;
169     if (C >= '0' && C <= '9') return true;
170     if (C == '.' || C == '_') return true;
171     return false;
172   }
EncodeChar6(char C)173   static unsigned EncodeChar6(char C) {
174     if (C >= 'a' && C <= 'z') return C-'a';
175     if (C >= 'A' && C <= 'Z') return C-'A'+26;
176     if (C >= '0' && C <= '9') return C-'0'+26+26;
177     if (C == '.')             return 62;
178     if (C == '_')             return 63;
179     llvm_unreachable("Not a value Char6 character!");
180   }
181 
DecodeChar6(unsigned V)182   static char DecodeChar6(unsigned V) {
183     assert((V & ~63) == 0 && "Not a Char6 encoded character!");
184     if (V < 26)       return V+'a';
185     if (V < 26+26)    return V-26+'A';
186     if (V < 26+26+10) return V-26-26+'0';
187     if (V == 62)      return '.';
188     if (V == 63)      return '_';
189     llvm_unreachable("Not a value Char6 character!");
190   }
191 
192   /// \brief Compares this to Op. Returns <0 if this is less than Op,
193   /// Returns 0 if they are equal, and >0 if this is greater than Op.
Compare(const NaClBitCodeAbbrevOp & Op)194   int Compare(const NaClBitCodeAbbrevOp &Op) const {
195     // Compare encoding values.
196     int EncodingDiff = static_cast<int>(Enc) - static_cast<int>(Op.Enc);
197     if (EncodingDiff != 0) return EncodingDiff;
198 
199     // Encodings don't differ, so now base on data associated with the
200     // encoding.
201     return ValCompare(Op);
202   }
203 
204 private:
205   Encoding Enc;           // The encoding to use.
206   uint64_t Val;           // Data associated with encoding (if any).
207 
ValCompare(const NaClBitCodeAbbrevOp & Op)208   int ValCompare(const NaClBitCodeAbbrevOp &Op) const {
209     if (Val < Op.Val)
210       return -1;
211     else if (Val > Op.Val)
212       return 1;
213     else
214       return 0;
215   }
216   static const bool HasValueArray[];
217   static const char *EncodingNameArray[];
218 };
219 
220 template <> struct isPodLike<NaClBitCodeAbbrevOp> {
221   static const bool value=true;
222 };
223 
224 static inline bool operator<(const NaClBitCodeAbbrevOp &Op1,
225                              const NaClBitCodeAbbrevOp &Op2) {
226   return Op1.Compare(Op2) < 0;
227 }
228 
229 static inline bool operator<=(const NaClBitCodeAbbrevOp &Op1,
230                               const NaClBitCodeAbbrevOp &Op2) {
231   return Op1.Compare(Op2) <= 0;
232 }
233 
234 static inline bool operator==(const NaClBitCodeAbbrevOp &Op1,
235                               const NaClBitCodeAbbrevOp &Op2) {
236   return Op1.Compare(Op2) == 0;
237 }
238 
239 static inline bool operator!=(const NaClBitCodeAbbrevOp &Op1,
240                               const NaClBitCodeAbbrevOp &Op2) {
241   return Op1.Compare(Op2) != 0;
242 }
243 
244 static inline bool operator>=(const NaClBitCodeAbbrevOp &Op1,
245                               const NaClBitCodeAbbrevOp &Op2) {
246   return Op1.Compare(Op2) >= 0;
247 }
248 
249 static inline bool operator>(const NaClBitCodeAbbrevOp &Op1,
250                              const NaClBitCodeAbbrevOp &Op2) {
251   return Op1.Compare(Op2) > 0;
252 }
253 
254 /// NaClBitCodeAbbrev - This class represents an abbreviation record.  An
255 /// abbreviation allows a complex record that has redundancy to be stored in a
256 /// specialized format instead of the fully-general, fully-vbr, format.
257 class NaClBitCodeAbbrev {
258   SmallVector<NaClBitCodeAbbrevOp, 8> OperandList;
259   unsigned char RefCount; // Number of things using this.
260   ~NaClBitCodeAbbrev() {}
261 public:
262   NaClBitCodeAbbrev() : RefCount(1) {}
263 
264   void addRef() { ++RefCount; }
265   void dropRef() { if (--RefCount == 0) delete this; }
266 
267   unsigned getNumOperandInfos() const {
268     return static_cast<unsigned>(OperandList.size());
269   }
270   const NaClBitCodeAbbrevOp &getOperandInfo(unsigned N) const {
271     return OperandList[N];
272   }
273 
274   void Add(const NaClBitCodeAbbrevOp &OpInfo) {
275     OperandList.push_back(OpInfo);
276   }
277 
278   // Returns a simplified version of the abbreviation. Used
279   // to recognize equivalent abbrevations.
280   NaClBitCodeAbbrev *Simplify() const;
281 
282   // Returns true if the abbreviation is valid wrt to the bitcode reader.
283   bool isValid() const;
284 
285   int Compare(const NaClBitCodeAbbrev &Abbrev) const {
286     // First order based on number of operands.
287     size_t OperandListSize = OperandList.size();
288     size_t AbbrevOperandListSize = Abbrev.OperandList.size();
289     if (OperandListSize < AbbrevOperandListSize)
290       return -1;
291     else if (OperandListSize > AbbrevOperandListSize)
292       return 1;
293 
294     // Same number of operands, so compare element by element.
295     for (size_t I = 0; I < OperandListSize; ++I) {
296       if (int Diff = OperandList[I].Compare(Abbrev.OperandList[I]))
297         return Diff;
298     }
299     return 0;
300   }
301 
302   // Returns true if all records matching the abbreviation must be
303   // of fixed length.
304   bool IsFixedSize() const {
305     unsigned Size = getNumOperandInfos();
306     if (Size < 2) return true;
307     return !OperandList[Size-2].isArrayOp();
308   }
309 
310   // Returns the smallest record size that will match this
311   // abbreviation.
312   size_t GetMinRecordSize() const {
313     size_t Min = getNumOperandInfos();
314     if (!IsFixedSize()) Min -= 2;
315     return Min;
316   }
317 
318   void Print(raw_ostream &Stream, bool AddNewline=true) const;
319 
320   NaClBitCodeAbbrev *Copy() const {
321     NaClBitCodeAbbrev *AbbrevCopy = new NaClBitCodeAbbrev();
322     for (unsigned I = 0, IEnd = getNumOperandInfos();
323          I != IEnd; ++I) {
324       AbbrevCopy->Add(NaClBitCodeAbbrevOp(getOperandInfo(I)));
325     }
326     return AbbrevCopy;
327   }
328 };
329 
330 static inline bool operator<(const NaClBitCodeAbbrev &A1,
331                              const NaClBitCodeAbbrev &A2) {
332   return A1.Compare(A2) < 0;
333 }
334 
335 static inline bool operator<=(const NaClBitCodeAbbrev &A1,
336                               const NaClBitCodeAbbrev &A2) {
337   return A1.Compare(A2) <= 0;
338 }
339 static inline bool operator==(const NaClBitCodeAbbrev &A1,
340                               const NaClBitCodeAbbrev &A2) {
341   return A1.Compare(A2) == 0;
342 }
343 
344 static inline bool operator!=(const NaClBitCodeAbbrev &A1,
345                               const NaClBitCodeAbbrev &A2) {
346   return A1.Compare(A2) != 0;
347 }
348 static inline bool operator>=(const NaClBitCodeAbbrev &A1,
349                               const NaClBitCodeAbbrev &A2) {
350   return A1.Compare(A2) >= 0;
351 }
352 
353 static inline bool operator>(const NaClBitCodeAbbrev &A1,
354                              const NaClBitCodeAbbrev &A2) {
355   return A1.Compare(A2) > 0;
356 }
357 
358 /// \brief Returns number of bits needed to encode
359 /// value for dense FIXED encoding.
360 inline unsigned NaClBitsNeededForValue(unsigned Value) {
361   // Note: Need to handle case where Value=0xFFFFFFFF as special case,
362   // since we can't add 1 to it.
363   if (Value >= 0x80000000) return 32;
364   return Log2_32_Ceil(Value+1);
365 }
366 
367 /// \brief Encode a signed value by moving the sign to the LSB for dense
368 /// VBR encoding.
369 inline uint64_t NaClEncodeSignRotatedValue(int64_t V) {
370   return (V >= 0) ? (V << 1) : ((-V << 1) | 1);
371 }
372 
373 /// \brief Decode a signed value stored with the sign bit in
374 /// the LSB for dense VBR encoding.
375 inline uint64_t NaClDecodeSignRotatedValue(uint64_t V) {
376   if ((V & 1) == 0)
377     return V >> 1;
378   if (V != 1)
379     return -(V >> 1);
380   // There is no such thing as -0 with integers.  "-0" really means MININT.
381   return 1ULL << 63;
382 }
383 
384 /// \brief This class determines whether a FIXED or VBR
385 /// abbreviation should be used for the selector, and the number of bits
386 /// needed to capture such selectors.
387 class NaClBitcodeSelectorAbbrev {
388 
389 public:
390   // If true, use a FIXED abbreviation. Otherwise, use a VBR abbreviation.
391   bool IsFixed;
392   // Number of bits needed for selector.
393   unsigned NumBits;
394 
395   // Creates a selector range for the given values.
396   NaClBitcodeSelectorAbbrev(bool IF, unsigned NB)
397       : IsFixed(IF), NumBits(NB) {}
398 
399   // Creates a selector range when no abbreviations are defined.
400   NaClBitcodeSelectorAbbrev()
401       : IsFixed(true),
402         NumBits(NaClBitsNeededForValue(naclbitc::DEFAULT_MAX_ABBREV)) {}
403 
404   // Creates a selector range to handle fixed abbrevations up to
405   // the specified value.
406   explicit NaClBitcodeSelectorAbbrev(unsigned MaxAbbrev)
407       : IsFixed(true),
408         NumBits(NaClBitsNeededForValue(MaxAbbrev)) {}
409 };
410 } // End llvm namespace
411 
412 #endif
413