1 //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H
11 #define LLVM_SUPPORT_BINARYSTREAMARRAY_H
12 
13 #include "llvm/ADT/ArrayRef.h"
14 #include "llvm/ADT/iterator.h"
15 #include "llvm/Support/BinaryStreamRef.h"
16 #include "llvm/Support/Error.h"
17 #include <cassert>
18 #include <cstdint>
19 
20 /// Lightweight arrays that are backed by an arbitrary BinaryStream.  This file
21 /// provides two different array implementations.
22 ///
23 ///     VarStreamArray - Arrays of variable length records.  The user specifies
24 ///       an Extractor type that can extract a record from a given offset and
25 ///       return the number of bytes consumed by the record.
26 ///
27 ///     FixedStreamArray - Arrays of fixed length records.  This is similar in
28 ///       spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the
29 ///       elements of the array need not be laid out in contiguous memory.
30 namespace llvm {
31 
32 /// VarStreamArrayExtractor is intended to be specialized to provide customized
33 /// extraction logic.  On input it receives a BinaryStreamRef pointing to the
34 /// beginning of the next record, but where the length of the record is not yet
35 /// known.  Upon completion, it should return an appropriate Error instance if
36 /// a record could not be extracted, or if one could be extracted it should
37 /// return success and set Len to the number of bytes this record occupied in
38 /// the underlying stream, and it should fill out the fields of the value type
39 /// Item appropriately to represent the current record.
40 ///
41 /// You can specialize this template for your own custom value types to avoid
42 /// having to specify a second template argument to VarStreamArray (documented
43 /// below).
44 template <typename T> struct VarStreamArrayExtractor {
45   // Method intentionally deleted.  You must provide an explicit specialization
46   // with the following method implemented.
47   Error operator()(BinaryStreamRef Stream, uint32_t &Len,
48                    T &Item) const = delete;
49 };
50 
51 /// VarStreamArray represents an array of variable length records backed by a
52 /// stream.  This could be a contiguous sequence of bytes in memory, it could
53 /// be a file on disk, or it could be a PDB stream where bytes are stored as
54 /// discontiguous blocks in a file.  Usually it is desirable to treat arrays
55 /// as contiguous blocks of memory, but doing so with large PDB files, for
56 /// example, could mean allocating huge amounts of memory just to allow
57 /// re-ordering of stream data to be contiguous before iterating over it.  By
58 /// abstracting this out, we need not duplicate this memory, and we can
59 /// iterate over arrays in arbitrarily formatted streams.  Elements are parsed
60 /// lazily on iteration, so there is no upfront cost associated with building
61 /// or copying a VarStreamArray, no matter how large it may be.
62 ///
63 /// You create a VarStreamArray by specifying a ValueType and an Extractor type.
64 /// If you do not specify an Extractor type, you are expected to specialize
65 /// VarStreamArrayExtractor<T> for your ValueType.
66 ///
67 /// By default an Extractor is default constructed in the class, but in some
68 /// cases you might find it useful for an Extractor to maintain state across
69 /// extractions.  In this case you can provide your own Extractor through a
70 /// secondary constructor.  The following examples show various ways of
71 /// creating a VarStreamArray.
72 ///
73 ///       // Will use VarStreamArrayExtractor<MyType> as the extractor.
74 ///       VarStreamArray<MyType> MyTypeArray;
75 ///
76 ///       // Will use a default-constructed MyExtractor as the extractor.
77 ///       VarStreamArray<MyType, MyExtractor> MyTypeArray2;
78 ///
79 ///       // Will use the specific instance of MyExtractor provided.
80 ///       // MyExtractor need not be default-constructible in this case.
81 ///       MyExtractor E(SomeContext);
82 ///       VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
83 ///
84 
85 template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
86 
87 template <typename ValueType,
88           typename Extractor = VarStreamArrayExtractor<ValueType>>
89 class VarStreamArray {
90   friend class VarStreamArrayIterator<ValueType, Extractor>;
91 
92 public:
93   typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
94 
95   VarStreamArray() = default;
96 
VarStreamArray(const Extractor & E)97   explicit VarStreamArray(const Extractor &E) : E(E) {}
98 
VarStreamArray(BinaryStreamRef Stream)99   explicit VarStreamArray(BinaryStreamRef Stream) : Stream(Stream) {}
100 
VarStreamArray(BinaryStreamRef Stream,const Extractor & E)101   VarStreamArray(BinaryStreamRef Stream, const Extractor &E)
102       : Stream(Stream), E(E) {}
103 
104   Iterator begin(bool *HadError = nullptr) const {
105     return Iterator(*this, E, HadError);
106   }
107 
valid()108   bool valid() const { return Stream.valid(); }
109 
end()110   Iterator end() const { return Iterator(E); }
111 
empty()112   bool empty() const { return Stream.getLength() == 0; }
113 
114   /// given an offset into the array's underlying stream, return an
115   /// iterator to the record at that offset.  This is considered unsafe
116   /// since the behavior is undefined if \p Offset does not refer to the
117   /// beginning of a valid record.
at(uint32_t Offset)118   Iterator at(uint32_t Offset) const {
119     return Iterator(*this, E, Offset, nullptr);
120   }
121 
getExtractor()122   const Extractor &getExtractor() const { return E; }
getExtractor()123   Extractor &getExtractor() { return E; }
124 
getUnderlyingStream()125   BinaryStreamRef getUnderlyingStream() const { return Stream; }
setUnderlyingStream(BinaryStreamRef S)126   void setUnderlyingStream(BinaryStreamRef S) { Stream = S; }
127 
128 private:
129   BinaryStreamRef Stream;
130   Extractor E;
131 };
132 
133 template <typename ValueType, typename Extractor>
134 class VarStreamArrayIterator
135     : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
136                                   std::forward_iterator_tag, ValueType> {
137   typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
138   typedef VarStreamArray<ValueType, Extractor> ArrayType;
139 
140 public:
VarStreamArrayIterator(const ArrayType & Array,const Extractor & E,bool * HadError)141   VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
142                          bool *HadError)
143       : VarStreamArrayIterator(Array, E, 0, HadError) {}
144 
VarStreamArrayIterator(const ArrayType & Array,const Extractor & E,uint32_t Offset,bool * HadError)145   VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
146                          uint32_t Offset, bool *HadError)
147       : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
148         Array(&Array), AbsOffset(Offset), HadError(HadError) {
149     if (IterRef.getLength() == 0)
150       moveToEnd();
151     else {
152       auto EC = Extract(IterRef, ThisLen, ThisValue);
153       if (EC) {
154         consumeError(std::move(EC));
155         markError();
156       }
157     }
158   }
159 
160   VarStreamArrayIterator() = default;
VarStreamArrayIterator(const Extractor & E)161   explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
162   ~VarStreamArrayIterator() = default;
163 
164   bool operator==(const IterType &R) const {
165     if (Array && R.Array) {
166       // Both have a valid array, make sure they're same.
167       assert(Array == R.Array);
168       return IterRef == R.IterRef;
169     }
170 
171     // Both iterators are at the end.
172     if (!Array && !R.Array)
173       return true;
174 
175     // One is not at the end and one is.
176     return false;
177   }
178 
179   const ValueType &operator*() const {
180     assert(Array && !HasError);
181     return ThisValue;
182   }
183 
184   ValueType &operator*() {
185     assert(Array && !HasError);
186     return ThisValue;
187   }
188 
189   IterType &operator+=(unsigned N) {
190     for (unsigned I = 0; I < N; ++I) {
191       // We are done with the current record, discard it so that we are
192       // positioned at the next record.
193       AbsOffset += ThisLen;
194       IterRef = IterRef.drop_front(ThisLen);
195       if (IterRef.getLength() == 0) {
196         // There is nothing after the current record, we must make this an end
197         // iterator.
198         moveToEnd();
199       } else {
200         // There is some data after the current record.
201         auto EC = Extract(IterRef, ThisLen, ThisValue);
202         if (EC) {
203           consumeError(std::move(EC));
204           markError();
205         } else if (ThisLen == 0) {
206           // An empty record? Make this an end iterator.
207           moveToEnd();
208         }
209       }
210     }
211     return *this;
212   }
213 
offset()214   uint32_t offset() const { return AbsOffset; }
getRecordLength()215   uint32_t getRecordLength() const { return ThisLen; }
216 
217 private:
moveToEnd()218   void moveToEnd() {
219     Array = nullptr;
220     ThisLen = 0;
221   }
markError()222   void markError() {
223     moveToEnd();
224     HasError = true;
225     if (HadError != nullptr)
226       *HadError = true;
227   }
228 
229   ValueType ThisValue;
230   BinaryStreamRef IterRef;
231   Extractor Extract;
232   const ArrayType *Array{nullptr};
233   uint32_t ThisLen{0};
234   uint32_t AbsOffset{0};
235   bool HasError{false};
236   bool *HadError{nullptr};
237 };
238 
239 template <typename T> class FixedStreamArrayIterator;
240 
241 /// FixedStreamArray is similar to VarStreamArray, except with each record
242 /// having a fixed-length.  As with VarStreamArray, there is no upfront
243 /// cost associated with building or copying a FixedStreamArray, as the
244 /// memory for each element is not read from the backing stream until that
245 /// element is iterated.
246 template <typename T> class FixedStreamArray {
247   friend class FixedStreamArrayIterator<T>;
248 
249 public:
250   typedef FixedStreamArrayIterator<T> Iterator;
251 
252   FixedStreamArray() = default;
FixedStreamArray(BinaryStreamRef Stream)253   explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) {
254     assert(Stream.getLength() % sizeof(T) == 0);
255   }
256 
257   bool operator==(const FixedStreamArray<T> &Other) const {
258     return Stream == Other.Stream;
259   }
260 
261   bool operator!=(const FixedStreamArray<T> &Other) const {
262     return !(*this == Other);
263   }
264 
265   FixedStreamArray &operator=(const FixedStreamArray &) = default;
266 
267   const T &operator[](uint32_t Index) const {
268     assert(Index < size());
269     uint32_t Off = Index * sizeof(T);
270     ArrayRef<uint8_t> Data;
271     if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) {
272       assert(false && "Unexpected failure reading from stream");
273       // This should never happen since we asserted that the stream length was
274       // an exact multiple of the element size.
275       consumeError(std::move(EC));
276     }
277     assert(llvm::alignmentAdjustment(Data.data(), alignof(T)) == 0);
278     return *reinterpret_cast<const T *>(Data.data());
279   }
280 
size()281   uint32_t size() const { return Stream.getLength() / sizeof(T); }
282 
empty()283   bool empty() const { return size() == 0; }
284 
begin()285   FixedStreamArrayIterator<T> begin() const {
286     return FixedStreamArrayIterator<T>(*this, 0);
287   }
288 
end()289   FixedStreamArrayIterator<T> end() const {
290     return FixedStreamArrayIterator<T>(*this, size());
291   }
292 
front()293   const T &front() const { return *begin(); }
back()294   const T &back() const {
295     FixedStreamArrayIterator<T> I = end();
296     return *(--I);
297   }
298 
getUnderlyingStream()299   BinaryStreamRef getUnderlyingStream() const { return Stream; }
300 
301 private:
302   BinaryStreamRef Stream;
303 };
304 
305 template <typename T>
306 class FixedStreamArrayIterator
307     : public iterator_facade_base<FixedStreamArrayIterator<T>,
308                                   std::random_access_iterator_tag, const T> {
309 
310 public:
FixedStreamArrayIterator(const FixedStreamArray<T> & Array,uint32_t Index)311   FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
312       : Array(Array), Index(Index) {}
313 
314   FixedStreamArrayIterator<T> &
315   operator=(const FixedStreamArrayIterator<T> &Other) {
316     Array = Other.Array;
317     Index = Other.Index;
318     return *this;
319   }
320 
321   const T &operator*() const { return Array[Index]; }
322   const T &operator*() { return Array[Index]; }
323 
324   bool operator==(const FixedStreamArrayIterator<T> &R) const {
325     assert(Array == R.Array);
326     return (Index == R.Index) && (Array == R.Array);
327   }
328 
329   FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
330     Index += N;
331     return *this;
332   }
333 
334   FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
335     assert(std::ptrdiff_t(Index) >= N);
336     Index -= N;
337     return *this;
338   }
339 
340   std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
341     assert(Array == R.Array);
342     assert(Index >= R.Index);
343     return Index - R.Index;
344   }
345 
346   bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
347     assert(Array == RHS.Array);
348     return Index < RHS.Index;
349   }
350 
351 private:
352   FixedStreamArray<T> Array;
353   uint32_t Index;
354 };
355 
356 } // namespace llvm
357 
358 #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H
359