1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
18 #define INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
19 
20 #include <stdint.h>
21 #include <array>
22 #include <memory>
23 #include <vector>
24 
25 #include "perfetto/base/logging.h"
26 #include "perfetto/base/string_view.h"
27 #include "perfetto/base/utils.h"
28 #include "perfetto/protozero/field.h"
29 #include "perfetto/protozero/proto_utils.h"
30 
31 namespace protozero {
32 
33 // A generic protobuf decoder. Doesn't require any knowledge about the proto
34 // schema. It tokenizes fields, retrieves their ID and type and exposes
35 // accessors to retrieve its values.
36 // It does NOT recurse in nested submessages, instead it just computes their
37 // boundaries, recursion is left to the caller.
38 // This class is designed to be used in perf-sensitive contexts. It does not
39 // allocate and does not perform any proto semantic checks (e.g. repeated /
40 // required / optional). It's supposedly safe wrt out-of-bounds memory accesses
41 // (see proto_decoder_fuzzer.cc).
42 // This class serves also as a building block for TypedProtoDecoder, used when
43 // the schema is known at compile time.
44 class ProtoDecoder {
45  public:
46   // Creates a ProtoDecoder using the given |buffer| with size |length| bytes.
ProtoDecoder(const uint8_t * buffer,size_t length)47   inline ProtoDecoder(const uint8_t* buffer, size_t length)
48       : begin_(buffer), end_(buffer + length), read_ptr_(buffer) {}
49 
50   // Reads the next field from the buffer and advances the read cursor. If a
51   // full field cannot be read, the returned Field will be invalid (i.e.
52   // field.valid() == false).
53   Field ReadField();
54 
55   // Finds the first field with the given id. Doesn't affect the read cursor.
56   Field FindField(uint32_t field_id);
57 
58   // Resets the read cursor to the start of the buffer.
Reset()59   inline void Reset() { read_ptr_ = begin_; }
60 
61   // Resets the read cursor to the given position (must be within the buffer).
Reset(const uint8_t * pos)62   inline void Reset(const uint8_t* pos) {
63     PERFETTO_DCHECK(pos >= begin_ && pos < end_);
64     read_ptr_ = pos;
65   }
66 
67   // Returns the position of read cursor, relative to the start of the buffer.
read_offset()68   inline size_t read_offset() const {
69     return static_cast<size_t>(read_ptr_ - begin_);
70   }
71 
bytes_left()72   inline size_t bytes_left() const {
73     PERFETTO_DCHECK(read_ptr_ <= end_);
74     return static_cast<size_t>(end_ - read_ptr_);
75   }
76 
begin()77   const uint8_t* begin() const { return begin_; }
end()78   const uint8_t* end() const { return end_; }
79 
80  protected:
81   const uint8_t* const begin_;
82   const uint8_t* const end_;
83   const uint8_t* read_ptr_ = nullptr;
84 };
85 
86 // An iterator-like class used to iterate through repeated fields. Used by
87 // TypedProtoDecoder. The iteration sequence is a bit counter-intuitive due to
88 // the fact that fields_[field_id] holds the *last* value of the field, not the
89 // first, but the remaining storage holds repeated fields in FIFO order.
90 // Assume that we push the 10,11,12 into a repeated field with ID=1.
91 //
92 // Decoder memory layout:  [  fields storage  ] [ repeated fields storage]
93 // 1st iteration:           10
94 // 2nd iteration:           11                   10
95 // 3rd iteration:           12                   10 11
96 //
97 // We start the iteration @ fields_[num_fields], which is the start of the
98 // repeated fields storage, proceed until the end and lastly jump @ fields_[id].
99 class RepeatedFieldIterator {
100  public:
RepeatedFieldIterator(uint32_t field_id,const Field * begin,const Field * end,const Field * last)101   RepeatedFieldIterator(uint32_t field_id,
102                         const Field* begin,
103                         const Field* end,
104                         const Field* last)
105       : field_id_(field_id), iter_(begin), end_(end), last_(last) {
106     FindNextMatchingId();
107   }
108 
109   inline const Field* operator->() const { return &*iter_; }
110   inline const Field& operator*() const { return *iter_; }
111   inline explicit operator bool() const { return iter_ != end_; }
112 
113   RepeatedFieldIterator& operator++() {
114     PERFETTO_DCHECK(iter_ != end_);
115     if (iter_ == last_) {
116       iter_ = end_;
117       return *this;
118     }
119     ++iter_;
120     FindNextMatchingId();
121     return *this;
122   }
123 
124  private:
FindNextMatchingId()125   inline void FindNextMatchingId() {
126     PERFETTO_DCHECK(iter_ != last_);
127     for (; iter_ != end_; ++iter_) {
128       if (iter_->id() == field_id_)
129         return;
130     }
131     iter_ = last_->valid() ? last_ : end_;
132   }
133 
134   uint32_t field_id_;
135 
136   // Initially points to the beginning of the repeated field storage, then is
137   // incremented as we call operator++().
138   const Field* iter_;
139 
140   // Always points to fields_[size_], i.e. past the end of the storage.
141   const Field* end_;
142 
143   // Always points to fields_[field_id].
144   const Field* last_;
145 };
146 
147 // This decoder loads all fields upfront, without recursing in nested messages.
148 // It is used as a base class for typed decoders generated by the pbzero plugin.
149 // The split between TypedProtoDecoderBase and TypedProtoDecoder<> is to have
150 // unique definition of functions like ParseAllFields() and ExpandHeapStorage().
151 // The storage (either on-stack or on-heap) for this class is organized as
152 // follows:
153 // |-------------------------- fields_ ----------------------|
154 // [ field 0 (invalid) ] [ fields 1 .. N ] [ repeated fields ]
155 //                                        ^                  ^
156 //                                        num_fields_        size_
157 class TypedProtoDecoderBase : public ProtoDecoder {
158  public:
159   // If the field |id| is known at compile time, prefer the templated
160   // specialization at<kFieldNumber>().
Get(uint32_t id)161   inline const Field& Get(uint32_t id) {
162     return PERFETTO_LIKELY(id < num_fields_) ? fields_[id] : fields_[0];
163   }
164 
165   // Returns an object that allows to iterate over all instances of a repeated
166   // field given its id. Example usage:
167   // for (auto it = decoder.GetRepeated(N); it; ++it) { ... }
GetRepeated(uint32_t field_id)168   inline RepeatedFieldIterator GetRepeated(uint32_t field_id) const {
169     return RepeatedFieldIterator(field_id, &fields_[num_fields_],
170                                  &fields_[size_], &fields_[field_id]);
171   }
172 
173  protected:
TypedProtoDecoderBase(Field * storage,uint32_t num_fields,uint32_t capacity,const uint8_t * buffer,size_t length)174   TypedProtoDecoderBase(Field* storage,
175                         uint32_t num_fields,
176                         uint32_t capacity,
177                         const uint8_t* buffer,
178                         size_t length)
179       : ProtoDecoder(buffer, length),
180         fields_(storage),
181         num_fields_(num_fields),
182         size_(num_fields),
183         capacity_(capacity) {
184     // The reason why Field needs to be trivially de/constructible is to avoid
185     // implicit initializers on all the ~1000 entries. We need it to initialize
186     // only on the first |max_field_id| fields, the remaining capacity doesn't
187     // require initialization.
188     static_assert(PERFETTO_IS_TRIVIALLY_CONSTRUCTIBLE(Field) &&
189                       std::is_trivially_destructible<Field>::value &&
190                       std::is_trivial<Field>::value,
191                   "Field must be a trivial aggregate type");
192     memset(fields_, 0, sizeof(Field) * num_fields_);
193   }
194 
195   void ParseAllFields();
196 
197   // Called when the default on-stack storage is exhausted and new repeated
198   // fields need to be pushed.
199   void ExpandHeapStorage();
200 
201   // Used only in presence of a large number of repeated fields, when the
202   // default on-stack storage is exhausted.
203   std::unique_ptr<Field[]> heap_storage_;
204 
205   // Points to the storage, either on-stack (default, provided by the template
206   // specialization) or |heap_storage_| after ExpandHeapStorage() is called, in
207   // case of a large number of repeated fields.
208   Field* fields_;
209 
210   // Number of fields without accounting repeated storage. This is equal to
211   // MAX_FIELD_ID + 1 (to account for the invalid 0th field).
212   // This value is always <= size_ (and hence <= capacity);
213   uint32_t num_fields_;
214 
215   // Number of active |fields_| entries. This is initially equal to the highest
216   // number of fields for the message (num_fields_ == MAX_FIELD_ID + 1) and can
217   // grow up to |capacity_| in the case of repeated fields.
218   uint32_t size_;
219 
220   // Initially equal to kFieldsCapacity of the TypedProtoDecoder
221   // specialization. Can grow when falling back on heap-based storage, in which
222   // case it represents the size (#fields with each entry of a repeated field
223   // counted individually) of the |heap_storage_| array.
224   uint32_t capacity_;
225 };
226 
227 // Template class instantiated by the auto-generated decoder classes declared in
228 // xxx.pbzero.h files.
229 template <int MAX_FIELD_ID, bool HAS_REPEATED_FIELDS>
230 class TypedProtoDecoder : public TypedProtoDecoderBase {
231  public:
TypedProtoDecoder(const uint8_t * buffer,size_t length)232   TypedProtoDecoder(const uint8_t* buffer, size_t length)
233       : TypedProtoDecoderBase(on_stack_storage_,
234                               /*num_fields=*/MAX_FIELD_ID + 1,
235                               kCapacity,
236                               buffer,
237                               length) {
238     static_assert(MAX_FIELD_ID <= kMaxDecoderFieldId, "Field ordinal too high");
239     TypedProtoDecoderBase::ParseAllFields();
240   }
241 
242   template <uint32_t FIELD_ID>
at()243   inline const Field& at() const {
244     static_assert(FIELD_ID <= MAX_FIELD_ID, "FIELD_ID > MAX_FIELD_ID");
245     return fields_[FIELD_ID];
246   }
247 
248  private:
249   // In the case of non-repeated fields, this constant defines the highest field
250   // id we are able to decode. This is to limit the on-stack storage.
251   // In the case of repeated fields, this constant defines the max number of
252   // repeated fields that we'll be able to store before falling back on the
253   // heap. Keep this value in sync with the one in protozero_generator.cc.
254   static constexpr size_t kMaxDecoderFieldId = 999;
255 
256   // If we the message has no repeated fields we need at most N Field entries
257   // in the on-stack storage, where N is the highest field id.
258   // Otherwise we need some room to store repeated fields.
259   static constexpr size_t kCapacity =
260       1 + (HAS_REPEATED_FIELDS ? kMaxDecoderFieldId : MAX_FIELD_ID);
261 
262   Field on_stack_storage_[kCapacity];
263 };
264 
265 }  // namespace protozero
266 
267 #endif  // INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
268