1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_ 18 #define INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_ 19 20 #include <stdint.h> 21 #include <array> 22 #include <memory> 23 #include <vector> 24 25 #include "perfetto/base/logging.h" 26 #include "perfetto/base/string_view.h" 27 #include "perfetto/base/utils.h" 28 #include "perfetto/protozero/field.h" 29 #include "perfetto/protozero/proto_utils.h" 30 31 namespace protozero { 32 33 // A generic protobuf decoder. Doesn't require any knowledge about the proto 34 // schema. It tokenizes fields, retrieves their ID and type and exposes 35 // accessors to retrieve its values. 36 // It does NOT recurse in nested submessages, instead it just computes their 37 // boundaries, recursion is left to the caller. 38 // This class is designed to be used in perf-sensitive contexts. It does not 39 // allocate and does not perform any proto semantic checks (e.g. repeated / 40 // required / optional). It's supposedly safe wrt out-of-bounds memory accesses 41 // (see proto_decoder_fuzzer.cc). 42 // This class serves also as a building block for TypedProtoDecoder, used when 43 // the schema is known at compile time. 44 class ProtoDecoder { 45 public: 46 // Creates a ProtoDecoder using the given |buffer| with size |length| bytes. ProtoDecoder(const uint8_t * buffer,size_t length)47 inline ProtoDecoder(const uint8_t* buffer, size_t length) 48 : begin_(buffer), end_(buffer + length), read_ptr_(buffer) {} 49 50 // Reads the next field from the buffer and advances the read cursor. If a 51 // full field cannot be read, the returned Field will be invalid (i.e. 52 // field.valid() == false). 53 Field ReadField(); 54 55 // Finds the first field with the given id. Doesn't affect the read cursor. 56 Field FindField(uint32_t field_id); 57 58 // Resets the read cursor to the start of the buffer. Reset()59 inline void Reset() { read_ptr_ = begin_; } 60 61 // Resets the read cursor to the given position (must be within the buffer). Reset(const uint8_t * pos)62 inline void Reset(const uint8_t* pos) { 63 PERFETTO_DCHECK(pos >= begin_ && pos < end_); 64 read_ptr_ = pos; 65 } 66 67 // Returns the position of read cursor, relative to the start of the buffer. read_offset()68 inline size_t read_offset() const { 69 return static_cast<size_t>(read_ptr_ - begin_); 70 } 71 bytes_left()72 inline size_t bytes_left() const { 73 PERFETTO_DCHECK(read_ptr_ <= end_); 74 return static_cast<size_t>(end_ - read_ptr_); 75 } 76 begin()77 const uint8_t* begin() const { return begin_; } end()78 const uint8_t* end() const { return end_; } 79 80 protected: 81 const uint8_t* const begin_; 82 const uint8_t* const end_; 83 const uint8_t* read_ptr_ = nullptr; 84 }; 85 86 // An iterator-like class used to iterate through repeated fields. Used by 87 // TypedProtoDecoder. The iteration sequence is a bit counter-intuitive due to 88 // the fact that fields_[field_id] holds the *last* value of the field, not the 89 // first, but the remaining storage holds repeated fields in FIFO order. 90 // Assume that we push the 10,11,12 into a repeated field with ID=1. 91 // 92 // Decoder memory layout: [ fields storage ] [ repeated fields storage] 93 // 1st iteration: 10 94 // 2nd iteration: 11 10 95 // 3rd iteration: 12 10 11 96 // 97 // We start the iteration @ fields_[num_fields], which is the start of the 98 // repeated fields storage, proceed until the end and lastly jump @ fields_[id]. 99 class RepeatedFieldIterator { 100 public: RepeatedFieldIterator(uint32_t field_id,const Field * begin,const Field * end,const Field * last)101 RepeatedFieldIterator(uint32_t field_id, 102 const Field* begin, 103 const Field* end, 104 const Field* last) 105 : field_id_(field_id), iter_(begin), end_(end), last_(last) { 106 FindNextMatchingId(); 107 } 108 109 inline const Field* operator->() const { return &*iter_; } 110 inline const Field& operator*() const { return *iter_; } 111 inline explicit operator bool() const { return iter_ != end_; } 112 113 RepeatedFieldIterator& operator++() { 114 PERFETTO_DCHECK(iter_ != end_); 115 if (iter_ == last_) { 116 iter_ = end_; 117 return *this; 118 } 119 ++iter_; 120 FindNextMatchingId(); 121 return *this; 122 } 123 124 private: FindNextMatchingId()125 inline void FindNextMatchingId() { 126 PERFETTO_DCHECK(iter_ != last_); 127 for (; iter_ != end_; ++iter_) { 128 if (iter_->id() == field_id_) 129 return; 130 } 131 iter_ = last_->valid() ? last_ : end_; 132 } 133 134 uint32_t field_id_; 135 136 // Initially points to the beginning of the repeated field storage, then is 137 // incremented as we call operator++(). 138 const Field* iter_; 139 140 // Always points to fields_[size_], i.e. past the end of the storage. 141 const Field* end_; 142 143 // Always points to fields_[field_id]. 144 const Field* last_; 145 }; 146 147 // This decoder loads all fields upfront, without recursing in nested messages. 148 // It is used as a base class for typed decoders generated by the pbzero plugin. 149 // The split between TypedProtoDecoderBase and TypedProtoDecoder<> is to have 150 // unique definition of functions like ParseAllFields() and ExpandHeapStorage(). 151 // The storage (either on-stack or on-heap) for this class is organized as 152 // follows: 153 // |-------------------------- fields_ ----------------------| 154 // [ field 0 (invalid) ] [ fields 1 .. N ] [ repeated fields ] 155 // ^ ^ 156 // num_fields_ size_ 157 class TypedProtoDecoderBase : public ProtoDecoder { 158 public: 159 // If the field |id| is known at compile time, prefer the templated 160 // specialization at<kFieldNumber>(). Get(uint32_t id)161 inline const Field& Get(uint32_t id) { 162 return PERFETTO_LIKELY(id < num_fields_) ? fields_[id] : fields_[0]; 163 } 164 165 // Returns an object that allows to iterate over all instances of a repeated 166 // field given its id. Example usage: 167 // for (auto it = decoder.GetRepeated(N); it; ++it) { ... } GetRepeated(uint32_t field_id)168 inline RepeatedFieldIterator GetRepeated(uint32_t field_id) const { 169 return RepeatedFieldIterator(field_id, &fields_[num_fields_], 170 &fields_[size_], &fields_[field_id]); 171 } 172 173 protected: TypedProtoDecoderBase(Field * storage,uint32_t num_fields,uint32_t capacity,const uint8_t * buffer,size_t length)174 TypedProtoDecoderBase(Field* storage, 175 uint32_t num_fields, 176 uint32_t capacity, 177 const uint8_t* buffer, 178 size_t length) 179 : ProtoDecoder(buffer, length), 180 fields_(storage), 181 num_fields_(num_fields), 182 size_(num_fields), 183 capacity_(capacity) { 184 // The reason why Field needs to be trivially de/constructible is to avoid 185 // implicit initializers on all the ~1000 entries. We need it to initialize 186 // only on the first |max_field_id| fields, the remaining capacity doesn't 187 // require initialization. 188 static_assert(PERFETTO_IS_TRIVIALLY_CONSTRUCTIBLE(Field) && 189 std::is_trivially_destructible<Field>::value && 190 std::is_trivial<Field>::value, 191 "Field must be a trivial aggregate type"); 192 memset(fields_, 0, sizeof(Field) * num_fields_); 193 } 194 195 void ParseAllFields(); 196 197 // Called when the default on-stack storage is exhausted and new repeated 198 // fields need to be pushed. 199 void ExpandHeapStorage(); 200 201 // Used only in presence of a large number of repeated fields, when the 202 // default on-stack storage is exhausted. 203 std::unique_ptr<Field[]> heap_storage_; 204 205 // Points to the storage, either on-stack (default, provided by the template 206 // specialization) or |heap_storage_| after ExpandHeapStorage() is called, in 207 // case of a large number of repeated fields. 208 Field* fields_; 209 210 // Number of fields without accounting repeated storage. This is equal to 211 // MAX_FIELD_ID + 1 (to account for the invalid 0th field). 212 // This value is always <= size_ (and hence <= capacity); 213 uint32_t num_fields_; 214 215 // Number of active |fields_| entries. This is initially equal to the highest 216 // number of fields for the message (num_fields_ == MAX_FIELD_ID + 1) and can 217 // grow up to |capacity_| in the case of repeated fields. 218 uint32_t size_; 219 220 // Initially equal to kFieldsCapacity of the TypedProtoDecoder 221 // specialization. Can grow when falling back on heap-based storage, in which 222 // case it represents the size (#fields with each entry of a repeated field 223 // counted individually) of the |heap_storage_| array. 224 uint32_t capacity_; 225 }; 226 227 // Template class instantiated by the auto-generated decoder classes declared in 228 // xxx.pbzero.h files. 229 template <int MAX_FIELD_ID, bool HAS_REPEATED_FIELDS> 230 class TypedProtoDecoder : public TypedProtoDecoderBase { 231 public: TypedProtoDecoder(const uint8_t * buffer,size_t length)232 TypedProtoDecoder(const uint8_t* buffer, size_t length) 233 : TypedProtoDecoderBase(on_stack_storage_, 234 /*num_fields=*/MAX_FIELD_ID + 1, 235 kCapacity, 236 buffer, 237 length) { 238 static_assert(MAX_FIELD_ID <= kMaxDecoderFieldId, "Field ordinal too high"); 239 TypedProtoDecoderBase::ParseAllFields(); 240 } 241 242 template <uint32_t FIELD_ID> at()243 inline const Field& at() const { 244 static_assert(FIELD_ID <= MAX_FIELD_ID, "FIELD_ID > MAX_FIELD_ID"); 245 return fields_[FIELD_ID]; 246 } 247 248 private: 249 // In the case of non-repeated fields, this constant defines the highest field 250 // id we are able to decode. This is to limit the on-stack storage. 251 // In the case of repeated fields, this constant defines the max number of 252 // repeated fields that we'll be able to store before falling back on the 253 // heap. Keep this value in sync with the one in protozero_generator.cc. 254 static constexpr size_t kMaxDecoderFieldId = 999; 255 256 // If we the message has no repeated fields we need at most N Field entries 257 // in the on-stack storage, where N is the highest field id. 258 // Otherwise we need some room to store repeated fields. 259 static constexpr size_t kCapacity = 260 1 + (HAS_REPEATED_FIELDS ? kMaxDecoderFieldId : MAX_FIELD_ID); 261 262 Field on_stack_storage_[kCapacity]; 263 }; 264 265 } // namespace protozero 266 267 #endif // INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_ 268