1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
18 #define INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
19 
20 #include <inttypes.h>
21 #include <stddef.h>
22 
23 #include <type_traits>
24 
25 #include "perfetto/base/logging.h"
26 
27 namespace protozero {
28 namespace proto_utils {
29 
30 // See https://developers.google.com/protocol-buffers/docs/encoding wire types.
31 // This is a type encoded into the proto that provides just enough info to
32 // find the length of the following value.
33 enum class ProtoWireType : uint32_t {
34   kVarInt = 0,
35   kFixed64 = 1,
36   kLengthDelimited = 2,
37   kFixed32 = 5,
38 };
39 
40 // This is the type defined in the proto for each field. This information
41 // is used to decide the translation strategy when writing the trace.
42 enum class ProtoSchemaType {
43   kUnknown = 0,
44   kDouble,
45   kFloat,
46   kInt64,
47   kUint64,
48   kInt32,
49   kFixed64,
50   kFixed32,
51   kBool,
52   kString,
53   kGroup,  // Deprecated (proto2 only)
54   kMessage,
55   kBytes,
56   kUint32,
57   kEnum,
58   kSfixed32,
59   kSfixed64,
60   kSint32,
61   kSint64,
62 };
63 
ProtoSchemaToString(ProtoSchemaType v)64 inline const char* ProtoSchemaToString(ProtoSchemaType v) {
65   switch (v) {
66     case ProtoSchemaType::kUnknown:
67       return "unknown";
68     case ProtoSchemaType::kDouble:
69       return "double";
70     case ProtoSchemaType::kFloat:
71       return "float";
72     case ProtoSchemaType::kInt64:
73       return "int64";
74     case ProtoSchemaType::kUint64:
75       return "uint64";
76     case ProtoSchemaType::kInt32:
77       return "int32";
78     case ProtoSchemaType::kFixed64:
79       return "fixed64";
80     case ProtoSchemaType::kFixed32:
81       return "fixed32";
82     case ProtoSchemaType::kBool:
83       return "bool";
84     case ProtoSchemaType::kString:
85       return "string";
86     case ProtoSchemaType::kGroup:
87       return "group";
88     case ProtoSchemaType::kMessage:
89       return "message";
90     case ProtoSchemaType::kBytes:
91       return "bytes";
92     case ProtoSchemaType::kUint32:
93       return "uint32";
94     case ProtoSchemaType::kEnum:
95       return "enum";
96     case ProtoSchemaType::kSfixed32:
97       return "sfixed32";
98     case ProtoSchemaType::kSfixed64:
99       return "sfixed64";
100     case ProtoSchemaType::kSint32:
101       return "sint32";
102     case ProtoSchemaType::kSint64:
103       return "sint64";
104   }
105   // For gcc:
106   PERFETTO_DCHECK(false);
107   return "";
108 }
109 
110 // Maximum message size supported: 256 MiB (4 x 7-bit due to varint encoding).
111 constexpr size_t kMessageLengthFieldSize = 4;
112 constexpr size_t kMaxMessageLength = (1u << (kMessageLengthFieldSize * 7)) - 1;
113 
114 // Field tag is encoded as 32-bit varint (5 bytes at most).
115 // Largest value of simple (not length-delimited) field is 64-bit varint
116 // (10 bytes at most). 15 bytes buffer is enough to store a simple field.
117 constexpr size_t kMaxTagEncodedSize = 5;
118 constexpr size_t kMaxSimpleFieldEncodedSize = kMaxTagEncodedSize + 10;
119 
120 // Proto types: (int|uint|sint)(32|64), bool, enum.
MakeTagVarInt(uint32_t field_id)121 constexpr uint32_t MakeTagVarInt(uint32_t field_id) {
122   return (field_id << 3) | static_cast<uint32_t>(ProtoWireType::kVarInt);
123 }
124 
125 // Proto types: fixed64, sfixed64, fixed32, sfixed32, double, float.
126 template <typename T>
MakeTagFixed(uint32_t field_id)127 constexpr uint32_t MakeTagFixed(uint32_t field_id) {
128   static_assert(sizeof(T) == 8 || sizeof(T) == 4, "Value must be 4 or 8 bytes");
129   return (field_id << 3) |
130          static_cast<uint32_t>((sizeof(T) == 8 ? ProtoWireType::kFixed64
131                                                : ProtoWireType::kFixed32));
132 }
133 
134 // Proto types: string, bytes, embedded messages.
MakeTagLengthDelimited(uint32_t field_id)135 constexpr uint32_t MakeTagLengthDelimited(uint32_t field_id) {
136   return (field_id << 3) |
137          static_cast<uint32_t>(ProtoWireType::kLengthDelimited);
138 }
139 
140 // Proto types: sint64, sint32.
141 template <typename T>
ZigZagEncode(T value)142 inline typename std::make_unsigned<T>::type ZigZagEncode(T value) {
143   using UnsignedType = typename std::make_unsigned<T>::type;
144 
145   // Right-shift of negative values is implementation specific.
146   // Assert the implementation does what we expect, which is that shifting any
147   // positive value by sizeof(T) * 8 - 1 gives an all 0 bitmap, and a negative
148   // value gives and all 1 bitmap.
149   constexpr uint64_t kUnsignedZero = 0u;
150   constexpr int64_t kNegativeOne = -1;
151   constexpr int64_t kPositiveOne = 1;
152   static_assert(static_cast<uint64_t>(kNegativeOne >> 63) == ~kUnsignedZero,
153                 "implementation does not support assumed rightshift");
154   static_assert(static_cast<uint64_t>(kPositiveOne >> 63) == kUnsignedZero,
155                 "implementation does not support assumed rightshift");
156 
157   return (static_cast<UnsignedType>(value) << 1) ^
158          static_cast<UnsignedType>(value >> (sizeof(T) * 8 - 1));
159 }
160 
161 // Proto types: sint64, sint32.
162 template <typename T>
ZigZagDecode(T value)163 inline typename std::make_signed<T>::type ZigZagDecode(T value) {
164   using UnsignedType = typename std::make_unsigned<T>::type;
165   using SignedType = typename std::make_signed<T>::type;
166   auto u_value = static_cast<UnsignedType>(value);
167   auto mask = static_cast<UnsignedType>(-static_cast<SignedType>(u_value & 1));
168   return static_cast<SignedType>((u_value >> 1) ^ mask);
169 }
170 
171 template <typename T>
WriteVarInt(T value,uint8_t * target)172 inline uint8_t* WriteVarInt(T value, uint8_t* target) {
173   // If value is <= 0 we must first sign extend to int64_t (see [1]).
174   // Finally we always cast to an unsigned value to to avoid arithmetic
175   // (sign expanding) shifts in the while loop.
176   // [1]: "If you use int32 or int64 as the type for a negative number, the
177   // resulting varint is always ten bytes long".
178   // - developers.google.com/protocol-buffers/docs/encoding
179   // So for each input type we do the following casts:
180   // uintX_t -> uintX_t -> uintX_t
181   // int8_t  -> int64_t -> uint64_t
182   // int16_t -> int64_t -> uint64_t
183   // int32_t -> int64_t -> uint64_t
184   // int64_t -> int64_t -> uint64_t
185   using MaybeExtendedType =
186       typename std::conditional<std::is_unsigned<T>::value, T, int64_t>::type;
187   using UnsignedType = typename std::make_unsigned<MaybeExtendedType>::type;
188 
189   MaybeExtendedType extended_value = static_cast<MaybeExtendedType>(value);
190   UnsignedType unsigned_value = static_cast<UnsignedType>(extended_value);
191 
192   while (unsigned_value >= 0x80) {
193     *target++ = static_cast<uint8_t>(unsigned_value) | 0x80;
194     unsigned_value >>= 7;
195   }
196   *target = static_cast<uint8_t>(unsigned_value);
197   return target + 1;
198 }
199 
200 // Writes a fixed-size redundant encoding of the given |value|. This is
201 // used to backfill fixed-size reservations for the length field using a
202 // non-canonical varint encoding (e.g. \x81\x80\x80\x00 instead of \x01).
203 // See https://github.com/google/protobuf/issues/1530.
204 // This is used mainly in two cases:
205 // 1) At trace writing time, when starting a nested messages. The size of a
206 //    nested message is not known until all its field have been written.
207 //    |kMessageLengthFieldSize| bytes are reserved to encode the size field and
208 //    backfilled at the end.
209 // 2) When rewriting a message at trace filtering time, in protozero/filtering.
210 //    At that point we know only the upper bound of the length (a filtered
211 //    message is <= the original one) and we backfill after the message has been
212 //    filtered.
213 inline void WriteRedundantVarInt(uint32_t value,
214                                  uint8_t* buf,
215                                  size_t size = kMessageLengthFieldSize) {
216   for (size_t i = 0; i < size; ++i) {
217     const uint8_t msb = (i < size - 1) ? 0x80 : 0;
218     buf[i] = static_cast<uint8_t>(value) | msb;
219     value >>= 7;
220   }
221 }
222 
223 template <uint32_t field_id>
StaticAssertSingleBytePreamble()224 void StaticAssertSingleBytePreamble() {
225   static_assert(field_id < 16,
226                 "Proto field id too big to fit in a single byte preamble");
227 }
228 
229 // Parses a VarInt from the encoded buffer [start, end). |end| is STL-style and
230 // points one byte past the end of buffer.
231 // The parsed int value is stored in the output arg |value|. Returns a pointer
232 // to the next unconsumed byte (so start < retval <= end) or |start| if the
233 // VarInt could not be fully parsed because there was not enough space in the
234 // buffer.
ParseVarInt(const uint8_t * start,const uint8_t * end,uint64_t * out_value)235 inline const uint8_t* ParseVarInt(const uint8_t* start,
236                                   const uint8_t* end,
237                                   uint64_t* out_value) {
238   const uint8_t* pos = start;
239   uint64_t value = 0;
240   for (uint32_t shift = 0; pos < end && shift < 64u; shift += 7) {
241     // Cache *pos into |cur_byte| to prevent that the compiler dereferences the
242     // pointer twice (here and in the if() below) due to char* aliasing rules.
243     uint8_t cur_byte = *pos++;
244     value |= static_cast<uint64_t>(cur_byte & 0x7f) << shift;
245     if ((cur_byte & 0x80) == 0) {
246       // In valid cases we get here.
247       *out_value = value;
248       return pos;
249     }
250   }
251   *out_value = 0;
252   return start;
253 }
254 
255 enum class RepetitionType {
256   kNotRepeated,
257   kRepeatedPacked,
258   kRepeatedNotPacked,
259 };
260 
261 // Provide a common base struct for all templated FieldMetadata types to allow
262 // simple checks if a given type is a FieldMetadata or not.
263 struct FieldMetadataBase {
264   constexpr FieldMetadataBase() = default;
265 };
266 
267 template <uint32_t field_id,
268           RepetitionType repetition_type,
269           ProtoSchemaType proto_schema_type,
270           typename CppFieldType,
271           typename MessageType>
272 struct FieldMetadata : public FieldMetadataBase {
273   constexpr FieldMetadata() = default;
274 
275   static constexpr int kFieldId = field_id;
276   // Whether this field is repeated, packed (repeated [packed-true]) or not
277   // (optional).
278   static constexpr RepetitionType kRepetitionType = repetition_type;
279   // Proto type of this field (e.g. int64, fixed32 or nested message).
280   static constexpr ProtoSchemaType kProtoFieldType = proto_schema_type;
281   // C++ type of this field (for nested messages - C++ protozero class).
282   using cpp_field_type = CppFieldType;
283   // Protozero message which this field belongs to.
284   using message_type = MessageType;
285 };
286 
287 namespace internal {
288 
289 // Ideally we would create variables of FieldMetadata<...> type directly,
290 // but before C++17's support for constexpr inline variables arrive, we have to
291 // actually use pointers to inline functions instead to avoid having to define
292 // symbols in *.pbzero.cc files.
293 //
294 // Note: protozero bindings will generate Message::kFieldName variable and which
295 // can then be passed to TRACE_EVENT macro for inline writing of typed messages.
296 // The fact that the former can be passed to the latter is a part of the stable
297 // API, while the particular type is not and users should not rely on it.
298 template <typename T>
299 using FieldMetadataHelper = T (*)(void);
300 
301 }  // namespace internal
302 }  // namespace proto_utils
303 }  // namespace protozero
304 
305 #endif  // INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
306