1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_PROTOZERO_FILTERING_FILTER_BYTECODE_PARSER_H_ 18 #define SRC_PROTOZERO_FILTERING_FILTER_BYTECODE_PARSER_H_ 19 20 #include <stddef.h> 21 #include <stdint.h> 22 23 #include <vector> 24 25 namespace protozero { 26 27 // Loads the proto-encoded bytecode in memory and allows fast lookups for tuples 28 // (msg_index, field_id) to tell if a given field should be allowed or not and, 29 // in the case of nested fields, what is the next message index to recurse into. 30 // This class does two things: 31 // 1. Expands the array of varint from the proto into a vector<uint32_t>. This 32 // is to avoid performing varint decoding on every lookup, at the cost of 33 // some extra memory (2KB-4KB). Note that the expanded vector is not just a 34 // 1:1 copy of the proto one (more below). This is to avoid O(Fields) linear 35 // lookup complexity. 36 // 2. Creates an index of offsets to remember the start word for each message. 37 // This is so we can jump to O(1) to the N-th message when recursing into a 38 // nested fields, without having to scan and find the (N-1)-th END_OF_MESSAGE 39 // marker. 40 // Overall lookups are O(1) for field ids < 128 (kDirectlyIndexLimit) and O(N), 41 // with N being the number of allowed field ranges for other fields. 42 // See comments around |word_| below for the structure of the word vector. 43 class FilterBytecodeParser { 44 public: 45 // Result of a Query() operation 46 struct QueryResult { 47 bool allowed; // Whether the field is allowed at all or no. 48 49 // If |allowed|==true && simple_field()==false, this tells the message index 50 // of the nested field that should be used when recursing in the parser. 51 uint32_t nested_msg_index; 52 53 // If |allowed|==true, specifies if the field is of a simple type (varint, 54 // fixed32/64, string or byte) or a nested field that needs recursion. 55 // In the latter case the caller is expected to use |nested_msg_index| for 56 // the next Query() calls. simple_fieldQueryResult57 bool simple_field() const { return nested_msg_index == kSimpleField; } 58 }; 59 60 // Loads a filter. The filter data consists of a sequence of varints which 61 // contains the filter opcodes and a final checksum. 62 bool Load(const void* filter_data, size_t len); 63 64 // Checks wheter a given field is allowed or not. 65 // msg_index = 0 is the index of the root message, where all queries should 66 // start from (typically perfetto.protos.Trace). 67 QueryResult Query(uint32_t msg_index, uint32_t field_id); 68 69 void Reset(); set_suppress_logs_for_fuzzer(bool x)70 void set_suppress_logs_for_fuzzer(bool x) { suppress_logs_for_fuzzer_ = x; } 71 72 private: 73 static constexpr uint32_t kDirectlyIndexLimit = 128; 74 static constexpr uint32_t kAllowed = 1u << 31u; 75 static constexpr uint32_t kSimpleField = 0x7fffffff; 76 77 bool LoadInternal(const uint8_t* filter_data, size_t len); 78 79 // The state of all fields for all messages is stored in one contiguous array. 80 // This is to avoid memory fragmentation and allocator overhead. 81 // We expect a high number of messages (hundreds), but each message is small. 82 // For each message we store two sets of uint32: 83 // 1. A set of "directly indexed" fields, for field ids < 128. 84 // 2. The remainder is a set of ranges. 85 // So each message descriptor consists of a sequence of words as follows: 86 // 87 // [0] -> how many directly indexed fields are stored next (up to 128) 88 // 89 // [1..N] -> One word per field id (See "field state" below). 90 // 91 // [N + 1] -> Start of field id range 1 92 // [N + 2] -> End of field id range 1 (exclusive, STL-style). 93 // [N + 3] -> Field state for fields in range 1 (below) 94 // 95 // [N + 4] -> Start of field id range 2 96 // [N + 5] -> End of field id range 2 (exclusive, STL-style). 97 // [N + 6] -> Field state for fields in range 2 (below) 98 99 // The "field state" word is as follows: 100 // Bit 31: 0 if the field is disallowed, 1 if allowed. 101 // Only directly indexed fields can be 0 (it doesn't make sense to add 102 // a range and then say "btw it's NOT allowed".. don't add it then. 103 // 0 is only used for filling gaps in the directly indexed bucket. 104 // Bits [30..0] (only when MSB == allowed): 105 // 0x7fffffff: The field is "simple" (varint, fixed32/64, string, bytes) and 106 // can be directly passed through in output. No recursion is needed. 107 // [0, 7ffffffe]: The field is a nested submessage. The value is the index 108 // that must be passed as first argument to the next Query() calls. 109 // Note that the message index is purely a monotonic counter in the 110 // filter bytecode, has no proto-equivalent match (unlike field ids). 111 std::vector<uint32_t> words_; 112 113 // One entry for each message index stored in the filter plus a sentinel at 114 // the end. Maps each message index to the offset in |words_| where the the 115 // Nth message start. 116 // message_offset_.size() - 2 == the max message id that can be parsed. 117 std::vector<uint32_t> message_offset_; 118 119 bool suppress_logs_for_fuzzer_ = false; 120 }; 121 122 } // namespace protozero 123 124 #endif // SRC_PROTOZERO_FILTERING_FILTER_BYTECODE_PARSER_H_ 125