1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_ 18 #define SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_ 19 20 #include <stddef.h> 21 #include <stdint.h> 22 23 #include <limits> 24 #include <unordered_map> 25 #include <vector> 26 27 #include "perfetto/ext/base/optional.h" 28 #include "perfetto/ext/base/paged_memory.h" 29 #include "perfetto/protozero/proto_utils.h" 30 #include "src/trace_processor/containers/null_term_string_view.h" 31 32 namespace perfetto { 33 namespace trace_processor { 34 35 // Interns strings in a string pool and hands out compact StringIds which can 36 // be used to retrieve the string in O(1). 37 class StringPool { 38 public: 39 struct Id { 40 Id() = default; 41 42 bool operator==(const Id& other) const { return other.id == id; } 43 bool operator!=(const Id& other) const { return !(other == *this); } 44 bool operator<(const Id& other) const { return id < other.id; } 45 is_nullId46 bool is_null() const { return id == 0u; } 47 is_large_stringId48 bool is_large_string() const { return id & kLargeStringFlagBitMask; } 49 block_offsetId50 uint32_t block_offset() const { return id & kBlockOffsetBitMask; } 51 block_indexId52 uint32_t block_index() const { 53 return (id & kBlockIndexBitMask) >> kNumBlockOffsetBits; 54 } 55 large_string_indexId56 uint32_t large_string_index() const { 57 PERFETTO_DCHECK(is_large_string()); 58 return id & ~kLargeStringFlagBitMask; 59 } 60 raw_idId61 uint32_t raw_id() const { return id; } 62 LargeStringId63 static Id LargeString(size_t index) { 64 PERFETTO_DCHECK(index <= static_cast<uint32_t>(index)); 65 PERFETTO_DCHECK(!(index & kLargeStringFlagBitMask)); 66 return Id(kLargeStringFlagBitMask | static_cast<uint32_t>(index)); 67 } 68 BlockStringId69 static Id BlockString(size_t index, uint32_t offset) { 70 PERFETTO_DCHECK(index < (1u << (kNumBlockIndexBits + 1))); 71 PERFETTO_DCHECK(offset < (1u << (kNumBlockOffsetBits + 1))); 72 return Id(~kLargeStringFlagBitMask & 73 (static_cast<uint32_t>(index << kNumBlockOffsetBits) | 74 (offset & kBlockOffsetBitMask))); 75 } 76 RawId77 static constexpr Id Raw(uint32_t raw) { return Id(raw); } 78 NullId79 static constexpr Id Null() { return Id(0u); } 80 81 private: IdId82 constexpr Id(uint32_t i) : id(i) {} 83 84 uint32_t id; 85 }; 86 87 // Iterator over the strings in the pool. 88 class Iterator { 89 public: 90 Iterator(const StringPool*); 91 92 explicit operator bool() const; 93 Iterator& operator++(); 94 95 NullTermStringView StringView(); 96 Id StringId(); 97 98 private: 99 const StringPool* pool_ = nullptr; 100 uint32_t block_index_ = 0; 101 uint32_t block_offset_ = 0; 102 uint32_t large_strings_index_ = 0; 103 }; 104 105 StringPool(); 106 ~StringPool(); 107 108 // Allow std::move(). 109 StringPool(StringPool&&); 110 StringPool& operator=(StringPool&&); 111 112 // Disable implicit copy. 113 StringPool(const StringPool&) = delete; 114 StringPool& operator=(const StringPool&) = delete; 115 InternString(base::StringView str)116 Id InternString(base::StringView str) { 117 if (str.data() == nullptr) 118 return Id::Null(); 119 120 auto hash = str.Hash(); 121 auto id_it = string_index_.find(hash); 122 if (id_it != string_index_.end()) { 123 PERFETTO_DCHECK(Get(id_it->second) == str); 124 return id_it->second; 125 } 126 return InsertString(str, hash); 127 } 128 GetId(base::StringView str)129 base::Optional<Id> GetId(base::StringView str) const { 130 if (str.data() == nullptr) 131 return Id::Null(); 132 133 auto hash = str.Hash(); 134 auto id_it = string_index_.find(hash); 135 if (id_it != string_index_.end()) { 136 PERFETTO_DCHECK(Get(id_it->second) == str); 137 return id_it->second; 138 } 139 return base::nullopt; 140 } 141 Get(Id id)142 NullTermStringView Get(Id id) const { 143 if (id.is_null()) 144 return NullTermStringView(); 145 if (id.is_large_string()) 146 return GetLargeString(id); 147 return GetFromBlockPtr(IdToPtr(id)); 148 } 149 CreateIterator()150 Iterator CreateIterator() const { return Iterator(this); } 151 size()152 size_t size() const { return string_index_.size(); } 153 154 private: 155 using StringHash = uint64_t; 156 157 struct Block { BlockBlock158 explicit Block(size_t size) 159 : mem_(base::PagedMemory::Allocate(size, 160 base::PagedMemory::kDontCommit)), 161 size_(size) {} 162 ~Block() = default; 163 164 // Allow std::move(). 165 Block(Block&&) noexcept = default; 166 Block& operator=(Block&&) = default; 167 168 // Disable implicit copy. 169 Block(const Block&) = delete; 170 Block& operator=(const Block&) = delete; 171 GetBlock172 uint8_t* Get(uint32_t offset) const { 173 return static_cast<uint8_t*>(mem_.Get()) + offset; 174 } 175 176 std::pair<bool /*success*/, uint32_t /*offset*/> TryInsert( 177 base::StringView str); 178 OffsetOfBlock179 uint32_t OffsetOf(const uint8_t* ptr) const { 180 PERFETTO_DCHECK(Get(0) < ptr && 181 ptr <= Get(static_cast<uint32_t>(size_ - 1))); 182 return static_cast<uint32_t>(ptr - Get(0)); 183 } 184 posBlock185 uint32_t pos() const { return pos_; } 186 187 private: 188 base::PagedMemory mem_; 189 uint32_t pos_ = 0; 190 size_t size_ = 0; 191 }; 192 193 friend class Iterator; 194 friend class StringPoolTest; 195 196 // StringPool IDs are 32-bit. If the MSB is 1, the remaining bits of the ID 197 // are an index into the |large_strings_| vector. Otherwise, the next 6 bits 198 // are the index of the Block in the pool, and the remaining 25 bits the 199 // offset of the encoded string inside the pool. 200 // 201 // [31] [30:25] [24:0] 202 // | | | 203 // | | +---- offset in block (or LSB of large string index). 204 // | +------------ block index (or MSB of large string index). 205 // +------------------- 1: large string, 0: string in a Block. 206 static constexpr size_t kNumBlockIndexBits = 6; 207 static constexpr size_t kNumBlockOffsetBits = 25; 208 209 static constexpr size_t kLargeStringFlagBitMask = 1u << 31; 210 static constexpr size_t kBlockOffsetBitMask = (1u << kNumBlockOffsetBits) - 1; 211 static constexpr size_t kBlockIndexBitMask = 212 0xffffffff & ~kLargeStringFlagBitMask & ~kBlockOffsetBitMask; 213 214 static constexpr size_t kBlockSizeBytes = kBlockOffsetBitMask + 1; // 32 MB 215 216 // If a string doesn't fit into the current block, we can either start a new 217 // block or insert the string into the |large_strings_| vector. To maximize 218 // the used proportion of each block's memory, we only start a new block if 219 // the string isn't very large. 220 static constexpr size_t kMinLargeStringSizeBytes = kBlockSizeBytes / 8; 221 222 // Number of bytes to reserve for size and null terminator. 223 // This is the upper limit on metadata size: 5 bytes for max uint32, 224 // plus 1 byte for null terminator. The actual size may be lower. 225 static constexpr uint8_t kMaxMetadataSize = 6; 226 227 // Inserts the string with the given hash into the pool and return its Id. 228 Id InsertString(base::StringView, uint64_t hash); 229 230 // Insert a large string into the pool and return its Id. 231 Id InsertLargeString(base::StringView, uint64_t hash); 232 233 // The returned pointer points to the start of the string metadata (i.e. the 234 // first byte of the size). IdToPtr(Id id)235 const uint8_t* IdToPtr(Id id) const { 236 // If the MSB is set, the ID represents an index into |large_strings_|, so 237 // shouldn't be converted into a block pointer. 238 PERFETTO_DCHECK(!id.is_large_string()); 239 240 size_t block_index = id.block_index(); 241 uint32_t block_offset = id.block_offset(); 242 243 PERFETTO_DCHECK(block_index < blocks_.size()); 244 PERFETTO_DCHECK(block_offset < blocks_[block_index].pos()); 245 246 return blocks_[block_index].Get(block_offset); 247 } 248 249 // |ptr| should point to the start of the string metadata (i.e. the first byte 250 // of the size). 251 // Returns pointer to the start of the string. ReadSize(const uint8_t * ptr,uint32_t * size)252 static const uint8_t* ReadSize(const uint8_t* ptr, uint32_t* size) { 253 uint64_t value = 0; 254 const uint8_t* str_ptr = protozero::proto_utils::ParseVarInt( 255 ptr, ptr + kMaxMetadataSize, &value); 256 PERFETTO_DCHECK(str_ptr != ptr); 257 PERFETTO_DCHECK(value < std::numeric_limits<uint32_t>::max()); 258 *size = static_cast<uint32_t>(value); 259 return str_ptr; 260 } 261 262 // |ptr| should point to the start of the string metadata (i.e. the first byte 263 // of the size). GetFromBlockPtr(const uint8_t * ptr)264 static NullTermStringView GetFromBlockPtr(const uint8_t* ptr) { 265 uint32_t size = 0; 266 const uint8_t* str_ptr = ReadSize(ptr, &size); 267 return NullTermStringView(reinterpret_cast<const char*>(str_ptr), size); 268 } 269 270 // Lookup a string in the |large_strings_| vector. |id| should have the MSB 271 // set. GetLargeString(Id id)272 NullTermStringView GetLargeString(Id id) const { 273 PERFETTO_DCHECK(id.is_large_string()); 274 size_t index = id.large_string_index(); 275 PERFETTO_DCHECK(index < large_strings_.size()); 276 const std::string* str = large_strings_[index].get(); 277 return NullTermStringView(str->c_str(), str->size()); 278 } 279 280 // The actual memory storing the strings. 281 std::vector<Block> blocks_; 282 283 // Any string that is too large to fit into a Block is stored separately 284 // (inside a unique_ptr to ensure any references to it remain valid even if 285 // |large_strings_| is resized). 286 std::vector<std::unique_ptr<std::string>> large_strings_; 287 288 // Maps hashes of strings to the Id in the string pool. 289 // TODO(lalitm): At some point we should benchmark just using a static 290 // hashtable of 1M elements, we can afford paying a fixed 8MB here 291 std::unordered_map<StringHash, Id> string_index_; 292 }; 293 294 } // namespace trace_processor 295 } // namespace perfetto 296 297 template <> 298 struct std::hash<::perfetto::trace_processor::StringPool::Id> { 299 using argument_type = ::perfetto::trace_processor::StringPool::Id; 300 using result_type = size_t; 301 302 result_type operator()(const argument_type& r) const { 303 return std::hash<uint32_t>{}(r.raw_id()); 304 } 305 }; 306 307 #endif // SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_ 308