1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_STRING_POOL_H_
18 #define SRC_TRACE_PROCESSOR_STRING_POOL_H_
19 
20 #include "perfetto/base/paged_memory.h"
21 #include "src/trace_processor/null_term_string_view.h"
22 
23 #include <unordered_map>
24 #include <vector>
25 
26 namespace perfetto {
27 namespace trace_processor {
28 
29 // Interns strings in a string pool and hands out compact StringIds which can
30 // be used to retrieve the string in O(1).
31 // On 64-bit platforms, the string pool is implemented as a mmaped buffer
32 // of 4GB with the id being equal ot the offset into this buffer of the string.
33 // On 32-bit platforms instead, the implementation allocates 32MB blocks of
34 // mmaped memory with the pointer being directly converted to the id.
35 class StringPool {
36  public:
37   using Id = uint32_t;
38 
39   // Iterator over the strings in the pool.
40   class Iterator {
41    public:
42     Iterator(const StringPool*);
43 
44     explicit operator bool() const;
45     Iterator& operator++();
46 
47     NullTermStringView StringView();
48     Id StringId();
49 
50    private:
51     const StringPool* pool_ = nullptr;
52     uint32_t block_id_ = 0;
53     uint32_t block_offset_ = 0;
54   };
55 
56   StringPool();
57   ~StringPool();
58 
59   // Allow std::move().
60   StringPool(StringPool&&) noexcept;
61   StringPool& operator=(StringPool&&);
62 
63   // Disable implicit copy.
64   StringPool(const StringPool&) = delete;
65   StringPool& operator=(const StringPool&) = delete;
66 
InternString(base::StringView str)67   Id InternString(base::StringView str) {
68     if (str.data() == nullptr)
69       return 0;
70 
71     auto hash = str.Hash();
72     auto id_it = string_index_.find(hash);
73     if (id_it != string_index_.end()) {
74       PERFETTO_DCHECK(Get(id_it->second) == str);
75       return id_it->second;
76     }
77     return InsertString(str, hash);
78   }
79 
Get(Id id)80   NullTermStringView Get(Id id) const {
81     if (id == 0)
82       return NullTermStringView();
83     return GetFromPtr(IdToPtr(id));
84   }
85 
CreateIterator()86   Iterator CreateIterator() const { return Iterator(this); }
87 
size()88   size_t size() const { return string_index_.size(); }
89 
90  private:
91   using StringHash = uint64_t;
92   struct Block {
BlockBlock93     Block() : mem_(base::PagedMemory::Allocate(kBlockSize)) {}
94     ~Block() = default;
95 
96     // Allow std::move().
97     Block(Block&&) noexcept = default;
98     Block& operator=(Block&&) = default;
99 
100     // Disable implicit copy.
101     Block(const Block&) = delete;
102     Block& operator=(const Block&) = delete;
103 
GetBlock104     uint8_t* Get(uint32_t offset) const {
105       return static_cast<uint8_t*>(mem_.Get()) + offset;
106     }
107 
108     uint8_t* TryInsert(base::StringView str);
109 
OffsetOfBlock110     uint32_t OffsetOf(uint8_t* ptr) const {
111       PERFETTO_DCHECK(Get(0) < ptr && ptr < Get(kBlockSize - 1));
112       return static_cast<uint32_t>(ptr - Get(0));
113     }
114 
posBlock115     uint32_t pos() const { return pos_; }
116 
117    private:
118     static constexpr size_t kBlockSize =
119         sizeof(void*) == 8
120             ? static_cast<size_t>(4ull * 1024ull * 1024ull * 1024ull) /* 4GB */
121             : 32ull * 1024ull * 1024ull /* 32MB */;
122 
123     base::PagedMemory mem_;
124     uint32_t pos_ = 0;
125   };
126 
127   friend class Iterator;
128 
129   // Number of bytes to reserve for size and null terminator.
130   static constexpr uint8_t kMetadataSize = 3;
131 
132   // Inserts the string with the given hash into the pool
133   Id InsertString(base::StringView, uint64_t hash);
134 
135   // |ptr| should point to the start of the string metadata (i.e. the first byte
136   // of the size).
PtrToId(uint8_t * ptr)137   Id PtrToId(uint8_t* ptr) const {
138     // For a 64 bit architecture, the id is the offset of the pointer inside
139     // the one and only 4GB block.
140     if (sizeof(void*) == 8) {
141       PERFETTO_DCHECK(blocks_.size() == 1);
142       return blocks_.back().OffsetOf(ptr);
143     }
144 
145     // On 32 bit architectures, the size of the pointer is 32-bit so we simply
146     // use the pointer itself as the id.
147     // Double cast needed because, on 64 archs, the compiler complains that we
148     // are losing information.
149     return static_cast<Id>(reinterpret_cast<uintptr_t>(ptr));
150   }
151 
152   // THe returned pointer points to the start of the string metadata (i.e. the
153   // first byte of the size).
IdToPtr(Id id)154   uint8_t* IdToPtr(Id id) const {
155     // For a 64 bit architecture, the pointer is simply the found by taking
156     // the base of the 4GB block and adding the offset given by |id|.
157     if (sizeof(void*) == 8) {
158       PERFETTO_DCHECK(blocks_.size() == 1);
159       return blocks_.back().Get(id);
160     }
161     // On a 32 bit architecture, the pointer is the same as the id.
162     return reinterpret_cast<uint8_t*>(id);
163   }
164 
165   // |ptr| should point to the start of the string metadata (i.e. the first byte
166   // of the size).
GetSize(uint8_t * ptr)167   static uint16_t GetSize(uint8_t* ptr) {
168     // The size is simply memcpyed into the byte buffer when writing.
169     uint16_t size;
170     memcpy(&size, ptr, sizeof(uint16_t));
171     return size;
172   }
173 
174   // |ptr| should point to the start of the string metadata (i.e. the first byte
175   // of the size).
GetFromPtr(uint8_t * ptr)176   static NullTermStringView GetFromPtr(uint8_t* ptr) {
177     // With the first two bytes being used for the size, the string starts from
178     // byte 3.
179     return NullTermStringView(reinterpret_cast<char*>(&ptr[2]), GetSize(ptr));
180   }
181 
182   // The actual memory storing the strings.
183   std::vector<Block> blocks_;
184 
185   // Maps hashes of strings to the Id in the string pool.
186   // TODO(lalitm): At some point we should benchmark just using a static
187   // hashtable of 1M elements, we can afford paying a fixed 8MB here
188   std::unordered_map<StringHash, Id> string_index_;
189 };
190 
191 }  // namespace trace_processor
192 }  // namespace perfetto
193 
194 #endif  // SRC_TRACE_PROCESSOR_STRING_POOL_H_
195