1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 18 #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 19 20 #include <map> 21 #include <vector> 22 23 #include "perfetto/base/time.h" 24 #include "src/profiling/common/callstack_trie.h" 25 #include "src/profiling/common/interner.h" 26 #include "src/profiling/memory/unwound_messages.h" 27 28 // Below is an illustration of the bookkeeping system state where 29 // PID 1 does the following allocations: 30 // 0x123: 128 bytes at [bar main] 31 // 0x234: 128 bytes at [bar main] 32 // 0xf00: 512 bytes at [foo main] 33 // PID 1 allocated but previously freed 1024 bytes at [bar main] 34 // 35 // PID 2 does the following allocations: 36 // 0x345: 512 bytes at [foo main] 37 // 0x456: 32 bytes at [foo main] 38 // PID 2 allocated but already freed 1235 bytes at [foo main] 39 // PID 2 allocated and freed 2048 bytes in main. 40 // 41 // +---------------------------------+ +-------------------+ 42 // | +---------+ HeapTracker PID 1| | GlobalCallstackTri| 43 // | |0x123 128+---+ +----------+ | | +---+ | 44 // | | | +---->alloc:1280+----------------->bar| | 45 // | |0x234 128+---+ |free: 1024| | | +-^-+ | 46 // | | | +----------+ | | +---+ ^ | 47 // | |0xf00 512+---+ | +----->foo| | | 48 // | +--------+| | +----------+ | | | +-^-+ | | 49 // | +---->alloc: 512+---+ | | | | 50 // | |free: 0| | | | +--+----+ | 51 // | +----------+ | | | | | 52 // | | | | +-+--+ | 53 // +---------------------------------+ | | |main| | 54 // | | +--+-+ | 55 // +---------------------------------+ | | ^ | 56 // | +---------+ HeapTracker PID 2| | +-------------------+ 57 // | |0x345 512+---+ +----------+ | | | 58 // | | | +---->alloc:1779+---+ | 59 // | |0x456 32+---+ |free: 1235| | | 60 // | +---------+ +----------+ | | 61 // | | | 62 // | +----------+ | | 63 // | |alloc:2048+---------------+ 64 // | |free: 2048| | 65 // | +----------+ | 66 // | | 67 // +---------------------------------+ 68 // Allocation CallstackAllocations Node 69 // 70 // The active allocations are on the leftmost side, modeled as the class 71 // HeapTracker::Allocation. 72 // 73 // The total allocated and freed bytes per callsite are in the middle, modeled 74 // as the HeapTracker::CallstackAllocations class. 75 // Note that (1280 - 1024) = 256, so alloc - free is equal to the total of the 76 // currently active allocations. 77 // Note in PID 2 there is a CallstackAllocations with 2048 allocated and 2048 78 // freed bytes. This is not currently referenced by any Allocations (as it 79 // should, as 2048 - 2048 = 0, which would mean that the total size of the 80 // allocations referencing it should be 0). This is because we haven't dumped 81 // this state yet, so the CallstackAllocations will be kept around until the 82 // next dump, written to the trace, and then destroyed. 83 // 84 // On the right hand side is the GlobalCallstackTrie, with nodes representing 85 // distinct callstacks. They have no information about the currently allocated 86 // or freed bytes, they only contain a reference count to destroy them as 87 // soon as they are no longer referenced by a HeapTracker. 88 89 namespace perfetto { 90 namespace profiling { 91 92 // Snapshot for memory allocations of a particular process. Shares callsites 93 // with other processes. 94 class HeapTracker { 95 public: 96 struct CallstackMaxAllocations { 97 uint64_t max; 98 uint64_t cur; 99 uint64_t max_count; 100 uint64_t cur_count; 101 }; 102 struct CallstackTotalAllocations { 103 uint64_t allocated; 104 uint64_t freed; 105 uint64_t allocation_count; 106 uint64_t free_count; 107 }; 108 109 // Sum of all the allocations for a given callstack. 110 struct CallstackAllocations { CallstackAllocationsCallstackAllocations111 explicit CallstackAllocations(GlobalCallstackTrie::Node* n) : node(n) {} 112 113 uint64_t allocs = 0; 114 115 union { 116 CallstackMaxAllocations retain_max; 117 CallstackTotalAllocations totals; 118 } value = {}; 119 120 GlobalCallstackTrie::Node* const node; 121 ~CallstackAllocationsCallstackAllocations122 ~CallstackAllocations() { GlobalCallstackTrie::DecrementNode(node); } 123 124 bool operator<(const CallstackAllocations& other) const { 125 return node < other.node; 126 } 127 }; 128 129 // Caller needs to ensure that callsites outlives the HeapTracker. HeapTracker(GlobalCallstackTrie * callsites,bool dump_at_max_mode)130 explicit HeapTracker(GlobalCallstackTrie* callsites, bool dump_at_max_mode) 131 : callsites_(callsites), dump_at_max_mode_(dump_at_max_mode) {} 132 133 void RecordMalloc(const std::vector<unwindstack::FrameData>& callstack, 134 const std::vector<std::string>& build_ids, 135 uint64_t address, 136 uint64_t sample_size, 137 uint64_t alloc_size, 138 uint64_t sequence_number, 139 uint64_t timestamp); 140 141 template <typename F> GetCallstackAllocations(F fn)142 void GetCallstackAllocations(F fn) { 143 // There are two reasons we remove the unused callstack allocations on the 144 // next iteration of Dump: 145 // * We need to remove them after the callstacks were dumped, which 146 // currently happens after the allocations are dumped. 147 // * This way, we do not destroy and recreate callstacks as frequently. 148 for (auto it_and_alloc : dead_callstack_allocations_) { 149 auto& it = it_and_alloc.first; 150 uint64_t allocated = it_and_alloc.second; 151 const CallstackAllocations& alloc = it->second; 152 // For non-dump-at-max, we need to check, even if there are still no 153 // allocations referencing this callstack, whether there were any 154 // allocations that happened but were freed again. If that was the case, 155 // we need to keep the callsite, because the next dump will indicate a 156 // different self_alloc and self_freed. 157 if (alloc.allocs == 0 && 158 (dump_at_max_mode_ || 159 alloc.value.totals.allocation_count == allocated)) { 160 // TODO(fmayer): We could probably be smarter than throw away 161 // our whole frames cache. 162 ClearFrameCache(); 163 callstack_allocations_.erase(it); 164 } 165 } 166 dead_callstack_allocations_.clear(); 167 168 for (auto it = callstack_allocations_.begin(); 169 it != callstack_allocations_.end(); ++it) { 170 const CallstackAllocations& alloc = it->second; 171 fn(alloc); 172 173 if (alloc.allocs == 0) 174 dead_callstack_allocations_.emplace_back( 175 it, !dump_at_max_mode_ ? alloc.value.totals.allocation_count : 0); 176 } 177 } 178 179 template <typename F> GetAllocations(F fn)180 void GetAllocations(F fn) { 181 for (const auto& addr_and_allocation : allocations_) { 182 const Allocation& alloc = addr_and_allocation.second; 183 fn(addr_and_allocation.first, alloc.sample_size, alloc.alloc_size, 184 alloc.callstack_allocations()->node->id()); 185 } 186 } 187 RecordFree(uint64_t address,uint64_t sequence_number,uint64_t timestamp)188 void RecordFree(uint64_t address, 189 uint64_t sequence_number, 190 uint64_t timestamp) { 191 RecordOperation(sequence_number, {address, timestamp}); 192 } 193 ClearFrameCache()194 void ClearFrameCache() { frame_cache_.clear(); } 195 committed_timestamp()196 uint64_t committed_timestamp() { return committed_timestamp_; } max_timestamp()197 uint64_t max_timestamp() { return max_timestamp_; } 198 199 uint64_t GetSizeForTesting(const std::vector<unwindstack::FrameData>& stack, 200 std::vector<std::string> build_ids); 201 uint64_t GetMaxForTesting(const std::vector<unwindstack::FrameData>& stack, 202 std::vector<std::string> build_ids); 203 uint64_t GetMaxCountForTesting( 204 const std::vector<unwindstack::FrameData>& stack, 205 std::vector<std::string> build_ids); 206 GetTimestampForTesting()207 uint64_t GetTimestampForTesting() { return committed_timestamp_; } 208 209 private: 210 struct Allocation { AllocationAllocation211 Allocation(uint64_t size, 212 uint64_t asize, 213 uint64_t seq, 214 CallstackAllocations* csa) 215 : sample_size(size), alloc_size(asize), sequence_number(seq) { 216 SetCallstackAllocations(csa); 217 } 218 219 Allocation() = default; 220 Allocation(const Allocation&) = delete; AllocationAllocation221 Allocation(Allocation&& other) noexcept { 222 sample_size = other.sample_size; 223 alloc_size = other.alloc_size; 224 sequence_number = other.sequence_number; 225 callstack_allocations_ = other.callstack_allocations_; 226 other.callstack_allocations_ = nullptr; 227 } 228 ~AllocationAllocation229 ~Allocation() { SetCallstackAllocations(nullptr); } 230 SetCallstackAllocationsAllocation231 void SetCallstackAllocations(CallstackAllocations* callstack_allocations) { 232 if (callstack_allocations_) 233 callstack_allocations_->allocs--; 234 callstack_allocations_ = callstack_allocations; 235 if (callstack_allocations_) 236 callstack_allocations_->allocs++; 237 } 238 callstack_allocationsAllocation239 CallstackAllocations* callstack_allocations() const { 240 return callstack_allocations_; 241 } 242 243 uint64_t sample_size; 244 uint64_t alloc_size; 245 uint64_t sequence_number; 246 247 private: 248 CallstackAllocations* callstack_allocations_ = nullptr; 249 }; 250 251 struct PendingOperation { 252 uint64_t allocation_address; 253 uint64_t timestamp; 254 }; 255 MaybeCreateCallstackAllocations(GlobalCallstackTrie::Node * node)256 CallstackAllocations* MaybeCreateCallstackAllocations( 257 GlobalCallstackTrie::Node* node) { 258 auto callstack_allocations_it = callstack_allocations_.find(node); 259 if (callstack_allocations_it == callstack_allocations_.end()) { 260 GlobalCallstackTrie::IncrementNode(node); 261 bool inserted; 262 std::tie(callstack_allocations_it, inserted) = 263 callstack_allocations_.emplace(node, node); 264 PERFETTO_DCHECK(inserted); 265 } 266 return &callstack_allocations_it->second; 267 } 268 269 void RecordOperation(uint64_t sequence_number, 270 const PendingOperation& operation); 271 272 // Commits a malloc or free operation. 273 // See comment of pending_operations_ for encoding of malloc and free 274 // operations. 275 // 276 // Committing a malloc operation: Add the allocations size to 277 // CallstackAllocation::allocated. 278 // Committing a free operation: Add the allocation's size to 279 // CallstackAllocation::freed and delete the allocation. 280 void CommitOperation(uint64_t sequence_number, 281 const PendingOperation& operation); 282 AddToCallstackAllocations(uint64_t ts,const Allocation & alloc)283 void AddToCallstackAllocations(uint64_t ts, const Allocation& alloc) { 284 if (dump_at_max_mode_) { 285 current_unfreed_ += alloc.sample_size; 286 alloc.callstack_allocations()->value.retain_max.cur += alloc.sample_size; 287 alloc.callstack_allocations()->value.retain_max.cur_count++; 288 289 if (current_unfreed_ <= max_unfreed_) 290 return; 291 292 if (max_sequence_number_ == alloc.sequence_number - 1) { 293 // We know the only CallstackAllocation that has max != cur is the 294 // one we just updated. 295 alloc.callstack_allocations()->value.retain_max.max = 296 alloc.callstack_allocations()->value.retain_max.cur; 297 alloc.callstack_allocations()->value.retain_max.max_count = 298 alloc.callstack_allocations()->value.retain_max.cur_count; 299 } else { 300 for (auto& p : callstack_allocations_) { 301 // We need to reset max = cur for every CallstackAllocation, as we 302 // do not know which ones have changed since the last max. 303 // TODO(fmayer): Add an index to speed this up 304 CallstackAllocations& csa = p.second; 305 csa.value.retain_max.max = csa.value.retain_max.cur; 306 csa.value.retain_max.max_count = csa.value.retain_max.cur_count; 307 } 308 } 309 max_sequence_number_ = alloc.sequence_number; 310 max_unfreed_ = current_unfreed_; 311 max_timestamp_ = ts; 312 } else { 313 alloc.callstack_allocations()->value.totals.allocated += 314 alloc.sample_size; 315 alloc.callstack_allocations()->value.totals.allocation_count++; 316 } 317 } 318 SubtractFromCallstackAllocations(const Allocation & alloc)319 void SubtractFromCallstackAllocations(const Allocation& alloc) { 320 if (dump_at_max_mode_) { 321 current_unfreed_ -= alloc.sample_size; 322 alloc.callstack_allocations()->value.retain_max.cur -= alloc.sample_size; 323 alloc.callstack_allocations()->value.retain_max.cur_count--; 324 } else { 325 alloc.callstack_allocations()->value.totals.freed += alloc.sample_size; 326 alloc.callstack_allocations()->value.totals.free_count++; 327 } 328 } 329 330 // We cannot use an interner here, because after the last allocation goes 331 // away, we still need to keep the CallstackAllocations around until the next 332 // dump. 333 std::map<GlobalCallstackTrie::Node*, CallstackAllocations> 334 callstack_allocations_; 335 336 std::vector<std::pair<decltype(callstack_allocations_)::iterator, uint64_t>> 337 dead_callstack_allocations_; 338 339 std::map<uint64_t /* allocation address */, Allocation> allocations_; 340 341 // An operation is either a commit of an allocation or freeing of an 342 // allocation. An operation is a free if its seq_id is larger than 343 // the sequence_number of the corresponding allocation. It is a commit if its 344 // seq_id is equal to the sequence_number of the corresponding allocation. 345 // 346 // If its seq_id is less than the sequence_number of the corresponding 347 // allocation it could be either, but is ignored either way. 348 std::map<uint64_t /* seq_id */, PendingOperation /* allocation address */> 349 pending_operations_; 350 351 uint64_t committed_timestamp_ = 0; 352 // The sequence number all mallocs and frees have been handled up to. 353 uint64_t committed_sequence_number_ = 0; 354 GlobalCallstackTrie* callsites_; 355 356 const bool dump_at_max_mode_ = false; 357 // The following members are only used if dump_at_max_mode_ == true. 358 uint64_t max_sequence_number_ = 0; 359 uint64_t current_unfreed_ = 0; 360 uint64_t max_unfreed_ = 0; 361 uint64_t max_timestamp_ = 0; 362 363 // We index by abspc, which is unique as long as the maps do not change. 364 // This is why we ClearFrameCache after we reparsed maps. 365 std::unordered_map<uint64_t /* abs pc */, Interned<Frame>> frame_cache_; 366 }; 367 368 } // namespace profiling 369 } // namespace perfetto 370 371 #endif // SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 372