1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_PROFILING_MEMORY_BOOKKEEPING_H_
18 #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_
19 
20 #include <map>
21 #include <vector>
22 
23 #include "perfetto/base/time.h"
24 #include "src/profiling/common/callstack_trie.h"
25 #include "src/profiling/common/interner.h"
26 #include "src/profiling/memory/unwound_messages.h"
27 
28 // Below is an illustration of the bookkeeping system state where
29 // PID 1 does the following allocations:
30 // 0x123: 128 bytes at [bar main]
31 // 0x234: 128 bytes at [bar main]
32 // 0xf00: 512 bytes at [foo main]
33 // PID 1 allocated but previously freed 1024 bytes at [bar main]
34 //
35 // PID 2 does the following allocations:
36 // 0x345: 512 bytes at [foo main]
37 // 0x456:  32 bytes at [foo main]
38 // PID 2 allocated but already freed 1235 bytes at [foo main]
39 // PID 2 allocated and freed 2048 bytes in main.
40 //
41 // +---------------------------------+   +-------------------+
42 // | +---------+    HeapTracker PID 1|   | GlobalCallstackTri|
43 // | |0x123 128+---+    +----------+ |   |           +---+   |
44 // | |         |   +---->alloc:1280+----------------->bar|   |
45 // | |0x234 128+---+    |free: 1024| |   |           +-^-+   |
46 // | |         |        +----------+ |   |   +---+     ^     |
47 // | |0xf00 512+---+                 | +----->foo|     |     |
48 // | +--------+|   |    +----------+ | | |   +-^-+     |     |
49 // |               +---->alloc: 512+---+ |     |       |     |
50 // |                    |free:    0| | | |     +--+----+     |
51 // |                    +----------+ | | |        |          |
52 // |                                 | | |      +-+--+       |
53 // +---------------------------------+ | |      |main|       |
54 //                                     | |      +--+-+       |
55 // +---------------------------------+ | |         ^         |
56 // | +---------+    HeapTracker PID 2| | +-------------------+
57 // | |0x345 512+---+    +----------+ | |           |
58 // | |         |   +---->alloc:1779+---+           |
59 // | |0x456  32+---+    |free: 1235| |             |
60 // | +---------+        +----------+ |             |
61 // |                                 |             |
62 // |                    +----------+ |             |
63 // |                    |alloc:2048+---------------+
64 // |                    |free: 2048| |
65 // |                    +----------+ |
66 // |                                 |
67 // +---------------------------------+
68 //   Allocation    CallstackAllocations        Node
69 //
70 // The active allocations are on the leftmost side, modeled as the class
71 // HeapTracker::Allocation.
72 //
73 // The total allocated and freed bytes per callsite are in the middle, modeled
74 // as the HeapTracker::CallstackAllocations class.
75 // Note that (1280 - 1024) = 256, so alloc - free is equal to the total of the
76 // currently active allocations.
77 // Note in PID 2 there is a CallstackAllocations with 2048 allocated and 2048
78 // freed bytes. This is not currently referenced by any Allocations (as it
79 // should, as 2048 - 2048 = 0, which would mean that the total size of the
80 // allocations referencing it should be 0). This is because we haven't dumped
81 // this state yet, so the CallstackAllocations will be kept around until the
82 // next dump, written to the trace, and then destroyed.
83 //
84 // On the right hand side is the GlobalCallstackTrie, with nodes representing
85 // distinct callstacks. They have no information about the currently allocated
86 // or freed bytes, they only contain a reference count to destroy them as
87 // soon as they are no longer referenced by a HeapTracker.
88 
89 namespace perfetto {
90 namespace profiling {
91 
92 // Snapshot for memory allocations of a particular process. Shares callsites
93 // with other processes.
94 class HeapTracker {
95  public:
96   struct CallstackMaxAllocations {
97     uint64_t max;
98     uint64_t cur;
99     uint64_t max_count;
100     uint64_t cur_count;
101   };
102   struct CallstackTotalAllocations {
103     uint64_t allocated;
104     uint64_t freed;
105     uint64_t allocation_count;
106     uint64_t free_count;
107   };
108 
109   // Sum of all the allocations for a given callstack.
110   struct CallstackAllocations {
CallstackAllocationsCallstackAllocations111     explicit CallstackAllocations(GlobalCallstackTrie::Node* n) : node(n) {}
112 
113     uint64_t allocs = 0;
114 
115     union {
116       CallstackMaxAllocations retain_max;
117       CallstackTotalAllocations totals;
118     } value = {};
119 
120     GlobalCallstackTrie::Node* const node;
121 
~CallstackAllocationsCallstackAllocations122     ~CallstackAllocations() { GlobalCallstackTrie::DecrementNode(node); }
123 
124     bool operator<(const CallstackAllocations& other) const {
125       return node < other.node;
126     }
127   };
128 
129   // Caller needs to ensure that callsites outlives the HeapTracker.
HeapTracker(GlobalCallstackTrie * callsites,bool dump_at_max_mode)130   explicit HeapTracker(GlobalCallstackTrie* callsites, bool dump_at_max_mode)
131       : callsites_(callsites), dump_at_max_mode_(dump_at_max_mode) {}
132 
133   void RecordMalloc(const std::vector<unwindstack::FrameData>& callstack,
134                     const std::vector<std::string>& build_ids,
135                     uint64_t address,
136                     uint64_t sample_size,
137                     uint64_t alloc_size,
138                     uint64_t sequence_number,
139                     uint64_t timestamp);
140 
141   template <typename F>
GetCallstackAllocations(F fn)142   void GetCallstackAllocations(F fn) {
143     // There are two reasons we remove the unused callstack allocations on the
144     // next iteration of Dump:
145     // * We need to remove them after the callstacks were dumped, which
146     //   currently happens after the allocations are dumped.
147     // * This way, we do not destroy and recreate callstacks as frequently.
148     for (auto it_and_alloc : dead_callstack_allocations_) {
149       auto& it = it_and_alloc.first;
150       uint64_t allocated = it_and_alloc.second;
151       const CallstackAllocations& alloc = it->second;
152       // For non-dump-at-max, we need to check, even if there are still no
153       // allocations referencing this callstack, whether there were any
154       // allocations that happened but were freed again. If that was the case,
155       // we need to keep the callsite, because the next dump will indicate a
156       // different self_alloc and self_freed.
157       if (alloc.allocs == 0 &&
158           (dump_at_max_mode_ ||
159            alloc.value.totals.allocation_count == allocated)) {
160         // TODO(fmayer): We could probably be smarter than throw away
161         // our whole frames cache.
162         ClearFrameCache();
163         callstack_allocations_.erase(it);
164       }
165     }
166     dead_callstack_allocations_.clear();
167 
168     for (auto it = callstack_allocations_.begin();
169          it != callstack_allocations_.end(); ++it) {
170       const CallstackAllocations& alloc = it->second;
171       fn(alloc);
172 
173       if (alloc.allocs == 0)
174         dead_callstack_allocations_.emplace_back(
175             it, !dump_at_max_mode_ ? alloc.value.totals.allocation_count : 0);
176     }
177   }
178 
179   template <typename F>
GetAllocations(F fn)180   void GetAllocations(F fn) {
181     for (const auto& addr_and_allocation : allocations_) {
182       const Allocation& alloc = addr_and_allocation.second;
183       fn(addr_and_allocation.first, alloc.sample_size, alloc.alloc_size,
184          alloc.callstack_allocations()->node->id());
185     }
186   }
187 
RecordFree(uint64_t address,uint64_t sequence_number,uint64_t timestamp)188   void RecordFree(uint64_t address,
189                   uint64_t sequence_number,
190                   uint64_t timestamp) {
191     RecordOperation(sequence_number, {address, timestamp});
192   }
193 
ClearFrameCache()194   void ClearFrameCache() { frame_cache_.clear(); }
195 
committed_timestamp()196   uint64_t committed_timestamp() { return committed_timestamp_; }
max_timestamp()197   uint64_t max_timestamp() { return max_timestamp_; }
198 
199   uint64_t GetSizeForTesting(const std::vector<unwindstack::FrameData>& stack,
200                              std::vector<std::string> build_ids);
201   uint64_t GetMaxForTesting(const std::vector<unwindstack::FrameData>& stack,
202                             std::vector<std::string> build_ids);
203   uint64_t GetMaxCountForTesting(
204       const std::vector<unwindstack::FrameData>& stack,
205       std::vector<std::string> build_ids);
206 
GetTimestampForTesting()207   uint64_t GetTimestampForTesting() { return committed_timestamp_; }
208 
209  private:
210   struct Allocation {
AllocationAllocation211     Allocation(uint64_t size,
212                uint64_t asize,
213                uint64_t seq,
214                CallstackAllocations* csa)
215         : sample_size(size), alloc_size(asize), sequence_number(seq) {
216       SetCallstackAllocations(csa);
217     }
218 
219     Allocation() = default;
220     Allocation(const Allocation&) = delete;
AllocationAllocation221     Allocation(Allocation&& other) noexcept {
222       sample_size = other.sample_size;
223       alloc_size = other.alloc_size;
224       sequence_number = other.sequence_number;
225       callstack_allocations_ = other.callstack_allocations_;
226       other.callstack_allocations_ = nullptr;
227     }
228 
~AllocationAllocation229     ~Allocation() { SetCallstackAllocations(nullptr); }
230 
SetCallstackAllocationsAllocation231     void SetCallstackAllocations(CallstackAllocations* callstack_allocations) {
232       if (callstack_allocations_)
233         callstack_allocations_->allocs--;
234       callstack_allocations_ = callstack_allocations;
235       if (callstack_allocations_)
236         callstack_allocations_->allocs++;
237     }
238 
callstack_allocationsAllocation239     CallstackAllocations* callstack_allocations() const {
240       return callstack_allocations_;
241     }
242 
243     uint64_t sample_size;
244     uint64_t alloc_size;
245     uint64_t sequence_number;
246 
247    private:
248     CallstackAllocations* callstack_allocations_ = nullptr;
249   };
250 
251   struct PendingOperation {
252     uint64_t allocation_address;
253     uint64_t timestamp;
254   };
255 
MaybeCreateCallstackAllocations(GlobalCallstackTrie::Node * node)256   CallstackAllocations* MaybeCreateCallstackAllocations(
257       GlobalCallstackTrie::Node* node) {
258     auto callstack_allocations_it = callstack_allocations_.find(node);
259     if (callstack_allocations_it == callstack_allocations_.end()) {
260       GlobalCallstackTrie::IncrementNode(node);
261       bool inserted;
262       std::tie(callstack_allocations_it, inserted) =
263           callstack_allocations_.emplace(node, node);
264       PERFETTO_DCHECK(inserted);
265     }
266     return &callstack_allocations_it->second;
267   }
268 
269   void RecordOperation(uint64_t sequence_number,
270                        const PendingOperation& operation);
271 
272   // Commits a malloc or free operation.
273   // See comment of pending_operations_ for encoding of malloc and free
274   // operations.
275   //
276   // Committing a malloc operation: Add the allocations size to
277   // CallstackAllocation::allocated.
278   // Committing a free operation: Add the allocation's size to
279   // CallstackAllocation::freed and delete the allocation.
280   void CommitOperation(uint64_t sequence_number,
281                        const PendingOperation& operation);
282 
AddToCallstackAllocations(uint64_t ts,const Allocation & alloc)283   void AddToCallstackAllocations(uint64_t ts, const Allocation& alloc) {
284     if (dump_at_max_mode_) {
285       current_unfreed_ += alloc.sample_size;
286       alloc.callstack_allocations()->value.retain_max.cur += alloc.sample_size;
287       alloc.callstack_allocations()->value.retain_max.cur_count++;
288 
289       if (current_unfreed_ <= max_unfreed_)
290         return;
291 
292       if (max_sequence_number_ == alloc.sequence_number - 1) {
293         // We know the only CallstackAllocation that has max != cur is the
294         // one we just updated.
295         alloc.callstack_allocations()->value.retain_max.max =
296             alloc.callstack_allocations()->value.retain_max.cur;
297         alloc.callstack_allocations()->value.retain_max.max_count =
298             alloc.callstack_allocations()->value.retain_max.cur_count;
299       } else {
300         for (auto& p : callstack_allocations_) {
301           // We need to reset max = cur for every CallstackAllocation, as we
302           // do not know which ones have changed since the last max.
303           // TODO(fmayer): Add an index to speed this up
304           CallstackAllocations& csa = p.second;
305           csa.value.retain_max.max = csa.value.retain_max.cur;
306           csa.value.retain_max.max_count = csa.value.retain_max.cur_count;
307         }
308       }
309       max_sequence_number_ = alloc.sequence_number;
310       max_unfreed_ = current_unfreed_;
311       max_timestamp_ = ts;
312     } else {
313       alloc.callstack_allocations()->value.totals.allocated +=
314           alloc.sample_size;
315       alloc.callstack_allocations()->value.totals.allocation_count++;
316     }
317   }
318 
SubtractFromCallstackAllocations(const Allocation & alloc)319   void SubtractFromCallstackAllocations(const Allocation& alloc) {
320     if (dump_at_max_mode_) {
321       current_unfreed_ -= alloc.sample_size;
322       alloc.callstack_allocations()->value.retain_max.cur -= alloc.sample_size;
323       alloc.callstack_allocations()->value.retain_max.cur_count--;
324     } else {
325       alloc.callstack_allocations()->value.totals.freed += alloc.sample_size;
326       alloc.callstack_allocations()->value.totals.free_count++;
327     }
328   }
329 
330   // We cannot use an interner here, because after the last allocation goes
331   // away, we still need to keep the CallstackAllocations around until the next
332   // dump.
333   std::map<GlobalCallstackTrie::Node*, CallstackAllocations>
334       callstack_allocations_;
335 
336   std::vector<std::pair<decltype(callstack_allocations_)::iterator, uint64_t>>
337       dead_callstack_allocations_;
338 
339   std::map<uint64_t /* allocation address */, Allocation> allocations_;
340 
341   // An operation is either a commit of an allocation or freeing of an
342   // allocation. An operation is a free if its seq_id is larger than
343   // the sequence_number of the corresponding allocation. It is a commit if its
344   // seq_id is equal to the sequence_number of the corresponding allocation.
345   //
346   // If its seq_id is less than the sequence_number of the corresponding
347   // allocation it could be either, but is ignored either way.
348   std::map<uint64_t /* seq_id */, PendingOperation /* allocation address */>
349       pending_operations_;
350 
351   uint64_t committed_timestamp_ = 0;
352   // The sequence number all mallocs and frees have been handled up to.
353   uint64_t committed_sequence_number_ = 0;
354   GlobalCallstackTrie* callsites_;
355 
356   const bool dump_at_max_mode_ = false;
357   // The following members are only used if dump_at_max_mode_ == true.
358   uint64_t max_sequence_number_ = 0;
359   uint64_t current_unfreed_ = 0;
360   uint64_t max_unfreed_ = 0;
361   uint64_t max_timestamp_ = 0;
362 
363   // We index by abspc, which is unique as long as the maps do not change.
364   // This is why we ClearFrameCache after we reparsed maps.
365   std::unordered_map<uint64_t /* abs pc */, Interned<Frame>> frame_cache_;
366 };
367 
368 }  // namespace profiling
369 }  // namespace perfetto
370 
371 #endif  // SRC_PROFILING_MEMORY_BOOKKEEPING_H_
372