1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/profiling/memory/bookkeeping.h"
18 
19 #include <fcntl.h>
20 #include <inttypes.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 
24 #include "perfetto/base/file_utils.h"
25 #include "perfetto/base/logging.h"
26 #include "perfetto/base/scoped_file.h"
27 
28 namespace perfetto {
29 namespace profiling {
30 namespace {
31 using ::perfetto::protos::pbzero::ProfilePacket;
32 // This needs to be lower than the maximum acceptable chunk size, because this
33 // is checked *before* writing another submessage. We conservatively assume
34 // submessages can be up to 100k here for a 500k chunk size.
35 // DropBox has a 500k chunk limit, and each chunk needs to parse as a proto.
36 uint32_t kPacketSizeThreshold = 400000;
37 }
38 
GetOrCreateChild(const Interned<Frame> & loc)39 GlobalCallstackTrie::Node* GlobalCallstackTrie::Node::GetOrCreateChild(
40     const Interned<Frame>& loc) {
41   Node* child = children_.Get(loc);
42   if (!child)
43     child = children_.Emplace(loc, this);
44   return child;
45 }
46 
RecordMalloc(const std::vector<FrameData> & callstack,uint64_t address,uint64_t size,uint64_t sequence_number,uint64_t timestamp)47 void HeapTracker::RecordMalloc(const std::vector<FrameData>& callstack,
48                                uint64_t address,
49                                uint64_t size,
50                                uint64_t sequence_number,
51                                uint64_t timestamp) {
52   auto it = allocations_.find(address);
53   if (it != allocations_.end()) {
54     Allocation& alloc = it->second;
55     PERFETTO_DCHECK(alloc.sequence_number != sequence_number);
56     if (alloc.sequence_number < sequence_number) {
57       // As we are overwriting the previous allocation, the previous allocation
58       // must have been freed.
59       //
60       // This makes the sequencing a bit incorrect. We are overwriting this
61       // allocation, so we prentend both the alloc and the free for this have
62       // already happened at committed_sequence_number_, while in fact the free
63       // might not have happened until right before this operation.
64 
65       if (alloc.sequence_number > committed_sequence_number_) {
66         // Only count the previous allocation if it hasn't already been
67         // committed to avoid double counting it.
68         alloc.AddToCallstackAllocations();
69       }
70 
71       alloc.SubtractFromCallstackAllocations();
72       GlobalCallstackTrie::Node* node = callsites_->CreateCallsite(callstack);
73       alloc.total_size = size;
74       alloc.sequence_number = sequence_number;
75       alloc.callstack_allocations = MaybeCreateCallstackAllocations(node);
76     }
77   } else {
78     GlobalCallstackTrie::Node* node = callsites_->CreateCallsite(callstack);
79     allocations_.emplace(address,
80                          Allocation(size, sequence_number,
81                                     MaybeCreateCallstackAllocations(node)));
82   }
83 
84   RecordOperation(sequence_number, {address, timestamp});
85 }
86 
RecordOperation(uint64_t sequence_number,const PendingOperation & operation)87 void HeapTracker::RecordOperation(uint64_t sequence_number,
88                                   const PendingOperation& operation) {
89   if (sequence_number != committed_sequence_number_ + 1) {
90     pending_operations_.emplace(sequence_number, operation);
91     return;
92   }
93 
94   CommitOperation(sequence_number, operation);
95 
96   // At this point some other pending operations might be eligible to be
97   // committed.
98   auto it = pending_operations_.begin();
99   while (it != pending_operations_.end() &&
100          it->first == committed_sequence_number_ + 1) {
101     CommitOperation(it->first, it->second);
102     it = pending_operations_.erase(it);
103   }
104 }
105 
CommitOperation(uint64_t sequence_number,const PendingOperation & operation)106 void HeapTracker::CommitOperation(uint64_t sequence_number,
107                                   const PendingOperation& operation) {
108   committed_sequence_number_++;
109   committed_timestamp_ = operation.timestamp;
110 
111   uint64_t address = operation.allocation_address;
112 
113   // We will see many frees for addresses we do not know about.
114   auto leaf_it = allocations_.find(address);
115   if (leaf_it == allocations_.end())
116     return;
117 
118   Allocation& value = leaf_it->second;
119   if (value.sequence_number == sequence_number) {
120     value.AddToCallstackAllocations();
121   } else if (value.sequence_number < sequence_number) {
122     value.SubtractFromCallstackAllocations();
123     allocations_.erase(leaf_it);
124   }
125   // else (value.sequence_number > sequence_number:
126   //  This allocation has been replaced by a newer one in RecordMalloc.
127   //  This code commits ther previous allocation's malloc (and implicit free
128   //  that must have happened, as there is now a new allocation at the same
129   //  address). This means that this operation, be it a malloc or a free, must
130   //  be treated as a no-op.
131 }
132 
Dump(std::function<void (ProfilePacket::ProcessHeapSamples *)> fill_process_header,DumpState * dump_state)133 void HeapTracker::Dump(
134     std::function<void(ProfilePacket::ProcessHeapSamples*)> fill_process_header,
135     DumpState* dump_state) {
136   // There are two reasons we remove the unused callstack allocations on the
137   // next iteration of Dump:
138   // * We need to remove them after the callstacks were dumped, which currently
139   //   happens after the allocations are dumped.
140   // * This way, we do not destroy and recreate callstacks as frequently.
141   for (auto it_and_alloc : dead_callstack_allocations_) {
142     auto& it = it_and_alloc.first;
143     uint64_t allocated = it_and_alloc.second;
144     const CallstackAllocations& alloc = it->second;
145     if (alloc.allocs == 0 && alloc.allocation_count == allocated)
146       callstack_allocations_.erase(it);
147   }
148   dead_callstack_allocations_.clear();
149 
150   if (dump_state->currently_written() > kPacketSizeThreshold)
151     dump_state->NewProfilePacket();
152 
153   ProfilePacket::ProcessHeapSamples* proto =
154       dump_state->current_profile_packet->add_process_dumps();
155   fill_process_header(proto);
156   proto->set_timestamp(committed_timestamp_);
157   for (auto it = callstack_allocations_.begin();
158        it != callstack_allocations_.end(); ++it) {
159     if (dump_state->currently_written() > kPacketSizeThreshold) {
160       dump_state->NewProfilePacket();
161       proto = dump_state->current_profile_packet->add_process_dumps();
162       fill_process_header(proto);
163       proto->set_timestamp(committed_timestamp_);
164     }
165 
166     const CallstackAllocations& alloc = it->second;
167     dump_state->callstacks_to_dump.emplace(alloc.node);
168     ProfilePacket::HeapSample* sample = proto->add_samples();
169     sample->set_callstack_id(alloc.node->id());
170     sample->set_self_allocated(alloc.allocated);
171     sample->set_self_freed(alloc.freed);
172     sample->set_alloc_count(alloc.allocation_count);
173     sample->set_free_count(alloc.free_count);
174 
175     if (alloc.allocs == 0)
176       dead_callstack_allocations_.emplace_back(it, alloc.allocation_count);
177   }
178 }
179 
GetSizeForTesting(const std::vector<FrameData> & stack)180 uint64_t HeapTracker::GetSizeForTesting(const std::vector<FrameData>& stack) {
181   GlobalCallstackTrie::Node* node = callsites_->CreateCallsite(stack);
182   // Hack to make it go away again if it wasn't used before.
183   // This is only good because this is used for testing only.
184   GlobalCallstackTrie::IncrementNode(node);
185   GlobalCallstackTrie::DecrementNode(node);
186   auto it = callstack_allocations_.find(node);
187   if (it == callstack_allocations_.end()) {
188     return 0;
189   }
190   const CallstackAllocations& alloc = it->second;
191   return alloc.allocated - alloc.freed;
192 }
193 
BuildCallstack(const Node * node) const194 std::vector<Interned<Frame>> GlobalCallstackTrie::BuildCallstack(
195     const Node* node) const {
196   std::vector<Interned<Frame>> res;
197   while (node != &root_) {
198     res.emplace_back(node->location_);
199     node = node->parent_;
200   }
201   return res;
202 }
203 
CreateCallsite(const std::vector<FrameData> & callstack)204 GlobalCallstackTrie::Node* GlobalCallstackTrie::CreateCallsite(
205     const std::vector<FrameData>& callstack) {
206   Node* node = &root_;
207   for (const FrameData& loc : callstack) {
208     node = node->GetOrCreateChild(InternCodeLocation(loc));
209   }
210   return node;
211 }
212 
IncrementNode(Node * node)213 void GlobalCallstackTrie::IncrementNode(Node* node) {
214   while (node != nullptr) {
215     node->ref_count_ += 1;
216     node = node->parent_;
217   }
218 }
219 
DecrementNode(Node * node)220 void GlobalCallstackTrie::DecrementNode(Node* node) {
221   PERFETTO_DCHECK(node->ref_count_ >= 1);
222 
223   bool delete_prev = false;
224   Node* prev = nullptr;
225   while (node != nullptr) {
226     if (delete_prev)
227       node->children_.Remove(*prev);
228     node->ref_count_ -= 1;
229     delete_prev = node->ref_count_ == 0;
230     prev = node;
231     node = node->parent_;
232   }
233 }
234 
InternCodeLocation(const FrameData & loc)235 Interned<Frame> GlobalCallstackTrie::InternCodeLocation(const FrameData& loc) {
236   Mapping map(string_interner_.Intern(loc.build_id));
237   map.offset = loc.frame.map_elf_start_offset;
238   map.start = loc.frame.map_start;
239   map.end = loc.frame.map_end;
240   map.load_bias = loc.frame.map_load_bias;
241   base::StringSplitter sp(loc.frame.map_name, '/');
242   while (sp.Next())
243     map.path_components.emplace_back(string_interner_.Intern(sp.cur_token()));
244 
245   Frame frame(mapping_interner_.Intern(std::move(map)),
246               string_interner_.Intern(loc.frame.function_name),
247               loc.frame.rel_pc);
248 
249   return frame_interner_.Intern(frame);
250 }
251 
MakeRootFrame()252 Interned<Frame> GlobalCallstackTrie::MakeRootFrame() {
253   Mapping map(string_interner_.Intern(""));
254 
255   Frame frame(mapping_interner_.Intern(std::move(map)),
256               string_interner_.Intern(""), 0);
257 
258   return frame_interner_.Intern(frame);
259 }
260 
WriteMap(const Interned<Mapping> map)261 void DumpState::WriteMap(const Interned<Mapping> map) {
262   auto map_it_and_inserted = dumped_mappings.emplace(map.id());
263   if (map_it_and_inserted.second) {
264     for (const Interned<std::string>& str : map->path_components)
265       WriteString(str);
266 
267     WriteString(map->build_id);
268 
269     if (currently_written() > kPacketSizeThreshold)
270       NewProfilePacket();
271 
272     auto mapping = current_profile_packet->add_mappings();
273     mapping->set_id(map.id());
274     mapping->set_offset(map->offset);
275     mapping->set_start(map->start);
276     mapping->set_end(map->end);
277     mapping->set_load_bias(map->load_bias);
278     mapping->set_build_id(map->build_id.id());
279     for (const Interned<std::string>& str : map->path_components)
280       mapping->add_path_string_ids(str.id());
281   }
282 }
283 
WriteFrame(Interned<Frame> frame)284 void DumpState::WriteFrame(Interned<Frame> frame) {
285   WriteMap(frame->mapping);
286   WriteString(frame->function_name);
287   bool inserted;
288   std::tie(std::ignore, inserted) = dumped_frames.emplace(frame.id());
289   if (inserted) {
290     if (currently_written() > kPacketSizeThreshold)
291       NewProfilePacket();
292 
293     auto frame_proto = current_profile_packet->add_frames();
294     frame_proto->set_id(frame.id());
295     frame_proto->set_function_name_id(frame->function_name.id());
296     frame_proto->set_mapping_id(frame->mapping.id());
297     frame_proto->set_rel_pc(frame->rel_pc);
298   }
299 }
300 
WriteString(const Interned<std::string> & str)301 void DumpState::WriteString(const Interned<std::string>& str) {
302   bool inserted;
303   std::tie(std::ignore, inserted) = dumped_strings.emplace(str.id());
304   if (inserted) {
305     if (currently_written() > kPacketSizeThreshold)
306       NewProfilePacket();
307 
308     auto interned_string = current_profile_packet->add_strings();
309     interned_string->set_id(str.id());
310     interned_string->set_str(reinterpret_cast<const uint8_t*>(str->c_str()),
311                              str->size());
312   }
313 }
314 
315 }  // namespace profiling
316 }  // namespace perfetto
317