1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/proto/heap_profile_tracker.h"
18 
19 #include "perfetto/base/logging.h"
20 #include "src/trace_processor/importers/common/process_tracker.h"
21 #include "src/trace_processor/types/trace_processor_context.h"
22 
23 #include "protos/perfetto/trace/profiling/profile_common.pbzero.h"
24 #include "protos/perfetto/trace/profiling/profile_packet.pbzero.h"
25 
26 namespace perfetto {
27 namespace trace_processor {
28 
29 namespace {
30 struct MergedCallsite {
31   StringId frame_name;
32   StringId mapping_name;
33   base::Optional<uint32_t> parent_idx;
operator <perfetto::trace_processor::__anon851f64410111::MergedCallsite34   bool operator<(const MergedCallsite& o) const {
35     return std::tie(frame_name, mapping_name, parent_idx) <
36            std::tie(o.frame_name, o.mapping_name, o.parent_idx);
37   }
38 };
39 
GetMergedCallsites(TraceStorage * storage,uint32_t callstack_row)40 std::vector<MergedCallsite> GetMergedCallsites(TraceStorage* storage,
41                                                uint32_t callstack_row) {
42   const tables::StackProfileCallsiteTable& callsites_tbl =
43       storage->stack_profile_callsite_table();
44   const tables::StackProfileFrameTable& frames_tbl =
45       storage->stack_profile_frame_table();
46   const tables::SymbolTable& symbols_tbl = storage->symbol_table();
47   const tables::StackProfileMappingTable& mapping_tbl =
48       storage->stack_profile_mapping_table();
49 
50   uint32_t frame_idx =
51       *frames_tbl.id().IndexOf(callsites_tbl.frame_id()[callstack_row]);
52 
53   uint32_t mapping_idx =
54       *mapping_tbl.id().IndexOf(frames_tbl.mapping()[frame_idx]);
55   StringId mapping_name = mapping_tbl.name()[mapping_idx];
56 
57   base::Optional<uint32_t> symbol_set_id =
58       frames_tbl.symbol_set_id()[frame_idx];
59 
60   if (!symbol_set_id) {
61     StringId frame_name = frames_tbl.name()[frame_idx];
62     base::Optional<StringId> deobfuscated_name =
63         frames_tbl.deobfuscated_name()[frame_idx];
64     return {{deobfuscated_name ? *deobfuscated_name : frame_name, mapping_name,
65              base::nullopt}};
66   }
67 
68   std::vector<MergedCallsite> result;
69   // id == symbol_set_id for the bottommost frame.
70   // TODO(lalitm): Encode this optimization in the table and remove this
71   // custom optimization.
72   uint32_t symbol_set_idx = *symbols_tbl.id().IndexOf(SymbolId(*symbol_set_id));
73   for (uint32_t i = symbol_set_idx;
74        i < symbols_tbl.row_count() &&
75        symbols_tbl.symbol_set_id()[i] == *symbol_set_id;
76        ++i) {
77     result.emplace_back(
78         MergedCallsite{symbols_tbl.name()[i], mapping_name, base::nullopt});
79   }
80   std::reverse(result.begin(), result.end());
81   return result;
82 }
83 }  // namespace
84 
BuildNativeFlamegraph(TraceStorage * storage,UniquePid upid,int64_t timestamp)85 std::unique_ptr<tables::ExperimentalFlamegraphNodesTable> BuildNativeFlamegraph(
86     TraceStorage* storage,
87     UniquePid upid,
88     int64_t timestamp) {
89   const tables::HeapProfileAllocationTable& allocation_tbl =
90       storage->heap_profile_allocation_table();
91   const tables::StackProfileCallsiteTable& callsites_tbl =
92       storage->stack_profile_callsite_table();
93 
94   StringId profile_type = storage->InternString("native");
95 
96   std::vector<uint32_t> callsite_to_merged_callsite(callsites_tbl.row_count(),
97                                                     0);
98   std::map<MergedCallsite, uint32_t> merged_callsites_to_table_idx;
99 
100   std::unique_ptr<tables::ExperimentalFlamegraphNodesTable> tbl(
101       new tables::ExperimentalFlamegraphNodesTable(
102           storage->mutable_string_pool(), nullptr));
103 
104   // FORWARD PASS:
105   // Aggregate callstacks by frame name / mapping name. Use symbolization
106   // data.
107   for (uint32_t i = 0; i < callsites_tbl.row_count(); ++i) {
108     base::Optional<uint32_t> parent_idx;
109 
110     auto opt_parent_id = callsites_tbl.parent_id()[i];
111     if (opt_parent_id) {
112       parent_idx = callsites_tbl.id().IndexOf(*opt_parent_id);
113       // Make sure what we index into has been populated already.
114       PERFETTO_CHECK(*parent_idx < i);
115       parent_idx = callsite_to_merged_callsite[*parent_idx];
116     }
117 
118     auto callsites = GetMergedCallsites(storage, i);
119     // Loop below needs to run at least once for parent_idx to get updated.
120     PERFETTO_CHECK(!callsites.empty());
121     for (MergedCallsite& merged_callsite : callsites) {
122       merged_callsite.parent_idx = parent_idx;
123       auto it = merged_callsites_to_table_idx.find(merged_callsite);
124       if (it == merged_callsites_to_table_idx.end()) {
125         std::tie(it, std::ignore) = merged_callsites_to_table_idx.emplace(
126             merged_callsite, merged_callsites_to_table_idx.size());
127         tables::ExperimentalFlamegraphNodesTable::Row row{};
128         if (parent_idx) {
129           row.depth = tbl->depth()[*parent_idx] + 1;
130         } else {
131           row.depth = 0;
132         }
133         row.ts = timestamp;
134         row.upid = upid;
135         row.profile_type = profile_type;
136         row.name = merged_callsite.frame_name;
137         row.map_name = merged_callsite.mapping_name;
138         if (parent_idx)
139           row.parent_id = tbl->id()[*parent_idx];
140 
141         parent_idx = tbl->Insert(std::move(row)).row;
142         PERFETTO_CHECK(merged_callsites_to_table_idx.size() ==
143                        tbl->row_count());
144       }
145       parent_idx = it->second;
146     }
147 
148     PERFETTO_CHECK(parent_idx);
149     callsite_to_merged_callsite[i] = *parent_idx;
150   }
151 
152   // PASS OVER ALLOCATIONS:
153   // Aggregate allocations into the newly built tree.
154   auto filtered = allocation_tbl.Filter(
155       {allocation_tbl.ts().le(timestamp), allocation_tbl.upid().eq(upid)});
156 
157   if (filtered.row_count() == 0) {
158     return nullptr;
159   }
160 
161   for (auto it = filtered.IterateRows(); it; it.Next()) {
162     int64_t size =
163         it.Get(static_cast<uint32_t>(
164                    tables::HeapProfileAllocationTable::ColumnIndex::size))
165             .long_value;
166     int64_t count =
167         it.Get(static_cast<uint32_t>(
168                    tables::HeapProfileAllocationTable::ColumnIndex::count))
169             .long_value;
170     int64_t callsite_id =
171         it
172             .Get(static_cast<uint32_t>(
173                 tables::HeapProfileAllocationTable::ColumnIndex::callsite_id))
174             .long_value;
175 
176     PERFETTO_CHECK((size <= 0 && count <= 0) || (size >= 0 && count >= 0));
177     uint32_t merged_idx =
178         callsite_to_merged_callsite[*callsites_tbl.id().IndexOf(
179             CallsiteId(static_cast<uint32_t>(callsite_id)))];
180     // On old heapprofd producers, the count field is incorrectly set and we
181     // zero it in proto_trace_parser.cc.
182     // As such, we cannot depend on count == 0 to imply size == 0, so we check
183     // for both of them separately.
184     if (size > 0) {
185       tbl->mutable_alloc_size()->Set(merged_idx,
186                                      tbl->alloc_size()[merged_idx] + size);
187     }
188     if (count > 0) {
189       tbl->mutable_alloc_count()->Set(merged_idx,
190                                       tbl->alloc_count()[merged_idx] + count);
191     }
192 
193     tbl->mutable_size()->Set(merged_idx, tbl->size()[merged_idx] + size);
194     tbl->mutable_count()->Set(merged_idx, tbl->count()[merged_idx] + count);
195   }
196 
197   // BACKWARD PASS:
198   // Propagate sizes to parents.
199   for (int64_t i = tbl->row_count() - 1; i >= 0; --i) {
200     uint32_t idx = static_cast<uint32_t>(i);
201 
202     tbl->mutable_cumulative_size()->Set(
203         idx, tbl->cumulative_size()[idx] + tbl->size()[idx]);
204     tbl->mutable_cumulative_count()->Set(
205         idx, tbl->cumulative_count()[idx] + tbl->count()[idx]);
206 
207     tbl->mutable_cumulative_alloc_size()->Set(
208         idx, tbl->cumulative_alloc_size()[idx] + tbl->alloc_size()[idx]);
209     tbl->mutable_cumulative_alloc_count()->Set(
210         idx, tbl->cumulative_alloc_count()[idx] + tbl->alloc_count()[idx]);
211 
212     auto parent = tbl->parent_id()[idx];
213     if (parent) {
214       uint32_t parent_idx = *tbl->id().IndexOf(
215           tables::ExperimentalFlamegraphNodesTable::Id(*parent));
216       tbl->mutable_cumulative_size()->Set(
217           parent_idx,
218           tbl->cumulative_size()[parent_idx] + tbl->cumulative_size()[idx]);
219       tbl->mutable_cumulative_count()->Set(
220           parent_idx,
221           tbl->cumulative_count()[parent_idx] + tbl->cumulative_count()[idx]);
222 
223       tbl->mutable_cumulative_alloc_size()->Set(
224           parent_idx, tbl->cumulative_alloc_size()[parent_idx] +
225                           tbl->cumulative_alloc_size()[idx]);
226       tbl->mutable_cumulative_alloc_count()->Set(
227           parent_idx, tbl->cumulative_alloc_count()[parent_idx] +
228                           tbl->cumulative_alloc_count()[idx]);
229     }
230   }
231 
232   return tbl;
233 }
234 
HeapProfileTracker(TraceProcessorContext * context)235 HeapProfileTracker::HeapProfileTracker(TraceProcessorContext* context)
236     : context_(context), empty_(context_->storage->InternString({"", 0})) {}
237 
238 HeapProfileTracker::~HeapProfileTracker() = default;
239 
SetProfilePacketIndex(uint32_t seq_id,uint64_t index)240 void HeapProfileTracker::SetProfilePacketIndex(uint32_t seq_id,
241                                                uint64_t index) {
242   SequenceState& sequence_state = sequence_state_[seq_id];
243   bool dropped_packet = false;
244   // heapprofd starts counting at index = 0.
245   if (!sequence_state.prev_index && index != 0) {
246     dropped_packet = true;
247   }
248 
249   if (sequence_state.prev_index && *sequence_state.prev_index + 1 != index) {
250     dropped_packet = true;
251   }
252 
253   if (dropped_packet) {
254     if (sequence_state.prev_index) {
255       PERFETTO_ELOG("Missing packets between %" PRIu64 " and %" PRIu64,
256                     *sequence_state.prev_index, index);
257     } else {
258       PERFETTO_ELOG("Invalid first packet index %" PRIu64 " (!= 0)", index);
259     }
260 
261     context_->storage->IncrementStats(stats::heapprofd_missing_packet);
262   }
263   sequence_state.prev_index = index;
264 }
265 
AddAllocation(uint32_t seq_id,SequenceStackProfileTracker * sequence_stack_profile_tracker,const SourceAllocation & alloc,const SequenceStackProfileTracker::InternLookup * intern_lookup)266 void HeapProfileTracker::AddAllocation(
267     uint32_t seq_id,
268     SequenceStackProfileTracker* sequence_stack_profile_tracker,
269     const SourceAllocation& alloc,
270     const SequenceStackProfileTracker::InternLookup* intern_lookup) {
271   SequenceState& sequence_state = sequence_state_[seq_id];
272 
273   auto opt_callstack_id = sequence_stack_profile_tracker->FindOrInsertCallstack(
274       alloc.callstack_id, intern_lookup);
275   if (!opt_callstack_id)
276     return;
277 
278   CallsiteId callstack_id = *opt_callstack_id;
279 
280   UniquePid upid = context_->process_tracker->GetOrCreateProcess(
281       static_cast<uint32_t>(alloc.pid));
282 
283   tables::HeapProfileAllocationTable::Row alloc_row{
284       alloc.timestamp,
285       upid,
286       alloc.heap_name,
287       callstack_id,
288       static_cast<int64_t>(alloc.alloc_count),
289       static_cast<int64_t>(alloc.self_allocated)};
290 
291   tables::HeapProfileAllocationTable::Row free_row{
292       alloc.timestamp,
293       upid,
294       alloc.heap_name,
295       callstack_id,
296       -static_cast<int64_t>(alloc.free_count),
297       -static_cast<int64_t>(alloc.self_freed)};
298 
299   auto prev_alloc_it = sequence_state.prev_alloc.find({upid, callstack_id});
300   if (prev_alloc_it == sequence_state.prev_alloc.end()) {
301     std::tie(prev_alloc_it, std::ignore) = sequence_state.prev_alloc.emplace(
302         std::make_pair(upid, callstack_id),
303         tables::HeapProfileAllocationTable::Row{});
304   }
305 
306   tables::HeapProfileAllocationTable::Row& prev_alloc = prev_alloc_it->second;
307 
308   auto prev_free_it = sequence_state.prev_free.find({upid, callstack_id});
309   if (prev_free_it == sequence_state.prev_free.end()) {
310     std::tie(prev_free_it, std::ignore) = sequence_state.prev_free.emplace(
311         std::make_pair(upid, callstack_id),
312         tables::HeapProfileAllocationTable::Row{});
313   }
314 
315   tables::HeapProfileAllocationTable::Row& prev_free = prev_free_it->second;
316 
317   std::set<CallsiteId>& callstacks_for_source_callstack_id =
318       sequence_state.seen_callstacks[SourceAllocationIndex{
319           upid, alloc.callstack_id, alloc.heap_name}];
320   bool new_callstack;
321   std::tie(std::ignore, new_callstack) =
322       callstacks_for_source_callstack_id.emplace(callstack_id);
323 
324   if (new_callstack) {
325     sequence_state.alloc_correction[alloc.callstack_id] = prev_alloc;
326     sequence_state.free_correction[alloc.callstack_id] = prev_free;
327   }
328 
329   auto alloc_correction_it =
330       sequence_state.alloc_correction.find(alloc.callstack_id);
331   if (alloc_correction_it != sequence_state.alloc_correction.end()) {
332     const auto& alloc_correction = alloc_correction_it->second;
333     alloc_row.count += alloc_correction.count;
334     alloc_row.size += alloc_correction.size;
335   }
336 
337   auto free_correction_it =
338       sequence_state.free_correction.find(alloc.callstack_id);
339   if (free_correction_it != sequence_state.free_correction.end()) {
340     const auto& free_correction = free_correction_it->second;
341     free_row.count += free_correction.count;
342     free_row.size += free_correction.size;
343   }
344 
345   tables::HeapProfileAllocationTable::Row alloc_delta = alloc_row;
346   tables::HeapProfileAllocationTable::Row free_delta = free_row;
347 
348   alloc_delta.count -= prev_alloc.count;
349   alloc_delta.size -= prev_alloc.size;
350 
351   free_delta.count -= prev_free.count;
352   free_delta.size -= prev_free.size;
353 
354   if (alloc_delta.count < 0 || alloc_delta.size < 0 || free_delta.count > 0 ||
355       free_delta.size > 0) {
356     PERFETTO_DLOG("Non-monotonous allocation.");
357     context_->storage->IncrementIndexedStats(stats::heapprofd_malformed_packet,
358                                              static_cast<int>(upid));
359     return;
360   }
361 
362   // Dump at max profiles do not have .count set.
363   if (alloc_delta.count || alloc_delta.size) {
364     context_->storage->mutable_heap_profile_allocation_table()->Insert(
365         alloc_delta);
366   }
367   if (free_delta.count || free_delta.size) {
368     context_->storage->mutable_heap_profile_allocation_table()->Insert(
369         free_delta);
370   }
371 
372   prev_alloc = alloc_row;
373   prev_free = free_row;
374 }
375 
StoreAllocation(uint32_t seq_id,SourceAllocation alloc)376 void HeapProfileTracker::StoreAllocation(uint32_t seq_id,
377                                          SourceAllocation alloc) {
378   SequenceState& sequence_state = sequence_state_[seq_id];
379   sequence_state.pending_allocs.emplace_back(std::move(alloc));
380 }
381 
CommitAllocations(uint32_t seq_id,SequenceStackProfileTracker * sequence_stack_profile_tracker,const SequenceStackProfileTracker::InternLookup * intern_lookup)382 void HeapProfileTracker::CommitAllocations(
383     uint32_t seq_id,
384     SequenceStackProfileTracker* sequence_stack_profile_tracker,
385     const SequenceStackProfileTracker::InternLookup* intern_lookup) {
386   SequenceState& sequence_state = sequence_state_[seq_id];
387   for (const auto& p : sequence_state.pending_allocs)
388     AddAllocation(seq_id, sequence_stack_profile_tracker, p, intern_lookup);
389   sequence_state.pending_allocs.clear();
390 }
391 
FinalizeProfile(uint32_t seq_id,SequenceStackProfileTracker * sequence_stack_profile_tracker,const SequenceStackProfileTracker::InternLookup * intern_lookup)392 void HeapProfileTracker::FinalizeProfile(
393     uint32_t seq_id,
394     SequenceStackProfileTracker* sequence_stack_profile_tracker,
395     const SequenceStackProfileTracker::InternLookup* intern_lookup) {
396   CommitAllocations(seq_id, sequence_stack_profile_tracker, intern_lookup);
397   sequence_stack_profile_tracker->ClearIndices();
398 }
399 
NotifyEndOfFile()400 void HeapProfileTracker::NotifyEndOfFile() {
401   for (const auto& key_and_sequence_state : sequence_state_) {
402     const SequenceState& sequence_state = key_and_sequence_state.second;
403     if (!sequence_state.pending_allocs.empty()) {
404       context_->storage->IncrementStats(stats::heapprofd_non_finalized_profile);
405     }
406   }
407 }
408 
409 }  // namespace trace_processor
410 }  // namespace perfetto
411