1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/importers/proto/heap_profile_tracker.h"
18
19 #include "perfetto/base/logging.h"
20 #include "src/trace_processor/importers/common/process_tracker.h"
21 #include "src/trace_processor/types/trace_processor_context.h"
22
23 #include "protos/perfetto/trace/profiling/profile_common.pbzero.h"
24 #include "protos/perfetto/trace/profiling/profile_packet.pbzero.h"
25
26 namespace perfetto {
27 namespace trace_processor {
28
29 namespace {
30 struct MergedCallsite {
31 StringId frame_name;
32 StringId mapping_name;
33 base::Optional<uint32_t> parent_idx;
operator <perfetto::trace_processor::__anon851f64410111::MergedCallsite34 bool operator<(const MergedCallsite& o) const {
35 return std::tie(frame_name, mapping_name, parent_idx) <
36 std::tie(o.frame_name, o.mapping_name, o.parent_idx);
37 }
38 };
39
GetMergedCallsites(TraceStorage * storage,uint32_t callstack_row)40 std::vector<MergedCallsite> GetMergedCallsites(TraceStorage* storage,
41 uint32_t callstack_row) {
42 const tables::StackProfileCallsiteTable& callsites_tbl =
43 storage->stack_profile_callsite_table();
44 const tables::StackProfileFrameTable& frames_tbl =
45 storage->stack_profile_frame_table();
46 const tables::SymbolTable& symbols_tbl = storage->symbol_table();
47 const tables::StackProfileMappingTable& mapping_tbl =
48 storage->stack_profile_mapping_table();
49
50 uint32_t frame_idx =
51 *frames_tbl.id().IndexOf(callsites_tbl.frame_id()[callstack_row]);
52
53 uint32_t mapping_idx =
54 *mapping_tbl.id().IndexOf(frames_tbl.mapping()[frame_idx]);
55 StringId mapping_name = mapping_tbl.name()[mapping_idx];
56
57 base::Optional<uint32_t> symbol_set_id =
58 frames_tbl.symbol_set_id()[frame_idx];
59
60 if (!symbol_set_id) {
61 StringId frame_name = frames_tbl.name()[frame_idx];
62 base::Optional<StringId> deobfuscated_name =
63 frames_tbl.deobfuscated_name()[frame_idx];
64 return {{deobfuscated_name ? *deobfuscated_name : frame_name, mapping_name,
65 base::nullopt}};
66 }
67
68 std::vector<MergedCallsite> result;
69 // id == symbol_set_id for the bottommost frame.
70 // TODO(lalitm): Encode this optimization in the table and remove this
71 // custom optimization.
72 uint32_t symbol_set_idx = *symbols_tbl.id().IndexOf(SymbolId(*symbol_set_id));
73 for (uint32_t i = symbol_set_idx;
74 i < symbols_tbl.row_count() &&
75 symbols_tbl.symbol_set_id()[i] == *symbol_set_id;
76 ++i) {
77 result.emplace_back(
78 MergedCallsite{symbols_tbl.name()[i], mapping_name, base::nullopt});
79 }
80 std::reverse(result.begin(), result.end());
81 return result;
82 }
83 } // namespace
84
BuildNativeFlamegraph(TraceStorage * storage,UniquePid upid,int64_t timestamp)85 std::unique_ptr<tables::ExperimentalFlamegraphNodesTable> BuildNativeFlamegraph(
86 TraceStorage* storage,
87 UniquePid upid,
88 int64_t timestamp) {
89 const tables::HeapProfileAllocationTable& allocation_tbl =
90 storage->heap_profile_allocation_table();
91 const tables::StackProfileCallsiteTable& callsites_tbl =
92 storage->stack_profile_callsite_table();
93
94 StringId profile_type = storage->InternString("native");
95
96 std::vector<uint32_t> callsite_to_merged_callsite(callsites_tbl.row_count(),
97 0);
98 std::map<MergedCallsite, uint32_t> merged_callsites_to_table_idx;
99
100 std::unique_ptr<tables::ExperimentalFlamegraphNodesTable> tbl(
101 new tables::ExperimentalFlamegraphNodesTable(
102 storage->mutable_string_pool(), nullptr));
103
104 // FORWARD PASS:
105 // Aggregate callstacks by frame name / mapping name. Use symbolization
106 // data.
107 for (uint32_t i = 0; i < callsites_tbl.row_count(); ++i) {
108 base::Optional<uint32_t> parent_idx;
109
110 auto opt_parent_id = callsites_tbl.parent_id()[i];
111 if (opt_parent_id) {
112 parent_idx = callsites_tbl.id().IndexOf(*opt_parent_id);
113 // Make sure what we index into has been populated already.
114 PERFETTO_CHECK(*parent_idx < i);
115 parent_idx = callsite_to_merged_callsite[*parent_idx];
116 }
117
118 auto callsites = GetMergedCallsites(storage, i);
119 // Loop below needs to run at least once for parent_idx to get updated.
120 PERFETTO_CHECK(!callsites.empty());
121 for (MergedCallsite& merged_callsite : callsites) {
122 merged_callsite.parent_idx = parent_idx;
123 auto it = merged_callsites_to_table_idx.find(merged_callsite);
124 if (it == merged_callsites_to_table_idx.end()) {
125 std::tie(it, std::ignore) = merged_callsites_to_table_idx.emplace(
126 merged_callsite, merged_callsites_to_table_idx.size());
127 tables::ExperimentalFlamegraphNodesTable::Row row{};
128 if (parent_idx) {
129 row.depth = tbl->depth()[*parent_idx] + 1;
130 } else {
131 row.depth = 0;
132 }
133 row.ts = timestamp;
134 row.upid = upid;
135 row.profile_type = profile_type;
136 row.name = merged_callsite.frame_name;
137 row.map_name = merged_callsite.mapping_name;
138 if (parent_idx)
139 row.parent_id = tbl->id()[*parent_idx];
140
141 parent_idx = tbl->Insert(std::move(row)).row;
142 PERFETTO_CHECK(merged_callsites_to_table_idx.size() ==
143 tbl->row_count());
144 }
145 parent_idx = it->second;
146 }
147
148 PERFETTO_CHECK(parent_idx);
149 callsite_to_merged_callsite[i] = *parent_idx;
150 }
151
152 // PASS OVER ALLOCATIONS:
153 // Aggregate allocations into the newly built tree.
154 auto filtered = allocation_tbl.Filter(
155 {allocation_tbl.ts().le(timestamp), allocation_tbl.upid().eq(upid)});
156
157 if (filtered.row_count() == 0) {
158 return nullptr;
159 }
160
161 for (auto it = filtered.IterateRows(); it; it.Next()) {
162 int64_t size =
163 it.Get(static_cast<uint32_t>(
164 tables::HeapProfileAllocationTable::ColumnIndex::size))
165 .long_value;
166 int64_t count =
167 it.Get(static_cast<uint32_t>(
168 tables::HeapProfileAllocationTable::ColumnIndex::count))
169 .long_value;
170 int64_t callsite_id =
171 it
172 .Get(static_cast<uint32_t>(
173 tables::HeapProfileAllocationTable::ColumnIndex::callsite_id))
174 .long_value;
175
176 PERFETTO_CHECK((size <= 0 && count <= 0) || (size >= 0 && count >= 0));
177 uint32_t merged_idx =
178 callsite_to_merged_callsite[*callsites_tbl.id().IndexOf(
179 CallsiteId(static_cast<uint32_t>(callsite_id)))];
180 // On old heapprofd producers, the count field is incorrectly set and we
181 // zero it in proto_trace_parser.cc.
182 // As such, we cannot depend on count == 0 to imply size == 0, so we check
183 // for both of them separately.
184 if (size > 0) {
185 tbl->mutable_alloc_size()->Set(merged_idx,
186 tbl->alloc_size()[merged_idx] + size);
187 }
188 if (count > 0) {
189 tbl->mutable_alloc_count()->Set(merged_idx,
190 tbl->alloc_count()[merged_idx] + count);
191 }
192
193 tbl->mutable_size()->Set(merged_idx, tbl->size()[merged_idx] + size);
194 tbl->mutable_count()->Set(merged_idx, tbl->count()[merged_idx] + count);
195 }
196
197 // BACKWARD PASS:
198 // Propagate sizes to parents.
199 for (int64_t i = tbl->row_count() - 1; i >= 0; --i) {
200 uint32_t idx = static_cast<uint32_t>(i);
201
202 tbl->mutable_cumulative_size()->Set(
203 idx, tbl->cumulative_size()[idx] + tbl->size()[idx]);
204 tbl->mutable_cumulative_count()->Set(
205 idx, tbl->cumulative_count()[idx] + tbl->count()[idx]);
206
207 tbl->mutable_cumulative_alloc_size()->Set(
208 idx, tbl->cumulative_alloc_size()[idx] + tbl->alloc_size()[idx]);
209 tbl->mutable_cumulative_alloc_count()->Set(
210 idx, tbl->cumulative_alloc_count()[idx] + tbl->alloc_count()[idx]);
211
212 auto parent = tbl->parent_id()[idx];
213 if (parent) {
214 uint32_t parent_idx = *tbl->id().IndexOf(
215 tables::ExperimentalFlamegraphNodesTable::Id(*parent));
216 tbl->mutable_cumulative_size()->Set(
217 parent_idx,
218 tbl->cumulative_size()[parent_idx] + tbl->cumulative_size()[idx]);
219 tbl->mutable_cumulative_count()->Set(
220 parent_idx,
221 tbl->cumulative_count()[parent_idx] + tbl->cumulative_count()[idx]);
222
223 tbl->mutable_cumulative_alloc_size()->Set(
224 parent_idx, tbl->cumulative_alloc_size()[parent_idx] +
225 tbl->cumulative_alloc_size()[idx]);
226 tbl->mutable_cumulative_alloc_count()->Set(
227 parent_idx, tbl->cumulative_alloc_count()[parent_idx] +
228 tbl->cumulative_alloc_count()[idx]);
229 }
230 }
231
232 return tbl;
233 }
234
HeapProfileTracker(TraceProcessorContext * context)235 HeapProfileTracker::HeapProfileTracker(TraceProcessorContext* context)
236 : context_(context), empty_(context_->storage->InternString({"", 0})) {}
237
238 HeapProfileTracker::~HeapProfileTracker() = default;
239
SetProfilePacketIndex(uint32_t seq_id,uint64_t index)240 void HeapProfileTracker::SetProfilePacketIndex(uint32_t seq_id,
241 uint64_t index) {
242 SequenceState& sequence_state = sequence_state_[seq_id];
243 bool dropped_packet = false;
244 // heapprofd starts counting at index = 0.
245 if (!sequence_state.prev_index && index != 0) {
246 dropped_packet = true;
247 }
248
249 if (sequence_state.prev_index && *sequence_state.prev_index + 1 != index) {
250 dropped_packet = true;
251 }
252
253 if (dropped_packet) {
254 if (sequence_state.prev_index) {
255 PERFETTO_ELOG("Missing packets between %" PRIu64 " and %" PRIu64,
256 *sequence_state.prev_index, index);
257 } else {
258 PERFETTO_ELOG("Invalid first packet index %" PRIu64 " (!= 0)", index);
259 }
260
261 context_->storage->IncrementStats(stats::heapprofd_missing_packet);
262 }
263 sequence_state.prev_index = index;
264 }
265
AddAllocation(uint32_t seq_id,SequenceStackProfileTracker * sequence_stack_profile_tracker,const SourceAllocation & alloc,const SequenceStackProfileTracker::InternLookup * intern_lookup)266 void HeapProfileTracker::AddAllocation(
267 uint32_t seq_id,
268 SequenceStackProfileTracker* sequence_stack_profile_tracker,
269 const SourceAllocation& alloc,
270 const SequenceStackProfileTracker::InternLookup* intern_lookup) {
271 SequenceState& sequence_state = sequence_state_[seq_id];
272
273 auto opt_callstack_id = sequence_stack_profile_tracker->FindOrInsertCallstack(
274 alloc.callstack_id, intern_lookup);
275 if (!opt_callstack_id)
276 return;
277
278 CallsiteId callstack_id = *opt_callstack_id;
279
280 UniquePid upid = context_->process_tracker->GetOrCreateProcess(
281 static_cast<uint32_t>(alloc.pid));
282
283 tables::HeapProfileAllocationTable::Row alloc_row{
284 alloc.timestamp,
285 upid,
286 alloc.heap_name,
287 callstack_id,
288 static_cast<int64_t>(alloc.alloc_count),
289 static_cast<int64_t>(alloc.self_allocated)};
290
291 tables::HeapProfileAllocationTable::Row free_row{
292 alloc.timestamp,
293 upid,
294 alloc.heap_name,
295 callstack_id,
296 -static_cast<int64_t>(alloc.free_count),
297 -static_cast<int64_t>(alloc.self_freed)};
298
299 auto prev_alloc_it = sequence_state.prev_alloc.find({upid, callstack_id});
300 if (prev_alloc_it == sequence_state.prev_alloc.end()) {
301 std::tie(prev_alloc_it, std::ignore) = sequence_state.prev_alloc.emplace(
302 std::make_pair(upid, callstack_id),
303 tables::HeapProfileAllocationTable::Row{});
304 }
305
306 tables::HeapProfileAllocationTable::Row& prev_alloc = prev_alloc_it->second;
307
308 auto prev_free_it = sequence_state.prev_free.find({upid, callstack_id});
309 if (prev_free_it == sequence_state.prev_free.end()) {
310 std::tie(prev_free_it, std::ignore) = sequence_state.prev_free.emplace(
311 std::make_pair(upid, callstack_id),
312 tables::HeapProfileAllocationTable::Row{});
313 }
314
315 tables::HeapProfileAllocationTable::Row& prev_free = prev_free_it->second;
316
317 std::set<CallsiteId>& callstacks_for_source_callstack_id =
318 sequence_state.seen_callstacks[SourceAllocationIndex{
319 upid, alloc.callstack_id, alloc.heap_name}];
320 bool new_callstack;
321 std::tie(std::ignore, new_callstack) =
322 callstacks_for_source_callstack_id.emplace(callstack_id);
323
324 if (new_callstack) {
325 sequence_state.alloc_correction[alloc.callstack_id] = prev_alloc;
326 sequence_state.free_correction[alloc.callstack_id] = prev_free;
327 }
328
329 auto alloc_correction_it =
330 sequence_state.alloc_correction.find(alloc.callstack_id);
331 if (alloc_correction_it != sequence_state.alloc_correction.end()) {
332 const auto& alloc_correction = alloc_correction_it->second;
333 alloc_row.count += alloc_correction.count;
334 alloc_row.size += alloc_correction.size;
335 }
336
337 auto free_correction_it =
338 sequence_state.free_correction.find(alloc.callstack_id);
339 if (free_correction_it != sequence_state.free_correction.end()) {
340 const auto& free_correction = free_correction_it->second;
341 free_row.count += free_correction.count;
342 free_row.size += free_correction.size;
343 }
344
345 tables::HeapProfileAllocationTable::Row alloc_delta = alloc_row;
346 tables::HeapProfileAllocationTable::Row free_delta = free_row;
347
348 alloc_delta.count -= prev_alloc.count;
349 alloc_delta.size -= prev_alloc.size;
350
351 free_delta.count -= prev_free.count;
352 free_delta.size -= prev_free.size;
353
354 if (alloc_delta.count < 0 || alloc_delta.size < 0 || free_delta.count > 0 ||
355 free_delta.size > 0) {
356 PERFETTO_DLOG("Non-monotonous allocation.");
357 context_->storage->IncrementIndexedStats(stats::heapprofd_malformed_packet,
358 static_cast<int>(upid));
359 return;
360 }
361
362 // Dump at max profiles do not have .count set.
363 if (alloc_delta.count || alloc_delta.size) {
364 context_->storage->mutable_heap_profile_allocation_table()->Insert(
365 alloc_delta);
366 }
367 if (free_delta.count || free_delta.size) {
368 context_->storage->mutable_heap_profile_allocation_table()->Insert(
369 free_delta);
370 }
371
372 prev_alloc = alloc_row;
373 prev_free = free_row;
374 }
375
StoreAllocation(uint32_t seq_id,SourceAllocation alloc)376 void HeapProfileTracker::StoreAllocation(uint32_t seq_id,
377 SourceAllocation alloc) {
378 SequenceState& sequence_state = sequence_state_[seq_id];
379 sequence_state.pending_allocs.emplace_back(std::move(alloc));
380 }
381
CommitAllocations(uint32_t seq_id,SequenceStackProfileTracker * sequence_stack_profile_tracker,const SequenceStackProfileTracker::InternLookup * intern_lookup)382 void HeapProfileTracker::CommitAllocations(
383 uint32_t seq_id,
384 SequenceStackProfileTracker* sequence_stack_profile_tracker,
385 const SequenceStackProfileTracker::InternLookup* intern_lookup) {
386 SequenceState& sequence_state = sequence_state_[seq_id];
387 for (const auto& p : sequence_state.pending_allocs)
388 AddAllocation(seq_id, sequence_stack_profile_tracker, p, intern_lookup);
389 sequence_state.pending_allocs.clear();
390 }
391
FinalizeProfile(uint32_t seq_id,SequenceStackProfileTracker * sequence_stack_profile_tracker,const SequenceStackProfileTracker::InternLookup * intern_lookup)392 void HeapProfileTracker::FinalizeProfile(
393 uint32_t seq_id,
394 SequenceStackProfileTracker* sequence_stack_profile_tracker,
395 const SequenceStackProfileTracker::InternLookup* intern_lookup) {
396 CommitAllocations(seq_id, sequence_stack_profile_tracker, intern_lookup);
397 sequence_stack_profile_tracker->ClearIndices();
398 }
399
NotifyEndOfFile()400 void HeapProfileTracker::NotifyEndOfFile() {
401 for (const auto& key_and_sequence_state : sequence_state_) {
402 const SequenceState& sequence_state = key_and_sequence_state.second;
403 if (!sequence_state.pending_allocs.empty()) {
404 context_->storage->IncrementStats(stats::heapprofd_non_finalized_profile);
405 }
406 }
407 }
408
409 } // namespace trace_processor
410 } // namespace perfetto
411