1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
18
19 #include <math.h>
20
21 #include "perfetto/ext/base/utils.h"
22 #include "src/trace_processor/importers/common/args_tracker.h"
23 #include "src/trace_processor/importers/common/event_tracker.h"
24 #include "src/trace_processor/importers/common/process_tracker.h"
25 #include "src/trace_processor/importers/common/system_info_tracker.h"
26 #include "src/trace_processor/importers/ftrace/ftrace_descriptors.h"
27 #include "src/trace_processor/storage/stats.h"
28 #include "src/trace_processor/types/task_state.h"
29 #include "src/trace_processor/types/trace_processor_context.h"
30 #include "src/trace_processor/types/variadic.h"
31
32 #include "protos/perfetto/trace/ftrace/ftrace_event.pbzero.h"
33 #include "protos/perfetto/trace/ftrace/sched.pbzero.h"
34
35 namespace perfetto {
36 namespace trace_processor {
37
SchedEventTracker(TraceProcessorContext * context)38 SchedEventTracker::SchedEventTracker(TraceProcessorContext* context)
39 : context_(context) {
40 // pre-parse sched_switch
41 auto* switch_descriptor = GetMessageDescriptorForId(
42 protos::pbzero::FtraceEvent::kSchedSwitchFieldNumber);
43 PERFETTO_CHECK(switch_descriptor->max_field_id == kSchedSwitchMaxFieldId);
44
45 for (size_t i = 1; i <= kSchedSwitchMaxFieldId; i++) {
46 sched_switch_field_ids_[i] =
47 context->storage->InternString(switch_descriptor->fields[i].name);
48 }
49 sched_switch_id_ = context->storage->InternString(switch_descriptor->name);
50
51 // pre-parse sched_waking
52 auto* waking_descriptor = GetMessageDescriptorForId(
53 protos::pbzero::FtraceEvent::kSchedWakingFieldNumber);
54 PERFETTO_CHECK(waking_descriptor->max_field_id == kSchedWakingMaxFieldId);
55
56 for (size_t i = 1; i <= kSchedWakingMaxFieldId; i++) {
57 sched_waking_field_ids_[i] =
58 context->storage->InternString(waking_descriptor->fields[i].name);
59 }
60 sched_waking_id_ = context->storage->InternString(waking_descriptor->name);
61 }
62
63 SchedEventTracker::~SchedEventTracker() = default;
64
PushSchedSwitch(uint32_t cpu,int64_t ts,uint32_t prev_pid,base::StringView prev_comm,int32_t prev_prio,int64_t prev_state,uint32_t next_pid,base::StringView next_comm,int32_t next_prio)65 void SchedEventTracker::PushSchedSwitch(uint32_t cpu,
66 int64_t ts,
67 uint32_t prev_pid,
68 base::StringView prev_comm,
69 int32_t prev_prio,
70 int64_t prev_state,
71 uint32_t next_pid,
72 base::StringView next_comm,
73 int32_t next_prio) {
74 // At this stage all events should be globally timestamp ordered.
75 if (ts < context_->event_tracker->max_timestamp()) {
76 PERFETTO_ELOG(
77 "sched_switch event out of order by %.4f ms, skipping",
78 static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
79 1e6);
80 context_->storage->IncrementStats(stats::sched_switch_out_of_order);
81 return;
82 }
83 context_->event_tracker->UpdateMaxTimestamp(ts);
84 PERFETTO_DCHECK(cpu < kMaxCpus);
85
86 StringId next_comm_id = context_->storage->InternString(next_comm);
87 UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
88 next_pid, next_comm_id, ThreadNamePriority::kFtrace);
89
90 // First use this data to close the previous slice.
91 bool prev_pid_match_prev_next_pid = false;
92 auto* pending_sched = &pending_sched_per_cpu_[cpu];
93 uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
94 if (pending_slice_idx < std::numeric_limits<uint32_t>::max()) {
95 prev_pid_match_prev_next_pid = prev_pid == pending_sched->last_pid;
96 if (PERFETTO_LIKELY(prev_pid_match_prev_next_pid)) {
97 ClosePendingSlice(pending_slice_idx, ts, prev_state);
98 } else {
99 // If the pids are not consistent, make a note of this.
100 context_->storage->IncrementStats(stats::mismatched_sched_switch_tids);
101 }
102 }
103
104 // We have to intern prev_comm again because our assumption that
105 // this event's |prev_comm| == previous event's |next_comm| does not hold
106 // if the thread changed its name while scheduled.
107 StringId prev_comm_id = context_->storage->InternString(prev_comm);
108 UniqueTid prev_utid = context_->process_tracker->UpdateThreadName(
109 prev_pid, prev_comm_id, ThreadNamePriority::kFtrace);
110
111 auto new_slice_idx = AddRawEventAndStartSlice(
112 cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
113 next_utid, next_pid, next_comm_id, next_prio);
114
115 // Finally, update the info for the next sched switch on this CPU.
116 pending_sched->pending_slice_storage_idx = new_slice_idx;
117 pending_sched->last_pid = next_pid;
118 pending_sched->last_utid = next_utid;
119 pending_sched->last_prio = next_prio;
120 }
121
PushSchedSwitchCompact(uint32_t cpu,int64_t ts,int64_t prev_state,uint32_t next_pid,int32_t next_prio,StringId next_comm_id)122 void SchedEventTracker::PushSchedSwitchCompact(uint32_t cpu,
123 int64_t ts,
124 int64_t prev_state,
125 uint32_t next_pid,
126 int32_t next_prio,
127 StringId next_comm_id) {
128 // At this stage all events should be globally timestamp ordered.
129 if (ts < context_->event_tracker->max_timestamp()) {
130 PERFETTO_ELOG(
131 "sched_switch event out of order by %.4f ms, skipping",
132 static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
133 1e6);
134 context_->storage->IncrementStats(stats::sched_switch_out_of_order);
135 return;
136 }
137 context_->event_tracker->UpdateMaxTimestamp(ts);
138 PERFETTO_DCHECK(cpu < kMaxCpus);
139
140 UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
141 next_pid, next_comm_id, ThreadNamePriority::kFtrace);
142
143 auto* pending_sched = &pending_sched_per_cpu_[cpu];
144
145 // If we're processing the first compact event for this cpu, don't start a
146 // slice since we're missing the "prev_*" fields. The successive events will
147 // create slices as normal, but the first per-cpu switch is effectively
148 // discarded.
149 if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
150 context_->storage->IncrementStats(stats::compact_sched_switch_skipped);
151
152 pending_sched->last_pid = next_pid;
153 pending_sched->last_utid = next_utid;
154 pending_sched->last_prio = next_prio;
155 // Note: no pending slice, so leave |pending_slice_storage_idx| in its
156 // invalid state.
157 return;
158 }
159
160 // Close the pending slice if any (we won't have one when processing the first
161 // two compact events for a given cpu).
162 uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
163 if (pending_slice_idx < std::numeric_limits<uint32_t>::max())
164 ClosePendingSlice(pending_slice_idx, ts, prev_state);
165
166 // Use the previous event's values to infer this event's "prev_*" fields.
167 // There are edge cases, but this assumption should still produce sensible
168 // results in the absence of data loss.
169 UniqueTid prev_utid = pending_sched->last_utid;
170 uint32_t prev_pid = pending_sched->last_pid;
171 int32_t prev_prio = pending_sched->last_prio;
172
173 // Do a fresh task name lookup in case it was updated by a task_rename while
174 // scheduled.
175 StringId prev_comm_id = context_->storage->thread_table().name()[prev_utid];
176
177 auto new_slice_idx = AddRawEventAndStartSlice(
178 cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
179 next_utid, next_pid, next_comm_id, next_prio);
180
181 // Finally, update the info for the next sched switch on this CPU.
182 pending_sched->pending_slice_storage_idx = new_slice_idx;
183 pending_sched->last_pid = next_pid;
184 pending_sched->last_utid = next_utid;
185 pending_sched->last_prio = next_prio;
186 }
187
188 PERFETTO_ALWAYS_INLINE
AddRawEventAndStartSlice(uint32_t cpu,int64_t ts,UniqueTid prev_utid,uint32_t prev_pid,StringId prev_comm_id,int32_t prev_prio,int64_t prev_state,UniqueTid next_utid,uint32_t next_pid,StringId next_comm_id,int32_t next_prio)189 uint32_t SchedEventTracker::AddRawEventAndStartSlice(uint32_t cpu,
190 int64_t ts,
191 UniqueTid prev_utid,
192 uint32_t prev_pid,
193 StringId prev_comm_id,
194 int32_t prev_prio,
195 int64_t prev_state,
196 UniqueTid next_utid,
197 uint32_t next_pid,
198 StringId next_comm_id,
199 int32_t next_prio) {
200 if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
201 // Push the raw event - this is done as the raw ftrace event codepath does
202 // not insert sched_switch.
203 RawId id = context_->storage->mutable_raw_table()
204 ->Insert({ts, sched_switch_id_, cpu, prev_utid})
205 .id;
206
207 // Note: this ordering is important. The events should be pushed in the same
208 // order as the order of fields in the proto; this is used by the raw table
209 // to index these events using the field ids.
210 using SS = protos::pbzero::SchedSwitchFtraceEvent;
211
212 auto inserter = context_->args_tracker->AddArgsTo(id);
213 auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
214 StringId key = sched_switch_field_ids_[static_cast<size_t>(field_num)];
215 inserter.AddArg(key, var);
216 };
217 add_raw_arg(SS::kPrevCommFieldNumber, Variadic::String(prev_comm_id));
218 add_raw_arg(SS::kPrevPidFieldNumber, Variadic::Integer(prev_pid));
219 add_raw_arg(SS::kPrevPrioFieldNumber, Variadic::Integer(prev_prio));
220 add_raw_arg(SS::kPrevStateFieldNumber, Variadic::Integer(prev_state));
221 add_raw_arg(SS::kNextCommFieldNumber, Variadic::String(next_comm_id));
222 add_raw_arg(SS::kNextPidFieldNumber, Variadic::Integer(next_pid));
223 add_raw_arg(SS::kNextPrioFieldNumber, Variadic::Integer(next_prio));
224 }
225
226 // Open a new scheduling slice, corresponding to the task that was
227 // just switched to.
228 auto* sched = context_->storage->mutable_sched_slice_table();
229 auto row_and_id = sched->Insert(
230 {ts, 0 /* duration */, cpu, next_utid, kNullStringId, next_prio});
231 SchedId sched_id = row_and_id.id;
232 return *sched->id().IndexOf(sched_id);
233 }
234
235 PERFETTO_ALWAYS_INLINE
ClosePendingSlice(uint32_t pending_slice_idx,int64_t ts,int64_t prev_state)236 void SchedEventTracker::ClosePendingSlice(uint32_t pending_slice_idx,
237 int64_t ts,
238 int64_t prev_state) {
239 auto* slices = context_->storage->mutable_sched_slice_table();
240
241 int64_t duration = ts - slices->ts()[pending_slice_idx];
242 slices->mutable_dur()->Set(pending_slice_idx, duration);
243
244 // We store the state as a uint16 as we only consider values up to 2048
245 // when unpacking the information inside; this allows savings of 48 bits
246 // per slice.
247 auto kernel_version =
248 SystemInfoTracker::GetOrCreate(context_)->GetKernelVersion();
249 auto task_state = ftrace_utils::TaskState(static_cast<uint16_t>(prev_state),
250 kernel_version);
251 if (!task_state.is_valid()) {
252 context_->storage->IncrementStats(stats::task_state_invalid);
253 }
254 auto id = task_state.is_valid()
255 ? context_->storage->InternString(task_state.ToString().data())
256 : kNullStringId;
257 slices->mutable_end_state()->Set(pending_slice_idx, id);
258 }
259
260 // Processes a sched_waking that was decoded from a compact representation,
261 // adding to the raw and instants tables.
PushSchedWakingCompact(uint32_t cpu,int64_t ts,uint32_t wakee_pid,int32_t target_cpu,int32_t prio,StringId comm_id)262 void SchedEventTracker::PushSchedWakingCompact(uint32_t cpu,
263 int64_t ts,
264 uint32_t wakee_pid,
265 int32_t target_cpu,
266 int32_t prio,
267 StringId comm_id) {
268 // At this stage all events should be globally timestamp ordered.
269 if (ts < context_->event_tracker->max_timestamp()) {
270 PERFETTO_ELOG(
271 "sched_waking event out of order by %.4f ms, skipping",
272 static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
273 1e6);
274 context_->storage->IncrementStats(stats::sched_waking_out_of_order);
275 return;
276 }
277 context_->event_tracker->UpdateMaxTimestamp(ts);
278 PERFETTO_DCHECK(cpu < kMaxCpus);
279
280 // We infer the task that emitted the event (i.e. common_pid) from the
281 // scheduling slices. Drop the event if we haven't seen any sched_switch
282 // events for this cpu yet.
283 // Note that if sched_switch wasn't enabled, we will have to skip all
284 // compact waking events.
285 auto* pending_sched = &pending_sched_per_cpu_[cpu];
286 if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
287 context_->storage->IncrementStats(stats::compact_sched_waking_skipped);
288 return;
289 }
290 auto curr_utid = pending_sched->last_utid;
291
292 if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
293 // Add an entry to the raw table.
294 RawId id = context_->storage->mutable_raw_table()
295 ->Insert({ts, sched_waking_id_, cpu, curr_utid})
296 .id;
297
298 // "success" is hardcoded as always 1 by the kernel, with a TODO to remove
299 // it.
300 static constexpr int32_t kHardcodedSuccess = 1;
301
302 using SW = protos::pbzero::SchedWakingFtraceEvent;
303 auto inserter = context_->args_tracker->AddArgsTo(id);
304 auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
305 StringId key = sched_waking_field_ids_[static_cast<size_t>(field_num)];
306 inserter.AddArg(key, var);
307 };
308 add_raw_arg(SW::kCommFieldNumber, Variadic::String(comm_id));
309 add_raw_arg(SW::kPidFieldNumber, Variadic::Integer(wakee_pid));
310 add_raw_arg(SW::kPrioFieldNumber, Variadic::Integer(prio));
311 add_raw_arg(SW::kSuccessFieldNumber, Variadic::Integer(kHardcodedSuccess));
312 add_raw_arg(SW::kTargetCpuFieldNumber, Variadic::Integer(target_cpu));
313 }
314
315 // Add a waking entry to the instants.
316 auto wakee_utid = context_->process_tracker->GetOrCreateThread(wakee_pid);
317 auto* instants = context_->storage->mutable_instant_table();
318 auto ref_type_id = context_->storage->InternString(
319 GetRefTypeStringMap()[static_cast<size_t>(RefType::kRefUtid)]);
320 instants->Insert({ts, sched_waking_id_, wakee_utid, ref_type_id});
321 }
322
FlushPendingEvents()323 void SchedEventTracker::FlushPendingEvents() {
324 // TODO(lalitm): the day this method is called before end of trace, don't
325 // flush the sched events as they will probably be pushed in the next round
326 // of ftrace events.
327 int64_t end_ts = context_->storage->GetTraceTimestampBoundsNs().second;
328 auto* slices = context_->storage->mutable_sched_slice_table();
329 for (const auto& pending_sched : pending_sched_per_cpu_) {
330 uint32_t row = pending_sched.pending_slice_storage_idx;
331 if (row == std::numeric_limits<uint32_t>::max())
332 continue;
333
334 int64_t duration = end_ts - slices->ts()[row];
335 slices->mutable_dur()->Set(row, duration);
336
337 auto state = ftrace_utils::TaskState(ftrace_utils::TaskState::kRunnable);
338 auto id = context_->storage->InternString(state.ToString().data());
339 slices->mutable_end_state()->Set(row, id);
340 }
341
342 pending_sched_per_cpu_ = {};
343 }
344
345 } // namespace trace_processor
346 } // namespace perfetto
347