1 /*
2  * time_in_state eBPF program
3  *
4  * Copyright (C) 2018 Google
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License version
8  * 2 as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  */
16 
17 #include <bpf_helpers.h>
18 #include <bpf_timeinstate.h>
19 
20 DEFINE_BPF_MAP_GRW(total_time_in_state_map, PERCPU_ARRAY, uint32_t, uint64_t, MAX_FREQS_FOR_TOTAL,
21                    AID_SYSTEM)
22 
23 DEFINE_BPF_MAP_GRW(uid_time_in_state_map, PERCPU_HASH, time_key_t, tis_val_t, 1024, AID_SYSTEM)
24 
25 DEFINE_BPF_MAP_GRW(uid_concurrent_times_map, PERCPU_HASH, time_key_t, concurrent_val_t, 1024, AID_SYSTEM)
26 DEFINE_BPF_MAP_GRW(uid_last_update_map, HASH, uint32_t, uint64_t, 1024, AID_SYSTEM)
27 
28 DEFINE_BPF_MAP_GWO(cpu_last_update_map, PERCPU_ARRAY, uint32_t, uint64_t, 1, AID_SYSTEM)
29 
30 DEFINE_BPF_MAP_GWO(cpu_policy_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
31 DEFINE_BPF_MAP_GWO(policy_freq_idx_map, ARRAY, uint32_t, uint8_t, 1024, AID_SYSTEM)
32 
33 DEFINE_BPF_MAP_GWO(freq_to_idx_map, HASH, freq_idx_key_t, uint8_t, 2048, AID_SYSTEM)
34 
35 DEFINE_BPF_MAP_GWO(nr_active_map, ARRAY, uint32_t, uint32_t, 1, AID_SYSTEM)
36 DEFINE_BPF_MAP_GWO(policy_nr_active_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
37 
38 DEFINE_BPF_MAP_GWO(pid_tracked_hash_map, HASH, uint32_t, pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
39 DEFINE_BPF_MAP_GWO(pid_tracked_map, ARRAY, uint32_t, tracked_pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
40 DEFINE_BPF_MAP_GWO(pid_task_aggregation_map, HASH, pid_t, uint16_t, 1024, AID_SYSTEM)
41 DEFINE_BPF_MAP_GRO(pid_time_in_state_map, PERCPU_HASH, aggregated_task_tis_key_t, tis_val_t, 1024,
42                    AID_SYSTEM)
43 
44 struct switch_args {
45     unsigned long long ignore;
46     char prev_comm[16];
47     int prev_pid;
48     int prev_prio;
49     long long prev_state;
50     char next_comm[16];
51     int next_pid;
52     int next_prio;
53 };
54 
55 DEFINE_BPF_PROG("tracepoint/sched/sched_switch", AID_ROOT, AID_SYSTEM, tp_sched_switch)
56 (struct switch_args* args) {
57     const int ALLOW = 1;  // return 1 to avoid blocking simpleperf from receiving events.
58     uint32_t zero = 0;
59     uint64_t* last = bpf_cpu_last_update_map_lookup_elem(&zero);
60     if (!last) return ALLOW;
61     uint64_t old_last = *last;
62     uint64_t time = bpf_ktime_get_ns();
63     *last = time;
64 
65     uint32_t* active = bpf_nr_active_map_lookup_elem(&zero);
66     if (!active) return ALLOW;
67 
68     uint32_t cpu = bpf_get_smp_processor_id();
69     uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
70     if (!policyp) return ALLOW;
71     uint32_t policy = *policyp;
72 
73     uint32_t* policy_active = bpf_policy_nr_active_map_lookup_elem(&policy);
74     if (!policy_active) return ALLOW;
75 
76     uint32_t nactive = *active - 1;
77     uint32_t policy_nactive = *policy_active - 1;
78 
79     if (!args->prev_pid || (!old_last && args->next_pid)) {
80         __sync_fetch_and_add(active, 1);
81         __sync_fetch_and_add(policy_active, 1);
82     }
83 
84     // Return here in 2 scenarios:
85     // 1) prev_pid == 0, so we're exiting idle. No UID stats need updating, and active CPUs can't be
86     //    decreasing.
87     // 2) old_last == 0, so this is the first time we've seen this CPU. Any delta will be invalid,
88     //    and our active CPU counts don't include this CPU yet so we shouldn't decrement them even
89     //    if we're going idle.
90     if (!args->prev_pid || !old_last) return ALLOW;
91 
92     if (!args->next_pid) {
93         __sync_fetch_and_add(active, -1);
94         __sync_fetch_and_add(policy_active, -1);
95     }
96 
97     uint8_t* freq_idxp = bpf_policy_freq_idx_map_lookup_elem(&policy);
98     if (!freq_idxp || !*freq_idxp) return ALLOW;
99     // freq_to_idx_map uses 1 as its minimum index so that *freq_idxp == 0 only when uninitialized
100     uint8_t freq_idx = *freq_idxp - 1;
101 
102     uint32_t uid = bpf_get_current_uid_gid();
103     time_key_t key = {.uid = uid, .bucket = freq_idx / FREQS_PER_ENTRY};
104     tis_val_t* val = bpf_uid_time_in_state_map_lookup_elem(&key);
105     if (!val) {
106         tis_val_t zero_val = {.ar = {0}};
107         bpf_uid_time_in_state_map_update_elem(&key, &zero_val, BPF_NOEXIST);
108         val = bpf_uid_time_in_state_map_lookup_elem(&key);
109     }
110     uint64_t delta = time - old_last;
111     if (val) val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
112 
113     // Add delta to total.
114     const uint32_t total_freq_idx = freq_idx < MAX_FREQS_FOR_TOTAL ? freq_idx :
115                                     MAX_FREQS_FOR_TOTAL - 1;
116     uint64_t* total = bpf_total_time_in_state_map_lookup_elem(&total_freq_idx);
117     if (total) *total += delta;
118 
119     const int pid = args->prev_pid;
120     const pid_t tgid = bpf_get_current_pid_tgid() >> 32;
121     bool is_tgid_tracked = false;
122 
123     // eBPF verifier does not currently allow loops.
124     // Instruct the C compiler to unroll the loop into a series of steps.
125     #pragma unroll
126     for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
127         const uint32_t key = index;
128         tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
129         if (!tracked_pid) continue;
130         if (tracked_pid->state == TRACKED_PID_STATE_UNUSED) {
131             // Reached the end of the list
132             break;
133         }
134 
135         if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE && tracked_pid->pid == tgid) {
136             is_tgid_tracked = true;
137             break;
138         }
139     }
140 
141     if (is_tgid_tracked) {
142         // If this process is marked for time-in-state tracking, aggregate the CPU time-in-state
143         // with other threads sharing the same TGID and aggregation key.
144         uint16_t* aggregation_key = bpf_pid_task_aggregation_map_lookup_elem(&pid);
145         aggregated_task_tis_key_t task_key = {
146                 .tgid = tgid,
147                 .aggregation_key = aggregation_key ? *aggregation_key : 0,
148                 .bucket = freq_idx / FREQS_PER_ENTRY};
149         tis_val_t* task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
150         if (!task_val) {
151             tis_val_t zero_val = {.ar = {0}};
152             bpf_pid_time_in_state_map_update_elem(&task_key, &zero_val, BPF_NOEXIST);
153             task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
154         }
155         if (task_val) task_val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
156     }
157     key.bucket = nactive / CPUS_PER_ENTRY;
158     concurrent_val_t* ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
159     if (!ct) {
160         concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
161         bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
162         ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
163     }
164     if (ct) ct->active[nactive % CPUS_PER_ENTRY] += delta;
165 
166     if (policy_nactive / CPUS_PER_ENTRY != key.bucket) {
167         key.bucket = policy_nactive / CPUS_PER_ENTRY;
168         ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
169         if (!ct) {
170             concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
171             bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
172             ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
173         }
174     }
175     if (ct) ct->policy[policy_nactive % CPUS_PER_ENTRY] += delta;
176     uint64_t* uid_last_update = bpf_uid_last_update_map_lookup_elem(&uid);
177     if (uid_last_update) {
178         *uid_last_update = time;
179     } else {
180         bpf_uid_last_update_map_update_elem(&uid, &time, BPF_NOEXIST);
181     }
182     return ALLOW;
183 }
184 
185 struct cpufreq_args {
186     unsigned long long ignore;
187     unsigned int state;
188     unsigned int cpu_id;
189 };
190 
191 DEFINE_BPF_PROG("tracepoint/power/cpu_frequency", AID_ROOT, AID_SYSTEM, tp_cpufreq)
192 (struct cpufreq_args* args) {
193     uint32_t cpu = args->cpu_id;
194     unsigned int new = args->state;
195     uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
196     if (!policyp) return 0;
197     uint32_t policy = *policyp;
198     freq_idx_key_t key = {.policy = policy, .freq = new};
199     uint8_t* idxp = bpf_freq_to_idx_map_lookup_elem(&key);
200     if (!idxp) return 0;
201     uint8_t idx = *idxp;
202     bpf_policy_freq_idx_map_update_elem(&policy, &idx, BPF_ANY);
203     return 0;
204 }
205 
206 // The format of the sched/sched_process_free event is described in
207 // adb shell cat /d/tracing/events/sched/sched_process_free/format
208 struct sched_process_free_args {
209     unsigned long long ignore;
210     char comm[16];
211     pid_t pid;
212     int prio;
213 };
214 
215 DEFINE_BPF_PROG("tracepoint/sched/sched_process_free", AID_ROOT, AID_SYSTEM, tp_sched_process_free)
216 (struct sched_process_free_args* args) {
217     const int ALLOW = 1;
218 
219     int pid = args->pid;
220     bool is_last = true;
221 
222     // eBPF verifier does not currently allow loops.
223     // Instruct the C compiler to unroll the loop into a series of steps.
224     #pragma unroll
225     for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
226         const uint32_t key = MAX_TRACKED_PIDS - index - 1;
227         tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
228         if (!tracked_pid) continue;
229         if (tracked_pid->pid == pid) {
230             tracked_pid->pid = 0;
231             tracked_pid->state = is_last ? TRACKED_PID_STATE_UNUSED : TRACKED_PID_STATE_EXITED;
232             bpf_pid_tracked_hash_map_delete_elem(&key);
233             break;
234         }
235         if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE) {
236             is_last = false;
237         }
238     }
239 
240     bpf_pid_task_aggregation_map_delete_elem(&pid);
241     return ALLOW;
242 }
243 
244 LICENSE("GPL");
245