1 /*
2  * Copyright 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "include/gpuwork/gpuWork.h"
18 
19 #include <linux/bpf.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 
23 #ifdef MOCK_BPF
24 #include <test/mock_bpf_helpers.h>
25 #else
26 #include <bpf_helpers.h>
27 #endif
28 
29 #define S_IN_NS (1000000000)
30 #define SMALL_TIME_GAP_LIMIT_NS (S_IN_NS)
31 
32 // A map from GpuIdUid (GPU ID and application UID) to |UidTrackingInfo|.
33 DEFINE_BPF_MAP_GRW(gpu_work_map, HASH, GpuIdUid, UidTrackingInfo, kMaxTrackedGpuIdUids,
34                    AID_GRAPHICS);
35 
36 // A map containing a single entry of |GlobalData|.
37 DEFINE_BPF_MAP_GRW(gpu_work_global_data, ARRAY, uint32_t, GlobalData, 1, AID_GRAPHICS);
38 
39 // Defines the structure of the kernel tracepoint:
40 //
41 //  /sys/kernel/tracing/events/power/gpu_work_period/
42 //
43 // Drivers must define an appropriate gpu_work_period kernel tracepoint (for
44 // example, using the DECLARE_EVENT_CLASS and DEFINE_EVENT macros) such that the
45 // arguments/fields match the fields of |GpuWorkPeriodEvent|, excluding the
46 // initial "common" field. Drivers must invoke the tracepoint (also referred to
47 // as emitting the event) as described below. Note that the description below
48 // assumes a single physical GPU and its driver; for devices with multiple GPUs,
49 // each GPU and its driver should emit events independently, using a different
50 // value for |gpu_id| per GPU.
51 //
52 // |GpuWorkPeriodEvent| defines a non-overlapping, non-zero period of time from
53 // |start_time_ns| (inclusive) until |end_time_ns| (exclusive) for a given
54 // |uid|, and includes details of how much work the GPU was performing for |uid|
55 // during the period. When GPU work for a given |uid| runs on the GPU, the
56 // driver must track one or more periods that cover the time where the work was
57 // running, and emit events soon after. The driver should try to emit the event
58 // for a period at most 1 second after |end_time_ns|, and must emit the event at
59 // most 2 seconds after |end_time_ns|. A period's duration (|end_time_ns| -
60 // |start_time_ns|) must be at most 1 second. Periods for different |uids| can
61 // overlap, but periods for the same |uid| must not overlap. The driver must
62 // emit events for the same |uid| in strictly increasing order of
63 // |start_time_ns|, such that it is guaranteed that the tracepoint call for a
64 // period for |uid| has returned before the tracepoint call for the next period
65 // for |uid| is made. Note that synchronization may be necessary if the driver
66 // emits events for the same |uid| from different threads/contexts. Note that
67 // |end_time_ns| for a period for a |uid| may equal the |start_time_ns| of the
68 // next period for |uid|. The driver should try to avoid emitting a large number
69 // of events in a short time period (e.g. 1000 events per second) for a given
70 // |uid|.
71 //
72 // The |total_active_duration_ns| must be set to the approximate total amount of
73 // time the GPU spent running work for |uid| within the period, without
74 // "double-counting" parallel GPU work on the same GPU for the same |uid|. Note
75 // that even if the parallel GPU work was submitted from several different
76 // processes (i.e. different PIDs) with the same UID, this overlapping work must
77 // not be double-counted, as it still came from a single |uid|. "GPU work"
78 // should correspond to the "GPU slices" shown in the AGI (Android GPU
79 // Inspector) tool, and so should include work such as fragment and non-fragment
80 // work/shaders running on the shader cores of the GPU. For example, given the
81 // following for a single |uid|:
82 //  - A period has:
83 //    - |start_time_ns|: 100,000,000 ns
84 //    - |end_time_ns|:   800,000,000 ns
85 //  - Some GPU vertex work (A):
86 //    - started at:      200,000,000 ns
87 //    - ended at:        400,000,000 ns
88 //  - Some GPU fragment work (B):
89 //    - started at:      300,000,000 ns
90 //    - ended at:        500,000,000 ns
91 //  - Some GPU fragment work (C):
92 //    - started at:      300,000,000 ns
93 //    - ended at:        400,000,000 ns
94 //  - Some GPU fragment work (D):
95 //    - started at:      600,000,000 ns
96 //    - ended at:        700,000,000 ns
97 //
98 // The |total_active_duration_ns| would be 400,000,000 ns, because GPU work for
99 // |uid| was executing:
100 //  - from 200,000,000 ns to 500,000,000 ns, giving a duration of 300,000,000 ns
101 //    (encompassing GPU work A, B, and C)
102 //  - from 600,000,000 ns to 700,000,000 ns, giving a duration of 100,000,000 ns
103 //    (GPU work D)
104 //
105 // Thus, the |total_active_duration_ns| is the sum of these two
106 // (non-overlapping) durations. Drivers may not have efficient access to the
107 // exact start and end times of all GPU work, as shown above, but drivers should
108 // try to approximate/aggregate the value of |total_active_duration_ns| as
109 // accurately as possible within the limitations of the hardware, without
110 // double-counting parallel GPU work for the same |uid|. The
111 // |total_active_duration_ns| value must be less than or equal to the period
112 // duration (|end_time_ns| - |start_time_ns|); if the aggregation approach might
113 // violate this requirement then the driver must clamp
114 // |total_active_duration_ns| to be at most the period duration.
115 //
116 // Protected mode: protected GPU work must not be reported. Periods must be
117 // emitted, and the |total_active_duration_ns| value set, as if the protected
118 // GPU work did not occur.
119 //
120 // Note that the above description allows for a certain amount of flexibility in
121 // how the driver tracks periods and emits the events. We list a few examples of
122 // how drivers might implement the above:
123 //
124 // - 1: The driver could track periods for all |uid| values at fixed intervals
125 //   of 1 second. Thus, every period duration would be exactly 1 second, and
126 //   periods from different |uid|s that overlap would have the same
127 //   |start_time_ns| and |end_time_ns| values.
128 //
129 // - 2: The driver could track periods with many different durations (up to 1
130 //   second), as needed in order to cover the GPU work for each |uid|.
131 //   Overlapping periods for different |uid|s may have very different durations,
132 //   as well as different |start_time_ns| and |end_time_ns| values.
133 //
134 // - 3: The driver could track fine-grained periods with different durations
135 //   that precisely cover the time where GPU work is running for each |uid|.
136 //   Thus, |total_active_duration_ns| would always equal the period duration.
137 //   For example, if a game was running at 60 frames per second, the driver
138 //   would most likely emit _at least_ 60 events per second (probably more, as
139 //   there would likely be multiple "chunks" of GPU work per frame, with gaps
140 //   between each chunk). However, the driver may sometimes need to resort to
141 //   more coarse-grained periods to avoid emitting thousands of events per
142 //   second for a |uid|, where |total_active_duration_ns| would then be less
143 //   than the period duration.
144 typedef struct {
145     // Actual fields start at offset 8.
146     uint64_t common;
147 
148     // A value that uniquely identifies the GPU within the system.
149     uint32_t gpu_id;
150 
151     // The UID of the application (i.e. persistent, unique ID of the Android
152     // app) that submitted work to the GPU.
153     uint32_t uid;
154 
155     // The start time of the period in nanoseconds. The clock must be
156     // CLOCK_MONOTONIC_RAW, as returned by the ktime_get_raw_ns(void) function.
157     uint64_t start_time_ns;
158 
159     // The end time of the period in nanoseconds. The clock must be
160     // CLOCK_MONOTONIC_RAW, as returned by the ktime_get_raw_ns(void) function.
161     uint64_t end_time_ns;
162 
163     // The amount of time the GPU was running GPU work for |uid| during the
164     // period, in nanoseconds, without double-counting parallel GPU work for the
165     // same |uid|. For example, this might include the amount of time the GPU
166     // spent performing shader work (vertex work, fragment work, etc.) for
167     // |uid|.
168     uint64_t total_active_duration_ns;
169 
170 } GpuWorkPeriodEvent;
171 
172 _Static_assert(offsetof(GpuWorkPeriodEvent, gpu_id) == 8 &&
173                        offsetof(GpuWorkPeriodEvent, uid) == 12 &&
174                        offsetof(GpuWorkPeriodEvent, start_time_ns) == 16 &&
175                        offsetof(GpuWorkPeriodEvent, end_time_ns) == 24 &&
176                        offsetof(GpuWorkPeriodEvent, total_active_duration_ns) == 32,
177                "Field offsets of struct GpuWorkPeriodEvent must not be changed because they "
178                "must match the tracepoint field offsets found via adb shell cat "
179                "/sys/kernel/tracing/events/power/gpu_work_period/format");
180 
181 DEFINE_BPF_PROG("tracepoint/power/gpu_work_period", AID_ROOT, AID_GRAPHICS, tp_gpu_work_period)
182 (GpuWorkPeriodEvent* const period) {
183     // Note: In eBPF programs, |__sync_fetch_and_add| is translated to an atomic
184     // add.
185 
186     // Return 1 to avoid blocking simpleperf from receiving events.
187     const int ALLOW = 1;
188 
189     GpuIdUid gpu_id_and_uid;
190     __builtin_memset(&gpu_id_and_uid, 0, sizeof(gpu_id_and_uid));
191     gpu_id_and_uid.gpu_id = period->gpu_id;
192     gpu_id_and_uid.uid = period->uid;
193 
194     // Get |UidTrackingInfo|.
195     UidTrackingInfo* uid_tracking_info = bpf_gpu_work_map_lookup_elem(&gpu_id_and_uid);
196     if (!uid_tracking_info) {
197         // There was no existing entry, so we add a new one.
198         UidTrackingInfo initial_info;
199         __builtin_memset(&initial_info, 0, sizeof(initial_info));
200         if (0 == bpf_gpu_work_map_update_elem(&gpu_id_and_uid, &initial_info, BPF_NOEXIST)) {
201             // We added an entry to the map, so we increment our entry counter in
202             // |GlobalData|.
203             const uint32_t zero = 0;
204             // Get the |GlobalData|.
205             GlobalData* global_data = bpf_gpu_work_global_data_lookup_elem(&zero);
206             // Getting the global data never fails because it is an |ARRAY| map,
207             // but we need to keep the verifier happy.
208             if (global_data) {
209                 __sync_fetch_and_add(&global_data->num_map_entries, 1);
210             }
211         }
212         uid_tracking_info = bpf_gpu_work_map_lookup_elem(&gpu_id_and_uid);
213         if (!uid_tracking_info) {
214             // This should never happen, unless entries are getting deleted at
215             // this moment. If so, we just give up.
216             return ALLOW;
217         }
218     }
219 
220     if (
221             // The period duration must be non-zero.
222             period->start_time_ns >= period->end_time_ns ||
223             // The period duration must be at most 1 second.
224             (period->end_time_ns - period->start_time_ns) > S_IN_NS) {
225         __sync_fetch_and_add(&uid_tracking_info->error_count, 1);
226         return ALLOW;
227     }
228 
229     // If |total_active_duration_ns| is 0 then no GPU work occurred and there is
230     // nothing to do.
231     if (period->total_active_duration_ns == 0) {
232         return ALLOW;
233     }
234 
235     // Update |uid_tracking_info->total_active_duration_ns|.
236     __sync_fetch_and_add(&uid_tracking_info->total_active_duration_ns,
237                          period->total_active_duration_ns);
238 
239     // |small_gap_time_ns| is the time gap between the current and previous
240     // active period, which could be 0. If the gap is more than
241     // |SMALL_TIME_GAP_LIMIT_NS| then |small_gap_time_ns| will be set to 0
242     // because we want to estimate the small gaps between "continuous" GPU work.
243     uint64_t small_gap_time_ns = 0;
244     if (uid_tracking_info->previous_active_end_time_ns > period->start_time_ns) {
245         // The current period appears to have occurred before the previous
246         // active period, which must not happen because per-UID periods must not
247         // overlap and must be emitted in strictly increasing order of
248         // |start_time_ns|.
249         __sync_fetch_and_add(&uid_tracking_info->error_count, 1);
250     } else {
251         // The current period appears to have been emitted after the previous
252         // active period, as expected, so we can calculate the gap between the
253         // current and previous active period.
254         small_gap_time_ns = period->start_time_ns - uid_tracking_info->previous_active_end_time_ns;
255 
256         // Update |previous_active_end_time_ns|.
257         uid_tracking_info->previous_active_end_time_ns = period->end_time_ns;
258 
259         // We want to estimate the small gaps between "continuous" GPU work; if
260         // the gap is more than |SMALL_TIME_GAP_LIMIT_NS| then we don't consider
261         // this "continuous" GPU work.
262         if (small_gap_time_ns > SMALL_TIME_GAP_LIMIT_NS) {
263             small_gap_time_ns = 0;
264         }
265     }
266 
267     uint64_t period_total_inactive_time_ns = 0;
268     const uint64_t period_duration_ns = period->end_time_ns - period->start_time_ns;
269     // |period->total_active_duration_ns| is the active time within the period duration, so
270     // it must not be larger than |period_duration_ns|.
271     if (period->total_active_duration_ns > period_duration_ns) {
272         __sync_fetch_and_add(&uid_tracking_info->error_count, 1);
273     } else {
274         period_total_inactive_time_ns = period_duration_ns - period->total_active_duration_ns;
275     }
276 
277     // Update |uid_tracking_info->total_inactive_duration_ns| by adding the
278     // inactive time from this period, plus the small gap between the current
279     // and previous active period. Either or both of these values could be 0.
280     if (small_gap_time_ns > 0 || period_total_inactive_time_ns > 0) {
281         __sync_fetch_and_add(&uid_tracking_info->total_inactive_duration_ns,
282                              small_gap_time_ns + period_total_inactive_time_ns);
283     }
284 
285     return ALLOW;
286 }
287 
288 LICENSE("Apache 2.0");
289