1 /* 2 * Copyright 2022 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "include/gpuwork/gpuWork.h" 18 19 #include <linux/bpf.h> 20 #include <stddef.h> 21 #include <stdint.h> 22 23 #ifdef MOCK_BPF 24 #include <test/mock_bpf_helpers.h> 25 #else 26 #include <bpf_helpers.h> 27 #endif 28 29 #define S_IN_NS (1000000000) 30 #define SMALL_TIME_GAP_LIMIT_NS (S_IN_NS) 31 32 // A map from GpuIdUid (GPU ID and application UID) to |UidTrackingInfo|. 33 DEFINE_BPF_MAP_GRW(gpu_work_map, HASH, GpuIdUid, UidTrackingInfo, kMaxTrackedGpuIdUids, 34 AID_GRAPHICS); 35 36 // A map containing a single entry of |GlobalData|. 37 DEFINE_BPF_MAP_GRW(gpu_work_global_data, ARRAY, uint32_t, GlobalData, 1, AID_GRAPHICS); 38 39 // Defines the structure of the kernel tracepoint: 40 // 41 // /sys/kernel/tracing/events/power/gpu_work_period/ 42 // 43 // Drivers must define an appropriate gpu_work_period kernel tracepoint (for 44 // example, using the DECLARE_EVENT_CLASS and DEFINE_EVENT macros) such that the 45 // arguments/fields match the fields of |GpuWorkPeriodEvent|, excluding the 46 // initial "common" field. Drivers must invoke the tracepoint (also referred to 47 // as emitting the event) as described below. Note that the description below 48 // assumes a single physical GPU and its driver; for devices with multiple GPUs, 49 // each GPU and its driver should emit events independently, using a different 50 // value for |gpu_id| per GPU. 51 // 52 // |GpuWorkPeriodEvent| defines a non-overlapping, non-zero period of time from 53 // |start_time_ns| (inclusive) until |end_time_ns| (exclusive) for a given 54 // |uid|, and includes details of how much work the GPU was performing for |uid| 55 // during the period. When GPU work for a given |uid| runs on the GPU, the 56 // driver must track one or more periods that cover the time where the work was 57 // running, and emit events soon after. The driver should try to emit the event 58 // for a period at most 1 second after |end_time_ns|, and must emit the event at 59 // most 2 seconds after |end_time_ns|. A period's duration (|end_time_ns| - 60 // |start_time_ns|) must be at most 1 second. Periods for different |uids| can 61 // overlap, but periods for the same |uid| must not overlap. The driver must 62 // emit events for the same |uid| in strictly increasing order of 63 // |start_time_ns|, such that it is guaranteed that the tracepoint call for a 64 // period for |uid| has returned before the tracepoint call for the next period 65 // for |uid| is made. Note that synchronization may be necessary if the driver 66 // emits events for the same |uid| from different threads/contexts. Note that 67 // |end_time_ns| for a period for a |uid| may equal the |start_time_ns| of the 68 // next period for |uid|. The driver should try to avoid emitting a large number 69 // of events in a short time period (e.g. 1000 events per second) for a given 70 // |uid|. 71 // 72 // The |total_active_duration_ns| must be set to the approximate total amount of 73 // time the GPU spent running work for |uid| within the period, without 74 // "double-counting" parallel GPU work on the same GPU for the same |uid|. Note 75 // that even if the parallel GPU work was submitted from several different 76 // processes (i.e. different PIDs) with the same UID, this overlapping work must 77 // not be double-counted, as it still came from a single |uid|. "GPU work" 78 // should correspond to the "GPU slices" shown in the AGI (Android GPU 79 // Inspector) tool, and so should include work such as fragment and non-fragment 80 // work/shaders running on the shader cores of the GPU. For example, given the 81 // following for a single |uid|: 82 // - A period has: 83 // - |start_time_ns|: 100,000,000 ns 84 // - |end_time_ns|: 800,000,000 ns 85 // - Some GPU vertex work (A): 86 // - started at: 200,000,000 ns 87 // - ended at: 400,000,000 ns 88 // - Some GPU fragment work (B): 89 // - started at: 300,000,000 ns 90 // - ended at: 500,000,000 ns 91 // - Some GPU fragment work (C): 92 // - started at: 300,000,000 ns 93 // - ended at: 400,000,000 ns 94 // - Some GPU fragment work (D): 95 // - started at: 600,000,000 ns 96 // - ended at: 700,000,000 ns 97 // 98 // The |total_active_duration_ns| would be 400,000,000 ns, because GPU work for 99 // |uid| was executing: 100 // - from 200,000,000 ns to 500,000,000 ns, giving a duration of 300,000,000 ns 101 // (encompassing GPU work A, B, and C) 102 // - from 600,000,000 ns to 700,000,000 ns, giving a duration of 100,000,000 ns 103 // (GPU work D) 104 // 105 // Thus, the |total_active_duration_ns| is the sum of these two 106 // (non-overlapping) durations. Drivers may not have efficient access to the 107 // exact start and end times of all GPU work, as shown above, but drivers should 108 // try to approximate/aggregate the value of |total_active_duration_ns| as 109 // accurately as possible within the limitations of the hardware, without 110 // double-counting parallel GPU work for the same |uid|. The 111 // |total_active_duration_ns| value must be less than or equal to the period 112 // duration (|end_time_ns| - |start_time_ns|); if the aggregation approach might 113 // violate this requirement then the driver must clamp 114 // |total_active_duration_ns| to be at most the period duration. 115 // 116 // Protected mode: protected GPU work must not be reported. Periods must be 117 // emitted, and the |total_active_duration_ns| value set, as if the protected 118 // GPU work did not occur. 119 // 120 // Note that the above description allows for a certain amount of flexibility in 121 // how the driver tracks periods and emits the events. We list a few examples of 122 // how drivers might implement the above: 123 // 124 // - 1: The driver could track periods for all |uid| values at fixed intervals 125 // of 1 second. Thus, every period duration would be exactly 1 second, and 126 // periods from different |uid|s that overlap would have the same 127 // |start_time_ns| and |end_time_ns| values. 128 // 129 // - 2: The driver could track periods with many different durations (up to 1 130 // second), as needed in order to cover the GPU work for each |uid|. 131 // Overlapping periods for different |uid|s may have very different durations, 132 // as well as different |start_time_ns| and |end_time_ns| values. 133 // 134 // - 3: The driver could track fine-grained periods with different durations 135 // that precisely cover the time where GPU work is running for each |uid|. 136 // Thus, |total_active_duration_ns| would always equal the period duration. 137 // For example, if a game was running at 60 frames per second, the driver 138 // would most likely emit _at least_ 60 events per second (probably more, as 139 // there would likely be multiple "chunks" of GPU work per frame, with gaps 140 // between each chunk). However, the driver may sometimes need to resort to 141 // more coarse-grained periods to avoid emitting thousands of events per 142 // second for a |uid|, where |total_active_duration_ns| would then be less 143 // than the period duration. 144 typedef struct { 145 // Actual fields start at offset 8. 146 uint64_t common; 147 148 // A value that uniquely identifies the GPU within the system. 149 uint32_t gpu_id; 150 151 // The UID of the application (i.e. persistent, unique ID of the Android 152 // app) that submitted work to the GPU. 153 uint32_t uid; 154 155 // The start time of the period in nanoseconds. The clock must be 156 // CLOCK_MONOTONIC_RAW, as returned by the ktime_get_raw_ns(void) function. 157 uint64_t start_time_ns; 158 159 // The end time of the period in nanoseconds. The clock must be 160 // CLOCK_MONOTONIC_RAW, as returned by the ktime_get_raw_ns(void) function. 161 uint64_t end_time_ns; 162 163 // The amount of time the GPU was running GPU work for |uid| during the 164 // period, in nanoseconds, without double-counting parallel GPU work for the 165 // same |uid|. For example, this might include the amount of time the GPU 166 // spent performing shader work (vertex work, fragment work, etc.) for 167 // |uid|. 168 uint64_t total_active_duration_ns; 169 170 } GpuWorkPeriodEvent; 171 172 _Static_assert(offsetof(GpuWorkPeriodEvent, gpu_id) == 8 && 173 offsetof(GpuWorkPeriodEvent, uid) == 12 && 174 offsetof(GpuWorkPeriodEvent, start_time_ns) == 16 && 175 offsetof(GpuWorkPeriodEvent, end_time_ns) == 24 && 176 offsetof(GpuWorkPeriodEvent, total_active_duration_ns) == 32, 177 "Field offsets of struct GpuWorkPeriodEvent must not be changed because they " 178 "must match the tracepoint field offsets found via adb shell cat " 179 "/sys/kernel/tracing/events/power/gpu_work_period/format"); 180 181 DEFINE_BPF_PROG("tracepoint/power/gpu_work_period", AID_ROOT, AID_GRAPHICS, tp_gpu_work_period) 182 (GpuWorkPeriodEvent* const period) { 183 // Note: In eBPF programs, |__sync_fetch_and_add| is translated to an atomic 184 // add. 185 186 // Return 1 to avoid blocking simpleperf from receiving events. 187 const int ALLOW = 1; 188 189 GpuIdUid gpu_id_and_uid; 190 __builtin_memset(&gpu_id_and_uid, 0, sizeof(gpu_id_and_uid)); 191 gpu_id_and_uid.gpu_id = period->gpu_id; 192 gpu_id_and_uid.uid = period->uid; 193 194 // Get |UidTrackingInfo|. 195 UidTrackingInfo* uid_tracking_info = bpf_gpu_work_map_lookup_elem(&gpu_id_and_uid); 196 if (!uid_tracking_info) { 197 // There was no existing entry, so we add a new one. 198 UidTrackingInfo initial_info; 199 __builtin_memset(&initial_info, 0, sizeof(initial_info)); 200 if (0 == bpf_gpu_work_map_update_elem(&gpu_id_and_uid, &initial_info, BPF_NOEXIST)) { 201 // We added an entry to the map, so we increment our entry counter in 202 // |GlobalData|. 203 const uint32_t zero = 0; 204 // Get the |GlobalData|. 205 GlobalData* global_data = bpf_gpu_work_global_data_lookup_elem(&zero); 206 // Getting the global data never fails because it is an |ARRAY| map, 207 // but we need to keep the verifier happy. 208 if (global_data) { 209 __sync_fetch_and_add(&global_data->num_map_entries, 1); 210 } 211 } 212 uid_tracking_info = bpf_gpu_work_map_lookup_elem(&gpu_id_and_uid); 213 if (!uid_tracking_info) { 214 // This should never happen, unless entries are getting deleted at 215 // this moment. If so, we just give up. 216 return ALLOW; 217 } 218 } 219 220 if ( 221 // The period duration must be non-zero. 222 period->start_time_ns >= period->end_time_ns || 223 // The period duration must be at most 1 second. 224 (period->end_time_ns - period->start_time_ns) > S_IN_NS) { 225 __sync_fetch_and_add(&uid_tracking_info->error_count, 1); 226 return ALLOW; 227 } 228 229 // If |total_active_duration_ns| is 0 then no GPU work occurred and there is 230 // nothing to do. 231 if (period->total_active_duration_ns == 0) { 232 return ALLOW; 233 } 234 235 // Update |uid_tracking_info->total_active_duration_ns|. 236 __sync_fetch_and_add(&uid_tracking_info->total_active_duration_ns, 237 period->total_active_duration_ns); 238 239 // |small_gap_time_ns| is the time gap between the current and previous 240 // active period, which could be 0. If the gap is more than 241 // |SMALL_TIME_GAP_LIMIT_NS| then |small_gap_time_ns| will be set to 0 242 // because we want to estimate the small gaps between "continuous" GPU work. 243 uint64_t small_gap_time_ns = 0; 244 if (uid_tracking_info->previous_active_end_time_ns > period->start_time_ns) { 245 // The current period appears to have occurred before the previous 246 // active period, which must not happen because per-UID periods must not 247 // overlap and must be emitted in strictly increasing order of 248 // |start_time_ns|. 249 __sync_fetch_and_add(&uid_tracking_info->error_count, 1); 250 } else { 251 // The current period appears to have been emitted after the previous 252 // active period, as expected, so we can calculate the gap between the 253 // current and previous active period. 254 small_gap_time_ns = period->start_time_ns - uid_tracking_info->previous_active_end_time_ns; 255 256 // Update |previous_active_end_time_ns|. 257 uid_tracking_info->previous_active_end_time_ns = period->end_time_ns; 258 259 // We want to estimate the small gaps between "continuous" GPU work; if 260 // the gap is more than |SMALL_TIME_GAP_LIMIT_NS| then we don't consider 261 // this "continuous" GPU work. 262 if (small_gap_time_ns > SMALL_TIME_GAP_LIMIT_NS) { 263 small_gap_time_ns = 0; 264 } 265 } 266 267 uint64_t period_total_inactive_time_ns = 0; 268 const uint64_t period_duration_ns = period->end_time_ns - period->start_time_ns; 269 // |period->total_active_duration_ns| is the active time within the period duration, so 270 // it must not be larger than |period_duration_ns|. 271 if (period->total_active_duration_ns > period_duration_ns) { 272 __sync_fetch_and_add(&uid_tracking_info->error_count, 1); 273 } else { 274 period_total_inactive_time_ns = period_duration_ns - period->total_active_duration_ns; 275 } 276 277 // Update |uid_tracking_info->total_inactive_duration_ns| by adding the 278 // inactive time from this period, plus the small gap between the current 279 // and previous active period. Either or both of these values could be 0. 280 if (small_gap_time_ns > 0 || period_total_inactive_time_ns > 0) { 281 __sync_fetch_and_add(&uid_tracking_info->total_inactive_duration_ns, 282 small_gap_time_ns + period_total_inactive_time_ns); 283 } 284 285 return ALLOW; 286 } 287 288 LICENSE("Apache 2.0"); 289