1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <fcntl.h>
29 #include <inttypes.h>
30 #include <errno.h>
31 #include <sys/stat.h>
32 #include <sys/time.h>
33 #include <sys/times.h>
34 #include <sys/types.h>
35 #include <dirent.h>
36 #include <time.h>
37 #include <poll.h>
38 #include <sched.h>
39 
40 #include "igt.h"
41 #include "igt_core.h"
42 #include "igt_perf.h"
43 #include "igt_sysfs.h"
44 #include "igt_pm.h"
45 #include "sw_sync.h"
46 
47 IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
48 
49 const double tolerance = 0.05f;
50 const unsigned long batch_duration_ns = 500e6;
51 
open_pmu(uint64_t config)52 static int open_pmu(uint64_t config)
53 {
54 	int fd;
55 
56 	fd = perf_i915_open(config);
57 	igt_skip_on(fd < 0 && errno == ENODEV);
58 	igt_assert(fd >= 0);
59 
60 	return fd;
61 }
62 
open_group(uint64_t config,int group)63 static int open_group(uint64_t config, int group)
64 {
65 	int fd;
66 
67 	fd = perf_i915_open_group(config, group);
68 	igt_skip_on(fd < 0 && errno == ENODEV);
69 	igt_assert(fd >= 0);
70 
71 	return fd;
72 }
73 
74 static void
init(int gem_fd,const struct intel_execution_engine2 * e,uint8_t sample)75 init(int gem_fd, const struct intel_execution_engine2 *e, uint8_t sample)
76 {
77 	int fd, err = 0;
78 	bool exists;
79 
80 	errno = 0;
81 	fd = perf_i915_open(__I915_PMU_ENGINE(e->class, e->instance, sample));
82 	if (fd < 0)
83 		err = errno;
84 
85 	exists = gem_context_has_engine(gem_fd, 0, e->flags);
86 	if (intel_gen(intel_get_drm_devid(gem_fd)) < 6 &&
87 	    sample == I915_SAMPLE_SEMA)
88 		exists = false;
89 
90 	if (exists) {
91 		igt_assert_eq(err, 0);
92 		igt_assert_fd(fd);
93 		close(fd);
94 	} else {
95 		igt_assert_lt(fd, 0);
96 		igt_assert_eq(err, ENODEV);
97 	}
98 }
99 
__pmu_read_single(int fd,uint64_t * ts)100 static uint64_t __pmu_read_single(int fd, uint64_t *ts)
101 {
102 	uint64_t data[2];
103 
104 	igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
105 
106 	if (ts)
107 		*ts = data[1];
108 
109 	return data[0];
110 }
111 
pmu_read_single(int fd)112 static uint64_t pmu_read_single(int fd)
113 {
114 	return __pmu_read_single(fd, NULL);
115 }
116 
pmu_read_multi(int fd,unsigned int num,uint64_t * val)117 static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
118 {
119 	uint64_t buf[2 + num];
120 	unsigned int i;
121 
122 	igt_assert_eq(read(fd, buf, sizeof(buf)), sizeof(buf));
123 
124 	for (i = 0; i < num; i++)
125 		val[i] = buf[2 + i];
126 
127 	return buf[1];
128 }
129 
130 #define __assert_within_epsilon(x, ref, tol_up, tol_down) \
131 	igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
132 		     (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
133 		     "'%s' != '%s' (%f not within +%f%%/-%f%% tolerance of %f)\n",\
134 		     #x, #ref, (double)(x), \
135 		     (tol_up) * 100.0, (tol_down) * 100.0, \
136 		     (double)(ref))
137 
138 #define assert_within_epsilon(x, ref, tolerance) \
139 	__assert_within_epsilon(x, ref, tolerance, tolerance)
140 
141 /*
142  * Helper for cases where we assert on time spent sleeping (directly or
143  * indirectly), so make it more robust by ensuring the system sleep time
144  * is within test tolerance to start with.
145  */
measured_usleep(unsigned int usec)146 static unsigned int measured_usleep(unsigned int usec)
147 {
148 	struct timespec ts = { };
149 	unsigned int slept;
150 
151 	slept = igt_nsec_elapsed(&ts);
152 	igt_assert(slept == 0);
153 	do {
154 		usleep(usec - slept);
155 		slept = igt_nsec_elapsed(&ts) / 1000;
156 	} while (slept < usec);
157 
158 	return igt_nsec_elapsed(&ts);
159 }
160 
161 #define TEST_BUSY (1)
162 #define FLAG_SYNC (2)
163 #define TEST_TRAILING_IDLE (4)
164 #define TEST_RUNTIME_PM (8)
165 #define FLAG_LONG (16)
166 #define FLAG_HANG (32)
167 
__spin_poll(int fd,uint32_t ctx,const struct intel_execution_engine2 * e)168 static igt_spin_t * __spin_poll(int fd, uint32_t ctx,
169 				const struct intel_execution_engine2 *e)
170 {
171 	struct igt_spin_factory opts = {
172 		.ctx = ctx,
173 		.engine = e->flags,
174 	};
175 
176 	if (gem_class_can_store_dword(fd, e->class))
177 		opts.flags |= IGT_SPIN_POLL_RUN;
178 
179 	return __igt_spin_factory(fd, &opts);
180 }
181 
__spin_wait(int fd,igt_spin_t * spin)182 static unsigned long __spin_wait(int fd, igt_spin_t *spin)
183 {
184 	struct timespec start = { };
185 
186 	igt_nsec_elapsed(&start);
187 
188 	if (igt_spin_has_poll(spin)) {
189 		unsigned long timeout = 0;
190 
191 		while (!igt_spin_has_started(spin)) {
192 			unsigned long t = igt_nsec_elapsed(&start);
193 
194 			if ((t - timeout) > 250e6) {
195 				timeout = t;
196 				igt_warn("Spinner not running after %.2fms\n",
197 					 (double)t / 1e6);
198 			}
199 		}
200 	} else {
201 		igt_debug("__spin_wait - usleep mode\n");
202 		usleep(500e3); /* Better than nothing! */
203 	}
204 
205 	return igt_nsec_elapsed(&start);
206 }
207 
__spin_sync(int fd,uint32_t ctx,const struct intel_execution_engine2 * e)208 static igt_spin_t * __spin_sync(int fd, uint32_t ctx,
209 				const struct intel_execution_engine2 *e)
210 {
211 	igt_spin_t *spin = __spin_poll(fd, ctx, e);
212 
213 	__spin_wait(fd, spin);
214 
215 	return spin;
216 }
217 
spin_sync(int fd,uint32_t ctx,const struct intel_execution_engine2 * e)218 static igt_spin_t * spin_sync(int fd, uint32_t ctx,
219 			      const struct intel_execution_engine2 *e)
220 {
221 	igt_require_gem(fd);
222 
223 	return __spin_sync(fd, ctx, e);
224 }
225 
spin_sync_flags(int fd,uint32_t ctx,unsigned int flags)226 static igt_spin_t * spin_sync_flags(int fd, uint32_t ctx, unsigned int flags)
227 {
228 	struct intel_execution_engine2 e = { };
229 
230 	e.class = gem_execbuf_flags_to_engine_class(flags);
231 	e.instance = (flags & (I915_EXEC_BSD_MASK | I915_EXEC_RING_MASK)) ==
232 		     (I915_EXEC_BSD | I915_EXEC_BSD_RING2) ? 1 : 0;
233 	e.flags = flags;
234 
235 	return spin_sync(fd, ctx, &e);
236 }
237 
end_spin(int fd,igt_spin_t * spin,unsigned int flags)238 static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
239 {
240 	if (!spin)
241 		return;
242 
243 	igt_spin_end(spin);
244 
245 	if (flags & FLAG_SYNC)
246 		gem_sync(fd, spin->handle);
247 
248 	if (flags & TEST_TRAILING_IDLE) {
249 		unsigned long t, timeout = 0;
250 		struct timespec start = { };
251 
252 		igt_nsec_elapsed(&start);
253 
254 		do {
255 			t = igt_nsec_elapsed(&start);
256 
257 			if (gem_bo_busy(fd, spin->handle) &&
258 			    (t - timeout) > 10e6) {
259 				timeout = t;
260 				igt_warn("Spinner not idle after %.2fms\n",
261 					 (double)t / 1e6);
262 			}
263 
264 			usleep(1e3);
265 		} while (t < batch_duration_ns / 5);
266 	}
267 }
268 
269 static void
single(int gem_fd,const struct intel_execution_engine2 * e,unsigned int flags)270 single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
271 {
272 	unsigned long slept;
273 	igt_spin_t *spin;
274 	uint64_t val;
275 	int fd;
276 
277 	fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
278 
279 	if (flags & TEST_BUSY)
280 		spin = spin_sync(gem_fd, 0, e);
281 	else
282 		spin = NULL;
283 
284 	val = pmu_read_single(fd);
285 	slept = measured_usleep(batch_duration_ns / 1000);
286 	if (flags & TEST_TRAILING_IDLE)
287 		end_spin(gem_fd, spin, flags);
288 	val = pmu_read_single(fd) - val;
289 
290 	if (flags & FLAG_HANG)
291 		igt_force_gpu_reset(gem_fd);
292 	else
293 		end_spin(gem_fd, spin, FLAG_SYNC);
294 
295 	assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance);
296 
297 	/* Check for idle after hang. */
298 	if (flags & FLAG_HANG) {
299 		gem_quiescent_gpu(gem_fd);
300 		igt_assert(!gem_bo_busy(gem_fd, spin->handle));
301 
302 		val = pmu_read_single(fd);
303 		slept = measured_usleep(batch_duration_ns / 1000);
304 		val = pmu_read_single(fd) - val;
305 
306 		assert_within_epsilon(val, 0, tolerance);
307 	}
308 
309 	igt_spin_free(gem_fd, spin);
310 	close(fd);
311 
312 	gem_quiescent_gpu(gem_fd);
313 }
314 
315 static void
busy_start(int gem_fd,const struct intel_execution_engine2 * e)316 busy_start(int gem_fd, const struct intel_execution_engine2 *e)
317 {
318 	unsigned long slept;
319 	uint64_t val, ts[2];
320 	igt_spin_t *spin;
321 	int fd;
322 
323 	/*
324 	 * Defeat the busy stats delayed disable, we need to guarantee we are
325 	 * the first user.
326 	 */
327 	sleep(2);
328 
329 	spin = __spin_sync(gem_fd, 0, e);
330 
331 	fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
332 
333 	val = __pmu_read_single(fd, &ts[0]);
334 	slept = measured_usleep(batch_duration_ns / 1000);
335 	val = __pmu_read_single(fd, &ts[1]) - val;
336 	igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]);
337 
338 	igt_spin_free(gem_fd, spin);
339 	close(fd);
340 
341 	assert_within_epsilon(val, ts[1] - ts[0], tolerance);
342 	gem_quiescent_gpu(gem_fd);
343 }
344 
345 /*
346  * This test has a potentially low rate of catching the issue it is trying to
347  * catch. Or in other words, quite high rate of false negative successes. We
348  * will depend on the CI systems running it a lot to detect issues.
349  */
350 static void
busy_double_start(int gem_fd,const struct intel_execution_engine2 * e)351 busy_double_start(int gem_fd, const struct intel_execution_engine2 *e)
352 {
353 	unsigned long slept;
354 	uint64_t val, val2, ts[2];
355 	igt_spin_t *spin[2];
356 	uint32_t ctx;
357 	int fd;
358 
359 	ctx = gem_context_create(gem_fd);
360 	gem_context_set_all_engines(gem_fd, ctx);
361 
362 	/*
363 	 * Defeat the busy stats delayed disable, we need to guarantee we are
364 	 * the first user.
365 	 */
366 	sleep(2);
367 
368 	/*
369 	 * Submit two contexts, with a pause in between targeting the ELSP
370 	 * re-submission in execlists mode. Make sure busyness is correctly
371 	 * reported with the engine busy, and after the engine went idle.
372 	 */
373 	spin[0] = __spin_sync(gem_fd, 0, e);
374 	usleep(500e3);
375 	spin[1] = __igt_spin_new(gem_fd,
376 				 .ctx = ctx,
377 				 .engine = e->flags);
378 
379 	/*
380 	 * Open PMU as fast as possible after the second spin batch in attempt
381 	 * to be faster than the driver handling lite-restore.
382 	 */
383 	fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
384 
385 	val = __pmu_read_single(fd, &ts[0]);
386 	slept = measured_usleep(batch_duration_ns / 1000);
387 	val = __pmu_read_single(fd, &ts[1]) - val;
388 	igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]);
389 
390 	igt_spin_end(spin[0]);
391 	igt_spin_end(spin[1]);
392 
393 	/* Wait for GPU idle to verify PMU reports idle. */
394 	gem_quiescent_gpu(gem_fd);
395 
396 	val2 = pmu_read_single(fd);
397 	usleep(batch_duration_ns / 1000);
398 	val2 = pmu_read_single(fd) - val2;
399 
400 	igt_info("busy=%"PRIu64" idle=%"PRIu64"\n", val, val2);
401 
402 	igt_spin_free(gem_fd, spin[0]);
403 	igt_spin_free(gem_fd, spin[1]);
404 
405 	close(fd);
406 
407 	gem_context_destroy(gem_fd, ctx);
408 
409 	assert_within_epsilon(val, ts[1] - ts[0], tolerance);
410 	igt_assert_eq(val2, 0);
411 
412 	gem_quiescent_gpu(gem_fd);
413 }
414 
log_busy(unsigned int num_engines,uint64_t * val)415 static void log_busy(unsigned int num_engines, uint64_t *val)
416 {
417 	char buf[1024];
418 	int rem = sizeof(buf);
419 	unsigned int i;
420 	char *p = buf;
421 
422 	for (i = 0; i < num_engines; i++) {
423 		int len;
424 
425 		len = snprintf(p, rem, "%u=%" PRIu64 "\n",  i, val[i]);
426 		igt_assert(len > 0);
427 		rem -= len;
428 		p += len;
429 	}
430 
431 	igt_info("%s", buf);
432 }
433 
434 static void
busy_check_all(int gem_fd,const struct intel_execution_engine2 * e,const unsigned int num_engines,unsigned int flags)435 busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
436 	       const unsigned int num_engines, unsigned int flags)
437 {
438 	struct intel_execution_engine2 *e_;
439 	uint64_t tval[2][num_engines];
440 	unsigned int busy_idx = 0, i;
441 	uint64_t val[num_engines];
442 	int fd[num_engines];
443 	unsigned long slept;
444 	igt_spin_t *spin;
445 
446 	i = 0;
447 	fd[0] = -1;
448 	__for_each_physical_engine(gem_fd, e_) {
449 		if (e->class == e_->class && e->instance == e_->instance)
450 			busy_idx = i;
451 
452 		fd[i++] = open_group(I915_PMU_ENGINE_BUSY(e_->class,
453 							  e_->instance),
454 				     fd[0]);
455 	}
456 
457 	igt_assert_eq(i, num_engines);
458 
459 	spin = spin_sync(gem_fd, 0, e);
460 	pmu_read_multi(fd[0], num_engines, tval[0]);
461 	slept = measured_usleep(batch_duration_ns / 1000);
462 	if (flags & TEST_TRAILING_IDLE)
463 		end_spin(gem_fd, spin, flags);
464 	pmu_read_multi(fd[0], num_engines, tval[1]);
465 
466 	end_spin(gem_fd, spin, FLAG_SYNC);
467 	igt_spin_free(gem_fd, spin);
468 	close(fd[0]);
469 
470 	for (i = 0; i < num_engines; i++)
471 		val[i] = tval[1][i] - tval[0][i];
472 
473 	log_busy(num_engines, val);
474 
475 	assert_within_epsilon(val[busy_idx], slept, tolerance);
476 	for (i = 0; i < num_engines; i++) {
477 		if (i == busy_idx)
478 			continue;
479 		assert_within_epsilon(val[i], 0.0f, tolerance);
480 	}
481 	gem_quiescent_gpu(gem_fd);
482 }
483 
484 static void
__submit_spin(int gem_fd,igt_spin_t * spin,const struct intel_execution_engine2 * e,int offset)485 __submit_spin(int gem_fd, igt_spin_t *spin,
486 	      const struct intel_execution_engine2 *e,
487 	      int offset)
488 {
489 	struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
490 
491 	eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
492 	eb.flags |= e->flags | I915_EXEC_NO_RELOC;
493 	eb.batch_start_offset += offset;
494 
495 	gem_execbuf(gem_fd, &eb);
496 }
497 
498 static void
most_busy_check_all(int gem_fd,const struct intel_execution_engine2 * e,const unsigned int num_engines,unsigned int flags)499 most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
500 		    const unsigned int num_engines, unsigned int flags)
501 {
502 	struct intel_execution_engine2 *e_;
503 	uint64_t tval[2][num_engines];
504 	uint64_t val[num_engines];
505 	int fd[num_engines];
506 	unsigned long slept;
507 	igt_spin_t *spin = NULL;
508 	unsigned int idle_idx, i;
509 
510 	i = 0;
511 	__for_each_physical_engine(gem_fd, e_) {
512 		if (e->class == e_->class && e->instance == e_->instance)
513 			idle_idx = i;
514 		else if (spin)
515 			__submit_spin(gem_fd, spin, e_, 64);
516 		else
517 			spin = __spin_poll(gem_fd, 0, e_);
518 
519 		val[i++] = I915_PMU_ENGINE_BUSY(e_->class, e_->instance);
520 	}
521 	igt_assert(i == num_engines);
522 	igt_require(spin); /* at least one busy engine */
523 
524 	fd[0] = -1;
525 	for (i = 0; i < num_engines; i++)
526 		fd[i] = open_group(val[i], fd[0]);
527 
528 	/* Small delay to allow engines to start. */
529 	usleep(__spin_wait(gem_fd, spin) * num_engines / 1e3);
530 
531 	pmu_read_multi(fd[0], num_engines, tval[0]);
532 	slept = measured_usleep(batch_duration_ns / 1000);
533 	if (flags & TEST_TRAILING_IDLE)
534 		end_spin(gem_fd, spin, flags);
535 	pmu_read_multi(fd[0], num_engines, tval[1]);
536 
537 	end_spin(gem_fd, spin, FLAG_SYNC);
538 	igt_spin_free(gem_fd, spin);
539 	close(fd[0]);
540 
541 	for (i = 0; i < num_engines; i++)
542 		val[i] = tval[1][i] - tval[0][i];
543 
544 	log_busy(num_engines, val);
545 
546 	for (i = 0; i < num_engines; i++) {
547 		if (i == idle_idx)
548 			assert_within_epsilon(val[i], 0.0f, tolerance);
549 		else
550 			assert_within_epsilon(val[i], slept, tolerance);
551 	}
552 	gem_quiescent_gpu(gem_fd);
553 }
554 
555 static void
all_busy_check_all(int gem_fd,const unsigned int num_engines,unsigned int flags)556 all_busy_check_all(int gem_fd, const unsigned int num_engines,
557 		   unsigned int flags)
558 {
559 	struct intel_execution_engine2 *e;
560 	uint64_t tval[2][num_engines];
561 	uint64_t val[num_engines];
562 	int fd[num_engines];
563 	unsigned long slept;
564 	igt_spin_t *spin = NULL;
565 	unsigned int i;
566 
567 	i = 0;
568 	__for_each_physical_engine(gem_fd, e) {
569 		if (spin)
570 			__submit_spin(gem_fd, spin, e, 64);
571 		else
572 			spin = __spin_poll(gem_fd, 0, e);
573 
574 		val[i++] = I915_PMU_ENGINE_BUSY(e->class, e->instance);
575 	}
576 	igt_assert(i == num_engines);
577 
578 	fd[0] = -1;
579 	for (i = 0; i < num_engines; i++)
580 		fd[i] = open_group(val[i], fd[0]);
581 
582 	/* Small delay to allow engines to start. */
583 	usleep(__spin_wait(gem_fd, spin) * num_engines / 1e3);
584 
585 	pmu_read_multi(fd[0], num_engines, tval[0]);
586 	slept = measured_usleep(batch_duration_ns / 1000);
587 	if (flags & TEST_TRAILING_IDLE)
588 		end_spin(gem_fd, spin, flags);
589 	pmu_read_multi(fd[0], num_engines, tval[1]);
590 
591 	end_spin(gem_fd, spin, FLAG_SYNC);
592 	igt_spin_free(gem_fd, spin);
593 	close(fd[0]);
594 
595 	for (i = 0; i < num_engines; i++)
596 		val[i] = tval[1][i] - tval[0][i];
597 
598 	log_busy(num_engines, val);
599 
600 	for (i = 0; i < num_engines; i++)
601 		assert_within_epsilon(val[i], slept, tolerance);
602 	gem_quiescent_gpu(gem_fd);
603 }
604 
605 static void
no_sema(int gem_fd,const struct intel_execution_engine2 * e,unsigned int flags)606 no_sema(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
607 {
608 	igt_spin_t *spin;
609 	uint64_t val[2][2];
610 	int fd;
611 
612 	fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
613 	open_group(I915_PMU_ENGINE_WAIT(e->class, e->instance), fd);
614 
615 	if (flags & TEST_BUSY)
616 		spin = spin_sync(gem_fd, 0, e);
617 	else
618 		spin = NULL;
619 
620 	pmu_read_multi(fd, 2, val[0]);
621 	measured_usleep(batch_duration_ns / 1000);
622 	if (flags & TEST_TRAILING_IDLE)
623 		end_spin(gem_fd, spin, flags);
624 	pmu_read_multi(fd, 2, val[1]);
625 
626 	val[0][0] = val[1][0] - val[0][0];
627 	val[0][1] = val[1][1] - val[0][1];
628 
629 	if (spin) {
630 		end_spin(gem_fd, spin, FLAG_SYNC);
631 		igt_spin_free(gem_fd, spin);
632 	}
633 	close(fd);
634 
635 	assert_within_epsilon(val[0][0], 0.0f, tolerance);
636 	assert_within_epsilon(val[0][1], 0.0f, tolerance);
637 }
638 
639 #define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
640 #define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
641 #define   MI_SEMAPHORE_POLL		(1<<15)
642 #define   MI_SEMAPHORE_SAD_GTE_SDD	(1<<12)
643 
644 static void
sema_wait(int gem_fd,const struct intel_execution_engine2 * e,unsigned int flags)645 sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
646 	  unsigned int flags)
647 {
648 	struct drm_i915_gem_relocation_entry reloc[2] = {};
649 	struct drm_i915_gem_exec_object2 obj[2] = {};
650 	struct drm_i915_gem_execbuffer2 eb = {};
651 	uint32_t bb_handle, obj_handle;
652 	unsigned long slept;
653 	uint32_t *obj_ptr;
654 	uint32_t batch[16];
655 	uint64_t val[2], ts[2];
656 	int fd;
657 
658 	igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
659 
660 	/**
661 	 * Setup up a batchbuffer with a polling semaphore wait command which
662 	 * will wait on an value in a shared bo to change. This way we are able
663 	 * to control how much time we will spend in this bb.
664 	 */
665 
666 	bb_handle = gem_create(gem_fd, 4096);
667 	obj_handle = gem_create(gem_fd, 4096);
668 
669 	obj_ptr = gem_mmap__wc(gem_fd, obj_handle, 0, 4096, PROT_WRITE);
670 
671 	batch[0] = MI_STORE_DWORD_IMM;
672 	batch[1] = sizeof(*obj_ptr);
673 	batch[2] = 0;
674 	batch[3] = 1;
675 	batch[4] = MI_SEMAPHORE_WAIT |
676 		   MI_SEMAPHORE_POLL |
677 		   MI_SEMAPHORE_SAD_GTE_SDD;
678 	batch[5] = 1;
679 	batch[6] = 0x0;
680 	batch[7] = 0x0;
681 	batch[8] = MI_BATCH_BUFFER_END;
682 
683 	gem_write(gem_fd, bb_handle, 0, batch, sizeof(batch));
684 
685 	reloc[0].target_handle = obj_handle;
686 	reloc[0].offset = 1 * sizeof(uint32_t);
687 	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
688 	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
689 	reloc[0].delta = sizeof(*obj_ptr);
690 
691 	reloc[1].target_handle = obj_handle;
692 	reloc[1].offset = 6 * sizeof(uint32_t);
693 	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
694 
695 	obj[0].handle = obj_handle;
696 
697 	obj[1].handle = bb_handle;
698 	obj[1].relocation_count = 2;
699 	obj[1].relocs_ptr = to_user_pointer(reloc);
700 
701 	eb.buffer_count = 2;
702 	eb.buffers_ptr = to_user_pointer(obj);
703 	eb.flags = e->flags;
704 
705 	/**
706 	 * Start the semaphore wait PMU and after some known time let the above
707 	 * semaphore wait command finish. Then check that the PMU is reporting
708 	 * to expected time spent in semaphore wait state.
709 	 */
710 
711 	fd = open_pmu(I915_PMU_ENGINE_SEMA(e->class, e->instance));
712 
713 	val[0] = pmu_read_single(fd);
714 
715 	gem_execbuf(gem_fd, &eb);
716 	do { /* wait for the batch to start executing */
717 		usleep(5e3);
718 	} while (!obj_ptr[1]);
719 
720 	igt_assert_f(igt_wait(pmu_read_single(fd) != val[0], 10, 1),
721 		     "sampling failed to start withing 10ms\n");
722 
723 	val[0] = __pmu_read_single(fd, &ts[0]);
724 	slept = measured_usleep(batch_duration_ns / 1000);
725 	if (flags & TEST_TRAILING_IDLE)
726 		obj_ptr[0] = 1;
727 	val[1] = __pmu_read_single(fd, &ts[1]);
728 	igt_debug("slept %.3fms (perf %.3fms), sampled %.3fms\n",
729 		  slept * 1e-6,
730 		  (ts[1] - ts[0]) * 1e-6,
731 		  (val[1] - val[0]) * 1e-6);
732 
733 	obj_ptr[0] = 1;
734 	gem_sync(gem_fd, bb_handle);
735 
736 	munmap(obj_ptr, 4096);
737 	gem_close(gem_fd, obj_handle);
738 	gem_close(gem_fd, bb_handle);
739 	close(fd);
740 
741 	assert_within_epsilon(val[1] - val[0], slept, tolerance);
742 }
743 
744 #define   MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
745 #define   MI_WAIT_FOR_PIPE_B_VBLANK (1<<11)
746 #define   MI_WAIT_FOR_PIPE_A_VBLANK (1<<3)
747 
748 typedef struct {
749 	igt_display_t display;
750 	struct igt_fb primary_fb;
751 	igt_output_t *output;
752 	enum pipe pipe;
753 } data_t;
754 
prepare_crtc(data_t * data,int fd,igt_output_t * output)755 static void prepare_crtc(data_t *data, int fd, igt_output_t *output)
756 {
757 	drmModeModeInfo *mode;
758 	igt_display_t *display = &data->display;
759 	igt_plane_t *primary;
760 
761 	/* select the pipe we want to use */
762 	igt_output_set_pipe(output, data->pipe);
763 
764 	/* create and set the primary plane fb */
765 	mode = igt_output_get_mode(output);
766 	igt_create_color_fb(fd, mode->hdisplay, mode->vdisplay,
767 			    DRM_FORMAT_XRGB8888,
768 			    LOCAL_DRM_FORMAT_MOD_NONE,
769 			    0.0, 0.0, 0.0,
770 			    &data->primary_fb);
771 
772 	primary = igt_output_get_plane_type(output, DRM_PLANE_TYPE_PRIMARY);
773 	igt_plane_set_fb(primary, &data->primary_fb);
774 
775 	igt_display_commit(display);
776 
777 	igt_wait_for_vblank(fd, data->pipe);
778 }
779 
cleanup_crtc(data_t * data,int fd,igt_output_t * output)780 static void cleanup_crtc(data_t *data, int fd, igt_output_t *output)
781 {
782 	igt_display_t *display = &data->display;
783 	igt_plane_t *primary;
784 
785 	igt_remove_fb(fd, &data->primary_fb);
786 
787 	primary = igt_output_get_plane_type(output, DRM_PLANE_TYPE_PRIMARY);
788 	igt_plane_set_fb(primary, NULL);
789 
790 	igt_output_set_pipe(output, PIPE_ANY);
791 	igt_display_commit(display);
792 }
793 
wait_vblank(int fd,union drm_wait_vblank * vbl)794 static int wait_vblank(int fd, union drm_wait_vblank *vbl)
795 {
796 	int err;
797 
798 	err = 0;
799 	if (igt_ioctl(fd, DRM_IOCTL_WAIT_VBLANK, vbl))
800 		err = -errno;
801 
802 	return err;
803 }
804 
805 static void
event_wait(int gem_fd,const struct intel_execution_engine2 * e)806 event_wait(int gem_fd, const struct intel_execution_engine2 *e)
807 {
808 	struct drm_i915_gem_exec_object2 obj = { };
809 	struct drm_i915_gem_execbuffer2 eb = { };
810 	const uint32_t DERRMR = 0x44050;
811 	const uint32_t FORCEWAKE_MT = 0xa188;
812 	unsigned int valid_tests = 0;
813 	uint32_t batch[16], *b;
814 	uint16_t devid;
815 	igt_output_t *output;
816 	data_t data;
817 	enum pipe p;
818 	int fd;
819 
820 	devid = intel_get_drm_devid(gem_fd);
821 	igt_require(intel_gen(devid) >= 7);
822 	igt_skip_on(IS_VALLEYVIEW(devid) || IS_CHERRYVIEW(devid));
823 
824 	kmstest_set_vt_graphics_mode();
825 	igt_display_require(&data.display, gem_fd);
826 
827 	/**
828 	 * We will use the display to render event forwarind so need to
829 	 * program the DERRMR register and restore it at exit.
830 	 * Note we assume that the default/desired value for DERRMR will always
831 	 * be ~0u (all routing disable). To be fancy, we could do a SRM of the
832 	 * reg beforehand and then LRM at the end.
833 	 *
834 	 * We will emit a MI_WAIT_FOR_EVENT listening for vblank events,
835 	 * have a background helper to indirectly enable vblank irqs, and
836 	 * listen to the recorded time spent in engine wait state as reported
837 	 * by the PMU.
838 	 */
839 	obj.handle = gem_create(gem_fd, 4096);
840 
841 	b = batch;
842 	*b++ = MI_LOAD_REGISTER_IMM;
843 	*b++ = FORCEWAKE_MT;
844 	*b++ = 2 << 16 | 2;
845 	*b++ = MI_LOAD_REGISTER_IMM;
846 	*b++ = DERRMR;
847 	*b++ = ~0u;
848 	*b++ = MI_WAIT_FOR_EVENT;
849 	*b++ = MI_LOAD_REGISTER_IMM;
850 	*b++ = DERRMR;
851 	*b++ = ~0u;
852 	*b++ = MI_LOAD_REGISTER_IMM;
853 	*b++ = FORCEWAKE_MT;
854 	*b++ = 2 << 16;
855 	*b++ = MI_BATCH_BUFFER_END;
856 
857 	eb.buffer_count = 1;
858 	eb.buffers_ptr = to_user_pointer(&obj);
859 	eb.flags = e->flags | I915_EXEC_SECURE;
860 
861 	for_each_pipe_with_valid_output(&data.display, p, output) {
862 		struct igt_helper_process waiter = { };
863 		const unsigned int frames = 3;
864 		uint64_t val[2];
865 
866 		batch[6] = MI_WAIT_FOR_EVENT;
867 		switch (p) {
868 		case PIPE_A:
869 			batch[6] |= MI_WAIT_FOR_PIPE_A_VBLANK;
870 			batch[5] = ~(1 << 3);
871 			break;
872 		case PIPE_B:
873 			batch[6] |= MI_WAIT_FOR_PIPE_B_VBLANK;
874 			batch[5] = ~(1 << 11);
875 			break;
876 		case PIPE_C:
877 			batch[6] |= MI_WAIT_FOR_PIPE_C_VBLANK;
878 			batch[5] = ~(1 << 21);
879 			break;
880 		default:
881 			continue;
882 		}
883 
884 		gem_write(gem_fd, obj.handle, 0, batch, sizeof(batch));
885 
886 		data.pipe = p;
887 		prepare_crtc(&data, gem_fd, output);
888 
889 		fd = open_pmu(I915_PMU_ENGINE_WAIT(e->class, e->instance));
890 
891 		val[0] = pmu_read_single(fd);
892 
893 		igt_fork_helper(&waiter) {
894 			const uint32_t pipe_id_flag =
895 					kmstest_get_vbl_flag(data.pipe);
896 
897 			for (;;) {
898 				union drm_wait_vblank vbl = { };
899 
900 				vbl.request.type = DRM_VBLANK_RELATIVE;
901 				vbl.request.type |= pipe_id_flag;
902 				vbl.request.sequence = 1;
903 				igt_assert_eq(wait_vblank(gem_fd, &vbl), 0);
904 			}
905 		}
906 
907 		for (unsigned int frame = 0; frame < frames; frame++) {
908 			gem_execbuf(gem_fd, &eb);
909 			gem_sync(gem_fd, obj.handle);
910 		}
911 
912 		igt_stop_helper(&waiter);
913 
914 		val[1] = pmu_read_single(fd);
915 
916 		close(fd);
917 
918 		cleanup_crtc(&data, gem_fd, output);
919 		valid_tests++;
920 
921 		igt_assert(val[1] - val[0] > 0);
922 	}
923 
924 	gem_close(gem_fd, obj.handle);
925 
926 	igt_require_f(valid_tests,
927 		      "no valid crtc/connector combinations found\n");
928 }
929 
930 static void
multi_client(int gem_fd,const struct intel_execution_engine2 * e)931 multi_client(int gem_fd, const struct intel_execution_engine2 *e)
932 {
933 	uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
934 	unsigned long slept[2];
935 	uint64_t val[2], ts[2], perf_slept[2];
936 	igt_spin_t *spin;
937 	int fd[2];
938 
939 	gem_quiescent_gpu(gem_fd);
940 
941 	fd[0] = open_pmu(config);
942 
943 	/*
944 	 * Second PMU client which is initialized after the first one,
945 	 * and exists before it, should not affect accounting as reported
946 	 * in the first client.
947 	 */
948 	fd[1] = open_pmu(config);
949 
950 	spin = spin_sync(gem_fd, 0, e);
951 
952 	val[0] = val[1] = __pmu_read_single(fd[0], &ts[0]);
953 	slept[1] = measured_usleep(batch_duration_ns / 1000);
954 	val[1] = __pmu_read_single(fd[1], &ts[1]) - val[1];
955 	perf_slept[1] = ts[1] - ts[0];
956 	igt_debug("slept=%lu perf=%"PRIu64"\n", slept[1], perf_slept[1]);
957 	close(fd[1]);
958 
959 	slept[0] = measured_usleep(batch_duration_ns / 1000) + slept[1];
960 	val[0] = __pmu_read_single(fd[0], &ts[1]) - val[0];
961 	perf_slept[0] = ts[1] - ts[0];
962 	igt_debug("slept=%lu perf=%"PRIu64"\n", slept[0], perf_slept[0]);
963 
964 	igt_spin_end(spin);
965 	gem_sync(gem_fd, spin->handle);
966 	igt_spin_free(gem_fd, spin);
967 	close(fd[0]);
968 
969 	assert_within_epsilon(val[0], perf_slept[0], tolerance);
970 	assert_within_epsilon(val[1], perf_slept[1], tolerance);
971 }
972 
973 /**
974  * Tests that i915 PMU corectly errors out in invalid initialization.
975  * i915 PMU is uncore PMU, thus:
976  *  - sampling period is not supported
977  *  - pid > 0 is not supported since we can't count per-process (we count
978  *    per whole system)
979  *  - cpu != 0 is not supported since i915 PMU only allows running on one cpu
980  *    and that is normally CPU0.
981  */
invalid_init(void)982 static void invalid_init(void)
983 {
984 	struct perf_event_attr attr;
985 
986 #define ATTR_INIT() \
987 do { \
988 	memset(&attr, 0, sizeof (attr)); \
989 	attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
990 	attr.type = i915_type_id(); \
991 	igt_assert(attr.type != 0); \
992 	errno = 0; \
993 } while(0)
994 
995 	ATTR_INIT();
996 	attr.sample_period = 100;
997 	igt_assert_eq(perf_event_open(&attr, -1, 0, -1, 0), -1);
998 	igt_assert_eq(errno, EINVAL);
999 
1000 	ATTR_INIT();
1001 	igt_assert_eq(perf_event_open(&attr, 0, 0, -1, 0), -1);
1002 	igt_assert_eq(errno, EINVAL);
1003 
1004 	ATTR_INIT();
1005 	igt_assert_eq(perf_event_open(&attr, -1, 1, -1, 0), -1);
1006 	igt_assert_eq(errno, EINVAL);
1007 }
1008 
init_other(unsigned int i,bool valid)1009 static void init_other(unsigned int i, bool valid)
1010 {
1011 	int fd;
1012 
1013 	fd = perf_i915_open(__I915_PMU_OTHER(i));
1014 	igt_require(!(fd < 0 && errno == ENODEV));
1015 	if (valid) {
1016 		igt_assert(fd >= 0);
1017 	} else {
1018 		igt_assert(fd < 0);
1019 		return;
1020 	}
1021 
1022 	close(fd);
1023 }
1024 
read_other(unsigned int i,bool valid)1025 static void read_other(unsigned int i, bool valid)
1026 {
1027 	int fd;
1028 
1029 	fd = perf_i915_open(__I915_PMU_OTHER(i));
1030 	igt_require(!(fd < 0 && errno == ENODEV));
1031 	if (valid) {
1032 		igt_assert(fd >= 0);
1033 	} else {
1034 		igt_assert(fd < 0);
1035 		return;
1036 	}
1037 
1038 	(void)pmu_read_single(fd);
1039 
1040 	close(fd);
1041 }
1042 
cpu0_hotplug_support(void)1043 static bool cpu0_hotplug_support(void)
1044 {
1045 	return access("/sys/devices/system/cpu/cpu0/online", W_OK) == 0;
1046 }
1047 
cpu_hotplug(int gem_fd)1048 static void cpu_hotplug(int gem_fd)
1049 {
1050 	igt_spin_t *spin[2];
1051 	uint64_t ts[2];
1052 	uint64_t val;
1053 	int link[2];
1054 	int fd, ret;
1055 	int cur = 0;
1056 	char buf;
1057 
1058 	igt_require(cpu0_hotplug_support());
1059 
1060 	fd = open_pmu(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
1061 
1062 	/*
1063 	 * Create two spinners so test can ensure shorter gaps in engine
1064 	 * busyness as it is terminating one and re-starting the other.
1065 	 */
1066 	spin[0] = igt_spin_new(gem_fd, .engine = I915_EXEC_DEFAULT);
1067 	spin[1] = __igt_spin_new(gem_fd, .engine = I915_EXEC_DEFAULT);
1068 
1069 	val = __pmu_read_single(fd, &ts[0]);
1070 
1071 	ret = pipe2(link, O_NONBLOCK);
1072 	igt_assert_eq(ret, 0);
1073 
1074 	/*
1075 	 * Toggle online status of all the CPUs in a child process and ensure
1076 	 * this has not affected busyness stats in the parent.
1077 	 */
1078 	igt_fork(child, 1) {
1079 		int cpu = 0;
1080 
1081 		close(link[0]);
1082 
1083 		for (;;) {
1084 			char name[128];
1085 			int cpufd;
1086 
1087 			igt_assert_lt(snprintf(name, sizeof(name),
1088 					       "/sys/devices/system/cpu/cpu%d/online",
1089 					       cpu), sizeof(name));
1090 			cpufd = open(name, O_WRONLY);
1091 			if (cpufd == -1) {
1092 				igt_assert(cpu > 0);
1093 				/*
1094 				 * Signal parent that we cycled through all
1095 				 * CPUs and we are done.
1096 				 */
1097 				igt_assert_eq(write(link[1], "*", 1), 1);
1098 				break;
1099 			}
1100 
1101 			/* Offline followed by online a CPU. */
1102 
1103 			ret = write(cpufd, "0", 2);
1104 			if (ret < 0) {
1105 				/*
1106 				 * If we failed to offline a CPU we don't want
1107 				 * to proceed.
1108 				 */
1109 				igt_warn("Failed to offline cpu%u! (%d)\n",
1110 					 cpu, errno);
1111 				igt_assert_eq(write(link[1], "s", 1), 1);
1112 				break;
1113 			}
1114 
1115 			usleep(1e6);
1116 
1117 			ret = write(cpufd, "1", 2);
1118 			if (ret < 0) {
1119 				/*
1120 				 * Failed to bring a CPU back online is fatal
1121 				 * for the sanity of a test run so stop further
1122 				 * testing.
1123 				 */
1124 				igt_warn("Failed to online cpu%u! (%d)\n",
1125 					 cpu, errno);
1126 				igt_fatal_error();
1127 			}
1128 
1129 			close(cpufd);
1130 			cpu++;
1131 		}
1132 	}
1133 
1134 	close(link[1]);
1135 
1136 	/*
1137 	 * Very long batches can be declared as GPU hangs so emit shorter ones
1138 	 * until the CPU core shuffler finishes one loop.
1139 	 */
1140 	for (;;) {
1141 		usleep(500e3);
1142 		end_spin(gem_fd, spin[cur], 0);
1143 
1144 		/* Check if the child is signaling completion. */
1145 		ret = read(link[0], &buf, 1);
1146 		if ( ret == 1 || (ret < 0 && errno != EAGAIN))
1147 			break;
1148 
1149 		igt_spin_free(gem_fd, spin[cur]);
1150 		spin[cur] = __igt_spin_new(gem_fd,
1151 					   .engine = I915_EXEC_DEFAULT);
1152 		cur ^= 1;
1153 	}
1154 
1155 	val = __pmu_read_single(fd, &ts[1]) - val;
1156 
1157 	end_spin(gem_fd, spin[0], FLAG_SYNC);
1158 	end_spin(gem_fd, spin[1], FLAG_SYNC);
1159 	igt_spin_free(gem_fd, spin[0]);
1160 	igt_spin_free(gem_fd, spin[1]);
1161 	igt_waitchildren();
1162 	close(fd);
1163 	close(link[0]);
1164 
1165 	/* Skip if child signals a problem with offlining a CPU. */
1166 	igt_skip_on(buf == 's');
1167 
1168 	assert_within_epsilon(val, ts[1] - ts[0], tolerance);
1169 }
1170 
1171 static void
test_interrupts(int gem_fd)1172 test_interrupts(int gem_fd)
1173 {
1174 	const unsigned int test_duration_ms = 1000;
1175 	const int target = 30;
1176 	igt_spin_t *spin[target];
1177 	struct pollfd pfd;
1178 	uint64_t idle, busy;
1179 	int fence_fd;
1180 	int fd;
1181 
1182 	gem_quiescent_gpu(gem_fd);
1183 
1184 	fd = open_pmu(I915_PMU_INTERRUPTS);
1185 
1186 	/* Queue spinning batches. */
1187 	for (int i = 0; i < target; i++) {
1188 		spin[i] = __igt_spin_new(gem_fd,
1189 					 .engine = I915_EXEC_DEFAULT,
1190 					 .flags = IGT_SPIN_FENCE_OUT);
1191 		if (i == 0) {
1192 			fence_fd = spin[i]->out_fence;
1193 		} else {
1194 			int old_fd = fence_fd;
1195 
1196 			fence_fd = sync_fence_merge(old_fd,
1197 						    spin[i]->out_fence);
1198 			close(old_fd);
1199 		}
1200 
1201 		igt_assert(fence_fd >= 0);
1202 	}
1203 
1204 	/* Wait for idle state. */
1205 	idle = pmu_read_single(fd);
1206 	do {
1207 		busy = idle;
1208 		usleep(1e3);
1209 		idle = pmu_read_single(fd);
1210 	} while (idle != busy);
1211 
1212 	/* Arm batch expiration. */
1213 	for (int i = 0; i < target; i++)
1214 		igt_spin_set_timeout(spin[i],
1215 				     (i + 1) * test_duration_ms * 1e6
1216 				     / target);
1217 
1218 	/* Wait for last batch to finish. */
1219 	pfd.events = POLLIN;
1220 	pfd.fd = fence_fd;
1221 	igt_assert_eq(poll(&pfd, 1, 2 * test_duration_ms), 1);
1222 	close(fence_fd);
1223 
1224 	/* Free batches. */
1225 	for (int i = 0; i < target; i++)
1226 		igt_spin_free(gem_fd, spin[i]);
1227 
1228 	/* Check at least as many interrupts has been generated. */
1229 	busy = pmu_read_single(fd) - idle;
1230 	close(fd);
1231 
1232 	igt_assert_lte(target, busy);
1233 }
1234 
1235 static void
test_interrupts_sync(int gem_fd)1236 test_interrupts_sync(int gem_fd)
1237 {
1238 	const unsigned int test_duration_ms = 1000;
1239 	const int target = 30;
1240 	igt_spin_t *spin[target];
1241 	struct pollfd pfd;
1242 	uint64_t idle, busy;
1243 	int fd;
1244 
1245 	gem_quiescent_gpu(gem_fd);
1246 
1247 	fd = open_pmu(I915_PMU_INTERRUPTS);
1248 
1249 	/* Queue spinning batches. */
1250 	for (int i = 0; i < target; i++)
1251 		spin[i] = __igt_spin_new(gem_fd,
1252 					 .flags = IGT_SPIN_FENCE_OUT);
1253 
1254 	/* Wait for idle state. */
1255 	idle = pmu_read_single(fd);
1256 	do {
1257 		busy = idle;
1258 		usleep(1e3);
1259 		idle = pmu_read_single(fd);
1260 	} while (idle != busy);
1261 
1262 	/* Process the batch queue. */
1263 	pfd.events = POLLIN;
1264 	for (int i = 0; i < target; i++) {
1265 		const unsigned int timeout_ms = test_duration_ms / target;
1266 
1267 		pfd.fd = spin[i]->out_fence;
1268 		igt_spin_set_timeout(spin[i], timeout_ms * 1e6);
1269 		igt_assert_eq(poll(&pfd, 1, 2 * timeout_ms), 1);
1270 		igt_spin_free(gem_fd, spin[i]);
1271 	}
1272 
1273 	/* Check at least as many interrupts has been generated. */
1274 	busy = pmu_read_single(fd) - idle;
1275 	close(fd);
1276 
1277 	igt_assert_lte(target, busy);
1278 }
1279 
1280 static void
test_frequency(int gem_fd)1281 test_frequency(int gem_fd)
1282 {
1283 	uint32_t min_freq, max_freq, boost_freq;
1284 	uint64_t val[2], start[2], slept;
1285 	double min[2], max[2];
1286 	igt_spin_t *spin;
1287 	int fd, sysfs;
1288 
1289 	sysfs = igt_sysfs_open(gem_fd);
1290 	igt_require(sysfs >= 0);
1291 
1292 	min_freq = igt_sysfs_get_u32(sysfs, "gt_RPn_freq_mhz");
1293 	max_freq = igt_sysfs_get_u32(sysfs, "gt_RP0_freq_mhz");
1294 	boost_freq = igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz");
1295 	igt_info("Frequency: min=%u, max=%u, boost=%u MHz\n",
1296 		 min_freq, max_freq, boost_freq);
1297 	igt_require(min_freq > 0 && max_freq > 0 && boost_freq > 0);
1298 	igt_require(max_freq > min_freq);
1299 	igt_require(boost_freq > min_freq);
1300 
1301 	fd = open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
1302 	open_group(I915_PMU_ACTUAL_FREQUENCY, fd);
1303 
1304 	/*
1305 	 * Set GPU to min frequency and read PMU counters.
1306 	 */
1307 	igt_require(igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", min_freq));
1308 	igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == min_freq);
1309 	igt_require(igt_sysfs_set_u32(sysfs, "gt_max_freq_mhz", min_freq));
1310 	igt_require(igt_sysfs_get_u32(sysfs, "gt_max_freq_mhz") == min_freq);
1311 	igt_require(igt_sysfs_set_u32(sysfs, "gt_boost_freq_mhz", min_freq));
1312 	igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == min_freq);
1313 
1314 	gem_quiescent_gpu(gem_fd); /* Idle to be sure the change takes effect */
1315 	spin = spin_sync_flags(gem_fd, 0, I915_EXEC_DEFAULT);
1316 
1317 	slept = pmu_read_multi(fd, 2, start);
1318 	measured_usleep(batch_duration_ns / 1000);
1319 	slept = pmu_read_multi(fd, 2, val) - slept;
1320 
1321 	min[0] = 1e9*(val[0] - start[0]) / slept;
1322 	min[1] = 1e9*(val[1] - start[1]) / slept;
1323 
1324 	igt_spin_free(gem_fd, spin);
1325 	gem_quiescent_gpu(gem_fd); /* Don't leak busy bo into the next phase */
1326 
1327 	usleep(1e6);
1328 
1329 	/*
1330 	 * Set GPU to max frequency and read PMU counters.
1331 	 */
1332 	igt_require(igt_sysfs_set_u32(sysfs, "gt_max_freq_mhz", max_freq));
1333 	igt_require(igt_sysfs_get_u32(sysfs, "gt_max_freq_mhz") == max_freq);
1334 	igt_require(igt_sysfs_set_u32(sysfs, "gt_boost_freq_mhz", boost_freq));
1335 	igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == boost_freq);
1336 
1337 	igt_require(igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", max_freq));
1338 	igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == max_freq);
1339 
1340 	gem_quiescent_gpu(gem_fd);
1341 	spin = spin_sync_flags(gem_fd, 0, I915_EXEC_DEFAULT);
1342 
1343 	slept = pmu_read_multi(fd, 2, start);
1344 	measured_usleep(batch_duration_ns / 1000);
1345 	slept = pmu_read_multi(fd, 2, val) - slept;
1346 
1347 	max[0] = 1e9*(val[0] - start[0]) / slept;
1348 	max[1] = 1e9*(val[1] - start[1]) / slept;
1349 
1350 	igt_spin_free(gem_fd, spin);
1351 	gem_quiescent_gpu(gem_fd);
1352 
1353 	/*
1354 	 * Restore min/max.
1355 	 */
1356 	igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", min_freq);
1357 	if (igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") != min_freq)
1358 		igt_warn("Unable to restore min frequency to saved value [%u MHz], now %u MHz\n",
1359 			 min_freq, igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz"));
1360 	close(fd);
1361 
1362 	igt_info("Min frequency: requested %.1f, actual %.1f\n",
1363 		 min[0], min[1]);
1364 	igt_info("Max frequency: requested %.1f, actual %.1f\n",
1365 		 max[0], max[1]);
1366 
1367 	assert_within_epsilon(min[0], min_freq, tolerance);
1368 	/*
1369 	 * On thermally throttled devices we cannot be sure maximum frequency
1370 	 * can be reached so use larger tolerance downards.
1371 	 */
1372 	__assert_within_epsilon(max[0], max_freq, tolerance, 0.15f);
1373 }
1374 
wait_for_rc6(int fd)1375 static bool wait_for_rc6(int fd)
1376 {
1377 	struct timespec tv = {};
1378 	uint64_t start, now;
1379 
1380 	/* First wait for roughly an RC6 Evaluation Interval */
1381 	usleep(160 * 1000);
1382 
1383 	/* Then poll for RC6 to start ticking */
1384 	now = pmu_read_single(fd);
1385 	do {
1386 		start = now;
1387 		usleep(5000);
1388 		now = pmu_read_single(fd);
1389 		if (now - start > 1e6)
1390 			return true;
1391 	} while (!igt_seconds_elapsed(&tv));
1392 
1393 	return false;
1394 }
1395 
1396 static void
test_rc6(int gem_fd,unsigned int flags)1397 test_rc6(int gem_fd, unsigned int flags)
1398 {
1399 	int64_t duration_ns = 2e9;
1400 	uint64_t idle, busy, prev, ts[2];
1401 	unsigned long slept;
1402 	int fd, fw;
1403 
1404 	gem_quiescent_gpu(gem_fd);
1405 
1406 	fd = open_pmu(I915_PMU_RC6_RESIDENCY);
1407 
1408 	if (flags & TEST_RUNTIME_PM) {
1409 		drmModeRes *res;
1410 
1411 		res = drmModeGetResources(gem_fd);
1412 		igt_require(res);
1413 
1414 		/* force all connectors off */
1415 		kmstest_set_vt_graphics_mode();
1416 		kmstest_unset_all_crtcs(gem_fd, res);
1417 		drmModeFreeResources(res);
1418 
1419 		igt_require(igt_setup_runtime_pm());
1420 		igt_require(igt_wait_for_pm_status(IGT_RUNTIME_PM_STATUS_SUSPENDED));
1421 
1422 		/*
1423 		 * Sleep for a bit to see if once woken up estimated RC6 hasn't
1424 		 * drifted to far in advance of real RC6.
1425 		 */
1426 		if (flags & FLAG_LONG) {
1427 			pmu_read_single(fd);
1428 			sleep(5);
1429 			pmu_read_single(fd);
1430 		}
1431 	}
1432 
1433 	igt_require(wait_for_rc6(fd));
1434 
1435 	/* While idle check full RC6. */
1436 	prev = __pmu_read_single(fd, &ts[0]);
1437 	slept = measured_usleep(duration_ns / 1000);
1438 	idle = __pmu_read_single(fd, &ts[1]);
1439 	igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]);
1440 
1441 	assert_within_epsilon(idle - prev, ts[1] - ts[0], tolerance);
1442 
1443 	/* Wake up device and check no RC6. */
1444 	fw = igt_open_forcewake_handle(gem_fd);
1445 	igt_assert(fw >= 0);
1446 	usleep(1e3); /* wait for the rc6 cycle counter to stop ticking */
1447 
1448 	prev = pmu_read_single(fd);
1449 	usleep(duration_ns / 1000);
1450 	busy = pmu_read_single(fd);
1451 
1452 	close(fw);
1453 	close(fd);
1454 
1455 	if (flags & TEST_RUNTIME_PM)
1456 		igt_restore_runtime_pm();
1457 
1458 	assert_within_epsilon(busy - prev, 0.0, tolerance);
1459 }
1460 
1461 static void
test_enable_race(int gem_fd,const struct intel_execution_engine2 * e)1462 test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
1463 {
1464 	uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
1465 	struct igt_helper_process engine_load = { };
1466 	const uint32_t bbend = MI_BATCH_BUFFER_END;
1467 	struct drm_i915_gem_exec_object2 obj = { };
1468 	struct drm_i915_gem_execbuffer2 eb = { };
1469 	int fd;
1470 
1471 	igt_require(gem_has_execlists(gem_fd));
1472 	igt_require(gem_context_has_engine(gem_fd, 0, e->flags));
1473 
1474 	obj.handle = gem_create(gem_fd, 4096);
1475 	gem_write(gem_fd, obj.handle, 0, &bbend, sizeof(bbend));
1476 
1477 	eb.buffer_count = 1;
1478 	eb.buffers_ptr = to_user_pointer(&obj);
1479 	eb.flags = e->flags;
1480 
1481 	/*
1482 	 * This test is probabilistic so run in a few times to increase the
1483 	 * chance of hitting the race.
1484 	 */
1485 	igt_until_timeout(10) {
1486 		/*
1487 		 * Defeat the busy stats delayed disable, we need to guarantee
1488 		 * we are the first PMU user.
1489 		 */
1490 		gem_quiescent_gpu(gem_fd);
1491 		sleep(2);
1492 
1493 		/* Apply interrupt-heavy load on the engine. */
1494 		igt_fork_helper(&engine_load) {
1495 			for (;;)
1496 				gem_execbuf(gem_fd, &eb);
1497 		}
1498 
1499 		/* Wait a bit to allow engine load to start. */
1500 		usleep(500e3);
1501 
1502 		/* Enable the PMU. */
1503 		fd = open_pmu(config);
1504 
1505 		/* Stop load and close the PMU. */
1506 		igt_stop_helper(&engine_load);
1507 		close(fd);
1508 	}
1509 
1510 	/* Cleanup. */
1511 	gem_close(gem_fd, obj.handle);
1512 	gem_quiescent_gpu(gem_fd);
1513 }
1514 
1515 #define __assert_within(x, ref, tol_up, tol_down) \
1516 	igt_assert_f((double)(x) <= ((double)(ref) + (tol_up)) && \
1517 		     (double)(x) >= ((double)(ref) - (tol_down)), \
1518 		     "%f not within +%f/-%f of %f! ('%s' vs '%s')\n", \
1519 		     (double)(x), (double)(tol_up), (double)(tol_down), \
1520 		     (double)(ref), #x, #ref)
1521 
1522 #define assert_within(x, ref, tolerance) \
1523 	__assert_within(x, ref, tolerance, tolerance)
1524 
1525 static void
accuracy(int gem_fd,const struct intel_execution_engine2 * e,unsigned long target_busy_pct,unsigned long target_iters)1526 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
1527 	 unsigned long target_busy_pct,
1528 	 unsigned long target_iters)
1529 {
1530 	const unsigned long min_test_us = 1e6;
1531 	unsigned long pwm_calibration_us;
1532 	unsigned long test_us;
1533 	unsigned long cycle_us, busy_us, idle_us;
1534 	double busy_r, expected;
1535 	uint64_t val[2];
1536 	uint64_t ts[2];
1537 	int link[2];
1538 	int fd;
1539 
1540 	/* Sampling platforms cannot reach the high accuracy criteria. */
1541 	igt_require(gem_has_execlists(gem_fd));
1542 
1543 	/* Aim for approximately 100 iterations for calibration */
1544 	cycle_us = min_test_us / target_iters;
1545 	busy_us = cycle_us * target_busy_pct / 100;
1546 	idle_us = cycle_us - busy_us;
1547 
1548 	while (idle_us < 2500 || busy_us < 2500) {
1549 		busy_us *= 2;
1550 		idle_us *= 2;
1551 	}
1552 	cycle_us = busy_us + idle_us;
1553 	pwm_calibration_us = target_iters * cycle_us / 2;
1554 	test_us = target_iters * cycle_us;
1555 
1556 	igt_info("calibration=%lums, test=%lums, cycle=%lums; ratio=%.2f%% (%luus/%luus)\n",
1557 		 pwm_calibration_us / 1000, test_us / 1000, cycle_us / 1000,
1558 		 (double)busy_us / cycle_us * 100.0,
1559 		 busy_us, idle_us);
1560 
1561 	assert_within_epsilon((double)busy_us / cycle_us,
1562 			      (double)target_busy_pct / 100.0,
1563 			      tolerance);
1564 
1565 	igt_assert(pipe(link) == 0);
1566 
1567 	/* Emit PWM pattern on the engine from a child. */
1568 	igt_fork(child, 1) {
1569 		const unsigned long timeout[] = {
1570 			pwm_calibration_us * 1000, test_us * 1000
1571 		};
1572 		uint64_t total_busy_ns = 0, total_ns = 0;
1573 		igt_spin_t *spin;
1574 
1575 		/* Allocate our spin batch and idle it. */
1576 		spin = igt_spin_new(gem_fd, .engine = e->flags);
1577 		igt_spin_end(spin);
1578 		gem_sync(gem_fd, spin->handle);
1579 
1580 		/* 1st pass is calibration, second pass is the test. */
1581 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
1582 			unsigned int target_idle_us = idle_us;
1583 			struct timespec start = { };
1584 			uint64_t busy_ns = 0;
1585 			unsigned long pass_ns = 0;
1586 			double avg = 0.0, var = 0.0;
1587 			unsigned int n = 0;
1588 
1589 			igt_nsec_elapsed(&start);
1590 
1591 			do {
1592 				unsigned long loop_ns, loop_busy;
1593 				struct timespec _ts = { };
1594 				double err, tmp;
1595 				uint64_t now;
1596 
1597 				/* PWM idle sleep. */
1598 				_ts.tv_nsec = target_idle_us * 1000;
1599 				nanosleep(&_ts, NULL);
1600 
1601 				/* Restart the spinbatch. */
1602 				igt_spin_reset(spin);
1603 				__submit_spin(gem_fd, spin, e, 0);
1604 
1605 				/* PWM busy sleep. */
1606 				loop_busy = igt_nsec_elapsed(&start);
1607 				_ts.tv_nsec = busy_us * 1000;
1608 				nanosleep(&_ts, NULL);
1609 				igt_spin_end(spin);
1610 
1611 				/* Time accounting. */
1612 				now = igt_nsec_elapsed(&start);
1613 				loop_busy = now - loop_busy;
1614 				loop_ns = now - pass_ns;
1615 				pass_ns = now;
1616 
1617 				busy_ns += loop_busy;
1618 				total_busy_ns += loop_busy;
1619 				total_ns += loop_ns;
1620 
1621 				/* Re-calibrate. */
1622 				err = (double)total_busy_ns / total_ns -
1623 				      (double)target_busy_pct / 100.0;
1624 				target_idle_us = (double)target_idle_us *
1625 						 (1.0 + err);
1626 
1627 				/* Running average and variance for debug. */
1628 				err = 100.0 * total_busy_ns / total_ns;
1629 				tmp = avg;
1630 				avg += (err - avg) / ++n;
1631 				var += (err - avg) * (err - tmp);
1632 			} while (pass_ns < timeout[pass]);
1633 
1634 			pass_ns = igt_nsec_elapsed(&start);
1635 			expected = (double)busy_ns / pass_ns;
1636 
1637 			igt_info("%u: %d cycles, busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f±%.3f%%)\n",
1638 				 pass, n,
1639 				 busy_ns / 1000, (pass_ns - busy_ns) / 1000,
1640 				 100 * expected, target_busy_pct,
1641 				 avg, sqrt(var / n));
1642 
1643 			write(link[1], &expected, sizeof(expected));
1644 		}
1645 
1646 		igt_spin_free(gem_fd, spin);
1647 	}
1648 
1649 	fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
1650 
1651 	/* Let the child run. */
1652 	read(link[0], &expected, sizeof(expected));
1653 	assert_within(100.0 * expected, target_busy_pct, 5);
1654 
1655 	/* Collect engine busyness for an interesting part of child runtime. */
1656 	val[0] = __pmu_read_single(fd, &ts[0]);
1657 	read(link[0], &expected, sizeof(expected));
1658 	val[1] = __pmu_read_single(fd, &ts[1]);
1659 	close(fd);
1660 
1661 	close(link[1]);
1662 	close(link[0]);
1663 
1664 	igt_waitchildren();
1665 
1666 	busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
1667 
1668 	igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n",
1669 		 (busy_r - expected) * 100, 100 * busy_r, 100 * expected);
1670 
1671 	assert_within(100.0 * busy_r, 100.0 * expected, 2);
1672 }
1673 
1674 igt_main
1675 {
1676 	const unsigned int num_other_metrics =
1677 				I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
1678 	unsigned int num_engines = 0;
1679 	int fd = -1;
1680 	struct intel_execution_engine2 *e;
1681 	unsigned int i;
1682 
1683 	igt_fixture {
1684 		fd = drm_open_driver_master(DRIVER_INTEL);
1685 
1686 		igt_require_gem(fd);
1687 		igt_require(i915_type_id() > 0);
1688 
1689 		__for_each_physical_engine(fd, e)
1690 			num_engines++;
1691 	}
1692 
1693 	/**
1694 	 * Test invalid access via perf API is rejected.
1695 	 */
1696 	igt_subtest("invalid-init")
1697 		invalid_init();
1698 
__for_each_physical_engine(fd,e)1699 	__for_each_physical_engine(fd, e) {
1700 		const unsigned int pct[] = { 2, 50, 98 };
1701 
1702 		/**
1703 		 * Test that a single engine metric can be initialized or it
1704 		 * is correctly rejected.
1705 		 */
1706 		igt_subtest_f("init-busy-%s", e->name)
1707 			init(fd, e, I915_SAMPLE_BUSY);
1708 
1709 		igt_subtest_f("init-wait-%s", e->name)
1710 			init(fd, e, I915_SAMPLE_WAIT);
1711 
1712 		igt_subtest_f("init-sema-%s", e->name)
1713 			init(fd, e, I915_SAMPLE_SEMA);
1714 
1715 		/**
1716 		 * Test that engines show no load when idle.
1717 		 */
1718 		igt_subtest_f("idle-%s", e->name)
1719 			single(fd, e, 0);
1720 
1721 		/**
1722 		 * Test that a single engine reports load correctly.
1723 		 */
1724 		igt_subtest_f("busy-%s", e->name)
1725 			single(fd, e, TEST_BUSY);
1726 		igt_subtest_f("busy-idle-%s", e->name)
1727 			single(fd, e, TEST_BUSY | TEST_TRAILING_IDLE);
1728 
1729 		/**
1730 		 * Test that when one engine is loaded other report no
1731 		 * load.
1732 		 */
1733 		igt_subtest_f("busy-check-all-%s", e->name)
1734 			busy_check_all(fd, e, num_engines, TEST_BUSY);
1735 		igt_subtest_f("busy-idle-check-all-%s", e->name)
1736 			busy_check_all(fd, e, num_engines,
1737 				       TEST_BUSY | TEST_TRAILING_IDLE);
1738 
1739 		/**
1740 		 * Test that when all except one engine are loaded all
1741 		 * loads are correctly reported.
1742 		 */
1743 		igt_subtest_f("most-busy-check-all-%s", e->name)
1744 			most_busy_check_all(fd, e, num_engines,
1745 					    TEST_BUSY);
1746 		igt_subtest_f("most-busy-idle-check-all-%s", e->name)
1747 			most_busy_check_all(fd, e, num_engines,
1748 					    TEST_BUSY |
1749 					    TEST_TRAILING_IDLE);
1750 
1751 		/**
1752 		 * Test that semphore counters report no activity on
1753 		 * idle or busy engines.
1754 		 */
1755 		igt_subtest_f("idle-no-semaphores-%s", e->name)
1756 			no_sema(fd, e, 0);
1757 
1758 		igt_subtest_f("busy-no-semaphores-%s", e->name)
1759 			no_sema(fd, e, TEST_BUSY);
1760 
1761 		igt_subtest_f("busy-idle-no-semaphores-%s", e->name)
1762 			no_sema(fd, e, TEST_BUSY | TEST_TRAILING_IDLE);
1763 
1764 		/**
1765 		 * Test that semaphore waits are correctly reported.
1766 		 */
1767 		igt_subtest_f("semaphore-wait-%s", e->name)
1768 			sema_wait(fd, e, TEST_BUSY);
1769 
1770 		igt_subtest_f("semaphore-wait-idle-%s", e->name)
1771 			sema_wait(fd, e,
1772 				  TEST_BUSY | TEST_TRAILING_IDLE);
1773 
1774 		/**
1775 		 * Check that two perf clients do not influence each
1776 		 * others observations.
1777 		 */
1778 		igt_subtest_f("multi-client-%s", e->name)
1779 			multi_client(fd, e);
1780 
1781 		/**
1782 		 * Check that reported usage is correct when PMU is
1783 		 * enabled after the batch is running.
1784 		 */
1785 		igt_subtest_f("busy-start-%s", e->name)
1786 			busy_start(fd, e);
1787 
1788 		/**
1789 		 * Check that reported usage is correct when PMU is
1790 		 * enabled after two batches are running.
1791 		 */
1792 		igt_subtest_f("busy-double-start-%s", e->name) {
1793 			gem_require_contexts(fd);
1794 			busy_double_start(fd, e);
1795 		}
1796 
1797 		/**
1798 		 * Check that the PMU can be safely enabled in face of
1799 		 * interrupt-heavy engine load.
1800 		 */
1801 		igt_subtest_f("enable-race-%s", e->name)
1802 			test_enable_race(fd, e);
1803 
1804 		/**
1805 		 * Check engine busyness accuracy is as expected.
1806 		 */
1807 		for (i = 0; i < ARRAY_SIZE(pct); i++) {
1808 			igt_subtest_f("busy-accuracy-%u-%s",
1809 				      pct[i], e->name)
1810 				accuracy(fd, e, pct[i], 10);
1811 		}
1812 
1813 		igt_subtest_f("busy-hang-%s", e->name) {
1814 			igt_hang_t hang = igt_allow_hang(fd, 0, 0);
1815 
1816 			single(fd, e, TEST_BUSY | FLAG_HANG);
1817 
1818 			igt_disallow_hang(fd, hang);
1819 		}
1820 
1821 		/**
1822 		 * Test that event waits are correctly reported.
1823 		 */
1824 		if (e->class == I915_ENGINE_CLASS_RENDER)
1825 			igt_subtest_f("event-wait-%s", e->name)
1826 				event_wait(fd, e);
1827 	}
1828 
1829 	/**
1830 	 * Test that when all engines are loaded all loads are
1831 	 * correctly reported.
1832 	 */
1833 	igt_subtest("all-busy-check-all")
1834 		all_busy_check_all(fd, num_engines, TEST_BUSY);
1835 	igt_subtest("all-busy-idle-check-all")
1836 		all_busy_check_all(fd, num_engines,
1837 				   TEST_BUSY | TEST_TRAILING_IDLE);
1838 
1839 	/**
1840 	 * Test that non-engine counters can be initialized and read. Apart
1841 	 * from the invalid metric which should fail.
1842 	 */
1843 	for (i = 0; i < num_other_metrics + 1; i++) {
1844 		igt_subtest_f("other-init-%u", i)
1845 			init_other(i, i < num_other_metrics);
1846 
1847 		igt_subtest_f("other-read-%u", i)
1848 			read_other(i, i < num_other_metrics);
1849 	}
1850 
1851 	/**
1852 	 * Test counters are not affected by CPU offline/online events.
1853 	 */
1854 	igt_subtest("cpu-hotplug")
1855 		cpu_hotplug(fd);
1856 
1857 	/**
1858 	 * Test GPU frequency.
1859 	 */
1860 	igt_subtest("frequency")
1861 		test_frequency(fd);
1862 
1863 	/**
1864 	 * Test interrupt count reporting.
1865 	 */
1866 	igt_subtest("interrupts")
1867 		test_interrupts(fd);
1868 
1869 	igt_subtest("interrupts-sync")
1870 		test_interrupts_sync(fd);
1871 
1872 	/**
1873 	 * Test RC6 residency reporting.
1874 	 */
1875 	igt_subtest("rc6")
1876 		test_rc6(fd, 0);
1877 
1878 	igt_subtest("rc6-runtime-pm")
1879 		test_rc6(fd, TEST_RUNTIME_PM);
1880 
1881 	igt_subtest("rc6-runtime-pm-long")
1882 		test_rc6(fd, TEST_RUNTIME_PM | FLAG_LONG);
1883 
1884 	/**
1885 	 * Check render nodes are counted.
1886 	 */
1887 	igt_subtest_group {
1888 		int render_fd = -1;
1889 
1890 		igt_fixture {
1891 			render_fd = drm_open_driver_render(DRIVER_INTEL);
1892 			igt_require_gem(render_fd);
1893 
1894 			gem_quiescent_gpu(fd);
1895 		}
1896 
__for_each_physical_engine(render_fd,e)1897 		__for_each_physical_engine(render_fd, e) {
1898 			igt_subtest_f("render-node-busy-%s", e->name)
1899 				single(render_fd, e, TEST_BUSY);
1900 			igt_subtest_f("render-node-busy-idle-%s",
1901 				      e->name)
1902 				single(render_fd, e,
1903 				       TEST_BUSY | TEST_TRAILING_IDLE);
1904 		}
1905 
1906 		igt_fixture {
1907 			close(render_fd);
1908 		}
1909 	}
1910 }
1911