1 /*
2  * Copyright (c) 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *  Mika Kuoppala <mika.kuoppala@intel.com>
25  *
26  */
27 
28 #include "igt.h"
29 #include "igt_sysfs.h"
30 #include <limits.h>
31 #include <stdbool.h>
32 #include <unistd.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <string.h>
36 #include <fcntl.h>
37 #include <inttypes.h>
38 #include <errno.h>
39 #include <sys/stat.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <time.h>
43 #include <signal.h>
44 
45 
46 #define RS_NO_ERROR      0
47 #define RS_BATCH_ACTIVE  (1 << 0)
48 #define RS_BATCH_PENDING (1 << 1)
49 #define RS_UNKNOWN       (1 << 2)
50 
51 
52 static uint32_t devid;
53 
54 struct local_drm_i915_reset_stats {
55 	__u32 ctx_id;
56 	__u32 flags;
57 	__u32 reset_count;
58 	__u32 batch_active;
59 	__u32 batch_pending;
60 	__u32 pad;
61 };
62 
63 #define MAX_FD 32
64 
65 #define GET_RESET_STATS_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x32, struct local_drm_i915_reset_stats)
66 
67 #define LOCAL_I915_EXEC_VEBOX	(4 << 0)
68 
sync_gpu(void)69 static void sync_gpu(void)
70 {
71 	int fd = drm_open_driver(DRIVER_INTEL);
72 	gem_quiescent_gpu(fd);
73 	close(fd);
74 }
75 
noop(int fd,uint32_t ctx,const struct intel_execution_engine * e)76 static int noop(int fd, uint32_t ctx, const struct intel_execution_engine *e)
77 {
78 	const uint32_t bbe = MI_BATCH_BUFFER_END;
79 	struct drm_i915_gem_execbuffer2 eb;
80 	struct drm_i915_gem_exec_object2 exec;
81 	int ret;
82 
83 	memset(&exec, 0, sizeof(exec));
84 	exec.handle = gem_create(fd, 4096);
85 	igt_assert((int)exec.handle > 0);
86 	gem_write(fd, exec.handle, 0, &bbe, sizeof(bbe));
87 
88 	memset(&eb, 0, sizeof(eb));
89 	eb.buffers_ptr = to_user_pointer(&exec);
90 	eb.buffer_count = 1;
91 	eb.flags = e->exec_id | e->flags;
92 	i915_execbuffer2_set_context_id(eb, ctx);
93 
94 	ret = __gem_execbuf(fd, &eb);
95 	if (ret < 0) {
96 		gem_close(fd, exec.handle);
97 		return ret;
98 	}
99 
100 	return exec.handle;
101 }
102 
has_engine(int fd,uint32_t ctx,const struct intel_execution_engine * e)103 static int has_engine(int fd,
104 		      uint32_t ctx,
105 		      const struct intel_execution_engine *e)
106 {
107 	int handle = noop(fd, ctx, e);
108 	if (handle < 0)
109 		return 0;
110 	gem_close(fd, handle);
111 	return 1;
112 }
113 
check_context(const struct intel_execution_engine * e)114 static void check_context(const struct intel_execution_engine *e)
115 {
116 	int fd = drm_open_driver(DRIVER_INTEL);
117 
118 	gem_require_contexts(fd);
119 	igt_require(has_engine(fd, gem_context_create(fd), e));
120 
121 	close(fd);
122 }
123 
gem_reset_stats(int fd,int ctx_id,struct local_drm_i915_reset_stats * rs)124 static int gem_reset_stats(int fd, int ctx_id,
125 			   struct local_drm_i915_reset_stats *rs)
126 {
127 	memset(rs, 0, sizeof(*rs));
128 	rs->ctx_id = ctx_id;
129 	rs->reset_count = -1;
130 
131 	if (drmIoctl(fd, GET_RESET_STATS_IOCTL, rs))
132 		return -errno;
133 
134 	igt_assert(rs->reset_count != -1);
135 	return 0;
136 }
137 
gem_reset_status(int fd,int ctx_id)138 static int gem_reset_status(int fd, int ctx_id)
139 {
140 	struct local_drm_i915_reset_stats rs;
141 	int ret;
142 
143 	ret = gem_reset_stats(fd, ctx_id, &rs);
144 	if (ret)
145 		return ret;
146 
147 	if (rs.batch_active)
148 		return RS_BATCH_ACTIVE;
149 	if (rs.batch_pending)
150 		return RS_BATCH_PENDING;
151 
152 	return RS_NO_ERROR;
153 }
154 
155 static struct timespec ts_injected;
156 
157 #define BAN HANG_ALLOW_BAN
158 #define ASYNC 2
inject_hang(int fd,uint32_t ctx,const struct intel_execution_engine * e,unsigned flags)159 static void inject_hang(int fd, uint32_t ctx,
160 			const struct intel_execution_engine *e,
161 			unsigned flags)
162 {
163 	igt_hang_t hang;
164 
165 	clock_gettime(CLOCK_MONOTONIC, &ts_injected);
166 
167 	hang = igt_hang_ctx(fd, ctx, e->exec_id | e->flags, flags & BAN);
168 	if ((flags & ASYNC) == 0)
169 		igt_post_hang_ring(fd, hang);
170 }
171 
status_to_string(int x)172 static const char *status_to_string(int x)
173 {
174 	const char *strings[] = {
175 		"No error",
176 		"Guilty",
177 		"Pending",
178 	};
179 	if (x >= ARRAY_SIZE(strings))
180 		return "Unknown";
181 	return strings[x];
182 }
183 
_assert_reset_status(int idx,int fd,int ctx,int status)184 static int _assert_reset_status(int idx, int fd, int ctx, int status)
185 {
186 	int rs;
187 
188 	rs = gem_reset_status(fd, ctx);
189 	if (rs < 0) {
190 		igt_info("reset status for %d ctx %d returned %d\n",
191 			 idx, ctx, rs);
192 		return rs;
193 	}
194 
195 	if (rs != status) {
196 		igt_info("%d:%d expected '%s' [%d], found '%s' [%d]\n",
197 			 idx, ctx,
198 			 status_to_string(status), status,
199 			 status_to_string(rs), rs);
200 
201 		return 1;
202 	}
203 
204 	return 0;
205 }
206 
207 #define assert_reset_status(idx, fd, ctx, status) \
208 	igt_assert(_assert_reset_status(idx, fd, ctx, status) == 0)
209 
test_rs(const struct intel_execution_engine * e,int num_fds,int hang_index,int rs_assumed_no_hang)210 static void test_rs(const struct intel_execution_engine *e,
211 		    int num_fds, int hang_index, int rs_assumed_no_hang)
212 {
213 	int fd[MAX_FD];
214 	int i;
215 
216 	igt_assert_lte(num_fds, MAX_FD);
217 	igt_assert_lt(hang_index, MAX_FD);
218 
219 	igt_debug("num fds=%d, hang index=%d\n", num_fds, hang_index);
220 
221 	for (i = 0; i < num_fds; i++) {
222 		fd[i] = drm_open_driver(DRIVER_INTEL);
223 		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
224 	}
225 
226 	sync_gpu();
227 	for (i = 0; i < num_fds; i++) {
228 		if (i == hang_index)
229 			inject_hang(fd[i], 0, e, ASYNC);
230 		else
231 			igt_assert(noop(fd[i], 0, e) > 0);
232 	}
233 	sync_gpu();
234 
235 	for (i = 0; i < num_fds; i++) {
236 		if (hang_index < 0) {
237 			assert_reset_status(i, fd[i], 0, rs_assumed_no_hang);
238 			continue;
239 		}
240 
241 		if (i < hang_index)
242 			assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
243 		if (i == hang_index)
244 			assert_reset_status(i, fd[i], 0, RS_BATCH_ACTIVE);
245 		if (i > hang_index)
246 			assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
247 	}
248 
249 	igt_assert(igt_seconds_elapsed(&ts_injected) <= 30);
250 
251 	for (i = 0; i < num_fds; i++)
252 		close(fd[i]);
253 }
254 
255 #define MAX_CTX 100
test_rs_ctx(const struct intel_execution_engine * e,int num_fds,int num_ctx,int hang_index,int hang_context)256 static void test_rs_ctx(const struct intel_execution_engine *e,
257 			int num_fds, int num_ctx, int hang_index,
258 			int hang_context)
259 {
260 	int i, j;
261 	int fd[MAX_FD];
262 	int ctx[MAX_FD][MAX_CTX];
263 
264 	igt_assert_lte(num_fds, MAX_FD);
265 	igt_assert_lt(hang_index, MAX_FD);
266 
267 	igt_assert_lte(num_ctx, MAX_CTX);
268 	igt_assert_lt(hang_context, MAX_CTX);
269 
270 	test_rs(e, num_fds, -1, RS_NO_ERROR);
271 
272 	for (i = 0; i < num_fds; i++) {
273 		fd[i] = drm_open_driver(DRIVER_INTEL);
274 		igt_assert(fd[i]);
275 		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
276 
277 		for (j = 0; j < num_ctx; j++) {
278 			ctx[i][j] = gem_context_create(fd[i]);
279 		}
280 
281 		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
282 	}
283 
284 	for (i = 0; i < num_fds; i++) {
285 		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
286 
287 		for (j = 0; j < num_ctx; j++)
288 			assert_reset_status(i, fd[i], ctx[i][j], RS_NO_ERROR);
289 
290 		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
291 	}
292 
293 	for (i = 0; i < num_fds; i++) {
294 		for (j = 0; j < num_ctx; j++) {
295 			if (i == hang_index && j == hang_context)
296 				inject_hang(fd[i], ctx[i][j], e, ASYNC);
297 			else
298 				igt_assert(noop(fd[i], ctx[i][j], e) > 0);
299 		}
300 	}
301 	sync_gpu();
302 
303 	igt_assert(igt_seconds_elapsed(&ts_injected) <= 30);
304 
305 	for (i = 0; i < num_fds; i++)
306 		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
307 
308 	for (i = 0; i < num_fds; i++) {
309 		for (j = 0; j < num_ctx; j++) {
310 			if (i < hang_index)
311 				assert_reset_status(i, fd[i], ctx[i][j], RS_NO_ERROR);
312 			if (i == hang_index && j < hang_context)
313 				assert_reset_status(i, fd[i], ctx[i][j], RS_NO_ERROR);
314 			if (i == hang_index && j == hang_context)
315 				assert_reset_status(i, fd[i], ctx[i][j],
316 						    RS_BATCH_ACTIVE);
317 			if (i == hang_index && j > hang_context)
318 				assert_reset_status(i, fd[i], ctx[i][j],
319 						    RS_NO_ERROR);
320 			if (i > hang_index)
321 				assert_reset_status(i, fd[i], ctx[i][j],
322 						    RS_NO_ERROR);
323 		}
324 	}
325 
326 	for (i = 0; i < num_fds; i++) {
327 		assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
328 		close(fd[i]);
329 	}
330 }
331 
test_ban(const struct intel_execution_engine * e)332 static void test_ban(const struct intel_execution_engine *e)
333 {
334 	struct local_drm_i915_reset_stats rs_bad, rs_good;
335 	int fd_bad, fd_good;
336 	int ban, retry = 10;
337 	int active_count = 0;
338 
339 	fd_bad = drm_open_driver(DRIVER_INTEL);
340 	fd_good = drm_open_driver(DRIVER_INTEL);
341 
342 	assert_reset_status(fd_bad, fd_bad, 0, RS_NO_ERROR);
343 	assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR);
344 
345 	noop(fd_bad, 0, e);
346 	noop(fd_good, 0, e);
347 
348 	assert_reset_status(fd_bad, fd_bad, 0, RS_NO_ERROR);
349 	assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR);
350 
351 	inject_hang(fd_bad, 0, e, BAN | ASYNC);
352 	active_count++;
353 
354 	noop(fd_good, 0, e);
355 	noop(fd_good, 0, e);
356 
357 	while (retry--) {
358 		inject_hang(fd_bad, 0, e, BAN);
359 		active_count++;
360 
361 		ban = noop(fd_bad, 0, e);
362 		if (ban == -EIO)
363 			break;
364 
365 		/* Should not happen often but sometimes hang is declared too
366 		 * slow due to our way of faking hang using loop */
367 		gem_close(fd_bad, ban);
368 
369 		igt_info("retrying for ban (%d)\n", retry);
370 	}
371 	igt_assert_eq(ban, -EIO);
372 	igt_assert_lt(0, noop(fd_good, 0, e));
373 
374 	assert_reset_status(fd_bad, fd_bad, 0, RS_BATCH_ACTIVE);
375 	igt_assert_eq(gem_reset_stats(fd_bad, 0, &rs_bad), 0);
376 	igt_assert_eq(rs_bad.batch_active, active_count);
377 
378 	assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR);
379 	igt_assert_eq(gem_reset_stats(fd_good, 0, &rs_good), 0);
380 	igt_assert_eq(rs_good.batch_active, 0);
381 
382 	close(fd_bad);
383 	close(fd_good);
384 }
385 
test_ban_ctx(const struct intel_execution_engine * e)386 static void test_ban_ctx(const struct intel_execution_engine *e)
387 {
388 	struct local_drm_i915_reset_stats rs_bad, rs_good;
389 	int fd, ban, retry = 10;
390 	uint32_t ctx_good, ctx_bad;
391 	int active_count = 0;
392 
393 	fd = drm_open_driver(DRIVER_INTEL);
394 
395 	assert_reset_status(fd, fd, 0, RS_NO_ERROR);
396 
397 	ctx_good = gem_context_create(fd);
398 	ctx_bad = gem_context_create(fd);
399 
400 	assert_reset_status(fd, fd, 0, RS_NO_ERROR);
401 	assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR);
402 	assert_reset_status(fd, fd, ctx_bad, RS_NO_ERROR);
403 
404 	noop(fd, ctx_bad, e);
405 	noop(fd, ctx_good, e);
406 
407 	assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR);
408 	assert_reset_status(fd, fd, ctx_bad, RS_NO_ERROR);
409 
410 	inject_hang(fd, ctx_bad, e, BAN | ASYNC);
411 	active_count++;
412 
413 	noop(fd, ctx_good, e);
414 	noop(fd, ctx_good, e);
415 
416 	while (retry--) {
417 		inject_hang(fd, ctx_bad, e, BAN);
418 		active_count++;
419 
420 		ban = noop(fd, ctx_bad, e);
421 		if (ban == -EIO)
422 			break;
423 
424 		/* Should not happen often but sometimes hang is declared too
425 		 * slow due to our way of faking hang using loop */
426 		gem_close(fd, ban);
427 
428 		igt_info("retrying for ban (%d)\n", retry);
429 	}
430 	igt_assert_eq(ban, -EIO);
431 	igt_assert_lt(0, noop(fd, ctx_good, e));
432 
433 	assert_reset_status(fd, fd, ctx_bad, RS_BATCH_ACTIVE);
434 	igt_assert_eq(gem_reset_stats(fd, ctx_bad, &rs_bad), 0);
435 	igt_assert_eq(rs_bad.batch_active, active_count);
436 
437 	assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR);
438 	igt_assert_eq(gem_reset_stats(fd, ctx_good, &rs_good), 0);
439 	igt_assert_eq(rs_good.batch_active, 0);
440 
441 	close(fd);
442 }
443 
test_unrelated_ctx(const struct intel_execution_engine * e)444 static void test_unrelated_ctx(const struct intel_execution_engine *e)
445 {
446 	int fd1,fd2;
447 	int ctx_guilty, ctx_unrelated;
448 
449 	fd1 = drm_open_driver(DRIVER_INTEL);
450 	fd2 = drm_open_driver(DRIVER_INTEL);
451 	assert_reset_status(0, fd1, 0, RS_NO_ERROR);
452 	assert_reset_status(1, fd2, 0, RS_NO_ERROR);
453 	ctx_guilty = gem_context_create(fd1);
454 	ctx_unrelated = gem_context_create(fd2);
455 
456 	assert_reset_status(0, fd1, ctx_guilty, RS_NO_ERROR);
457 	assert_reset_status(1, fd2, ctx_unrelated, RS_NO_ERROR);
458 
459 	inject_hang(fd1, ctx_guilty, e, 0);
460 	assert_reset_status(0, fd1, ctx_guilty, RS_BATCH_ACTIVE);
461 	assert_reset_status(1, fd2, ctx_unrelated, RS_NO_ERROR);
462 
463 	gem_sync(fd2, noop(fd2, ctx_unrelated, e));
464 	assert_reset_status(0, fd1, ctx_guilty, RS_BATCH_ACTIVE);
465 	assert_reset_status(1, fd2, ctx_unrelated, RS_NO_ERROR);
466 
467 	close(fd1);
468 	close(fd2);
469 }
470 
get_reset_count(int fd,int ctx)471 static int get_reset_count(int fd, int ctx)
472 {
473 	int ret;
474 	struct local_drm_i915_reset_stats rs;
475 
476 	ret = gem_reset_stats(fd, ctx, &rs);
477 	if (ret)
478 		return ret;
479 
480 	return rs.reset_count;
481 }
482 
test_close_pending_ctx(const struct intel_execution_engine * e)483 static void test_close_pending_ctx(const struct intel_execution_engine *e)
484 {
485 	int fd = drm_open_driver(DRIVER_INTEL);
486 	uint32_t ctx = gem_context_create(fd);
487 
488 	assert_reset_status(fd, fd, ctx, RS_NO_ERROR);
489 
490 	inject_hang(fd, ctx, e, 0);
491 	gem_context_destroy(fd, ctx);
492 	igt_assert_eq(__gem_context_destroy(fd, ctx), -ENOENT);
493 
494 	close(fd);
495 }
496 
test_close_pending(const struct intel_execution_engine * e)497 static void test_close_pending(const struct intel_execution_engine *e)
498 {
499 	int fd = drm_open_driver(DRIVER_INTEL);
500 
501 	assert_reset_status(fd, fd, 0, RS_NO_ERROR);
502 
503 	inject_hang(fd, 0, e, 0);
504 	close(fd);
505 }
506 
noop_on_each_ring(int fd,const bool reverse)507 static void noop_on_each_ring(int fd, const bool reverse)
508 {
509 	const uint32_t bbe = MI_BATCH_BUFFER_END;
510 	struct drm_i915_gem_execbuffer2 eb;
511 	struct drm_i915_gem_exec_object2 obj;
512 	const struct intel_execution_engine *e;
513 
514 	memset(&obj, 0, sizeof(obj));
515 	obj.handle = gem_create(fd, 4096);
516 	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
517 
518 	memset(&eb, 0, sizeof(eb));
519 	eb.buffers_ptr = to_user_pointer(&obj);
520 	eb.buffer_count = 1;
521 
522 	if (reverse) {
523 		for (e = intel_execution_engines; e->name; e++)
524 			;
525 		while (--e >= intel_execution_engines) {
526 			eb.flags = e->exec_id | e->flags;
527 			__gem_execbuf(fd, &eb);
528 		}
529 	} else {
530 		for (e = intel_execution_engines; e->name; e++) {
531 			eb.flags = e->exec_id | e->flags;
532 			__gem_execbuf(fd, &eb);
533 		}
534 	}
535 
536 	gem_sync(fd, obj.handle);
537 	gem_close(fd, obj.handle);
538 }
539 
test_close_pending_fork(const struct intel_execution_engine * e,const bool reverse)540 static void test_close_pending_fork(const struct intel_execution_engine *e,
541 				    const bool reverse)
542 {
543 	int fd = drm_open_driver(DRIVER_INTEL);
544 	igt_hang_t hang;
545 	int pid;
546 
547 	assert_reset_status(fd, fd, 0, RS_NO_ERROR);
548 
549 	hang = igt_hang_ctx(fd, 0, e->exec_id | e->flags, 0);
550 	sleep(1);
551 
552 	/* Avoid helpers as we need to kill the child
553 	 * without any extra signal handling on behalf of
554 	 * lib/drmtest.c
555 	 */
556 	pid = fork();
557 	if (pid == 0) {
558 		const int fd2 = drm_open_driver(DRIVER_INTEL);
559 		igt_assert_lte(0, fd2);
560 
561 		/* The crucial component is that we schedule the same noop batch
562 		 * on each ring. This exercises batch_obj reference counting,
563 		 * when gpu is reset and ring lists are cleared.
564 		 */
565 		noop_on_each_ring(fd2, reverse);
566 		close(fd2);
567 		pause();
568 		exit(0);
569 	} else {
570 		igt_assert_lt(0, pid);
571 		sleep(1);
572 
573 		/* Kill the child to reduce refcounts on
574 		   batch_objs */
575 		kill(pid, SIGKILL);
576 	}
577 
578 	igt_post_hang_ring(fd, hang);
579 	close(fd);
580 }
581 
test_reset_count(const struct intel_execution_engine * e,const bool create_ctx)582 static void test_reset_count(const struct intel_execution_engine *e,
583 			     const bool create_ctx)
584 {
585 	int fd = drm_open_driver(DRIVER_INTEL);
586 	int ctx;
587 	long c1, c2;
588 
589 	if (create_ctx)
590 		ctx = gem_context_create(fd);
591 	else
592 		ctx = 0;
593 
594 	assert_reset_status(fd, fd, ctx, RS_NO_ERROR);
595 
596 	c1 = get_reset_count(fd, ctx);
597 	igt_assert(c1 >= 0);
598 
599 	inject_hang(fd, ctx, e, 0);
600 
601 	assert_reset_status(fd, fd, ctx, RS_BATCH_ACTIVE);
602 	c2 = get_reset_count(fd, ctx);
603 	igt_assert(c2 >= 0);
604 	igt_assert(c2 == (c1 + 1));
605 
606 	igt_fork(child, 1) {
607 		igt_drop_root();
608 
609 		c2 = get_reset_count(fd, ctx);
610 
611 		igt_assert(c2 == 0);
612 	}
613 
614 	igt_waitchildren();
615 
616 	if (create_ctx)
617 		gem_context_destroy(fd, ctx);
618 
619 	close(fd);
620 }
621 
_test_params(int fd,int ctx,uint32_t flags,uint32_t pad)622 static int _test_params(int fd, int ctx, uint32_t flags, uint32_t pad)
623 {
624 	struct local_drm_i915_reset_stats rs;
625 
626 	memset(&rs, 0, sizeof(rs));
627 	rs.ctx_id = ctx;
628 	rs.flags = flags;
629 	rs.reset_count = rand();
630 	rs.batch_active = rand();
631 	rs.batch_pending = rand();
632 	rs.pad = pad;
633 
634 	if (drmIoctl(fd, GET_RESET_STATS_IOCTL, &rs))
635 		return -errno;
636 
637 	return 0;
638 }
639 
640 typedef enum { root = 0, user } cap_t;
641 
_check_param_ctx(const int fd,const int ctx,const cap_t cap)642 static void _check_param_ctx(const int fd, const int ctx, const cap_t cap)
643 {
644 	const uint32_t bad = rand() + 1;
645 
646 	if (ctx == 0) {
647 		igt_assert_eq(_test_params(fd, ctx, 0, 0), 0);
648 
649 		if (cap != root) {
650 			igt_assert(get_reset_count(fd, ctx) == 0);
651 		}
652 	}
653 
654 	igt_assert_eq(_test_params(fd, ctx, 0, bad), -EINVAL);
655 	igt_assert_eq(_test_params(fd, ctx, bad, 0), -EINVAL);
656 	igt_assert_eq(_test_params(fd, ctx, bad, bad), -EINVAL);
657 }
658 
check_params(const int fd,const int ctx,cap_t cap)659 static void check_params(const int fd, const int ctx, cap_t cap)
660 {
661 	igt_assert(ioctl(fd, GET_RESET_STATS_IOCTL, 0) == -1);
662 	igt_assert_eq(_test_params(fd, 0xbadbad, 0, 0), -ENOENT);
663 
664 	_check_param_ctx(fd, ctx, cap);
665 }
666 
_test_param(const int fd,const int ctx)667 static void _test_param(const int fd, const int ctx)
668 {
669 	check_params(fd, ctx, root);
670 
671 	igt_fork(child, 1) {
672 		check_params(fd, ctx, root);
673 
674 		igt_drop_root();
675 
676 		check_params(fd, ctx, user);
677 	}
678 
679 	check_params(fd, ctx, root);
680 
681 	igt_waitchildren();
682 }
683 
test_params_ctx(void)684 static void test_params_ctx(void)
685 {
686 	int fd;
687 
688 	fd = drm_open_driver(DRIVER_INTEL);
689 	_test_param(fd, gem_context_create(fd));
690 	close(fd);
691 }
692 
test_params(void)693 static void test_params(void)
694 {
695 	int fd;
696 
697 	fd = drm_open_driver(DRIVER_INTEL);
698 	_test_param(fd, 0);
699 	close(fd);
700 }
701 
702 static const struct intel_execution_engine *
next_engine(int fd,const struct intel_execution_engine * e)703 next_engine(int fd, const struct intel_execution_engine *e)
704 {
705 	do {
706 		e++;
707 		if (e->name == NULL)
708 			e = intel_execution_engines;
709 		if (e->exec_id == 0)
710 			e++;
711 	} while (!has_engine(fd, 0, e));
712 
713 	return e;
714 }
715 
defer_hangcheck(const struct intel_execution_engine * engine)716 static void defer_hangcheck(const struct intel_execution_engine *engine)
717 {
718 	const struct intel_execution_engine *next;
719 	int fd, count_start, count_end;
720 	int seconds = 30;
721 
722 	fd = drm_open_driver(DRIVER_INTEL);
723 
724 	next = next_engine(fd, engine);
725 	igt_skip_on(next == engine);
726 
727 	count_start = get_reset_count(fd, 0);
728 	igt_assert_lte(0, count_start);
729 
730 	inject_hang(fd, 0, engine, 0);
731 	while (--seconds) {
732 		noop(fd, 0, next);
733 
734 		count_end = get_reset_count(fd, 0);
735 		igt_assert_lte(0, count_end);
736 
737 		if (count_end > count_start)
738 			break;
739 
740 		sleep(1);
741 	}
742 
743 	igt_assert_lt(count_start, count_end);
744 
745 	close(fd);
746 }
747 
gem_has_reset_stats(int fd)748 static bool gem_has_reset_stats(int fd)
749 {
750 	struct local_drm_i915_reset_stats rs;
751 	int ret;
752 
753 	/* Carefully set flags and pad to zero, otherwise
754 	   we get -EINVAL
755 	*/
756 	memset(&rs, 0, sizeof(rs));
757 
758 	ret = drmIoctl(fd, GET_RESET_STATS_IOCTL, &rs);
759 	if (ret == 0)
760 		return true;
761 
762 	/* If we get EPERM, we have support but did not
763 	   have CAP_SYSADM */
764 	if (ret == -1 && errno == EPERM)
765 		return true;
766 
767 	return false;
768 }
769 
770 #define RUN_TEST(...) do { sync_gpu(); __VA_ARGS__; sync_gpu(); } while (0)
771 #define RUN_CTX_TEST(...) do { check_context(e); RUN_TEST(__VA_ARGS__); } while (0)
772 
773 igt_main
774 {
775 	const struct intel_execution_engine *e;
776 	igt_skip_on_simulation();
777 
778 	igt_fixture {
779 		int fd;
780 
781 		bool has_reset_stats;
782 		bool using_full_reset;
783 		fd = drm_open_driver(DRIVER_INTEL);
784 		devid = intel_get_drm_devid(fd);
785 
786 		has_reset_stats = gem_has_reset_stats(fd);
787 
788 		igt_assert(igt_sysfs_set_parameter
789 			   (fd, "reset", "%d", 1 /* only global reset */));
790 
791 		using_full_reset = !gem_engine_reset_enabled(fd) &&
792 				   gem_gpu_reset_enabled(fd);
793 
794 		close(fd);
795 
796 		igt_require_f(has_reset_stats,
797 			      "No reset stats ioctl support. Too old kernel?\n");
798 		igt_require_f(using_full_reset,
799 			      "Full GPU reset is not enabled. Is enable_hangcheck set?\n");
800 	}
801 
802 	igt_subtest("params")
803 		test_params();
804 
805 	igt_subtest_f("params-ctx")
806 		RUN_TEST(test_params_ctx());
807 
808 	for (e = intel_execution_engines; e->name; e++) {
809 		igt_subtest_f("reset-stats-%s", e->name)
810 			RUN_TEST(test_rs(e, 4, 1, 0));
811 
812 		igt_subtest_f("reset-stats-ctx-%s", e->name)
813 			RUN_CTX_TEST(test_rs_ctx(e, 4, 4, 1, 2));
814 
815 		igt_subtest_f("ban-%s", e->name)
816 			RUN_TEST(test_ban(e));
817 
818 		igt_subtest_f("ban-ctx-%s", e->name)
819 			RUN_CTX_TEST(test_ban_ctx(e));
820 
821 		igt_subtest_f("reset-count-%s", e->name)
822 			RUN_TEST(test_reset_count(e, false));
823 
824 		igt_subtest_f("reset-count-ctx-%s", e->name)
825 			RUN_CTX_TEST(test_reset_count(e, true));
826 
827 		igt_subtest_f("unrelated-ctx-%s", e->name)
828 			RUN_CTX_TEST(test_unrelated_ctx(e));
829 
830 		igt_subtest_f("close-pending-%s", e->name)
831 			RUN_TEST(test_close_pending(e));
832 
833 		igt_subtest_f("close-pending-ctx-%s", e->name)
834 			RUN_CTX_TEST(test_close_pending_ctx(e));
835 
836 		igt_subtest_f("close-pending-fork-%s", e->name)
837 			RUN_TEST(test_close_pending_fork(e, false));
838 
839 		igt_subtest_f("close-pending-fork-reverse-%s", e->name)
840 			RUN_TEST(test_close_pending_fork(e, true));
841 
842 		igt_subtest_f("defer-hangcheck-%s", e->name)
843 			RUN_TEST(defer_hangcheck(e));
844 	}
845 
846 	igt_fixture {
847 		int fd;
848 
849 		fd = drm_open_driver(DRIVER_INTEL);
850 		igt_assert(igt_sysfs_set_parameter
851 			   (fd, "reset", "%d", INT_MAX /* any reset method */));
852 		close(fd);
853 	}
854 }
855