1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 /*
26  * Testcase: Test that only specific ioctl report a wedged GPU.
27  *
28  */
29 
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <fcntl.h>
35 #include <inttypes.h>
36 #include <errno.h>
37 #include <sys/ioctl.h>
38 #include <signal.h>
39 #include <time.h>
40 
41 #include <drm.h>
42 
43 #include "igt.h"
44 #include "igt_device.h"
45 #include "igt_stats.h"
46 #include "igt_sysfs.h"
47 #include "sw_sync.h"
48 #include "i915/gem_ring.h"
49 
50 IGT_TEST_DESCRIPTION("Test that specific ioctls report a wedged GPU (EIO).");
51 
i915_reset_control(bool enable)52 static bool i915_reset_control(bool enable)
53 {
54 	const char *path = "/sys/module/i915/parameters/reset";
55 	int fd, ret;
56 
57 	igt_debug("%s GPU reset\n", enable ? "Enabling" : "Disabling");
58 
59 	fd = open(path, O_RDWR);
60 	igt_require(fd >= 0);
61 
62 	ret = write(fd, &"01"[enable], 1) == 1;
63 	close(fd);
64 
65 	return ret;
66 }
67 
trigger_reset(int fd)68 static void trigger_reset(int fd)
69 {
70 	struct timespec ts = { };
71 
72 	igt_nsec_elapsed(&ts);
73 
74 	igt_kmsg(KMSG_DEBUG "Forcing GPU reset\n");
75 	igt_force_gpu_reset(fd);
76 
77 	/* And just check the gpu is indeed running again */
78 	igt_kmsg(KMSG_DEBUG "Checking that the GPU recovered\n");
79 	gem_test_engine(fd, ALL_ENGINES);
80 	igt_drop_caches_set(fd, DROP_ACTIVE);
81 
82 	/* We expect forced reset and health check to be quick. */
83 	igt_assert(igt_seconds_elapsed(&ts) < 2);
84 }
85 
manual_hang(int drm_fd)86 static void manual_hang(int drm_fd)
87 {
88 	int dir = igt_debugfs_dir(drm_fd);
89 
90 	igt_sysfs_set(dir, "i915_wedged", "-1");
91 
92 	close(dir);
93 }
94 
wedge_gpu(int fd)95 static void wedge_gpu(int fd)
96 {
97 	/* First idle the GPU then disable GPU resets before injecting a hang */
98 	gem_quiescent_gpu(fd);
99 
100 	igt_require(i915_reset_control(false));
101 	manual_hang(fd);
102 	igt_assert(i915_reset_control(true));
103 }
104 
__gem_throttle(int fd)105 static int __gem_throttle(int fd)
106 {
107 	int err = 0;
108 	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL))
109 		err = -errno;
110 	return err;
111 }
112 
test_throttle(int fd)113 static void test_throttle(int fd)
114 {
115 	wedge_gpu(fd);
116 
117 	igt_assert_eq(__gem_throttle(fd), -EIO);
118 
119 	trigger_reset(fd);
120 }
121 
test_context_create(int fd)122 static void test_context_create(int fd)
123 {
124 	uint32_t ctx;
125 
126 	gem_require_contexts(fd);
127 
128 	wedge_gpu(fd);
129 
130 	igt_assert_eq(__gem_context_create(fd, &ctx), -EIO);
131 
132 	trigger_reset(fd);
133 }
134 
test_execbuf(int fd)135 static void test_execbuf(int fd)
136 {
137 	struct drm_i915_gem_execbuffer2 execbuf;
138 	struct drm_i915_gem_exec_object2 exec;
139 	uint32_t tmp[] = { MI_BATCH_BUFFER_END };
140 
141 	memset(&exec, 0, sizeof(exec));
142 	memset(&execbuf, 0, sizeof(execbuf));
143 
144 	exec.handle = gem_create(fd, 4096);
145 	gem_write(fd, exec.handle, 0, tmp, sizeof(tmp));
146 
147 	execbuf.buffers_ptr = to_user_pointer(&exec);
148 	execbuf.buffer_count = 1;
149 
150 	wedge_gpu(fd);
151 
152 	igt_assert_eq(__gem_execbuf(fd, &execbuf), -EIO);
153 	gem_close(fd, exec.handle);
154 
155 	trigger_reset(fd);
156 }
157 
__gem_wait(int fd,uint32_t handle,int64_t timeout)158 static int __gem_wait(int fd, uint32_t handle, int64_t timeout)
159 {
160 	struct drm_i915_gem_wait wait = {
161 		.bo_handle = handle,
162 		.timeout_ns = timeout,
163 	};
164 	int err;
165 
166 	err = 0;
167 	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT, &wait))
168 		err = -errno;
169 
170 	errno = 0;
171 	return err;
172 }
173 
__spin_poll(int fd,uint32_t ctx,unsigned long flags)174 static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags)
175 {
176 	struct igt_spin_factory opts = {
177 		.ctx = ctx,
178 		.engine = flags,
179 		.flags = IGT_SPIN_FAST | IGT_SPIN_FENCE_OUT,
180 	};
181 
182 	if (gem_can_store_dword(fd, opts.engine))
183 		opts.flags |= IGT_SPIN_POLL_RUN;
184 
185 	return __igt_spin_factory(fd, &opts);
186 }
187 
__spin_wait(int fd,igt_spin_t * spin)188 static void __spin_wait(int fd, igt_spin_t *spin)
189 {
190 	if (igt_spin_has_poll(spin)) {
191 		igt_spin_busywait_until_started(spin);
192 	} else {
193 		igt_debug("__spin_wait - usleep mode\n");
194 		usleep(500e3); /* Better than nothing! */
195 	}
196 }
197 
spin_sync(int fd,uint32_t ctx,unsigned long flags)198 static igt_spin_t * spin_sync(int fd, uint32_t ctx, unsigned long flags)
199 {
200 	igt_spin_t *spin = __spin_poll(fd, ctx, flags);
201 
202 	__spin_wait(fd, spin);
203 
204 	return spin;
205 }
206 
207 struct hang_ctx {
208 	int debugfs;
209 	struct timespec delay;
210 	struct timespec *ts;
211 	timer_t timer;
212 };
213 
hang_handler(union sigval arg)214 static void hang_handler(union sigval arg)
215 {
216 	struct hang_ctx *ctx = arg.sival_ptr;
217 
218 	igt_debug("hang delay = %.2fus\n",
219 		  igt_nsec_elapsed(&ctx->delay) / 1000.0);
220 
221 	igt_nsec_elapsed(ctx->ts);
222 	igt_assert(igt_sysfs_set(ctx->debugfs, "i915_wedged", "-1"));
223 
224 	igt_assert_eq(timer_delete(ctx->timer), 0);
225 	close(ctx->debugfs);
226 	free(ctx);
227 }
228 
hang_after(int fd,unsigned int us,struct timespec * ts)229 static void hang_after(int fd, unsigned int us, struct timespec *ts)
230 {
231 	struct sigevent sev = {
232 		.sigev_notify = SIGEV_THREAD,
233 		.sigev_notify_function = hang_handler
234 	};
235 	struct itimerspec its = {
236 		.it_value.tv_sec = us / USEC_PER_SEC,
237 		.it_value.tv_nsec = us % USEC_PER_SEC * 1000,
238 	};
239 	struct hang_ctx *ctx;
240 
241 	ctx = calloc(1, sizeof(*ctx));
242 	igt_assert(ctx);
243 
244 	ctx->debugfs = igt_debugfs_dir(fd);
245 	igt_assert_fd(ctx->debugfs);
246 
247 	sev.sigev_value.sival_ptr = ctx;
248 
249 	igt_assert_eq(timer_create(CLOCK_MONOTONIC, &sev, &ctx->timer), 0);
250 
251 	ctx->ts = ts;
252 	igt_nsec_elapsed(&ctx->delay);
253 
254 	igt_assert_eq(timer_settime(ctx->timer, 0, &its, NULL), 0);
255 }
256 
check_wait(int fd,uint32_t bo,unsigned int wait,igt_stats_t * st)257 static void check_wait(int fd, uint32_t bo, unsigned int wait, igt_stats_t *st)
258 {
259 	struct timespec ts = {};
260 
261 	if (wait) {
262 		hang_after(fd, wait, &ts);
263 	} else {
264 		igt_nsec_elapsed(&ts);
265 		manual_hang(fd);
266 	}
267 
268 	gem_sync(fd, bo);
269 
270 	if (st)
271 		igt_stats_push(st, igt_nsec_elapsed(&ts));
272 }
273 
check_wait_elapsed(const char * prefix,int fd,igt_stats_t * st)274 static void check_wait_elapsed(const char *prefix, int fd, igt_stats_t *st)
275 {
276 	double med, max, limit;
277 
278 	igt_info("%s: completed %d resets, wakeups took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms)\n",
279 		 prefix, st->n_values,
280 		 igt_stats_get_mean(st)*1e-6,
281 		 igt_stats_get_std_deviation(st)*1e-6,
282 		 igt_stats_get_min(st)*1e-6,
283 		 igt_stats_get_median(st)*1e-6,
284 		 igt_stats_get_max(st)*1e-6);
285 
286 	if (st->n_values < 9)
287 		return; /* too few for stable median */
288 
289 	/*
290 	 * Older platforms need to reset the display (incl. modeset to off,
291 	 * modeset back on) around resets, so may take a lot longer.
292 	 */
293 	limit = 250e6;
294 	if (intel_gen(intel_get_drm_devid(fd)) < 5)
295 		limit += 300e6; /* guestimate for 2x worstcase modeset */
296 
297 	med = igt_stats_get_median(st);
298 	max = igt_stats_get_max(st);
299 	igt_assert_f(med < limit && max < 5 * limit,
300 		     "Wake up following reset+wedge took %.3f+-%.3fms (min:%.3fms, median:%.3fms, max:%.3fms); limit set to %.0fms on average and %.0fms maximum\n",
301 		     igt_stats_get_mean(st)*1e-6,
302 		     igt_stats_get_std_deviation(st)*1e-6,
303 		     igt_stats_get_min(st)*1e-6,
304 		     igt_stats_get_median(st)*1e-6,
305 		     igt_stats_get_max(st)*1e-6,
306 		     limit*1e-6, limit*5e-6);
307 }
308 
__test_banned(int fd)309 static void __test_banned(int fd)
310 {
311 	struct drm_i915_gem_exec_object2 obj = {
312 		.handle = gem_create(fd, 4096),
313 	};
314 	struct drm_i915_gem_execbuffer2 execbuf = {
315 		.buffers_ptr = to_user_pointer(&obj),
316 		.buffer_count = 1,
317 	};
318 	const uint32_t bbe = MI_BATCH_BUFFER_END;
319 	unsigned long count = 0;
320 
321 	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
322 
323 	gem_quiescent_gpu(fd);
324 	igt_require(i915_reset_control(true));
325 
326 	igt_until_timeout(5) {
327 		igt_spin_t *hang;
328 
329 		if (__gem_execbuf(fd, &execbuf) == -EIO) {
330 			uint32_t ctx = 0;
331 
332 			igt_info("Banned after causing %lu hangs\n", count);
333 			igt_assert(count > 1);
334 
335 			/* Only this context, not the file, should be banned */
336 			igt_assert_neq(__gem_context_create(fd, &ctx), -EIO);
337 			if (ctx) { /* remember the contextless! */
338 				/* And check it actually works! */
339 				execbuf.rsvd1 = ctx;
340 				gem_execbuf(fd, &execbuf);
341 
342 				gem_context_destroy(fd, ctx);
343 			}
344 			return;
345 		}
346 
347 		/* Trigger a reset, making sure we are detected as guilty */
348 		hang = spin_sync(fd, 0, 0);
349 		trigger_reset(fd);
350 		igt_spin_free(fd, hang);
351 
352 		count++;
353 	}
354 
355 	igt_assert_f(false,
356 		     "Ran for 5s, %lu hangs without being banned\n",
357 		     count);
358 }
359 
test_banned(int fd)360 static void test_banned(int fd)
361 {
362 	fd = gem_reopen_driver(fd);
363 	__test_banned(fd);
364 	close(fd);
365 }
366 
367 #define TEST_WEDGE (1)
368 
test_wait(int fd,unsigned int flags,unsigned int wait)369 static void test_wait(int fd, unsigned int flags, unsigned int wait)
370 {
371 	igt_spin_t *hang;
372 
373 	fd = gem_reopen_driver(fd);
374 	igt_require_gem(fd);
375 
376 	/*
377 	 * If the request we wait on completes due to a hang (even for
378 	 * that request), the user expects the return value to 0 (success).
379 	 */
380 
381 	if (flags & TEST_WEDGE)
382 		igt_require(i915_reset_control(false));
383 	else
384 		igt_require(i915_reset_control(true));
385 
386 	hang = spin_sync(fd, 0, I915_EXEC_DEFAULT);
387 
388 	check_wait(fd, hang->handle, wait, NULL);
389 
390 	igt_spin_free(fd, hang);
391 
392 	igt_require(i915_reset_control(true));
393 
394 	trigger_reset(fd);
395 	close(fd);
396 }
397 
test_suspend(int fd,int state)398 static void test_suspend(int fd, int state)
399 {
400 	fd = gem_reopen_driver(fd);
401 	igt_require_gem(fd);
402 
403 	/* Do a suspend first so that we don't skip inside the test */
404 	igt_system_suspend_autoresume(state, SUSPEND_TEST_DEVICES);
405 
406 	/* Check we can suspend when the driver is already wedged */
407 	igt_require(i915_reset_control(false));
408 	manual_hang(fd);
409 
410 	igt_system_suspend_autoresume(state, SUSPEND_TEST_DEVICES);
411 
412 	igt_require(i915_reset_control(true));
413 	trigger_reset(fd);
414 	close(fd);
415 }
416 
test_inflight(int fd,unsigned int wait)417 static void test_inflight(int fd, unsigned int wait)
418 {
419 	int parent_fd = fd;
420 	unsigned int engine;
421 	int fence[64]; /* mostly conservative estimate of ring size */
422 	int max;
423 
424 	igt_require_gem(fd);
425 	igt_require(gem_has_exec_fence(fd));
426 
427 	max = gem_measure_ring_inflight(fd, -1, 0);
428 	igt_require(max > 1);
429 	max = min(max - 1, ARRAY_SIZE(fence));
430 
431 	for_each_engine(parent_fd, engine) {
432 		const uint32_t bbe = MI_BATCH_BUFFER_END;
433 		struct drm_i915_gem_exec_object2 obj[2];
434 		struct drm_i915_gem_execbuffer2 execbuf;
435 		igt_spin_t *hang;
436 
437 		fd = gem_reopen_driver(parent_fd);
438 		igt_require_gem(fd);
439 
440 		memset(obj, 0, sizeof(obj));
441 		obj[0].flags = EXEC_OBJECT_WRITE;
442 		obj[1].handle = gem_create(fd, 4096);
443 		gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
444 
445 		gem_quiescent_gpu(fd);
446 		igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
447 		igt_require(i915_reset_control(false));
448 
449 		hang = spin_sync(fd, 0, engine);
450 		obj[0].handle = hang->handle;
451 
452 		memset(&execbuf, 0, sizeof(execbuf));
453 		execbuf.buffers_ptr = to_user_pointer(obj);
454 		execbuf.buffer_count = 2;
455 		execbuf.flags = engine | I915_EXEC_FENCE_OUT;
456 
457 		for (unsigned int n = 0; n < max; n++) {
458 			gem_execbuf_wr(fd, &execbuf);
459 			fence[n] = execbuf.rsvd2 >> 32;
460 			igt_assert(fence[n] != -1);
461 		}
462 
463 		check_wait(fd, obj[1].handle, wait, NULL);
464 
465 		for (unsigned int n = 0; n < max; n++) {
466 			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
467 			close(fence[n]);
468 		}
469 
470 		igt_spin_free(fd, hang);
471 		igt_assert(i915_reset_control(true));
472 		trigger_reset(fd);
473 
474 		gem_close(fd, obj[1].handle);
475 		close(fd);
476 	}
477 }
478 
test_inflight_suspend(int fd)479 static void test_inflight_suspend(int fd)
480 {
481 	struct drm_i915_gem_execbuffer2 execbuf;
482 	struct drm_i915_gem_exec_object2 obj[2];
483 	uint32_t bbe = MI_BATCH_BUFFER_END;
484 	int fence[64]; /* mostly conservative estimate of ring size */
485 	igt_spin_t *hang;
486 	int max;
487 
488 	max = gem_measure_ring_inflight(fd, -1, 0);
489 	igt_require(max > 1);
490 	max = min(max - 1, ARRAY_SIZE(fence));
491 
492 	fd = gem_reopen_driver(fd);
493 	igt_require_gem(fd);
494 	igt_require(gem_has_exec_fence(fd));
495 	igt_require(i915_reset_control(false));
496 
497 	memset(obj, 0, sizeof(obj));
498 	obj[0].flags = EXEC_OBJECT_WRITE;
499 	obj[1].handle = gem_create(fd, 4096);
500 	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
501 
502 	hang = spin_sync(fd, 0, 0);
503 	obj[0].handle = hang->handle;
504 
505 	memset(&execbuf, 0, sizeof(execbuf));
506 	execbuf.buffers_ptr = to_user_pointer(obj);
507 	execbuf.buffer_count = 2;
508 	execbuf.flags = I915_EXEC_FENCE_OUT;
509 
510 	for (unsigned int n = 0; n < max; n++) {
511 		gem_execbuf_wr(fd, &execbuf);
512 		fence[n] = execbuf.rsvd2 >> 32;
513 		igt_assert(fence[n] != -1);
514 	}
515 
516 	igt_set_autoresume_delay(30);
517 	igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_NONE);
518 
519 	check_wait(fd, obj[1].handle, 10, NULL);
520 
521 	for (unsigned int n = 0; n < max; n++) {
522 		igt_assert_eq(sync_fence_status(fence[n]), -EIO);
523 		close(fence[n]);
524 	}
525 
526 	igt_spin_free(fd, hang);
527 	igt_assert(i915_reset_control(true));
528 	trigger_reset(fd);
529 	close(fd);
530 }
531 
context_create_safe(int i915)532 static uint32_t context_create_safe(int i915)
533 {
534 	struct drm_i915_gem_context_param param;
535 
536 	memset(&param, 0, sizeof(param));
537 
538 	param.ctx_id = gem_context_create(i915);
539 	param.param = I915_CONTEXT_PARAM_BANNABLE;
540 	gem_context_set_param(i915, &param);
541 
542 	param.param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE;
543 	param.value = 1;
544 	gem_context_set_param(i915, &param);
545 
546 	return param.ctx_id;
547 }
548 
test_inflight_contexts(int fd,unsigned int wait)549 static void test_inflight_contexts(int fd, unsigned int wait)
550 {
551 	int parent_fd = fd;
552 	unsigned int engine;
553 
554 	igt_require_gem(fd);
555 	igt_require(gem_has_exec_fence(fd));
556 	gem_require_contexts(fd);
557 
558 	for_each_engine(parent_fd, engine) {
559 		const uint32_t bbe = MI_BATCH_BUFFER_END;
560 		struct drm_i915_gem_exec_object2 obj[2];
561 		struct drm_i915_gem_execbuffer2 execbuf;
562 		unsigned int count;
563 		igt_spin_t *hang;
564 		uint32_t ctx[64];
565 		int fence[64];
566 
567 		fd = gem_reopen_driver(parent_fd);
568 		igt_require_gem(fd);
569 
570 		for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
571 			ctx[n] = context_create_safe(fd);
572 
573 		gem_quiescent_gpu(fd);
574 
575 		igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
576 		igt_require(i915_reset_control(false));
577 
578 		memset(obj, 0, sizeof(obj));
579 		obj[0].flags = EXEC_OBJECT_WRITE;
580 		obj[1].handle = gem_create(fd, 4096);
581 		gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
582 
583 		hang = spin_sync(fd, 0, engine);
584 		obj[0].handle = hang->handle;
585 
586 		memset(&execbuf, 0, sizeof(execbuf));
587 		execbuf.buffers_ptr = to_user_pointer(obj);
588 		execbuf.buffer_count = 2;
589 		execbuf.flags = engine | I915_EXEC_FENCE_OUT;
590 
591 		count = 0;
592 		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
593 			execbuf.rsvd1 = ctx[n];
594 			if (__gem_execbuf_wr(fd, &execbuf))
595 				break; /* small shared ring */
596 			fence[n] = execbuf.rsvd2 >> 32;
597 			igt_assert(fence[n] != -1);
598 			count++;
599 		}
600 
601 		check_wait(fd, obj[1].handle, wait, NULL);
602 
603 		for (unsigned int n = 0; n < count; n++) {
604 			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
605 			close(fence[n]);
606 		}
607 
608 		igt_spin_free(fd, hang);
609 		gem_close(fd, obj[1].handle);
610 		igt_assert(i915_reset_control(true));
611 		trigger_reset(fd);
612 
613 		for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
614 			gem_context_destroy(fd, ctx[n]);
615 
616 		close(fd);
617 	}
618 }
619 
test_inflight_external(int fd)620 static void test_inflight_external(int fd)
621 {
622 	const uint32_t bbe = MI_BATCH_BUFFER_END;
623 	struct drm_i915_gem_execbuffer2 execbuf;
624 	struct drm_i915_gem_exec_object2 obj;
625 	igt_spin_t *hang;
626 	uint32_t fence;
627 	IGT_CORK_FENCE(cork);
628 
629 	igt_require_sw_sync();
630 	igt_require(gem_has_exec_fence(fd));
631 
632 	fd = gem_reopen_driver(fd);
633 	igt_require_gem(fd);
634 
635 	fence = igt_cork_plug(&cork, fd);
636 
637 	igt_require(i915_reset_control(false));
638 	hang = __spin_poll(fd, 0, 0);
639 
640 	memset(&obj, 0, sizeof(obj));
641 	obj.handle = gem_create(fd, 4096);
642 	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
643 
644 	memset(&execbuf, 0, sizeof(execbuf));
645 	execbuf.buffers_ptr = to_user_pointer(&obj);
646 	execbuf.buffer_count = 1;
647 	execbuf.flags = I915_EXEC_FENCE_IN | I915_EXEC_FENCE_OUT;
648 	execbuf.rsvd2 = (uint32_t)fence;
649 
650 	gem_execbuf_wr(fd, &execbuf);
651 	close(fence);
652 
653 	fence = execbuf.rsvd2 >> 32;
654 	igt_assert(fence != -1);
655 
656 	__spin_wait(fd, hang);
657 	manual_hang(fd);
658 
659 	gem_sync(fd, hang->handle); /* wedged, with an unready batch */
660 	igt_assert(!gem_bo_busy(fd, hang->handle));
661 	igt_assert(gem_bo_busy(fd, obj.handle));
662 	igt_cork_unplug(&cork); /* only now submit our batches */
663 
664 	igt_assert_eq(__gem_wait(fd, obj.handle, -1), 0);
665 	igt_assert_eq(sync_fence_status(fence), -EIO);
666 	close(fence);
667 
668 	igt_spin_free(fd, hang);
669 	igt_assert(i915_reset_control(true));
670 	trigger_reset(fd);
671 	close(fd);
672 }
673 
test_inflight_internal(int fd,unsigned int wait)674 static void test_inflight_internal(int fd, unsigned int wait)
675 {
676 	struct drm_i915_gem_execbuffer2 execbuf;
677 	struct drm_i915_gem_exec_object2 obj[2];
678 	uint32_t bbe = MI_BATCH_BUFFER_END;
679 	unsigned engine, nfence = 0;
680 	int fences[16];
681 	igt_spin_t *hang;
682 
683 	igt_require(gem_has_exec_fence(fd));
684 
685 	fd = gem_reopen_driver(fd);
686 	igt_require_gem(fd);
687 
688 	igt_require(i915_reset_control(false));
689 	hang = spin_sync(fd, 0, 0);
690 
691 	memset(obj, 0, sizeof(obj));
692 	obj[0].handle = hang->handle;
693 	obj[0].flags = EXEC_OBJECT_WRITE;
694 	obj[1].handle = gem_create(fd, 4096);
695 	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
696 
697 	memset(&execbuf, 0, sizeof(execbuf));
698 	execbuf.buffers_ptr = to_user_pointer(obj);
699 	execbuf.buffer_count = 2;
700 	for_each_engine(fd, engine) {
701 		execbuf.flags = engine | I915_EXEC_FENCE_OUT;
702 
703 		gem_execbuf_wr(fd, &execbuf);
704 
705 		fences[nfence] = execbuf.rsvd2 >> 32;
706 		igt_assert(fences[nfence] != -1);
707 		nfence++;
708 	}
709 
710 	check_wait(fd, obj[1].handle, wait, NULL);
711 
712 	while (nfence--) {
713 		igt_assert_eq(sync_fence_status(fences[nfence]), -EIO);
714 		close(fences[nfence]);
715 	}
716 
717 	igt_spin_free(fd, hang);
718 	igt_assert(i915_reset_control(true));
719 	trigger_reset(fd);
720 	close(fd);
721 }
722 
reset_stress(int fd,uint32_t ctx0,const char * name,unsigned int engine,unsigned int flags)723 static void reset_stress(int fd, uint32_t ctx0,
724 			 const char *name, unsigned int engine,
725 			 unsigned int flags)
726 {
727 	const uint32_t bbe = MI_BATCH_BUFFER_END;
728 	struct drm_i915_gem_exec_object2 obj = {
729 		.handle = gem_create(fd, 4096)
730 	};
731 	struct drm_i915_gem_execbuffer2 execbuf = {
732 		.buffers_ptr = to_user_pointer(&obj),
733 		.buffer_count = 1,
734 		.flags = engine,
735 	};
736 	igt_stats_t stats;
737 	int max;
738 
739 	max = gem_measure_ring_inflight(fd, engine, 0);
740 	max = max / 2 - 1; /* assume !execlists and a shared ring */
741 	igt_require(max > 0);
742 
743 	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
744 
745 	igt_stats_init(&stats);
746 	igt_until_timeout(5) {
747 		uint32_t ctx = context_create_safe(fd);
748 		igt_spin_t *hang;
749 		unsigned int i;
750 
751 		gem_quiescent_gpu(fd);
752 
753 		igt_require(i915_reset_control(flags & TEST_WEDGE ?
754 					       false : true));
755 
756 		/*
757 		 * Start executing a spin batch with some queued batches
758 		 * against a different context after it.
759 		 */
760 		hang = spin_sync(fd, ctx0, engine);
761 
762 		execbuf.rsvd1 = ctx;
763 		for (i = 0; i < max; i++)
764 			gem_execbuf(fd, &execbuf);
765 
766 		execbuf.rsvd1 = ctx0;
767 		for (i = 0; i < max; i++)
768 			gem_execbuf(fd, &execbuf);
769 
770 		/* Wedge after a small delay. */
771 		check_wait(fd, obj.handle, 100e3, &stats);
772 		igt_assert_eq(sync_fence_status(hang->out_fence), -EIO);
773 
774 		/* Unwedge by forcing a reset. */
775 		igt_assert(i915_reset_control(true));
776 		trigger_reset(fd);
777 
778 		gem_quiescent_gpu(fd);
779 
780 		/*
781 		 * Verify that we are able to submit work after unwedging from
782 		 * both contexts.
783 		 */
784 		execbuf.rsvd1 = ctx;
785 		for (i = 0; i < max; i++)
786 			gem_execbuf(fd, &execbuf);
787 
788 		execbuf.rsvd1 = ctx0;
789 		for (i = 0; i < max; i++)
790 			gem_execbuf(fd, &execbuf);
791 
792 		gem_sync(fd, obj.handle);
793 		igt_spin_free(fd, hang);
794 		gem_context_destroy(fd, ctx);
795 	}
796 	check_wait_elapsed(name, fd, &stats);
797 	igt_stats_fini(&stats);
798 
799 	gem_close(fd, obj.handle);
800 }
801 
802 /*
803  * Verify that we can submit and execute work after unwedging the GPU.
804  */
test_reset_stress(int fd,unsigned int flags)805 static void test_reset_stress(int fd, unsigned int flags)
806 {
807 	uint32_t ctx0 = context_create_safe(fd);
808 	unsigned int engine;
809 
810 	for_each_engine(fd, engine)
811 		reset_stress(fd, ctx0, e__->name, engine, flags);
812 
813 	gem_context_destroy(fd, ctx0);
814 }
815 
816 static int fd = -1;
817 
818 static void
exit_handler(int sig)819 exit_handler(int sig)
820 {
821 	i915_reset_control(true);
822 	igt_force_gpu_reset(fd);
823 }
824 
825 igt_main
826 {
827 	igt_skip_on_simulation();
828 
829 	igt_fixture {
830 		fd = drm_open_driver(DRIVER_INTEL);
831 		igt_device_drop_master(fd);
832 
833 		gem_submission_print_method(fd);
834 		igt_require_gem(fd);
835 
836 		igt_allow_hang(fd, 0, 0);
837 
838 		igt_require(i915_reset_control(true));
839 		igt_force_gpu_reset(fd);
840 		igt_install_exit_handler(exit_handler);
841 	}
842 
843 	igt_subtest("throttle")
844 		test_throttle(fd);
845 
846 	igt_subtest("context-create")
847 		test_context_create(fd);
848 
849 	igt_subtest("execbuf")
850 		test_execbuf(fd);
851 
852 	igt_subtest("banned")
853 		test_banned(fd);
854 
855 	igt_subtest("suspend")
856 		test_suspend(fd, SUSPEND_STATE_MEM);
857 
858 	igt_subtest("hibernate")
859 		test_suspend(fd, SUSPEND_STATE_DISK);
860 
861 	igt_subtest("in-flight-external")
862 		test_inflight_external(fd);
863 
864 	igt_subtest("in-flight-suspend")
865 		test_inflight_suspend(fd);
866 
867 	igt_subtest_group {
868 		igt_fixture {
869 			igt_require(gem_has_contexts(fd));
870 		}
871 
872 		igt_subtest("reset-stress")
873 			test_reset_stress(fd, 0);
874 
875 		igt_subtest("unwedge-stress")
876 			test_reset_stress(fd, TEST_WEDGE);
877 	}
878 
879 	igt_subtest_group {
880 		const struct {
881 			unsigned int wait;
882 			const char *name;
883 		} waits[] = {
884 			{ .wait = 0, .name = "immediate" },
885 			{ .wait = 1, .name = "1us" },
886 			{ .wait = 10000, .name = "10ms" },
887 		};
888 		unsigned int i;
889 
890 		for (i = 0; i < sizeof(waits) / sizeof(waits[0]); i++) {
891 			igt_subtest_f("wait-%s", waits[i].name)
892 				test_wait(fd, 0, waits[i].wait);
893 
894 			igt_subtest_f("wait-wedge-%s", waits[i].name)
895 				test_wait(fd, TEST_WEDGE, waits[i].wait);
896 
897 			igt_subtest_f("in-flight-%s", waits[i].name)
898 				test_inflight(fd, waits[i].wait);
899 
900 			igt_subtest_f("in-flight-contexts-%s", waits[i].name)
901 				test_inflight_contexts(fd, waits[i].wait);
902 
903 			igt_subtest_f("in-flight-internal-%s", waits[i].name) {
904 				igt_skip_on(gem_has_semaphores(fd));
905 				test_inflight_internal(fd, waits[i].wait);
906 			}
907 		}
908 	}
909 }
910