1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <time.h>
26 #include <signal.h>
27 #include <pthread.h>
28 #include <sys/poll.h>
29 
30 #include <i915_drm.h>
31 
32 #include "igt_core.h"
33 #include "drmtest.h"
34 #include "igt_device.h"
35 #include "igt_dummyload.h"
36 #include "igt_gt.h"
37 #include "intel_chipset.h"
38 #include "intel_reg.h"
39 #include "ioctl_wrappers.h"
40 #include "sw_sync.h"
41 #include "igt_vgem.h"
42 #include "i915/gem_engine_topology.h"
43 #include "i915/gem_mman.h"
44 
45 /**
46  * SECTION:igt_dummyload
47  * @short_description: Library for submitting GPU workloads
48  * @title: Dummyload
49  * @include: igt.h
50  *
51  * A lot of igt testcases need some GPU workload to make sure a race window is
52  * big enough. Unfortunately having a fixed amount of workload leads to
53  * spurious test failures or overly long runtimes on some fast/slow platforms.
54  * This library contains functionality to submit GPU workloads that should
55  * consume exactly a specific amount of time.
56  */
57 
58 #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
59 #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
60 
61 #define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
62 
63 #define MI_ARB_CHK (0x5 << 23)
64 
65 static const int BATCH_SIZE = 4096;
66 static const int LOOP_START_OFFSET = 64;
67 
68 static IGT_LIST(spin_list);
69 static pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
70 
71 static int
emit_recursive_batch(igt_spin_t * spin,int fd,const struct igt_spin_factory * opts)72 emit_recursive_batch(igt_spin_t *spin,
73 		     int fd, const struct igt_spin_factory *opts)
74 {
75 #define SCRATCH 0
76 #define BATCH IGT_SPIN_BATCH
77 	const int gen = intel_gen(intel_get_drm_devid(fd));
78 	struct drm_i915_gem_relocation_entry relocs[2], *r;
79 	struct drm_i915_gem_execbuffer2 *execbuf;
80 	struct drm_i915_gem_exec_object2 *obj;
81 	unsigned int flags[GEM_MAX_ENGINES];
82 	unsigned int nengine;
83 	int fence_fd = -1;
84 	uint32_t *cs, *batch;
85 	int i;
86 
87 	nengine = 0;
88 	if (opts->engine == ALL_ENGINES) {
89 		struct intel_execution_engine2 *engine;
90 
91 		for_each_context_engine(fd, opts->ctx, engine) {
92 			if (opts->flags & IGT_SPIN_POLL_RUN &&
93 			    !gem_class_can_store_dword(fd, engine->class))
94 				continue;
95 
96 			flags[nengine++] = engine->flags;
97 		}
98 	} else {
99 		flags[nengine++] = opts->engine;
100 	}
101 	igt_require(nengine);
102 
103 	memset(&spin->execbuf, 0, sizeof(spin->execbuf));
104 	execbuf = &spin->execbuf;
105 	memset(spin->obj, 0, sizeof(spin->obj));
106 	obj = spin->obj;
107 	memset(relocs, 0, sizeof(relocs));
108 
109 	obj[BATCH].handle = gem_create(fd, BATCH_SIZE);
110 	batch = __gem_mmap__wc(fd, obj[BATCH].handle,
111 			       0, BATCH_SIZE, PROT_WRITE);
112 	if (!batch)
113 		batch = gem_mmap__gtt(fd, obj[BATCH].handle,
114 				      BATCH_SIZE, PROT_WRITE);
115 
116 	gem_set_domain(fd, obj[BATCH].handle,
117 		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
118 	execbuf->buffer_count++;
119 	cs = batch;
120 
121 	if (opts->dependency) {
122 		igt_assert(!(opts->flags & IGT_SPIN_POLL_RUN));
123 
124 		r = &relocs[obj[BATCH].relocation_count++];
125 
126 		/* dummy write to dependency */
127 		obj[SCRATCH].handle = opts->dependency;
128 		r->presumed_offset = 0;
129 		r->target_handle = obj[SCRATCH].handle;
130 		r->offset = sizeof(uint32_t) * 1020;
131 		r->delta = 0;
132 		r->read_domains = I915_GEM_DOMAIN_RENDER;
133 		r->write_domain = I915_GEM_DOMAIN_RENDER;
134 
135 		execbuf->buffer_count++;
136 	} else if (opts->flags & IGT_SPIN_POLL_RUN) {
137 		r = &relocs[obj[BATCH].relocation_count++];
138 
139 		igt_assert(!opts->dependency);
140 
141 		if (gen == 4 || gen == 5) {
142 			execbuf->flags |= I915_EXEC_SECURE;
143 			igt_require(__igt_device_set_master(fd) == 0);
144 		}
145 
146 		spin->poll_handle = gem_create(fd, 4096);
147 		obj[SCRATCH].handle = spin->poll_handle;
148 
149 		if (__gem_set_caching(fd, spin->poll_handle,
150 				      I915_CACHING_CACHED) == 0)
151 			spin->poll = gem_mmap__cpu(fd, spin->poll_handle,
152 						   0, 4096,
153 						   PROT_READ | PROT_WRITE);
154 		else
155 			spin->poll = gem_mmap__wc(fd, spin->poll_handle,
156 						  0, 4096,
157 						  PROT_READ | PROT_WRITE);
158 
159 		igt_assert_eq(spin->poll[SPIN_POLL_START_IDX], 0);
160 
161 		/* batch is first */
162 		r->presumed_offset = 4096;
163 		r->target_handle = obj[SCRATCH].handle;
164 		r->offset = sizeof(uint32_t) * 1;
165 		r->delta = sizeof(uint32_t) * SPIN_POLL_START_IDX;
166 
167 		*cs++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
168 
169 		if (gen >= 8) {
170 			*cs++ = r->presumed_offset + r->delta;
171 			*cs++ = 0;
172 		} else if (gen >= 4) {
173 			*cs++ = 0;
174 			*cs++ = r->presumed_offset + r->delta;
175 			r->offset += sizeof(uint32_t);
176 		} else {
177 			cs[-1]--;
178 			*cs++ = r->presumed_offset + r->delta;
179 		}
180 
181 		*cs++ = 1;
182 
183 		execbuf->buffer_count++;
184 	}
185 
186 	spin->handle = obj[BATCH].handle;
187 
188 	igt_assert_lt(cs - batch, LOOP_START_OFFSET / sizeof(*cs));
189 	spin->condition = batch + LOOP_START_OFFSET / sizeof(*cs);
190 	cs = spin->condition;
191 
192 	/* Allow ourselves to be preempted */
193 	if (!(opts->flags & IGT_SPIN_NO_PREEMPTION))
194 		*cs++ = MI_ARB_CHK;
195 
196 	/* Pad with a few nops so that we do not completely hog the system.
197 	 *
198 	 * Part of the attraction of using a recursive batch is that it is
199 	 * hard on the system (executing the "function" call is apparently
200 	 * quite expensive). However, the GPU may hog the entire system for
201 	 * a few minutes, preventing even NMI. Quite why this is so is unclear,
202 	 * but presumably it relates to the PM_INTRMSK workaround on gen6/gen7.
203 	 * If we give the system a break by having the GPU execute a few nops
204 	 * between function calls, that appears enough to keep SNB out of
205 	 * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262
206 	 */
207 	if (!(opts->flags & IGT_SPIN_FAST))
208 		cs += 1000;
209 
210 	/* recurse */
211 	r = &relocs[obj[BATCH].relocation_count++];
212 	r->target_handle = obj[BATCH].handle;
213 	r->offset = (cs + 1 - batch) * sizeof(*cs);
214 	r->read_domains = I915_GEM_DOMAIN_COMMAND;
215 	r->delta = LOOP_START_OFFSET;
216 	if (gen >= 8) {
217 		*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
218 		*cs++ = r->delta;
219 		*cs++ = 0;
220 	} else if (gen >= 6) {
221 		*cs++ = MI_BATCH_BUFFER_START | 1 << 8;
222 		*cs++ = r->delta;
223 	} else {
224 		*cs++ = MI_BATCH_BUFFER_START | 2 << 6;
225 		if (gen < 4)
226 			r->delta |= 1;
227 		*cs = r->delta;
228 		cs++;
229 	}
230 	obj[BATCH].relocs_ptr = to_user_pointer(relocs);
231 
232 	execbuf->buffers_ptr = to_user_pointer(obj +
233 					       (2 - execbuf->buffer_count));
234 	execbuf->rsvd1 = opts->ctx;
235 
236 	if (opts->flags & IGT_SPIN_FENCE_OUT)
237 		execbuf->flags |= I915_EXEC_FENCE_OUT;
238 
239 	for (i = 0; i < nengine; i++) {
240 		execbuf->flags &= ~ENGINE_MASK;
241 		execbuf->flags |= flags[i];
242 
243 		gem_execbuf_wr(fd, execbuf);
244 
245 		if (opts->flags & IGT_SPIN_FENCE_OUT) {
246 			int _fd = execbuf->rsvd2 >> 32;
247 
248 			igt_assert(_fd >= 0);
249 			if (fence_fd == -1) {
250 				fence_fd = _fd;
251 			} else {
252 				int old_fd = fence_fd;
253 
254 				fence_fd = sync_fence_merge(old_fd, _fd);
255 				close(old_fd);
256 				close(_fd);
257 			}
258 			igt_assert(fence_fd >= 0);
259 		}
260 	}
261 
262 	igt_assert_lt(cs - batch, BATCH_SIZE / sizeof(*cs));
263 
264 	/* Make it easier for callers to resubmit. */
265 	for (i = 0; i < ARRAY_SIZE(spin->obj); i++) {
266 		spin->obj[i].relocation_count = 0;
267 		spin->obj[i].relocs_ptr = 0;
268 		spin->obj[i].flags = EXEC_OBJECT_PINNED;
269 	}
270 
271 	spin->cmd_precondition = *spin->condition;
272 
273 	return fence_fd;
274 }
275 
276 static igt_spin_t *
spin_create(int fd,const struct igt_spin_factory * opts)277 spin_create(int fd, const struct igt_spin_factory *opts)
278 {
279 	igt_spin_t *spin;
280 
281 	spin = calloc(1, sizeof(struct igt_spin));
282 	igt_assert(spin);
283 
284 	spin->out_fence = emit_recursive_batch(spin, fd, opts);
285 
286 	pthread_mutex_lock(&list_lock);
287 	igt_list_add(&spin->link, &spin_list);
288 	pthread_mutex_unlock(&list_lock);
289 
290 	return spin;
291 }
292 
293 igt_spin_t *
__igt_spin_factory(int fd,const struct igt_spin_factory * opts)294 __igt_spin_factory(int fd, const struct igt_spin_factory *opts)
295 {
296 	return spin_create(fd, opts);
297 }
298 
299 /**
300  * igt_spin_factory:
301  * @fd: open i915 drm file descriptor
302  * @opts: controlling options such as context, engine, dependencies etc
303  *
304  * Start a recursive batch on a ring. Immediately returns a #igt_spin_t that
305  * contains the batch's handle that can be waited upon. The returned structure
306  * must be passed to igt_spin_free() for post-processing.
307  *
308  * Returns:
309  * Structure with helper internal state for igt_spin_free().
310  */
311 igt_spin_t *
igt_spin_factory(int fd,const struct igt_spin_factory * opts)312 igt_spin_factory(int fd, const struct igt_spin_factory *opts)
313 {
314 	igt_spin_t *spin;
315 
316 	igt_require_gem(fd);
317 
318 	if (opts->engine != ALL_ENGINES) {
319 		struct intel_execution_engine2 e;
320 		int class;
321 
322 		if (!gem_context_lookup_engine(fd, opts->engine,
323 					       opts->ctx, &e)) {
324 			class = e.class;
325 		} else {
326 			gem_require_ring(fd, opts->engine);
327 			class = gem_execbuf_flags_to_engine_class(opts->engine);
328 		}
329 
330 		if (opts->flags & IGT_SPIN_POLL_RUN)
331 			igt_require(gem_class_can_store_dword(fd, class));
332 	}
333 
334 	spin = spin_create(fd, opts);
335 
336 	igt_assert(gem_bo_busy(fd, spin->handle));
337 	if (opts->flags & IGT_SPIN_FENCE_OUT) {
338 		struct pollfd pfd = { spin->out_fence, POLLIN };
339 
340 		igt_assert(poll(&pfd, 1, 0) == 0);
341 	}
342 
343 	return spin;
344 }
345 
notify(union sigval arg)346 static void notify(union sigval arg)
347 {
348 	igt_spin_t *spin = arg.sival_ptr;
349 
350 	igt_spin_end(spin);
351 }
352 
353 /**
354  * igt_spin_set_timeout:
355  * @spin: spin state from igt_spin_new()
356  * @ns: amount of time in nanoseconds the batch continues to execute
357  *      before finishing.
358  *
359  * Specify a timeout. This ends the recursive batch associated with @spin after
360  * the timeout has elapsed.
361  */
igt_spin_set_timeout(igt_spin_t * spin,int64_t ns)362 void igt_spin_set_timeout(igt_spin_t *spin, int64_t ns)
363 {
364 	timer_t timer;
365 	struct sigevent sev;
366 	struct itimerspec its;
367 
368 	igt_assert(ns > 0);
369 	if (!spin)
370 		return;
371 
372 	igt_assert(!spin->timer);
373 
374 	memset(&sev, 0, sizeof(sev));
375 	sev.sigev_notify = SIGEV_THREAD;
376 	sev.sigev_value.sival_ptr = spin;
377 	sev.sigev_notify_function = notify;
378 	igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) == 0);
379 	igt_assert(timer);
380 
381 	memset(&its, 0, sizeof(its));
382 	its.it_value.tv_sec = ns / NSEC_PER_SEC;
383 	its.it_value.tv_nsec = ns % NSEC_PER_SEC;
384 	igt_assert(timer_settime(timer, 0, &its, NULL) == 0);
385 
386 	spin->timer = timer;
387 }
388 
389 /**
390  * igt_spin_reset:
391  * @spin: spin state from igt_spin_new()
392  *
393  * Reset the state of spin, allowing its reuse.
394  */
igt_spin_reset(igt_spin_t * spin)395 void igt_spin_reset(igt_spin_t *spin)
396 {
397 	if (igt_spin_has_poll(spin))
398 		spin->poll[SPIN_POLL_START_IDX] = 0;
399 
400 	*spin->condition = spin->cmd_precondition;
401 	__sync_synchronize();
402 }
403 
404 /**
405  * igt_spin_end:
406  * @spin: spin state from igt_spin_new()
407  *
408  * End the spinner associated with @spin manually.
409  */
igt_spin_end(igt_spin_t * spin)410 void igt_spin_end(igt_spin_t *spin)
411 {
412 	if (!spin)
413 		return;
414 
415 	*spin->condition = MI_BATCH_BUFFER_END;
416 	__sync_synchronize();
417 }
418 
419 /**
420  * igt_spin_free:
421  * @fd: open i915 drm file descriptor
422  * @spin: spin state from igt_spin_new()
423  *
424  * This function does the necessary post-processing after starting a
425  * spin with igt_spin_new() and then frees it.
426  */
igt_spin_free(int fd,igt_spin_t * spin)427 void igt_spin_free(int fd, igt_spin_t *spin)
428 {
429 	if (!spin)
430 		return;
431 
432 	pthread_mutex_lock(&list_lock);
433 	igt_list_del(&spin->link);
434 	pthread_mutex_unlock(&list_lock);
435 
436 	if (spin->timer)
437 		timer_delete(spin->timer);
438 
439 	igt_spin_end(spin);
440 	gem_munmap((void *)((unsigned long)spin->condition & (~4095UL)),
441 		   BATCH_SIZE);
442 
443 	if (spin->poll) {
444 		gem_munmap(spin->poll, 4096);
445 		gem_close(fd, spin->poll_handle);
446 	}
447 
448 	gem_close(fd, spin->handle);
449 
450 	if (spin->out_fence >= 0)
451 		close(spin->out_fence);
452 
453 	free(spin);
454 }
455 
igt_terminate_spins(void)456 void igt_terminate_spins(void)
457 {
458 	struct igt_spin *iter;
459 
460 	pthread_mutex_lock(&list_lock);
461 	igt_list_for_each(iter, &spin_list, link)
462 		igt_spin_end(iter);
463 	pthread_mutex_unlock(&list_lock);
464 }
465 
igt_unshare_spins(void)466 void igt_unshare_spins(void)
467 {
468 	struct igt_spin *it, *n;
469 
470 	/* Disable the automatic termination on inherited spinners */
471 	igt_list_for_each_safe(it, n, &spin_list, link)
472 		igt_list_init(&it->link);
473 	igt_list_init(&spin_list);
474 }
475 
plug_vgem_handle(struct igt_cork * cork,int fd)476 static uint32_t plug_vgem_handle(struct igt_cork *cork, int fd)
477 {
478 	struct vgem_bo bo;
479 	int dmabuf;
480 	uint32_t handle;
481 
482 	cork->vgem.device = drm_open_driver(DRIVER_VGEM);
483 	igt_require(vgem_has_fences(cork->vgem.device));
484 
485 	bo.width = bo.height = 1;
486 	bo.bpp = 4;
487 	vgem_create(cork->vgem.device, &bo);
488 	cork->vgem.fence = vgem_fence_attach(cork->vgem.device, &bo, VGEM_FENCE_WRITE);
489 
490 	dmabuf = prime_handle_to_fd(cork->vgem.device, bo.handle);
491 	handle = prime_fd_to_handle(fd, dmabuf);
492 	close(dmabuf);
493 
494 	return handle;
495 }
496 
unplug_vgem_handle(struct igt_cork * cork)497 static void unplug_vgem_handle(struct igt_cork *cork)
498 {
499 	vgem_fence_signal(cork->vgem.device, cork->vgem.fence);
500 	close(cork->vgem.device);
501 }
502 
plug_sync_fd(struct igt_cork * cork)503 static uint32_t plug_sync_fd(struct igt_cork *cork)
504 {
505 	int fence;
506 
507 	igt_require_sw_sync();
508 
509 	cork->sw_sync.timeline = sw_sync_timeline_create();
510 	fence = sw_sync_timeline_create_fence(cork->sw_sync.timeline, 1);
511 
512 	return fence;
513 }
514 
unplug_sync_fd(struct igt_cork * cork)515 static void unplug_sync_fd(struct igt_cork *cork)
516 {
517 	sw_sync_timeline_inc(cork->sw_sync.timeline, 1);
518 	close(cork->sw_sync.timeline);
519 }
520 
521 /**
522  * igt_cork_plug:
523  * @fd: open drm file descriptor
524  * @method: method to utilize for corking.
525  * @cork: structure that will be filled with the state of the cork bo.
526  * Note: this has to match the corking method.
527  *
528  * This function provides a mechanism to stall submission. It provides two
529  * blocking methods:
530  *
531  * VGEM_BO.
532  * Imports a vgem bo with a fence attached to it. This bo can be used as a
533  * dependency during submission to stall execution until the fence is signaled.
534  *
535  * SW_SYNC:
536  * Creates a timeline and then a fence on that timeline. The fence can be used
537  * as an input fence to a request, the request will be stalled until the fence
538  * is signaled.
539  *
540  * The parameters required to unblock the execution and to cleanup are stored in
541  * the provided cork structure.
542  *
543  * Returns:
544  * Handle of the imported BO / Sw sync fence FD.
545  */
igt_cork_plug(struct igt_cork * cork,int fd)546 uint32_t igt_cork_plug(struct igt_cork *cork, int fd)
547 {
548 	igt_assert(cork->fd == -1);
549 
550 	switch (cork->type) {
551 	case CORK_SYNC_FD:
552 		return plug_sync_fd(cork);
553 
554 	case CORK_VGEM_HANDLE:
555 		return plug_vgem_handle(cork, fd);
556 
557 	default:
558 		igt_assert_f(0, "Invalid cork type!\n");
559 		return 0;
560 	}
561 }
562 
563 /**
564  * igt_cork_unplug:
565  * @method: method to utilize for corking.
566  * @cork: cork state from igt_cork_plug()
567  *
568  * This function unblocks the execution by signaling the fence attached to the
569  * imported bo and does the necessary post-processing.
570  *
571  * NOTE: the handle returned by igt_cork_plug is not closed during this phase.
572  */
igt_cork_unplug(struct igt_cork * cork)573 void igt_cork_unplug(struct igt_cork *cork)
574 {
575 	igt_assert(cork->fd != -1);
576 
577 	switch (cork->type) {
578 	case CORK_SYNC_FD:
579 		unplug_sync_fd(cork);
580 		break;
581 
582 	case CORK_VGEM_HANDLE:
583 		unplug_vgem_handle(cork);
584 		break;
585 
586 	default:
587 		igt_assert_f(0, "Invalid cork type!\n");
588 	}
589 
590 	cork->fd = -1; /* Reset cork */
591 }
592