1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28 #include "igt.h"
29 #include "igt_device.h"
30 #include "igt_rand.h"
31 #include "igt_sysfs.h"
32
33 #include <unistd.h>
34 #include <stdlib.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <string.h>
38 #include <fcntl.h>
39 #include <inttypes.h>
40 #include <errno.h>
41 #include <sys/stat.h>
42 #include <sys/ioctl.h>
43 #include <sys/poll.h>
44 #include <sys/time.h>
45 #include <time.h>
46 #include "drm.h"
47
48 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
49 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
50
51 #define LOCAL_I915_EXEC_BSD_SHIFT (13)
52 #define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT)
53
54 #define ENGINE_FLAGS (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
55
56 #define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
57 #define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
58
59 #define FORKED 1
60 #define CHAINED 2
61 #define CONTEXT 4
62
elapsed(const struct timespec * start,const struct timespec * end)63 static double elapsed(const struct timespec *start, const struct timespec *end)
64 {
65 return ((end->tv_sec - start->tv_sec) +
66 (end->tv_nsec - start->tv_nsec)*1e-9);
67 }
68
nop_on_ring(int fd,uint32_t handle,unsigned ring_id,int timeout,unsigned long * out)69 static double nop_on_ring(int fd, uint32_t handle, unsigned ring_id,
70 int timeout, unsigned long *out)
71 {
72 struct drm_i915_gem_execbuffer2 execbuf;
73 struct drm_i915_gem_exec_object2 obj;
74 struct timespec start, now;
75 unsigned long count;
76
77 memset(&obj, 0, sizeof(obj));
78 obj.handle = handle;
79
80 memset(&execbuf, 0, sizeof(execbuf));
81 execbuf.buffers_ptr = to_user_pointer(&obj);
82 execbuf.buffer_count = 1;
83 execbuf.flags = ring_id;
84 execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
85 execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
86 if (__gem_execbuf(fd, &execbuf)) {
87 execbuf.flags = ring_id;
88 gem_execbuf(fd, &execbuf);
89 }
90 intel_detect_and_clear_missed_interrupts(fd);
91
92 count = 0;
93 clock_gettime(CLOCK_MONOTONIC, &start);
94 do {
95 for (int loop = 0; loop < 1024; loop++)
96 gem_execbuf(fd, &execbuf);
97
98 count += 1024;
99 clock_gettime(CLOCK_MONOTONIC, &now);
100 } while (elapsed(&start, &now) < timeout);
101 igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
102
103 *out = count;
104 return elapsed(&start, &now);
105 }
106
poll_ring(int fd,unsigned engine,const char * name,int timeout)107 static void poll_ring(int fd, unsigned engine, const char *name, int timeout)
108 {
109 const int gen = intel_gen(intel_get_drm_devid(fd));
110 const uint32_t MI_ARB_CHK = 0x5 << 23;
111 struct drm_i915_gem_execbuffer2 execbuf;
112 struct drm_i915_gem_exec_object2 obj;
113 struct drm_i915_gem_relocation_entry reloc[4], *r;
114 uint32_t *bbe[2], *state, *batch;
115 struct timespec tv = {};
116 unsigned long cycles;
117 unsigned flags;
118 uint64_t elapsed;
119
120 flags = I915_EXEC_NO_RELOC;
121 if (gen == 4 || gen == 5)
122 flags |= I915_EXEC_SECURE;
123
124 gem_require_ring(fd, engine);
125 igt_require(gem_can_store_dword(fd, engine));
126
127 memset(&obj, 0, sizeof(obj));
128 obj.handle = gem_create(fd, 4096);
129 obj.relocs_ptr = to_user_pointer(reloc);
130 obj.relocation_count = ARRAY_SIZE(reloc);
131
132 r = memset(reloc, 0, sizeof(reloc));
133 batch = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE);
134
135 for (unsigned int start_offset = 0;
136 start_offset <= 128;
137 start_offset += 128) {
138 uint32_t *b = batch + start_offset / sizeof(*batch);
139
140 r->target_handle = obj.handle;
141 r->offset = (b - batch + 1) * sizeof(uint32_t);
142 r->delta = 4092;
143 r->read_domains = I915_GEM_DOMAIN_RENDER;
144
145 *b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
146 if (gen >= 8) {
147 *++b = r->delta;
148 *++b = 0;
149 } else if (gen >= 4) {
150 r->offset += sizeof(uint32_t);
151 *++b = 0;
152 *++b = r->delta;
153 } else {
154 *b -= 1;
155 *++b = r->delta;
156 }
157 *++b = start_offset != 0;
158 r++;
159
160 b = batch + (start_offset + 64) / sizeof(*batch);
161 bbe[start_offset != 0] = b;
162 *b++ = MI_ARB_CHK;
163
164 r->target_handle = obj.handle;
165 r->offset = (b - batch + 1) * sizeof(uint32_t);
166 r->read_domains = I915_GEM_DOMAIN_COMMAND;
167 r->delta = start_offset + 64;
168 if (gen >= 8) {
169 *b++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
170 *b++ = r->delta;
171 *b++ = 0;
172 } else if (gen >= 6) {
173 *b++ = MI_BATCH_BUFFER_START | 1 << 8;
174 *b++ = r->delta;
175 } else {
176 *b++ = MI_BATCH_BUFFER_START | 2 << 6;
177 if (gen < 4)
178 r->delta |= 1;
179 *b++ = r->delta;
180 }
181 r++;
182 }
183 igt_assert(r == reloc + ARRAY_SIZE(reloc));
184 state = batch + 1023;
185
186 memset(&execbuf, 0, sizeof(execbuf));
187 execbuf.buffers_ptr = to_user_pointer(&obj);
188 execbuf.buffer_count = 1;
189 execbuf.flags = engine | flags;
190
191 cycles = 0;
192 do {
193 unsigned int idx = ++cycles & 1;
194
195 *bbe[idx] = MI_ARB_CHK;
196 execbuf.batch_start_offset =
197 (bbe[idx] - batch) * sizeof(*batch) - 64;
198
199 gem_execbuf(fd, &execbuf);
200
201 *bbe[!idx] = MI_BATCH_BUFFER_END;
202 __sync_synchronize();
203
204 while (READ_ONCE(*state) != idx)
205 ;
206 } while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout);
207 *bbe[cycles & 1] = MI_BATCH_BUFFER_END;
208 gem_sync(fd, obj.handle);
209
210 igt_info("%s completed %ld cycles: %.3f us\n",
211 name, cycles, elapsed*1e-3/cycles);
212
213 munmap(batch, 4096);
214 gem_close(fd, obj.handle);
215 }
216
poll_sequential(int fd,const char * name,int timeout)217 static void poll_sequential(int fd, const char *name, int timeout)
218 {
219 const int gen = intel_gen(intel_get_drm_devid(fd));
220 const uint32_t MI_ARB_CHK = 0x5 << 23;
221 struct drm_i915_gem_execbuffer2 execbuf;
222 struct drm_i915_gem_exec_object2 obj[2];
223 struct drm_i915_gem_relocation_entry reloc[4], *r;
224 uint32_t *bbe[2], *state, *batch;
225 unsigned engines[16], nengine, engine, flags;
226 struct timespec tv = {};
227 unsigned long cycles;
228 uint64_t elapsed;
229 bool cached;
230
231 flags = I915_EXEC_NO_RELOC;
232 if (gen == 4 || gen == 5)
233 flags |= I915_EXEC_SECURE;
234
235 nengine = 0;
236 for_each_physical_engine(fd, engine) {
237 if (!gem_can_store_dword(fd, engine))
238 continue;
239
240 engines[nengine++] = engine;
241 }
242 igt_require(nengine);
243
244 memset(obj, 0, sizeof(obj));
245 obj[0].handle = gem_create(fd, 4096);
246 obj[0].flags = EXEC_OBJECT_WRITE;
247 cached = __gem_set_caching(fd, obj[0].handle, 1) == 0;
248 obj[1].handle = gem_create(fd, 4096);
249 obj[1].relocs_ptr = to_user_pointer(reloc);
250 obj[1].relocation_count = ARRAY_SIZE(reloc);
251
252 r = memset(reloc, 0, sizeof(reloc));
253 batch = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_WRITE);
254
255 for (unsigned int start_offset = 0;
256 start_offset <= 128;
257 start_offset += 128) {
258 uint32_t *b = batch + start_offset / sizeof(*batch);
259
260 r->target_handle = obj[0].handle;
261 r->offset = (b - batch + 1) * sizeof(uint32_t);
262 r->delta = 0;
263 r->read_domains = I915_GEM_DOMAIN_RENDER;
264 r->write_domain = I915_GEM_DOMAIN_RENDER;
265
266 *b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
267 if (gen >= 8) {
268 *++b = r->delta;
269 *++b = 0;
270 } else if (gen >= 4) {
271 r->offset += sizeof(uint32_t);
272 *++b = 0;
273 *++b = r->delta;
274 } else {
275 *b -= 1;
276 *++b = r->delta;
277 }
278 *++b = start_offset != 0;
279 r++;
280
281 b = batch + (start_offset + 64) / sizeof(*batch);
282 bbe[start_offset != 0] = b;
283 *b++ = MI_ARB_CHK;
284
285 r->target_handle = obj[1].handle;
286 r->offset = (b - batch + 1) * sizeof(uint32_t);
287 r->read_domains = I915_GEM_DOMAIN_COMMAND;
288 r->delta = start_offset + 64;
289 if (gen >= 8) {
290 *b++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
291 *b++ = r->delta;
292 *b++ = 0;
293 } else if (gen >= 6) {
294 *b++ = MI_BATCH_BUFFER_START | 1 << 8;
295 *b++ = r->delta;
296 } else {
297 *b++ = MI_BATCH_BUFFER_START | 2 << 6;
298 if (gen < 4)
299 r->delta |= 1;
300 *b++ = r->delta;
301 }
302 r++;
303 }
304 igt_assert(r == reloc + ARRAY_SIZE(reloc));
305
306 if (cached)
307 state = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_READ);
308 else
309 state = gem_mmap__wc(fd, obj[0].handle, 0, 4096, PROT_READ);
310
311 memset(&execbuf, 0, sizeof(execbuf));
312 execbuf.buffers_ptr = to_user_pointer(obj);
313 execbuf.buffer_count = ARRAY_SIZE(obj);
314
315 cycles = 0;
316 do {
317 unsigned int idx = ++cycles & 1;
318
319 *bbe[idx] = MI_ARB_CHK;
320 execbuf.batch_start_offset =
321 (bbe[idx] - batch) * sizeof(*batch) - 64;
322
323 execbuf.flags = engines[cycles % nengine] | flags;
324 gem_execbuf(fd, &execbuf);
325
326 *bbe[!idx] = MI_BATCH_BUFFER_END;
327 __sync_synchronize();
328
329 while (READ_ONCE(*state) != idx)
330 ;
331 } while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout);
332 *bbe[cycles & 1] = MI_BATCH_BUFFER_END;
333 gem_sync(fd, obj[1].handle);
334
335 igt_info("%s completed %ld cycles: %.3f us\n",
336 name, cycles, elapsed*1e-3/cycles);
337
338 munmap(state, 4096);
339 munmap(batch, 4096);
340 gem_close(fd, obj[1].handle);
341 gem_close(fd, obj[0].handle);
342 }
343
single(int fd,uint32_t handle,unsigned ring_id,const char * ring_name)344 static void single(int fd, uint32_t handle,
345 unsigned ring_id, const char *ring_name)
346 {
347 double time;
348 unsigned long count;
349
350 gem_require_ring(fd, ring_id);
351
352 time = nop_on_ring(fd, handle, ring_id, 20, &count);
353 igt_info("%s: %'lu cycles: %.3fus\n",
354 ring_name, count, time*1e6 / count);
355 }
356
357 static double
stable_nop_on_ring(int fd,uint32_t handle,unsigned int engine,int timeout,int reps)358 stable_nop_on_ring(int fd, uint32_t handle, unsigned int engine,
359 int timeout, int reps)
360 {
361 igt_stats_t s;
362 double n;
363
364 igt_assert(reps >= 5);
365
366 igt_stats_init_with_size(&s, reps);
367 s.is_float = true;
368
369 while (reps--) {
370 unsigned long count;
371 double time;
372
373 time = nop_on_ring(fd, handle, engine, timeout, &count);
374 igt_stats_push_float(&s, time / count);
375 }
376
377 n = igt_stats_get_median(&s);
378 igt_stats_fini(&s);
379
380 return n;
381 }
382
383 #define assert_within_epsilon(x, ref, tolerance) \
384 igt_assert_f((x) <= (1.0 + tolerance) * ref && \
385 (x) >= (1.0 - tolerance) * ref, \
386 "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
387 #x, #ref, x, tolerance * 100.0, ref)
388
headless(int fd,uint32_t handle)389 static void headless(int fd, uint32_t handle)
390 {
391 unsigned int nr_connected = 0;
392 drmModeConnector *connector;
393 drmModeRes *res;
394 double n_display, n_headless;
395
396 res = drmModeGetResources(fd);
397 igt_require(res);
398
399 /* require at least one connected connector for the test */
400 for (int i = 0; i < res->count_connectors; i++) {
401 connector = drmModeGetConnectorCurrent(fd, res->connectors[i]);
402 if (connector->connection == DRM_MODE_CONNECTED)
403 nr_connected++;
404 drmModeFreeConnector(connector);
405 }
406 igt_require(nr_connected > 0);
407
408 /* set graphics mode to prevent blanking */
409 kmstest_set_vt_graphics_mode();
410
411 /* benchmark nops */
412 n_display = stable_nop_on_ring(fd, handle, I915_EXEC_DEFAULT, 1, 5);
413 igt_info("With one display connected: %.2fus\n",
414 n_display * 1e6);
415
416 /* force all connectors off */
417 kmstest_unset_all_crtcs(fd, res);
418
419 /* benchmark nops again */
420 n_headless = stable_nop_on_ring(fd, handle, I915_EXEC_DEFAULT, 1, 5);
421 igt_info("Without a display connected (headless): %.2fus\n",
422 n_headless * 1e6);
423
424 /* check that the two execution speeds are roughly the same */
425 assert_within_epsilon(n_headless, n_display, 0.1f);
426 }
427
parallel(int fd,uint32_t handle,int timeout)428 static void parallel(int fd, uint32_t handle, int timeout)
429 {
430 struct drm_i915_gem_execbuffer2 execbuf;
431 struct drm_i915_gem_exec_object2 obj;
432 unsigned engines[16];
433 const char *names[16];
434 unsigned nengine;
435 unsigned engine;
436 unsigned long count;
437 double time, sum;
438
439 sum = 0;
440 nengine = 0;
441 for_each_physical_engine(fd, engine) {
442 engines[nengine] = engine;
443 names[nengine] = e__->name;
444 nengine++;
445
446 time = nop_on_ring(fd, handle, engine, 1, &count) / count;
447 sum += time;
448 igt_debug("%s: %.3fus\n", e__->name, 1e6*time);
449 }
450 igt_require(nengine);
451 igt_info("average (individually): %.3fus\n", sum/nengine*1e6);
452
453 memset(&obj, 0, sizeof(obj));
454 obj.handle = handle;
455
456 memset(&execbuf, 0, sizeof(execbuf));
457 execbuf.buffers_ptr = to_user_pointer(&obj);
458 execbuf.buffer_count = 1;
459 execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
460 execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
461 if (__gem_execbuf(fd, &execbuf)) {
462 execbuf.flags = 0;
463 gem_execbuf(fd, &execbuf);
464 }
465 intel_detect_and_clear_missed_interrupts(fd);
466
467 igt_fork(child, nengine) {
468 struct timespec start, now;
469
470 execbuf.flags &= ~ENGINE_FLAGS;
471 execbuf.flags |= engines[child];
472
473 count = 0;
474 clock_gettime(CLOCK_MONOTONIC, &start);
475 do {
476 for (int loop = 0; loop < 1024; loop++)
477 gem_execbuf(fd, &execbuf);
478 count += 1024;
479 clock_gettime(CLOCK_MONOTONIC, &now);
480 } while (elapsed(&start, &now) < timeout);
481 time = elapsed(&start, &now) / count;
482 igt_info("%s: %ld cycles, %.3fus\n", names[child], count, 1e6*time);
483 }
484
485 igt_waitchildren();
486 igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
487
488 }
489
series(int fd,uint32_t handle,int timeout)490 static void series(int fd, uint32_t handle, int timeout)
491 {
492 struct drm_i915_gem_execbuffer2 execbuf;
493 struct drm_i915_gem_exec_object2 obj;
494 struct timespec start, now, sync;
495 unsigned engines[16];
496 unsigned nengine;
497 unsigned engine;
498 unsigned long count;
499 double time, max = 0, min = HUGE_VAL, sum = 0;
500 const char *name;
501
502 nengine = 0;
503 for_each_physical_engine(fd, engine) {
504 time = nop_on_ring(fd, handle, engine, 1, &count) / count;
505 if (time > max) {
506 name = e__->name;
507 max = time;
508 }
509 if (time < min)
510 min = time;
511 sum += time;
512 engines[nengine++] = engine;
513 }
514 igt_require(nengine);
515 igt_info("Maximum execution latency on %s, %.3fus, min %.3fus, total %.3fus per cycle, average %.3fus\n",
516 name, max*1e6, min*1e6, sum*1e6, sum/nengine*1e6);
517
518 memset(&obj, 0, sizeof(obj));
519 obj.handle = handle;
520
521 memset(&execbuf, 0, sizeof(execbuf));
522 execbuf.buffers_ptr = to_user_pointer(&obj);
523 execbuf.buffer_count = 1;
524 execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
525 execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
526 if (__gem_execbuf(fd, &execbuf)) {
527 execbuf.flags = 0;
528 gem_execbuf(fd, &execbuf);
529 }
530 intel_detect_and_clear_missed_interrupts(fd);
531
532 count = 0;
533 clock_gettime(CLOCK_MONOTONIC, &start);
534 do {
535 for (int loop = 0; loop < 1024; loop++) {
536 for (int n = 0; n < nengine; n++) {
537 execbuf.flags &= ~ENGINE_FLAGS;
538 execbuf.flags |= engines[n];
539 gem_execbuf(fd, &execbuf);
540 }
541 }
542 count += nengine * 1024;
543 clock_gettime(CLOCK_MONOTONIC, &now);
544 } while (elapsed(&start, &now) < timeout); /* Hang detection ~120s */
545 gem_sync(fd, handle);
546 clock_gettime(CLOCK_MONOTONIC, &sync);
547 igt_debug("sync time: %.3fus\n", elapsed(&now, &sync)*1e6);
548 igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
549
550 time = elapsed(&start, &now) / count;
551 igt_info("All (%d engines): %'lu cycles, average %.3fus per cycle [expected %.3fus]\n",
552 nengine, count, 1e6*time, 1e6*((max-min)/nengine+min));
553
554 /* The rate limiting step should be how fast the slowest engine can
555 * execute its queue of requests, as when we wait upon a full ring all
556 * dispatch is frozen. So in general we cannot go faster than the
557 * slowest engine (but as all engines are in lockstep, they should all
558 * be executing in parallel and so the average should be max/nengines),
559 * but we should equally not go any slower.
560 *
561 * However, that depends upon being able to submit fast enough, and
562 * that in turns depends upon debugging turned off and no bottlenecks
563 * within the driver. We cannot assert that we hit ideal conditions
564 * across all engines, so we only look for an outrageous error
565 * condition.
566 */
567 igt_assert_f(time < 2*sum,
568 "Average time (%.3fus) exceeds expectation for parallel execution (min %.3fus, max %.3fus; limit set at %.3fus)\n",
569 1e6*time, 1e6*min, 1e6*max, 1e6*sum*2);
570 }
571
xchg(void * array,unsigned i,unsigned j)572 static void xchg(void *array, unsigned i, unsigned j)
573 {
574 unsigned *u = array;
575 unsigned tmp = u[i];
576 u[i] = u[j];
577 u[j] = tmp;
578 }
579
sequential(int fd,uint32_t handle,unsigned flags,int timeout)580 static void sequential(int fd, uint32_t handle, unsigned flags, int timeout)
581 {
582 const int ncpus = flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1;
583 struct drm_i915_gem_execbuffer2 execbuf;
584 struct drm_i915_gem_exec_object2 obj[2];
585 unsigned engines[16];
586 unsigned nengine;
587 double *results;
588 double time, sum;
589 unsigned n;
590
591 gem_require_contexts(fd);
592
593 results = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
594 igt_assert(results != MAP_FAILED);
595
596 nengine = 0;
597 sum = 0;
598 for_each_physical_engine(fd, n) {
599 unsigned long count;
600
601 time = nop_on_ring(fd, handle, n, 1, &count) / count;
602 sum += time;
603 igt_debug("%s: %.3fus\n", e__->name, 1e6*time);
604
605 engines[nengine++] = n;
606 }
607 igt_require(nengine);
608 igt_info("Total (individual) execution latency %.3fus per cycle\n",
609 1e6*sum);
610
611 memset(obj, 0, sizeof(obj));
612 obj[0].handle = gem_create(fd, 4096);
613 obj[0].flags = EXEC_OBJECT_WRITE;
614 obj[1].handle = handle;
615
616 memset(&execbuf, 0, sizeof(execbuf));
617 execbuf.buffers_ptr = to_user_pointer(obj);
618 execbuf.buffer_count = 2;
619 execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
620 execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
621 igt_require(__gem_execbuf(fd, &execbuf) == 0);
622
623 if (flags & CONTEXT) {
624 uint32_t id;
625
626 igt_require(__gem_context_create(fd, &id) == 0);
627 execbuf.rsvd1 = id;
628 }
629
630 for (n = 0; n < nengine; n++) {
631 execbuf.flags &= ~ENGINE_FLAGS;
632 execbuf.flags |= engines[n];
633 igt_require(__gem_execbuf(fd, &execbuf) == 0);
634 }
635
636 intel_detect_and_clear_missed_interrupts(fd);
637
638 igt_fork(child, ncpus) {
639 struct timespec start, now;
640 unsigned long count;
641
642 obj[0].handle = gem_create(fd, 4096);
643 gem_execbuf(fd, &execbuf);
644
645 if (flags & CONTEXT)
646 execbuf.rsvd1 = gem_context_create(fd);
647
648 hars_petruska_f54_1_random_perturb(child);
649
650 count = 0;
651 clock_gettime(CLOCK_MONOTONIC, &start);
652 do {
653 igt_permute_array(engines, nengine, xchg);
654 if (flags & CHAINED) {
655 for (n = 0; n < nengine; n++) {
656 execbuf.flags &= ~ENGINE_FLAGS;
657 execbuf.flags |= engines[n];
658 for (int loop = 0; loop < 1024; loop++)
659 gem_execbuf(fd, &execbuf);
660 }
661 } else {
662 for (int loop = 0; loop < 1024; loop++) {
663 for (n = 0; n < nengine; n++) {
664 execbuf.flags &= ~ENGINE_FLAGS;
665 execbuf.flags |= engines[n];
666 gem_execbuf(fd, &execbuf);
667 }
668 }
669 }
670 count += 1024;
671 clock_gettime(CLOCK_MONOTONIC, &now);
672 } while (elapsed(&start, &now) < timeout); /* Hang detection ~120s */
673
674 gem_sync(fd, obj[0].handle);
675 clock_gettime(CLOCK_MONOTONIC, &now);
676 results[child] = elapsed(&start, &now) / count;
677
678 if (flags & CONTEXT)
679 gem_context_destroy(fd, execbuf.rsvd1);
680
681 gem_close(fd, obj[0].handle);
682 }
683 igt_waitchildren();
684 igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
685
686 results[ncpus] = 0;
687 for (n = 0; n < ncpus; n++)
688 results[ncpus] += results[n];
689 results[ncpus] /= ncpus;
690
691 igt_info("Sequential (%d engines, %d processes): average %.3fus per cycle [expected %.3fus]\n",
692 nengine, ncpus, 1e6*results[ncpus], 1e6*sum*ncpus);
693
694 if (flags & CONTEXT)
695 gem_context_destroy(fd, execbuf.rsvd1);
696
697 gem_close(fd, obj[0].handle);
698 munmap(results, 4096);
699 }
700
701 #define LOCAL_EXEC_FENCE_OUT (1 << 17)
fence_enable_signaling(int fence)702 static bool fence_enable_signaling(int fence)
703 {
704 return poll(&(struct pollfd){fence, POLLIN}, 1, 0) == 0;
705 }
706
fence_wait(int fence)707 static bool fence_wait(int fence)
708 {
709 return poll(&(struct pollfd){fence, POLLIN}, 1, -1) == 1;
710 }
711
fence_signal(int fd,uint32_t handle,unsigned ring_id,const char * ring_name,int timeout)712 static void fence_signal(int fd, uint32_t handle,
713 unsigned ring_id, const char *ring_name,
714 int timeout)
715 {
716 #define NFENCES 512
717 struct drm_i915_gem_execbuffer2 execbuf;
718 struct drm_i915_gem_exec_object2 obj;
719 struct timespec start, now;
720 unsigned engines[16];
721 unsigned nengine;
722 int *fences, n;
723 unsigned long count, signal;
724
725 igt_require(gem_has_exec_fence(fd));
726
727 nengine = 0;
728 if (ring_id == ALL_ENGINES) {
729 for_each_physical_engine(fd, n)
730 engines[nengine++] = n;
731 } else {
732 gem_require_ring(fd, ring_id);
733 engines[nengine++] = ring_id;
734 }
735 igt_require(nengine);
736
737 fences = malloc(sizeof(*fences) * NFENCES);
738 igt_assert(fences);
739 memset(fences, -1, sizeof(*fences) * NFENCES);
740
741 memset(&obj, 0, sizeof(obj));
742 obj.handle = handle;
743
744 memset(&execbuf, 0, sizeof(execbuf));
745 execbuf.buffers_ptr = to_user_pointer(&obj);
746 execbuf.buffer_count = 1;
747 execbuf.flags = LOCAL_EXEC_FENCE_OUT;
748
749 n = 0;
750 count = 0;
751 signal = 0;
752
753 intel_detect_and_clear_missed_interrupts(fd);
754 clock_gettime(CLOCK_MONOTONIC, &start);
755 do {
756 for (int loop = 0; loop < 1024; loop++) {
757 for (int e = 0; e < nengine; e++) {
758 if (fences[n] != -1) {
759 igt_assert(fence_wait(fences[n]));
760 close(fences[n]);
761 }
762
763 execbuf.flags &= ~ENGINE_FLAGS;
764 execbuf.flags |= engines[e];
765 gem_execbuf_wr(fd, &execbuf);
766
767 /* Enable signaling by doing a poll() */
768 fences[n] = execbuf.rsvd2 >> 32;
769 signal += fence_enable_signaling(fences[n]);
770
771 n = (n + 1) % NFENCES;
772 }
773 }
774
775 count += 1024 * nengine;
776 clock_gettime(CLOCK_MONOTONIC, &now);
777 } while (elapsed(&start, &now) < timeout);
778 igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
779
780 for (n = 0; n < NFENCES; n++)
781 if (fences[n] != -1)
782 close(fences[n]);
783 free(fences);
784
785 igt_info("Signal %s: %'lu cycles (%'lu signals): %.3fus\n",
786 ring_name, count, signal, elapsed(&start, &now) * 1e6 / count);
787 }
788
preempt(int fd,uint32_t handle,unsigned ring_id,const char * ring_name)789 static void preempt(int fd, uint32_t handle,
790 unsigned ring_id, const char *ring_name)
791 {
792 struct drm_i915_gem_execbuffer2 execbuf;
793 struct drm_i915_gem_exec_object2 obj;
794 struct timespec start, now;
795 unsigned long count;
796 uint32_t ctx[2];
797
798 gem_require_ring(fd, ring_id);
799
800 ctx[0] = gem_context_create(fd);
801 gem_context_set_priority(fd, ctx[0], MIN_PRIO);
802
803 ctx[1] = gem_context_create(fd);
804 gem_context_set_priority(fd, ctx[1], MAX_PRIO);
805
806 memset(&obj, 0, sizeof(obj));
807 obj.handle = handle;
808
809 memset(&execbuf, 0, sizeof(execbuf));
810 execbuf.buffers_ptr = to_user_pointer(&obj);
811 execbuf.buffer_count = 1;
812 execbuf.flags = ring_id;
813 execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
814 execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
815 if (__gem_execbuf(fd, &execbuf)) {
816 execbuf.flags = ring_id;
817 gem_execbuf(fd, &execbuf);
818 }
819 execbuf.rsvd1 = ctx[1];
820 intel_detect_and_clear_missed_interrupts(fd);
821
822 count = 0;
823 clock_gettime(CLOCK_MONOTONIC, &start);
824 do {
825 igt_spin_t *spin =
826 __igt_spin_new(fd,
827 .ctx = ctx[0],
828 .engine = ring_id);
829
830 for (int loop = 0; loop < 1024; loop++)
831 gem_execbuf(fd, &execbuf);
832
833 igt_spin_free(fd, spin);
834
835 count += 1024;
836 clock_gettime(CLOCK_MONOTONIC, &now);
837 } while (elapsed(&start, &now) < 20);
838 igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
839
840 gem_context_destroy(fd, ctx[1]);
841 gem_context_destroy(fd, ctx[0]);
842
843 igt_info("%s: %'lu cycles: %.3fus\n",
844 ring_name, count, elapsed(&start, &now)*1e6 / count);
845 }
846
847 igt_main
848 {
849 const struct intel_execution_engine *e;
850 uint32_t handle = 0;
851 int device = -1;
852
853 igt_fixture {
854 const uint32_t bbe = MI_BATCH_BUFFER_END;
855
856 device = drm_open_driver(DRIVER_INTEL);
857 igt_require_gem(device);
858 gem_submission_print_method(device);
859 gem_scheduler_print_capability(device);
860
861 handle = gem_create(device, 4096);
862 gem_write(device, handle, 0, &bbe, sizeof(bbe));
863
864 igt_fork_hang_detector(device);
865 }
866
867 igt_subtest("basic-series")
868 series(device, handle, 5);
869
870 igt_subtest("basic-parallel")
871 parallel(device, handle, 5);
872
873 igt_subtest("basic-sequential")
874 sequential(device, handle, 0, 5);
875
876 for (e = intel_execution_engines; e->name; e++) {
877 igt_subtest_f("%s", e->name)
878 single(device, handle, e->exec_id | e->flags, e->name);
879 igt_subtest_f("signal-%s", e->name)
880 fence_signal(device, handle, e->exec_id | e->flags, e->name, 5);
881 }
882
883 igt_subtest("signal-all")
884 fence_signal(device, handle, ALL_ENGINES, "all", 150);
885
886 igt_subtest("series")
887 series(device, handle, 150);
888
889 igt_subtest("parallel")
890 parallel(device, handle, 150);
891
892 igt_subtest("sequential")
893 sequential(device, handle, 0, 150);
894
895 igt_subtest("forked-sequential")
896 sequential(device, handle, FORKED, 150);
897
898 igt_subtest("chained-sequential")
899 sequential(device, handle, FORKED | CHAINED, 150);
900
901 igt_subtest("context-sequential")
902 sequential(device, handle, FORKED | CONTEXT, 150);
903
904 igt_subtest_group {
905 igt_fixture {
906 gem_require_contexts(device);
907 igt_require(gem_scheduler_has_ctx_priority(device));
908 igt_require(gem_scheduler_has_preemption(device));
909 }
910
911 for (e = intel_execution_engines; e->name; e++) {
912 igt_subtest_f("preempt-%s", e->name)
913 preempt(device, handle, e->exec_id | e->flags, e->name);
914 }
915 }
916
917 igt_subtest_group {
918 igt_fixture {
919 igt_device_set_master(device);
920 }
921
922 for (e = intel_execution_engines; e->name; e++) {
923 /* Requires master for STORE_DWORD on gen4/5 */
924 igt_subtest_f("poll-%s", e->name)
925 poll_ring(device,
926 e->exec_id | e->flags, e->name, 20);
927 }
928
929 igt_subtest("poll-sequential")
930 poll_sequential(device, "Sequential", 20);
931
932 igt_subtest("headless") {
933 /* Requires master for changing display modes */
934 headless(device, handle);
935 }
936 }
937
938 igt_fixture {
939 igt_stop_hang_detector();
940 gem_close(device, handle);
941 close(device);
942 }
943 }
944